feat(monitoring): general fixup

- added healthcheck for every service
- added nginx second "server" block for monitoring
    all monitoring services are now behind this nginx reverse proxy
- fixed logging driver not present for chat service
This commit is contained in:
Maix0 2025-12-17 19:23:47 +01:00
parent 67c8a9cbd1
commit 8a3481ea8b
10 changed files with 209 additions and 48 deletions

View file

@ -33,8 +33,10 @@ services:
restart: always
networks:
- transcendance-network
- monitoring
ports:
- '8888:443'
- '9090:8443'
volumes:
# if you need to share files with nginx, you do it here.
- static-volume:/volumes/static
@ -47,6 +49,7 @@ services:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
###############
# AUTH #
###############
@ -73,26 +76,6 @@ services:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
###############
# TIC-TAC-TOE #
###############
# tic-tac-toe:
# build:
# context: ./src/
# args:
# - SERVICE=tic-tac-toe
# - EXTRA_FILES=tic-tac-toe/extra
# container_name: tic-tac-toe
# restart: unless-stopped
# networks:
# - transcendance-network
# volumes:
# - sqlite-volume:/volumes/database
# - static-volume:/volumes/static
# environment:
# - JWT_SECRET=KRUGKIDROVUWG2ZAMJZG653OEBTG66BANJ2W24DTEBXXMZLSEB2GQZJANRQXU6JA
# - DATABASE_DIR=/volumes/database
###############
# CHAT #
###############
@ -114,6 +97,11 @@ services:
- DATABASE_DIR=/volumes/database
- PROVIDER_FILE=/extra/providers.toml
- SESSION_MANAGER=${SESSION_MANAGER}
logging:
driver: gelf
options:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
###############
# USER #
@ -123,7 +111,6 @@ services:
context: ./src/
args:
- SERVICE=user
# - EXTRA_FILES=user/extra
container_name: user
restart: always
networks:
@ -154,8 +141,6 @@ services:
- monitoring
depends_on:
- prometheus
ports:
- '3000:3000'
volumes:
- ./monitoring/grafana/alerting:/etc/grafana/provisioning/alerting
- ./monitoring/grafana/datasources:/etc/grafana/provisioning/datasources
@ -166,7 +151,7 @@ services:
# this can stay the same for developpement. This is an alias to `localhost`
- NGINX_DOMAIN=local.maix.me
- GF_LOG_LEVEL=warn
- GF_SERVER_ROOT_URL=http://local.maix.me:3000
- GF_SERVER_ROOT_URL=https://local.maix.me:9090/grafana/
- GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER}
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASS}
logging:
@ -174,6 +159,12 @@ services:
options:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
healthcheck:
test: ["CMD-SHELL", "curl -f -s http://localhost:3000/api/health || exit 1"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
prometheus:
image: prom/prometheus:latest
@ -189,14 +180,22 @@ services:
options:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
healthcheck:
test: ["CMD", "wget", "--quiet", "--spider", "http://localhost:9090/-/healthy"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
networks:
- monitoring
container_name: monitoring-cadvisor
ports:
- '8080:8080'
command:
- '-url_base_prefix=/cadvisor'
environment:
- CADVISOR_HEALTHCHECK_URL=http://localhost:8080/cadvisor/healthz
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
@ -209,20 +208,23 @@ services:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
blackbox:
image: prom/blackbox-exporter:latest
container_name: monitoring-blackbox
networks:
- transcendance-network
ports:
- "9115:9115"
restart: unless-stopped
logging:
driver: gelf
options:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
healthcheck:
test: ["CMD", "wget", "--quiet", "--spider", "http://localhost:9115/-/healthy"]
interval: 30s
timeout: 5s
retries: 3
start_period: 10s
@ -242,10 +244,14 @@ services:
volumes:
- elastic-data:/usr/share/elasticsearch/data
- ./logs/elasticsearch:/setup
ports:
- "9200:9200"
command: ["/setup/bootstrap.sh"]
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "-s", "localhost:9200"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
logstash:
image: docker.elastic.co/logstash/logstash:7.17.23
@ -259,6 +265,12 @@ services:
ports:
- "12201:12201/udp"
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "-s", "localhost:9600"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
kibana:
image: docker.elastic.co/kibana/kibana:7.17.23
@ -267,17 +279,24 @@ services:
- elasticsearch
networks:
- monitoring
- transcendance-network
environment:
- ELASTICSEARCH_HOSTS=http://elasticsearch:9200
- SERVER_PUBLICBASEURL=http://local.maix.me:5601
- SERVER_PUBLICBASEURL=https://local.maix.me:9090/kibana
- SERVER_BASEPATH=/kibana
- SERVER_REWRITEBASEPATH=true
- ELASTICSEARCH_USERNAME=elastic
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
ports:
- "5601:5601"
volumes:
- ./logs/kibana:/setup
command: ["/setup/bootstrap.sh"]
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "-s", "localhost:5601/kibana/api/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
volumes:
sqlite-volume:

View file

@ -2,11 +2,11 @@
kibana_setup() {
set -xe
until curl -s -f "localhost:5601/api/status"; do
until curl -s -f "localhost:5601/kibana/api/status"; do
sleep 2
done
curl -v -X POST "localhost:5601/api/saved_objects/_import?overwrite=true" \
curl -v -X POST "localhost:5601/kibana/api/saved_objects/_import?overwrite=true" \
-H "kbn-xsrf: true" \
--form file='@/setup/export.ndjson'
exit 0

View file

@ -17,10 +17,10 @@ scrape_configs:
static_configs:
- targets:
- http://nginx/monitoring/ok
- http://nginx:8080/ok
- http://auth/monitoring
- http://user/monitoring
- http://icons/monitoring
- http://chat/monitoring
relabel_configs:
- source_labels: [__address__]

View file

@ -27,3 +27,9 @@ COPY ./15-local-resolvers.envsh /docker-entrypoint.d/
COPY ./17-add-template-prefix.sh /docker-entrypoint.d/
COPY ./conf /etc/nginx/templates
COPY ./monitoring.index.html /var/share/www/monitoring/
RUN chmod -R +r /var/share/www/monitoring/;
HEALTHCHECK --interval=30s --timeout=3s \
CMD curl -f -s http://localhost:8080/ok?docker || exit 1;

View file

@ -1,10 +1,15 @@
# please make sure you want to edit this file...
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
# this allows the redirection of `http://domain/URL` to `https://domain/URL`
server {
charset UTF-8;
listen 80;
listen [::]:80;
#listen [::]:80;
resolver $NGINX_RESOLVERS;
server_name $NGINX_DOMAIN;
@ -14,16 +19,18 @@ server {
server {
charset UTF-8;
listen [::]:443 ssl;
#listen [::]:443 ssl;
listen 443 ssl;
resolver $NGINX_RESOLVERS;
server_name $NGINX_DOMAIN;
ssl_certificate /etc/ssl/certs/nginx-selfsigned.crt;
ssl_certificate /etc/ssl/certs/nginx-selfsigned.crt;
ssl_certificate_key /etc/ssl/private/nginx-selfsigned.key;
ssl_protocols TLSv1.3;
ssl_protocols TLSv1.3;
proxy_set_header X-Forwarded true;
error_page 497 https://$http_host$request_uri;
error_page 497 https://$http_host$request_uri;
include conf.d/locations/*.conf;
}
include conf.d/monitoring/server.conf;

View file

@ -1,4 +0,0 @@
location /monitoring/ok {
add_header Content-Type text/plain;
return 200 'healthy';
}

View file

@ -0,0 +1,39 @@
# This is required to proxy Grafana Live WebSocket connections.
location /kibana {
proxy_set_header Host $host;
set $upstream_kibana kibana:5601;
proxy_pass http://$upstream_kibana;
}
location /cadvisor {
proxy_set_header Host $host;
set $upstream_cadvisor cadvisor:8080;
proxy_pass http://$upstream_cadvisor;
}
location /grafana {
proxy_set_header Host $host;
rewrite ^/grafana/?(.*) /$1 break;
set $upstream_grafana grafana:3000;
proxy_pass http://$upstream_grafana;
}
# Proxy Grafana Live WebSocket connections.
location /grafana/api/live/ {
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header Host $host;
rewrite ^/grafana/?(.*) /$1 break;
set $upstream_grafana grafana:3000;
proxy_pass http://$upstream_grafana;
}
location /ok {
add_header Content-Type text/plain;
return 200 'healthy';
}
location / {
root /var/share/www/monitoring/;
index monitoring.index.html;
}

View file

@ -0,0 +1,26 @@
server {
charset UTF-8;
listen 8080;
#listen [::]:8080;
resolver $NGINX_RESOLVERS;
server_name $NGINX_DOMAIN;
include conf.d/monitoring/locations.conf;
}
server {
charset UTF-8;
#listen [::]:10443 ssl;
listen 8443 ssl;
resolver $NGINX_RESOLVERS;
server_name $NGINX_DOMAIN;
ssl_certificate /etc/ssl/certs/nginx-selfsigned.crt;
ssl_certificate_key /etc/ssl/private/nginx-selfsigned.key;
ssl_protocols TLSv1.3;
error_page 497 https://$http_host$request_uri;
include conf.d/monitoring/locations.conf;
}

View file

@ -0,0 +1,64 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Service Dashboard</title>
<style>
body {
font-family: Arial, Helvetica, sans-serif;
background-color: #f4f6f8;
margin: 0;
padding: 0;
}
.container {
max-width: 400px;
margin: 100px auto;
padding: 20px;
background-color: #ffffff;
border-radius: 6px;
box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);
}
h1 {
font-size: 20px;
text-align: center;
margin-bottom: 20px;
}
ul {
list-style: none;
padding: 0;
margin: 0;
}
li {
margin-bottom: 10px;
}
a {
display: block;
padding: 12px;
text-decoration: none;
color: #333;
background-color: #e9ecef;
border-radius: 4px;
text-align: center;
}
a:hover {
background-color: #dfe3e6;
}
</style>
</head>
<body>
<div class="container">
<h1>Services</h1>
<ul>
<li><a href="/kibana">Kibana</a></li>
<li><a href="/cadvisor">cAdvisor</a></li>
<li><a href="/grafana">Grafana</a></li>
</ul>
</div>
</body>
</html>

View file

@ -1,6 +1,7 @@
FROM node:22-alpine AS pnpm_base
RUN npm install --global pnpm@10 --no-fund -q;
RUN apk add make python3 gcc clang build-base musl-dev;
RUN apk add --no-cache curl
FROM pnpm_base AS deps
WORKDIR /build
@ -50,5 +51,8 @@ COPY --from=deps /build/node_modules /src/node_modules
COPY ${EXTRA_FILES} /extra
ENTRYPOINT [ "/src/entrypoint.sh" ]
HEALTHCHECK --interval=30s --timeout=3s \
CMD curl -f -s http://localhost/monitoring?docker || exit 1
CMD ["node", "/src/run.cjs"]