feat(monitoring): general fixup

- added healthcheck for every service
- added nginx second "server" block for monitoring
    all monitoring services are now behind this nginx reverse proxy
- fixed logging driver not present for chat service
This commit is contained in:
Maix0 2025-12-17 19:23:47 +01:00
parent 67c8a9cbd1
commit 8a3481ea8b
10 changed files with 209 additions and 48 deletions

View file

@ -33,8 +33,10 @@ services:
restart: always restart: always
networks: networks:
- transcendance-network - transcendance-network
- monitoring
ports: ports:
- '8888:443' - '8888:443'
- '9090:8443'
volumes: volumes:
# if you need to share files with nginx, you do it here. # if you need to share files with nginx, you do it here.
- static-volume:/volumes/static - static-volume:/volumes/static
@ -47,6 +49,7 @@ services:
gelf-address: "udp://127.0.0.1:12201" gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}" tag: "{{.Name}}"
############### ###############
# AUTH # # AUTH #
############### ###############
@ -73,26 +76,6 @@ services:
gelf-address: "udp://127.0.0.1:12201" gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}" tag: "{{.Name}}"
###############
# TIC-TAC-TOE #
###############
# tic-tac-toe:
# build:
# context: ./src/
# args:
# - SERVICE=tic-tac-toe
# - EXTRA_FILES=tic-tac-toe/extra
# container_name: tic-tac-toe
# restart: unless-stopped
# networks:
# - transcendance-network
# volumes:
# - sqlite-volume:/volumes/database
# - static-volume:/volumes/static
# environment:
# - JWT_SECRET=KRUGKIDROVUWG2ZAMJZG653OEBTG66BANJ2W24DTEBXXMZLSEB2GQZJANRQXU6JA
# - DATABASE_DIR=/volumes/database
############### ###############
# CHAT # # CHAT #
############### ###############
@ -114,6 +97,11 @@ services:
- DATABASE_DIR=/volumes/database - DATABASE_DIR=/volumes/database
- PROVIDER_FILE=/extra/providers.toml - PROVIDER_FILE=/extra/providers.toml
- SESSION_MANAGER=${SESSION_MANAGER} - SESSION_MANAGER=${SESSION_MANAGER}
logging:
driver: gelf
options:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
############### ###############
# USER # # USER #
@ -123,7 +111,6 @@ services:
context: ./src/ context: ./src/
args: args:
- SERVICE=user - SERVICE=user
# - EXTRA_FILES=user/extra
container_name: user container_name: user
restart: always restart: always
networks: networks:
@ -154,8 +141,6 @@ services:
- monitoring - monitoring
depends_on: depends_on:
- prometheus - prometheus
ports:
- '3000:3000'
volumes: volumes:
- ./monitoring/grafana/alerting:/etc/grafana/provisioning/alerting - ./monitoring/grafana/alerting:/etc/grafana/provisioning/alerting
- ./monitoring/grafana/datasources:/etc/grafana/provisioning/datasources - ./monitoring/grafana/datasources:/etc/grafana/provisioning/datasources
@ -166,7 +151,7 @@ services:
# this can stay the same for developpement. This is an alias to `localhost` # this can stay the same for developpement. This is an alias to `localhost`
- NGINX_DOMAIN=local.maix.me - NGINX_DOMAIN=local.maix.me
- GF_LOG_LEVEL=warn - GF_LOG_LEVEL=warn
- GF_SERVER_ROOT_URL=http://local.maix.me:3000 - GF_SERVER_ROOT_URL=https://local.maix.me:9090/grafana/
- GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER} - GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER}
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASS} - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASS}
logging: logging:
@ -174,6 +159,12 @@ services:
options: options:
gelf-address: "udp://127.0.0.1:12201" gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}" tag: "{{.Name}}"
healthcheck:
test: ["CMD-SHELL", "curl -f -s http://localhost:3000/api/health || exit 1"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
prometheus: prometheus:
image: prom/prometheus:latest image: prom/prometheus:latest
@ -189,14 +180,22 @@ services:
options: options:
gelf-address: "udp://127.0.0.1:12201" gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}" tag: "{{.Name}}"
healthcheck:
test: ["CMD", "wget", "--quiet", "--spider", "http://localhost:9090/-/healthy"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
cadvisor: cadvisor:
image: gcr.io/cadvisor/cadvisor:latest image: gcr.io/cadvisor/cadvisor:latest
networks: networks:
- monitoring - monitoring
container_name: monitoring-cadvisor container_name: monitoring-cadvisor
ports: command:
- '8080:8080' - '-url_base_prefix=/cadvisor'
environment:
- CADVISOR_HEALTHCHECK_URL=http://localhost:8080/cadvisor/healthz
volumes: volumes:
- /:/rootfs:ro - /:/rootfs:ro
- /var/run:/var/run:ro - /var/run:/var/run:ro
@ -209,20 +208,23 @@ services:
gelf-address: "udp://127.0.0.1:12201" gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}" tag: "{{.Name}}"
blackbox: blackbox:
image: prom/blackbox-exporter:latest image: prom/blackbox-exporter:latest
container_name: monitoring-blackbox container_name: monitoring-blackbox
networks: networks:
- transcendance-network - transcendance-network
ports:
- "9115:9115"
restart: unless-stopped restart: unless-stopped
logging: logging:
driver: gelf driver: gelf
options: options:
gelf-address: "udp://127.0.0.1:12201" gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}" tag: "{{.Name}}"
healthcheck:
test: ["CMD", "wget", "--quiet", "--spider", "http://localhost:9115/-/healthy"]
interval: 30s
timeout: 5s
retries: 3
start_period: 10s
@ -242,10 +244,14 @@ services:
volumes: volumes:
- elastic-data:/usr/share/elasticsearch/data - elastic-data:/usr/share/elasticsearch/data
- ./logs/elasticsearch:/setup - ./logs/elasticsearch:/setup
ports:
- "9200:9200"
command: ["/setup/bootstrap.sh"] command: ["/setup/bootstrap.sh"]
restart: unless-stopped restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "-s", "localhost:9200"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
logstash: logstash:
image: docker.elastic.co/logstash/logstash:7.17.23 image: docker.elastic.co/logstash/logstash:7.17.23
@ -259,6 +265,12 @@ services:
ports: ports:
- "12201:12201/udp" - "12201:12201/udp"
restart: unless-stopped restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "-s", "localhost:9600"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
kibana: kibana:
image: docker.elastic.co/kibana/kibana:7.17.23 image: docker.elastic.co/kibana/kibana:7.17.23
@ -267,17 +279,24 @@ services:
- elasticsearch - elasticsearch
networks: networks:
- monitoring - monitoring
- transcendance-network
environment: environment:
- ELASTICSEARCH_HOSTS=http://elasticsearch:9200 - ELASTICSEARCH_HOSTS=http://elasticsearch:9200
- SERVER_PUBLICBASEURL=http://local.maix.me:5601 - SERVER_PUBLICBASEURL=https://local.maix.me:9090/kibana
- SERVER_BASEPATH=/kibana
- SERVER_REWRITEBASEPATH=true
- ELASTICSEARCH_USERNAME=elastic - ELASTICSEARCH_USERNAME=elastic
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD} - ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
ports:
- "5601:5601"
volumes: volumes:
- ./logs/kibana:/setup - ./logs/kibana:/setup
command: ["/setup/bootstrap.sh"] command: ["/setup/bootstrap.sh"]
restart: unless-stopped restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "-s", "localhost:5601/kibana/api/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
volumes: volumes:
sqlite-volume: sqlite-volume:

View file

@ -2,11 +2,11 @@
kibana_setup() { kibana_setup() {
set -xe set -xe
until curl -s -f "localhost:5601/api/status"; do until curl -s -f "localhost:5601/kibana/api/status"; do
sleep 2 sleep 2
done done
curl -v -X POST "localhost:5601/api/saved_objects/_import?overwrite=true" \ curl -v -X POST "localhost:5601/kibana/api/saved_objects/_import?overwrite=true" \
-H "kbn-xsrf: true" \ -H "kbn-xsrf: true" \
--form file='@/setup/export.ndjson' --form file='@/setup/export.ndjson'
exit 0 exit 0

View file

@ -17,10 +17,10 @@ scrape_configs:
static_configs: static_configs:
- targets: - targets:
- http://nginx/monitoring/ok - http://nginx:8080/ok
- http://auth/monitoring - http://auth/monitoring
- http://user/monitoring - http://user/monitoring
- http://icons/monitoring - http://chat/monitoring
relabel_configs: relabel_configs:
- source_labels: [__address__] - source_labels: [__address__]

View file

@ -27,3 +27,9 @@ COPY ./15-local-resolvers.envsh /docker-entrypoint.d/
COPY ./17-add-template-prefix.sh /docker-entrypoint.d/ COPY ./17-add-template-prefix.sh /docker-entrypoint.d/
COPY ./conf /etc/nginx/templates COPY ./conf /etc/nginx/templates
COPY ./monitoring.index.html /var/share/www/monitoring/
RUN chmod -R +r /var/share/www/monitoring/;
HEALTHCHECK --interval=30s --timeout=3s \
CMD curl -f -s http://localhost:8080/ok?docker || exit 1;

View file

@ -1,10 +1,15 @@
# please make sure you want to edit this file... # please make sure you want to edit this file...
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
# this allows the redirection of `http://domain/URL` to `https://domain/URL` # this allows the redirection of `http://domain/URL` to `https://domain/URL`
server { server {
charset UTF-8; charset UTF-8;
listen 80; listen 80;
listen [::]:80; #listen [::]:80;
resolver $NGINX_RESOLVERS; resolver $NGINX_RESOLVERS;
server_name $NGINX_DOMAIN; server_name $NGINX_DOMAIN;
@ -14,7 +19,7 @@ server {
server { server {
charset UTF-8; charset UTF-8;
listen [::]:443 ssl; #listen [::]:443 ssl;
listen 443 ssl; listen 443 ssl;
resolver $NGINX_RESOLVERS; resolver $NGINX_RESOLVERS;
server_name $NGINX_DOMAIN; server_name $NGINX_DOMAIN;
@ -27,3 +32,5 @@ server {
error_page 497 https://$http_host$request_uri; error_page 497 https://$http_host$request_uri;
include conf.d/locations/*.conf; include conf.d/locations/*.conf;
} }
include conf.d/monitoring/server.conf;

View file

@ -1,4 +0,0 @@
location /monitoring/ok {
add_header Content-Type text/plain;
return 200 'healthy';
}

View file

@ -0,0 +1,39 @@
# This is required to proxy Grafana Live WebSocket connections.
location /kibana {
proxy_set_header Host $host;
set $upstream_kibana kibana:5601;
proxy_pass http://$upstream_kibana;
}
location /cadvisor {
proxy_set_header Host $host;
set $upstream_cadvisor cadvisor:8080;
proxy_pass http://$upstream_cadvisor;
}
location /grafana {
proxy_set_header Host $host;
rewrite ^/grafana/?(.*) /$1 break;
set $upstream_grafana grafana:3000;
proxy_pass http://$upstream_grafana;
}
# Proxy Grafana Live WebSocket connections.
location /grafana/api/live/ {
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header Host $host;
rewrite ^/grafana/?(.*) /$1 break;
set $upstream_grafana grafana:3000;
proxy_pass http://$upstream_grafana;
}
location /ok {
add_header Content-Type text/plain;
return 200 'healthy';
}
location / {
root /var/share/www/monitoring/;
index monitoring.index.html;
}

View file

@ -0,0 +1,26 @@
server {
charset UTF-8;
listen 8080;
#listen [::]:8080;
resolver $NGINX_RESOLVERS;
server_name $NGINX_DOMAIN;
include conf.d/monitoring/locations.conf;
}
server {
charset UTF-8;
#listen [::]:10443 ssl;
listen 8443 ssl;
resolver $NGINX_RESOLVERS;
server_name $NGINX_DOMAIN;
ssl_certificate /etc/ssl/certs/nginx-selfsigned.crt;
ssl_certificate_key /etc/ssl/private/nginx-selfsigned.key;
ssl_protocols TLSv1.3;
error_page 497 https://$http_host$request_uri;
include conf.d/monitoring/locations.conf;
}

View file

@ -0,0 +1,64 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Service Dashboard</title>
<style>
body {
font-family: Arial, Helvetica, sans-serif;
background-color: #f4f6f8;
margin: 0;
padding: 0;
}
.container {
max-width: 400px;
margin: 100px auto;
padding: 20px;
background-color: #ffffff;
border-radius: 6px;
box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);
}
h1 {
font-size: 20px;
text-align: center;
margin-bottom: 20px;
}
ul {
list-style: none;
padding: 0;
margin: 0;
}
li {
margin-bottom: 10px;
}
a {
display: block;
padding: 12px;
text-decoration: none;
color: #333;
background-color: #e9ecef;
border-radius: 4px;
text-align: center;
}
a:hover {
background-color: #dfe3e6;
}
</style>
</head>
<body>
<div class="container">
<h1>Services</h1>
<ul>
<li><a href="/kibana">Kibana</a></li>
<li><a href="/cadvisor">cAdvisor</a></li>
<li><a href="/grafana">Grafana</a></li>
</ul>
</div>
</body>
</html>

View file

@ -1,6 +1,7 @@
FROM node:22-alpine AS pnpm_base FROM node:22-alpine AS pnpm_base
RUN npm install --global pnpm@10 --no-fund -q; RUN npm install --global pnpm@10 --no-fund -q;
RUN apk add make python3 gcc clang build-base musl-dev; RUN apk add make python3 gcc clang build-base musl-dev;
RUN apk add --no-cache curl
FROM pnpm_base AS deps FROM pnpm_base AS deps
WORKDIR /build WORKDIR /build
@ -50,5 +51,8 @@ COPY --from=deps /build/node_modules /src/node_modules
COPY ${EXTRA_FILES} /extra COPY ${EXTRA_FILES} /extra
ENTRYPOINT [ "/src/entrypoint.sh" ] ENTRYPOINT [ "/src/entrypoint.sh" ]
HEALTHCHECK --interval=30s --timeout=3s \
CMD curl -f -s http://localhost/monitoring?docker || exit 1
CMD ["node", "/src/run.cjs"] CMD ["node", "/src/run.cjs"]