feat(monitoring): general fixup

- added healthcheck for every service
- added nginx second "server" block for monitoring
    all monitoring services are now behind this nginx reverse proxy
- fixed logging driver not present for chat service
This commit is contained in:
Maix0 2025-12-17 19:23:47 +01:00
parent 67c8a9cbd1
commit 8a3481ea8b
10 changed files with 209 additions and 48 deletions

View file

@ -33,8 +33,10 @@ services:
restart: always
networks:
- transcendance-network
- monitoring
ports:
- '8888:443'
- '9090:8443'
volumes:
# if you need to share files with nginx, you do it here.
- static-volume:/volumes/static
@ -47,6 +49,7 @@ services:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
###############
# AUTH #
###############
@ -73,26 +76,6 @@ services:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
###############
# TIC-TAC-TOE #
###############
# tic-tac-toe:
# build:
# context: ./src/
# args:
# - SERVICE=tic-tac-toe
# - EXTRA_FILES=tic-tac-toe/extra
# container_name: tic-tac-toe
# restart: unless-stopped
# networks:
# - transcendance-network
# volumes:
# - sqlite-volume:/volumes/database
# - static-volume:/volumes/static
# environment:
# - JWT_SECRET=KRUGKIDROVUWG2ZAMJZG653OEBTG66BANJ2W24DTEBXXMZLSEB2GQZJANRQXU6JA
# - DATABASE_DIR=/volumes/database
###############
# CHAT #
###############
@ -114,6 +97,11 @@ services:
- DATABASE_DIR=/volumes/database
- PROVIDER_FILE=/extra/providers.toml
- SESSION_MANAGER=${SESSION_MANAGER}
logging:
driver: gelf
options:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
###############
# USER #
@ -123,7 +111,6 @@ services:
context: ./src/
args:
- SERVICE=user
# - EXTRA_FILES=user/extra
container_name: user
restart: always
networks:
@ -154,8 +141,6 @@ services:
- monitoring
depends_on:
- prometheus
ports:
- '3000:3000'
volumes:
- ./monitoring/grafana/alerting:/etc/grafana/provisioning/alerting
- ./monitoring/grafana/datasources:/etc/grafana/provisioning/datasources
@ -166,7 +151,7 @@ services:
# this can stay the same for developpement. This is an alias to `localhost`
- NGINX_DOMAIN=local.maix.me
- GF_LOG_LEVEL=warn
- GF_SERVER_ROOT_URL=http://local.maix.me:3000
- GF_SERVER_ROOT_URL=https://local.maix.me:9090/grafana/
- GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER}
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASS}
logging:
@ -174,6 +159,12 @@ services:
options:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
healthcheck:
test: ["CMD-SHELL", "curl -f -s http://localhost:3000/api/health || exit 1"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
prometheus:
image: prom/prometheus:latest
@ -189,14 +180,22 @@ services:
options:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
healthcheck:
test: ["CMD", "wget", "--quiet", "--spider", "http://localhost:9090/-/healthy"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
networks:
- monitoring
container_name: monitoring-cadvisor
ports:
- '8080:8080'
command:
- '-url_base_prefix=/cadvisor'
environment:
- CADVISOR_HEALTHCHECK_URL=http://localhost:8080/cadvisor/healthz
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
@ -209,20 +208,23 @@ services:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
blackbox:
image: prom/blackbox-exporter:latest
container_name: monitoring-blackbox
networks:
- transcendance-network
ports:
- "9115:9115"
restart: unless-stopped
logging:
driver: gelf
options:
gelf-address: "udp://127.0.0.1:12201"
tag: "{{.Name}}"
healthcheck:
test: ["CMD", "wget", "--quiet", "--spider", "http://localhost:9115/-/healthy"]
interval: 30s
timeout: 5s
retries: 3
start_period: 10s
@ -242,10 +244,14 @@ services:
volumes:
- elastic-data:/usr/share/elasticsearch/data
- ./logs/elasticsearch:/setup
ports:
- "9200:9200"
command: ["/setup/bootstrap.sh"]
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "-s", "localhost:9200"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
logstash:
image: docker.elastic.co/logstash/logstash:7.17.23
@ -259,6 +265,12 @@ services:
ports:
- "12201:12201/udp"
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "-s", "localhost:9600"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
kibana:
image: docker.elastic.co/kibana/kibana:7.17.23
@ -267,17 +279,24 @@ services:
- elasticsearch
networks:
- monitoring
- transcendance-network
environment:
- ELASTICSEARCH_HOSTS=http://elasticsearch:9200
- SERVER_PUBLICBASEURL=http://local.maix.me:5601
- SERVER_PUBLICBASEURL=https://local.maix.me:9090/kibana
- SERVER_BASEPATH=/kibana
- SERVER_REWRITEBASEPATH=true
- ELASTICSEARCH_USERNAME=elastic
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
ports:
- "5601:5601"
volumes:
- ./logs/kibana:/setup
command: ["/setup/bootstrap.sh"]
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "-s", "localhost:5601/kibana/api/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
volumes:
sqlite-volume: