feat(monitoring): general fixup
- added healthcheck for every service
- added nginx second "server" block for monitoring
all monitoring services are now behind this nginx reverse proxy
- fixed logging driver not present for chat service
This commit is contained in:
parent
67c8a9cbd1
commit
8a3481ea8b
10 changed files with 209 additions and 48 deletions
|
|
@ -33,8 +33,10 @@ services:
|
|||
restart: always
|
||||
networks:
|
||||
- transcendance-network
|
||||
- monitoring
|
||||
ports:
|
||||
- '8888:443'
|
||||
- '9090:8443'
|
||||
volumes:
|
||||
# if you need to share files with nginx, you do it here.
|
||||
- static-volume:/volumes/static
|
||||
|
|
@ -47,6 +49,7 @@ services:
|
|||
gelf-address: "udp://127.0.0.1:12201"
|
||||
tag: "{{.Name}}"
|
||||
|
||||
|
||||
###############
|
||||
# AUTH #
|
||||
###############
|
||||
|
|
@ -73,26 +76,6 @@ services:
|
|||
gelf-address: "udp://127.0.0.1:12201"
|
||||
tag: "{{.Name}}"
|
||||
|
||||
###############
|
||||
# TIC-TAC-TOE #
|
||||
###############
|
||||
# tic-tac-toe:
|
||||
# build:
|
||||
# context: ./src/
|
||||
# args:
|
||||
# - SERVICE=tic-tac-toe
|
||||
# - EXTRA_FILES=tic-tac-toe/extra
|
||||
# container_name: tic-tac-toe
|
||||
# restart: unless-stopped
|
||||
# networks:
|
||||
# - transcendance-network
|
||||
# volumes:
|
||||
# - sqlite-volume:/volumes/database
|
||||
# - static-volume:/volumes/static
|
||||
# environment:
|
||||
# - JWT_SECRET=KRUGKIDROVUWG2ZAMJZG653OEBTG66BANJ2W24DTEBXXMZLSEB2GQZJANRQXU6JA
|
||||
# - DATABASE_DIR=/volumes/database
|
||||
|
||||
###############
|
||||
# CHAT #
|
||||
###############
|
||||
|
|
@ -114,6 +97,11 @@ services:
|
|||
- DATABASE_DIR=/volumes/database
|
||||
- PROVIDER_FILE=/extra/providers.toml
|
||||
- SESSION_MANAGER=${SESSION_MANAGER}
|
||||
logging:
|
||||
driver: gelf
|
||||
options:
|
||||
gelf-address: "udp://127.0.0.1:12201"
|
||||
tag: "{{.Name}}"
|
||||
|
||||
###############
|
||||
# USER #
|
||||
|
|
@ -123,7 +111,6 @@ services:
|
|||
context: ./src/
|
||||
args:
|
||||
- SERVICE=user
|
||||
# - EXTRA_FILES=user/extra
|
||||
container_name: user
|
||||
restart: always
|
||||
networks:
|
||||
|
|
@ -154,8 +141,6 @@ services:
|
|||
- monitoring
|
||||
depends_on:
|
||||
- prometheus
|
||||
ports:
|
||||
- '3000:3000'
|
||||
volumes:
|
||||
- ./monitoring/grafana/alerting:/etc/grafana/provisioning/alerting
|
||||
- ./monitoring/grafana/datasources:/etc/grafana/provisioning/datasources
|
||||
|
|
@ -166,7 +151,7 @@ services:
|
|||
# this can stay the same for developpement. This is an alias to `localhost`
|
||||
- NGINX_DOMAIN=local.maix.me
|
||||
- GF_LOG_LEVEL=warn
|
||||
- GF_SERVER_ROOT_URL=http://local.maix.me:3000
|
||||
- GF_SERVER_ROOT_URL=https://local.maix.me:9090/grafana/
|
||||
- GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER}
|
||||
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASS}
|
||||
logging:
|
||||
|
|
@ -174,6 +159,12 @@ services:
|
|||
options:
|
||||
gelf-address: "udp://127.0.0.1:12201"
|
||||
tag: "{{.Name}}"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f -s http://localhost:3000/api/health || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
|
|
@ -189,14 +180,22 @@ services:
|
|||
options:
|
||||
gelf-address: "udp://127.0.0.1:12201"
|
||||
tag: "{{.Name}}"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--quiet", "--spider", "http://localhost:9090/-/healthy"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:latest
|
||||
networks:
|
||||
- monitoring
|
||||
container_name: monitoring-cadvisor
|
||||
ports:
|
||||
- '8080:8080'
|
||||
command:
|
||||
- '-url_base_prefix=/cadvisor'
|
||||
environment:
|
||||
- CADVISOR_HEALTHCHECK_URL=http://localhost:8080/cadvisor/healthz
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:ro
|
||||
|
|
@ -209,20 +208,23 @@ services:
|
|||
gelf-address: "udp://127.0.0.1:12201"
|
||||
tag: "{{.Name}}"
|
||||
|
||||
|
||||
blackbox:
|
||||
image: prom/blackbox-exporter:latest
|
||||
container_name: monitoring-blackbox
|
||||
networks:
|
||||
- transcendance-network
|
||||
ports:
|
||||
- "9115:9115"
|
||||
restart: unless-stopped
|
||||
logging:
|
||||
driver: gelf
|
||||
options:
|
||||
gelf-address: "udp://127.0.0.1:12201"
|
||||
tag: "{{.Name}}"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--quiet", "--spider", "http://localhost:9115/-/healthy"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
|
||||
|
||||
|
|
@ -242,10 +244,14 @@ services:
|
|||
volumes:
|
||||
- elastic-data:/usr/share/elasticsearch/data
|
||||
- ./logs/elasticsearch:/setup
|
||||
ports:
|
||||
- "9200:9200"
|
||||
command: ["/setup/bootstrap.sh"]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "-s", "localhost:9200"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
logstash:
|
||||
image: docker.elastic.co/logstash/logstash:7.17.23
|
||||
|
|
@ -259,6 +265,12 @@ services:
|
|||
ports:
|
||||
- "12201:12201/udp"
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "-s", "localhost:9600"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
kibana:
|
||||
image: docker.elastic.co/kibana/kibana:7.17.23
|
||||
|
|
@ -267,17 +279,24 @@ services:
|
|||
- elasticsearch
|
||||
networks:
|
||||
- monitoring
|
||||
- transcendance-network
|
||||
environment:
|
||||
- ELASTICSEARCH_HOSTS=http://elasticsearch:9200
|
||||
- SERVER_PUBLICBASEURL=http://local.maix.me:5601
|
||||
- SERVER_PUBLICBASEURL=https://local.maix.me:9090/kibana
|
||||
- SERVER_BASEPATH=/kibana
|
||||
- SERVER_REWRITEBASEPATH=true
|
||||
- ELASTICSEARCH_USERNAME=elastic
|
||||
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
|
||||
ports:
|
||||
- "5601:5601"
|
||||
volumes:
|
||||
- ./logs/kibana:/setup
|
||||
command: ["/setup/bootstrap.sh"]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "-s", "localhost:5601/kibana/api/status"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
volumes:
|
||||
sqlite-volume:
|
||||
|
|
|
|||
|
|
@ -2,11 +2,11 @@
|
|||
|
||||
kibana_setup() {
|
||||
set -xe
|
||||
until curl -s -f "localhost:5601/api/status"; do
|
||||
until curl -s -f "localhost:5601/kibana/api/status"; do
|
||||
sleep 2
|
||||
done
|
||||
|
||||
curl -v -X POST "localhost:5601/api/saved_objects/_import?overwrite=true" \
|
||||
curl -v -X POST "localhost:5601/kibana/api/saved_objects/_import?overwrite=true" \
|
||||
-H "kbn-xsrf: true" \
|
||||
--form file='@/setup/export.ndjson'
|
||||
exit 0
|
||||
|
|
|
|||
|
|
@ -17,10 +17,10 @@ scrape_configs:
|
|||
|
||||
static_configs:
|
||||
- targets:
|
||||
- http://nginx/monitoring/ok
|
||||
- http://nginx:8080/ok
|
||||
- http://auth/monitoring
|
||||
- http://user/monitoring
|
||||
- http://icons/monitoring
|
||||
- http://chat/monitoring
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
|
|
|
|||
|
|
@ -27,3 +27,9 @@ COPY ./15-local-resolvers.envsh /docker-entrypoint.d/
|
|||
COPY ./17-add-template-prefix.sh /docker-entrypoint.d/
|
||||
|
||||
COPY ./conf /etc/nginx/templates
|
||||
COPY ./monitoring.index.html /var/share/www/monitoring/
|
||||
|
||||
RUN chmod -R +r /var/share/www/monitoring/;
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=3s \
|
||||
CMD curl -f -s http://localhost:8080/ok?docker || exit 1;
|
||||
|
|
@ -1,10 +1,15 @@
|
|||
# please make sure you want to edit this file...
|
||||
|
||||
map $http_upgrade $connection_upgrade {
|
||||
default upgrade;
|
||||
'' close;
|
||||
}
|
||||
|
||||
# this allows the redirection of `http://domain/URL` to `https://domain/URL`
|
||||
server {
|
||||
charset UTF-8;
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
#listen [::]:80;
|
||||
resolver $NGINX_RESOLVERS;
|
||||
server_name $NGINX_DOMAIN;
|
||||
|
||||
|
|
@ -14,16 +19,18 @@ server {
|
|||
|
||||
server {
|
||||
charset UTF-8;
|
||||
listen [::]:443 ssl;
|
||||
#listen [::]:443 ssl;
|
||||
listen 443 ssl;
|
||||
resolver $NGINX_RESOLVERS;
|
||||
server_name $NGINX_DOMAIN;
|
||||
|
||||
ssl_certificate /etc/ssl/certs/nginx-selfsigned.crt;
|
||||
ssl_certificate /etc/ssl/certs/nginx-selfsigned.crt;
|
||||
ssl_certificate_key /etc/ssl/private/nginx-selfsigned.key;
|
||||
ssl_protocols TLSv1.3;
|
||||
|
||||
ssl_protocols TLSv1.3;
|
||||
|
||||
proxy_set_header X-Forwarded true;
|
||||
error_page 497 https://$http_host$request_uri;
|
||||
error_page 497 https://$http_host$request_uri;
|
||||
include conf.d/locations/*.conf;
|
||||
}
|
||||
|
||||
include conf.d/monitoring/server.conf;
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
location /monitoring/ok {
|
||||
add_header Content-Type text/plain;
|
||||
return 200 'healthy';
|
||||
}
|
||||
39
nginx/conf/monitoring/locations.conf
Normal file
39
nginx/conf/monitoring/locations.conf
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
# This is required to proxy Grafana Live WebSocket connections.
|
||||
location /kibana {
|
||||
proxy_set_header Host $host;
|
||||
set $upstream_kibana kibana:5601;
|
||||
proxy_pass http://$upstream_kibana;
|
||||
}
|
||||
|
||||
location /cadvisor {
|
||||
proxy_set_header Host $host;
|
||||
set $upstream_cadvisor cadvisor:8080;
|
||||
proxy_pass http://$upstream_cadvisor;
|
||||
}
|
||||
|
||||
location /grafana {
|
||||
proxy_set_header Host $host;
|
||||
rewrite ^/grafana/?(.*) /$1 break;
|
||||
set $upstream_grafana grafana:3000;
|
||||
proxy_pass http://$upstream_grafana;
|
||||
}
|
||||
# Proxy Grafana Live WebSocket connections.
|
||||
location /grafana/api/live/ {
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection $connection_upgrade;
|
||||
proxy_set_header Host $host;
|
||||
rewrite ^/grafana/?(.*) /$1 break;
|
||||
set $upstream_grafana grafana:3000;
|
||||
proxy_pass http://$upstream_grafana;
|
||||
}
|
||||
|
||||
location /ok {
|
||||
add_header Content-Type text/plain;
|
||||
return 200 'healthy';
|
||||
}
|
||||
|
||||
location / {
|
||||
root /var/share/www/monitoring/;
|
||||
index monitoring.index.html;
|
||||
}
|
||||
26
nginx/conf/monitoring/server.conf
Normal file
26
nginx/conf/monitoring/server.conf
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
server {
|
||||
charset UTF-8;
|
||||
listen 8080;
|
||||
#listen [::]:8080;
|
||||
resolver $NGINX_RESOLVERS;
|
||||
server_name $NGINX_DOMAIN;
|
||||
|
||||
include conf.d/monitoring/locations.conf;
|
||||
}
|
||||
|
||||
|
||||
server {
|
||||
charset UTF-8;
|
||||
#listen [::]:10443 ssl;
|
||||
listen 8443 ssl;
|
||||
resolver $NGINX_RESOLVERS;
|
||||
server_name $NGINX_DOMAIN;
|
||||
|
||||
ssl_certificate /etc/ssl/certs/nginx-selfsigned.crt;
|
||||
ssl_certificate_key /etc/ssl/private/nginx-selfsigned.key;
|
||||
ssl_protocols TLSv1.3;
|
||||
|
||||
error_page 497 https://$http_host$request_uri;
|
||||
|
||||
include conf.d/monitoring/locations.conf;
|
||||
}
|
||||
64
nginx/monitoring.index.html
Normal file
64
nginx/monitoring.index.html
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Service Dashboard</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, Helvetica, sans-serif;
|
||||
background-color: #f4f6f8;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 400px;
|
||||
margin: 100px auto;
|
||||
padding: 20px;
|
||||
background-color: #ffffff;
|
||||
border-radius: 6px;
|
||||
box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 20px;
|
||||
text-align: center;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
ul {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
li {
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
a {
|
||||
display: block;
|
||||
padding: 12px;
|
||||
text-decoration: none;
|
||||
color: #333;
|
||||
background-color: #e9ecef;
|
||||
border-radius: 4px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
background-color: #dfe3e6;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>Services</h1>
|
||||
<ul>
|
||||
<li><a href="/kibana">Kibana</a></li>
|
||||
<li><a href="/cadvisor">cAdvisor</a></li>
|
||||
<li><a href="/grafana">Grafana</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
FROM node:22-alpine AS pnpm_base
|
||||
RUN npm install --global pnpm@10 --no-fund -q;
|
||||
RUN apk add make python3 gcc clang build-base musl-dev;
|
||||
RUN apk add --no-cache curl
|
||||
|
||||
FROM pnpm_base AS deps
|
||||
WORKDIR /build
|
||||
|
|
@ -50,5 +51,8 @@ COPY --from=deps /build/node_modules /src/node_modules
|
|||
COPY ${EXTRA_FILES} /extra
|
||||
ENTRYPOINT [ "/src/entrypoint.sh" ]
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=3s \
|
||||
CMD curl -f -s http://localhost/monitoring?docker || exit 1
|
||||
|
||||
CMD ["node", "/src/run.cjs"]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue