monitoring(monitoring/grafana): setting up the rules for the notification
This commit is contained in:
parent
20cac0e42b
commit
9e98ff2d13
1 changed files with 782 additions and 0 deletions
782
monitoring/grafana/alerting/rules.yaml
Normal file
782
monitoring/grafana/alerting/rules.yaml
Normal file
|
|
@ -0,0 +1,782 @@
|
|||
apiVersion: 1
|
||||
groups:
|
||||
- orgId: 1
|
||||
name: availability
|
||||
folder: alert_rules.yml
|
||||
interval: 1m
|
||||
rules:
|
||||
- uid: 14db4fe7-faf3-5629-9ee1-c5c189d75fec
|
||||
title: InstanceDown
|
||||
condition: threshold
|
||||
data:
|
||||
- refId: query
|
||||
queryType: prometheus
|
||||
relativeTimeRange:
|
||||
from: 660
|
||||
to: 60
|
||||
datasourceUid: prometheus
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
expr: up == 0
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: query
|
||||
- refId: prometheus_math
|
||||
queryType: math
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: is_number($query) || is_nan($query) || is_inf($query)
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: prometheus_math
|
||||
type: math
|
||||
- refId: threshold
|
||||
queryType: threshold
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
type: gt
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: prometheus_math
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: threshold
|
||||
type: threshold
|
||||
noDataState: OK
|
||||
execErrState: OK
|
||||
for: 1m
|
||||
annotations:
|
||||
description: |
|
||||
Instance {{ $labels.instance }} (job={{ $labels.job }}) has not responded to Prometheus scrapes for more than one minute.
|
||||
summary: Instance {{ $labels.job }} down
|
||||
labels:
|
||||
__converted_prometheus_rule__: "true"
|
||||
severity: critical
|
||||
isPaused: false
|
||||
missing_series_evals_to_resolve: 1
|
||||
- orgId: 1
|
||||
name: blackbox-probes
|
||||
folder: alert_rules.yml
|
||||
interval: 1m
|
||||
rules:
|
||||
- uid: c549c658-ce15-5d56-9842-07730bb11e15
|
||||
title: BlackboxProbeFailed
|
||||
condition: threshold
|
||||
data:
|
||||
- refId: query
|
||||
queryType: prometheus
|
||||
relativeTimeRange:
|
||||
from: 660
|
||||
to: 60
|
||||
datasourceUid: prometheus
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
expr: probe_success == 0
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: query
|
||||
- refId: prometheus_math
|
||||
queryType: math
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: is_number($query) || is_nan($query) || is_inf($query)
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: prometheus_math
|
||||
type: math
|
||||
- refId: threshold
|
||||
queryType: threshold
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
type: gt
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: prometheus_math
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: threshold
|
||||
type: threshold
|
||||
noDataState: OK
|
||||
execErrState: OK
|
||||
for: 30s
|
||||
annotations:
|
||||
description: |
|
||||
The Blackbox probe for {{ $labels.instance }} has failed (probe_success = 0).
|
||||
summary: Blackbox probe failed
|
||||
labels:
|
||||
__converted_prometheus_rule__: "true"
|
||||
severity: critical
|
||||
isPaused: false
|
||||
missing_series_evals_to_resolve: 1
|
||||
- uid: 78a2ece6-4f7a-5496-9a59-6de4a56db201
|
||||
title: BlackboxHighLatency
|
||||
condition: threshold
|
||||
data:
|
||||
- refId: query
|
||||
queryType: prometheus
|
||||
relativeTimeRange:
|
||||
from: 660
|
||||
to: 60
|
||||
datasourceUid: prometheus
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
expr: probe_duration_seconds > 1
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: query
|
||||
- refId: prometheus_math
|
||||
queryType: math
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: is_number($query) || is_nan($query) || is_inf($query)
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: prometheus_math
|
||||
type: math
|
||||
- refId: threshold
|
||||
queryType: threshold
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
type: gt
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: prometheus_math
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: threshold
|
||||
type: threshold
|
||||
noDataState: OK
|
||||
execErrState: OK
|
||||
for: 2m
|
||||
annotations:
|
||||
description: |
|
||||
The Blackbox probe to {{ $labels.instance }} has been taking more than 1 second to respond for over 2 minutes.
|
||||
summary: High latency on a Blackbox probe
|
||||
labels:
|
||||
__converted_prometheus_rule__: "true"
|
||||
severity: warning
|
||||
isPaused: false
|
||||
missing_series_evals_to_resolve: 1
|
||||
- uid: 00b5d799-0eef-59e9-9371-2a0bfb7df19b
|
||||
title: BlackboxBadHTTPStatus
|
||||
condition: threshold
|
||||
data:
|
||||
- refId: query
|
||||
queryType: prometheus
|
||||
relativeTimeRange:
|
||||
from: 660
|
||||
to: 60
|
||||
datasourceUid: prometheus
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
expr: probe_http_status_code != 200
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: query
|
||||
- refId: prometheus_math
|
||||
queryType: math
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: is_number($query) || is_nan($query) || is_inf($query)
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: prometheus_math
|
||||
type: math
|
||||
- refId: threshold
|
||||
queryType: threshold
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
type: gt
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: prometheus_math
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: threshold
|
||||
type: threshold
|
||||
noDataState: OK
|
||||
execErrState: OK
|
||||
for: 1m
|
||||
annotations:
|
||||
description: |
|
||||
The Blackbox probe to {{ $labels.instance }} is returning HTTP status {{ $value }} different from 200.
|
||||
summary: Bad HTTP status code on a Blackbox probe
|
||||
labels:
|
||||
__converted_prometheus_rule__: "true"
|
||||
severity: warning
|
||||
isPaused: false
|
||||
missing_series_evals_to_resolve: 1
|
||||
- orgId: 1
|
||||
name: container-resources
|
||||
folder: alert_rules.yml
|
||||
interval: 1m
|
||||
rules:
|
||||
- uid: 985c697f-e309-524c-9cd4-650a2045c279
|
||||
title: HighGlobalCPUUsage
|
||||
condition: threshold
|
||||
data:
|
||||
- refId: query
|
||||
queryType: prometheus
|
||||
relativeTimeRange:
|
||||
from: 660
|
||||
to: 60
|
||||
datasourceUid: prometheus
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
expr: (sum(rate(container_cpu_user_seconds_total[5m])) * 100) > 80
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: query
|
||||
- refId: prometheus_math
|
||||
queryType: math
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: is_number($query) || is_nan($query) || is_inf($query)
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: prometheus_math
|
||||
type: math
|
||||
- refId: threshold
|
||||
queryType: threshold
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
type: gt
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: prometheus_math
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: threshold
|
||||
type: threshold
|
||||
noDataState: OK
|
||||
execErrState: OK
|
||||
for: 5m
|
||||
annotations:
|
||||
description: |
|
||||
Global CPU usage of containers has been above 80% for more than 5 minutes. Check which services are consuming the most resources.
|
||||
summary: High global CPU usage for containers
|
||||
labels:
|
||||
__converted_prometheus_rule__: "true"
|
||||
severity: warning
|
||||
isPaused: false
|
||||
missing_series_evals_to_resolve: 1
|
||||
- uid: 635d0ad1-10f2-51f4-9226-baf56557d870
|
||||
title: HighGlobalMemoryUsage
|
||||
condition: threshold
|
||||
data:
|
||||
- refId: query
|
||||
queryType: prometheus
|
||||
relativeTimeRange:
|
||||
from: 660
|
||||
to: 60
|
||||
datasourceUid: prometheus
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
expr: (sum(container_memory_usage_bytes) / sum(machine_memory_bytes)) * 100 > 80
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: query
|
||||
- refId: prometheus_math
|
||||
queryType: math
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: is_number($query) || is_nan($query) || is_inf($query)
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: prometheus_math
|
||||
type: math
|
||||
- refId: threshold
|
||||
queryType: threshold
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
type: gt
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: prometheus_math
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: threshold
|
||||
type: threshold
|
||||
noDataState: OK
|
||||
execErrState: OK
|
||||
for: 5m
|
||||
annotations:
|
||||
description: |
|
||||
Global memory usage of containers has been above 80% for more than 5 minutes.
|
||||
summary: High global memory usage for containers
|
||||
labels:
|
||||
__converted_prometheus_rule__: "true"
|
||||
severity: warning
|
||||
isPaused: false
|
||||
missing_series_evals_to_resolve: 1
|
||||
- orgId: 1
|
||||
name: per-container-resources
|
||||
folder: alert_rules.yml
|
||||
interval: 1m
|
||||
rules:
|
||||
- uid: 3daf3f51-d4ad-5169-ace2-cdc1c43d8e4e
|
||||
title: HighContainerCPUUsage
|
||||
condition: threshold
|
||||
data:
|
||||
- refId: query
|
||||
queryType: prometheus
|
||||
relativeTimeRange:
|
||||
from: 660
|
||||
to: 60
|
||||
datasourceUid: prometheus
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
expr: rate(container_cpu_user_seconds_total[5m]) * 100 > 80
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: query
|
||||
- refId: prometheus_math
|
||||
queryType: math
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: is_number($query) || is_nan($query) || is_inf($query)
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: prometheus_math
|
||||
type: math
|
||||
- refId: threshold
|
||||
queryType: threshold
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
type: gt
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: prometheus_math
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: threshold
|
||||
type: threshold
|
||||
noDataState: OK
|
||||
execErrState: OK
|
||||
for: 5m
|
||||
annotations:
|
||||
description: |
|
||||
Container {{ $labels.name }} has been using more than 80% CPU for more than 5 minutes.
|
||||
summary: High CPU usage on a container
|
||||
labels:
|
||||
__converted_prometheus_rule__: "true"
|
||||
severity: warning
|
||||
isPaused: false
|
||||
missing_series_evals_to_resolve: 1
|
||||
- uid: 3202077e-ba84-5401-86fe-0fe6b0a4c26d
|
||||
title: HighContainerMemoryUsage
|
||||
condition: threshold
|
||||
data:
|
||||
- refId: query
|
||||
queryType: prometheus
|
||||
relativeTimeRange:
|
||||
from: 660
|
||||
to: 60
|
||||
datasourceUid: prometheus
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
expr: container_memory_usage_bytes > 500 * 1024 * 1024
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: query
|
||||
- refId: prometheus_math
|
||||
queryType: math
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: is_number($query) || is_nan($query) || is_inf($query)
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: prometheus_math
|
||||
type: math
|
||||
- refId: threshold
|
||||
queryType: threshold
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
type: gt
|
||||
datasource:
|
||||
IsPrunable: false
|
||||
access: ""
|
||||
apiVersion: ""
|
||||
basicAuth: false
|
||||
basicAuthUser: ""
|
||||
created: "0001-01-01T00:00:00Z"
|
||||
database: ""
|
||||
id: -100
|
||||
isDefault: false
|
||||
jsonData: {}
|
||||
name: __expr__
|
||||
readOnly: false
|
||||
secureJsonData: {}
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
updated: "0001-01-01T00:00:00Z"
|
||||
url: ""
|
||||
user: ""
|
||||
withCredentials: false
|
||||
expression: prometheus_math
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: threshold
|
||||
type: threshold
|
||||
noDataState: OK
|
||||
execErrState: OK
|
||||
for: 5m
|
||||
annotations:
|
||||
description: |
|
||||
Container {{ $labels.name }} has been using more than 500 MB of RAM for more than 5 minutes. Adjust the threshold if necessary.
|
||||
summary: High memory usage on a container
|
||||
labels:
|
||||
__converted_prometheus_rule__: "true"
|
||||
severity: warning
|
||||
isPaused: false
|
||||
missing_series_evals_to_resolve: 1
|
||||
Loading…
Add table
Add a link
Reference in a new issue