Unverified Commit f452eea6 authored by Ben Kochie's avatar Ben Kochie Committed by GitHub

Merge pull request #281 from jamtur01/patch-1

Updated rules for Prometheus 2.0
parents 5504ce58 84b8482c
......@@ -22,102 +22,69 @@ job:mysql_transactions:rate5m = sum(rate(mysql_global_status_commands_total{comm
###
# Galera Alerts
# Alert: Galera node is not "ready".
ALERT MySQLGaleraNotReady
IF mysql_global_status_wsrep_ready != 1
FOR 5m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "Galera cluster node not ready",
description = "{{$labels.job}} on {{$labels.instance}} is not ready.",
}
# Alert: Galera node state is not synced.
ALERT MySQLGaleraOutOfSync
IF (mysql_global_status_wsrep_local_state != 4 AND mysql_global_variables_wsrep_desync == 0)
FOR 5m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "Galera cluster node out of sync",
description = "{{$labels.job}} on {{$labels.instance}} is not in sync ({{$value}} != 4).",
}
# Alert: Galera node is in "doner" state, and is behind applying transactions.
ALERT MySQLGaleraDonorFallingBehind
IF (mysql_global_status_wsrep_local_state == 2 AND mysql_global_status_wsrep_local_recv_queue > 100)
FOR 5m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "xtradb cluster donor node falling behind",
description = "{{$labels.job}} on {{$labels.instance}} is a donor (hotbackup) and is falling behind (queue size {{$value}}).",
}
###
# Replication Alerts
# Alert: The replication IO or SQL threads are stopped.
ALERT MySQLReplicationNotRunning
IF mysql_slave_status_slave_io_running == 0 OR mysql_slave_status_slave_sql_running == 0
FOR 2m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "Slave replication is not running",
description = "Slave replication (IO or SQL) has been down for more than 2 minutes.",
}
# Alert: The replication lag is non-zero and it predicted to not recover within
# 2 minutes. This allows for a small amount of replication lag.
# NOTE: This alert depends on the recording rule at the top of the file.
ALERT MySQLReplicationLag
IF
(mysql_slave_lag_seconds > 30)
AND on (instance)
(predict_linear(mysql_slave_lag_seconds[5m], 60*2) > 0)
FOR 1m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "MySQL slave replication is lagging",
description = "The mysql slave replication has fallen behind and is not recovering",
}
# Alert: The replication lag is non-zero and it predicted to not recover within
# 2 minutes. This allows for a small amount of replication lag.
# NOTE: This alert depends on the recording rule at the top of the file.
ALERT MySQLReplicationLag
IF
(mysql_heartbeat_lag_seconds > 30)
AND on (instance)
(predict_linear(mysql_heartbeat_lag_seconds[5m], 60*2) > 0)
FOR 1m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "MySQL slave replication is lagging",
description = "The mysql slave replication has fallen behind and is not recovering",
}
###
# Performance Alerts
# Alert: InnoDB log writes are stalling.
ALERT MySQLInnoDBLogWaits
IF rate(mysql_global_status_innodb_log_waits[15m]) > 10
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "MySQL innodb log writes stalling",
description = "The innodb logs are waiting for disk at a rate of {{$value}} / second",
}
groups:
- name: GaleraAlerts
rules:
- alert: MySQLGaleraNotReady
expr: mysql_global_status_wsrep_ready != 1
for: 5m
labels:
severity: warning
annotations:
description: '{{$labels.job}} on {{$labels.instance}} is not ready.'
summary: Galera cluster node not ready
- alert: MySQLGaleraOutOfSync
expr: (mysql_global_status_wsrep_local_state != 4 and mysql_global_variables_wsrep_desync
== 0)
for: 5m
labels:
severity: warning
annotations:
description: '{{$labels.job}} on {{$labels.instance}} is not in sync ({{$value}}
!= 4).'
summary: Galera cluster node out of sync
- alert: MySQLGaleraDonorFallingBehind
expr: (mysql_global_status_wsrep_local_state == 2 and mysql_global_status_wsrep_local_recv_queue
> 100)
for: 5m
labels:
severity: warning
annotations:
description: '{{$labels.job}} on {{$labels.instance}} is a donor (hotbackup)
and is falling behind (queue size {{$value}}).'
summary: xtradb cluster donor node falling behind
- alert: MySQLReplicationNotRunning
expr: mysql_slave_status_slave_io_running == 0 or mysql_slave_status_slave_sql_running
== 0
for: 2m
labels:
severity: critical
annotations:
description: Slave replication (IO or SQL) has been down for more than 2 minutes.
summary: Slave replication is not running
- alert: MySQLReplicationLag
expr: (mysql_slave_lag_seconds > 30) and on(instance) (predict_linear(mysql_slave_lag_seconds[5m],
60 * 2) > 0)
for: 1m
labels:
severity: critical
annotations:
description: The mysql slave replication has fallen behind and is not recovering
summary: MySQL slave replication is lagging
- alert: MySQLReplicationLag
expr: (mysql_heartbeat_lag_seconds > 30) and on(instance) (predict_linear(mysql_heartbeat_lag_seconds[5m],
60 * 2) > 0)
for: 1m
labels:
severity: critical
annotations:
description: The mysql slave replication has fallen behind and is not recovering
summary: MySQL slave replication is lagging
- alert: MySQLInnoDBLogWaits
expr: rate(mysql_global_status_innodb_log_waits[15m]) > 10
labels:
severity: warning
annotations:
description: The innodb logs are waiting for disk at a rate of {{$value}} /
second
summary: MySQL innodb log writes stalling
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment