|
|
@@ -1,167 +0,0 @@
|
|
|
-{
|
|
|
- "alert": {
|
|
|
- "name": "Majority servers down",
|
|
|
- "frequency": 60,
|
|
|
- "notify": ["group1", "group2"],
|
|
|
- "expressions": [
|
|
|
- {
|
|
|
- "left": [
|
|
|
- {
|
|
|
- "type": "query",
|
|
|
- "refId": "A",
|
|
|
- "timeRange": {"from": "5m", "to": "now-1m"},
|
|
|
- },
|
|
|
- {
|
|
|
- "type": "function",
|
|
|
- "name": "max"
|
|
|
- }
|
|
|
- ],
|
|
|
- "operator": ">",
|
|
|
- "right": [
|
|
|
- {
|
|
|
- "type": "constant",
|
|
|
- "value": 100
|
|
|
- }
|
|
|
- ],
|
|
|
- "level": 2,
|
|
|
- }
|
|
|
- ]
|
|
|
- },
|
|
|
-
|
|
|
- "alert": {
|
|
|
- "name": "Majority servers down take2",
|
|
|
- "frequency": 60,
|
|
|
- "notify": ["group1", "group2"],
|
|
|
- "expressions": [
|
|
|
- {
|
|
|
- "left": [
|
|
|
- {
|
|
|
- "type": "query",
|
|
|
- "refId": "A",
|
|
|
- "timeRange": {"from": "5m", "to": "now-1m"},
|
|
|
- },
|
|
|
- {
|
|
|
- "type": "function",
|
|
|
- "name": "max"
|
|
|
- }
|
|
|
- ],
|
|
|
- "operator": ">",
|
|
|
- "right": [
|
|
|
- {
|
|
|
- "type": "query",
|
|
|
- "refId": "A",
|
|
|
- "timeRange": {"from": "now-1d-5m", "to": "now-1d"},
|
|
|
- },
|
|
|
- {
|
|
|
- "type": "function",
|
|
|
- "name": "max"
|
|
|
- }
|
|
|
- ],
|
|
|
- "level": 2,
|
|
|
- }
|
|
|
- ]
|
|
|
- },
|
|
|
- "alert": {
|
|
|
- "name": "CPU usage last 5min above 90%",
|
|
|
- "frequency": 60,
|
|
|
- "expressions": [
|
|
|
- {
|
|
|
- "expr": "query(#A, 5m, now, avg)",
|
|
|
- "operator": ">",
|
|
|
- "critLevel": 90,
|
|
|
- }
|
|
|
- ]
|
|
|
- },
|
|
|
- "alert": {
|
|
|
- "name": "Series count above 10",
|
|
|
- "frequency": "1m",
|
|
|
- "expressions": [
|
|
|
- {
|
|
|
- "expr": "query(#A, 5m, now, avg) | countSeries()",
|
|
|
- "operator": ">",
|
|
|
- "critLevel": 10,
|
|
|
- }
|
|
|
- ]
|
|
|
- },
|
|
|
- "alert": {
|
|
|
- "name": "Disk Free Zero in 3 days",
|
|
|
- "frequency": "1d",
|
|
|
- "expressions": [
|
|
|
- {
|
|
|
- "expr": "query(#A, 1d, now, trend(3d))",
|
|
|
- "operator": ">",
|
|
|
- "critLevel": 0,
|
|
|
- }
|
|
|
- ]
|
|
|
- },
|
|
|
- "alert": {
|
|
|
- "name": "Server requests is zero for more than 10min",
|
|
|
- "frequency": "1d",
|
|
|
- "expressions": [
|
|
|
- {
|
|
|
- "expr": "query(#A, 10m, now, sum)",
|
|
|
- "operator": "=",
|
|
|
- "critLevel": 0,
|
|
|
- }
|
|
|
- ]
|
|
|
- },
|
|
|
- "alert": {
|
|
|
- "name": "Timeouts should not be more than 0.1% of requests",
|
|
|
- "frequency": "1d",
|
|
|
- "expressions": [
|
|
|
- {
|
|
|
- "expr": "query(#A, 10m, now, sum) | subtract | query(#B, 10m, now, sum)",
|
|
|
- "operator": ">",
|
|
|
- "critLevel": 0,
|
|
|
- }
|
|
|
- ]
|
|
|
- },
|
|
|
- "alert": {
|
|
|
- "name": "CPU usage last 5min changed by more than 20% compared to last 24hours",
|
|
|
- "frequency": "1m",
|
|
|
- "value": "query(#A, 5m, now, avg)",
|
|
|
- "operator": "percent change",
|
|
|
- "threshold": "query(#A, 1d, now, avg)",
|
|
|
- },
|
|
|
-
|
|
|
- "alert": {
|
|
|
- "name": "CPU higher than 90%",
|
|
|
- "frequency": "1m",
|
|
|
- "valueExpr": "query(#A, 5m, now, avg)",
|
|
|
- "evalType": "greater than",
|
|
|
- "critLevel": 20,
|
|
|
- "warnLevel": 10,
|
|
|
- },
|
|
|
-
|
|
|
- "alert": {
|
|
|
- "name": "CPU usage last 5min changed by more than 20% compared to last 24hours",
|
|
|
- "frequency": "1m",
|
|
|
- "expr": "query(#A, 5m, now, avg) percentGreaterThan()",
|
|
|
- "evalType": "percentscre change",
|
|
|
- "evalExpr": "query(#A, 1d, now, avg)",
|
|
|
- "critLevel": 20,
|
|
|
- "warnLevel": 10,
|
|
|
- },
|
|
|
- "alert": {
|
|
|
- "name": "CPU usage last 5min changed by more than 20% compared to last 24hours",
|
|
|
- "frequency": "1m",
|
|
|
- "valueQuery": "query(#A, 5m, now, avg) ",
|
|
|
- "evalType": "simple", "// other options are: percent change, trend"
|
|
|
- "evalQuery": "query(#A, 1d, now, avg)",
|
|
|
- "comparison": "greater than",
|
|
|
- "critLevel": 20,
|
|
|
- "warnLevel": 10,
|
|
|
- },
|
|
|
- "alert": {
|
|
|
- "name": "CPU usage last 5min changed by more than 20% compared to last 24hours",
|
|
|
- "frequency": "1m",
|
|
|
- "valueQuery": "query(#A, 5m, now, avg) | Evaluate Against: Static Threshold | >200 Warn | >300 Critical",
|
|
|
- "valueQuery": "query(#A, 5m, now, avg) | Evaluate Against: Percent Change Compared To | query(#B, 5m, now, avg) | >200 Warn | >300 Critical",
|
|
|
- "valueQuery": "query(#A, 5m, now, trend) | Evaluate Against: Forcast | 7days | >200 Warn | >300 Critical",
|
|
|
- "evalType": "simple", "// other options are: percent change, trend"
|
|
|
- "evalQuery": "query(#A, 1d, now, avg)",
|
|
|
- "comparison": "greater than",
|
|
|
- "critLevel": 20,
|
|
|
- "warnLevel": 10,
|
|
|
- },
|
|
|
-}
|