Explorar o código

fix(alerting): measure state result instead of severity

bergquist %!s(int64=9) %!d(string=hai) anos
pai
achega
c624f3d470
Modificáronse 2 ficheiros con 51 adicións e 42 borrados
  1. 37 32
      pkg/metrics/metrics.go
  2. 14 10
      pkg/services/alerting/result_handler.go

+ 37 - 32
pkg/metrics/metrics.go

@@ -9,34 +9,36 @@ func init() {
 }
 }
 
 
 var (
 var (
-	M_Instance_Start                     Counter
-	M_Page_Status_200                    Counter
-	M_Page_Status_500                    Counter
-	M_Page_Status_404                    Counter
-	M_Api_Status_500                     Counter
-	M_Api_Status_404                     Counter
-	M_Api_User_SignUpStarted             Counter
-	M_Api_User_SignUpCompleted           Counter
-	M_Api_User_SignUpInvite              Counter
-	M_Api_Dashboard_Save                 Timer
-	M_Api_Dashboard_Get                  Timer
-	M_Api_Dashboard_Search               Timer
-	M_Api_Admin_User_Create              Counter
-	M_Api_Login_Post                     Counter
-	M_Api_Login_OAuth                    Counter
-	M_Api_Org_Create                     Counter
-	M_Api_Dashboard_Snapshot_Create      Counter
-	M_Api_Dashboard_Snapshot_External    Counter
-	M_Api_Dashboard_Snapshot_Get         Counter
-	M_Models_Dashboard_Insert            Counter
-	M_Alerting_Result_Critical           Counter
-	M_Alerting_Result_Warning            Counter
-	M_Alerting_Result_Info               Counter
-	M_Alerting_Result_Ok                 Counter
-	M_Alerting_Active_Alerts             Counter
-	M_Alerting_Notification_Sent_Slack   Counter
-	M_Alerting_Notification_Sent_Email   Counter
-	M_Alerting_Notification_Sent_Webhook Counter
+	M_Instance_Start                       Counter
+	M_Page_Status_200                      Counter
+	M_Page_Status_500                      Counter
+	M_Page_Status_404                      Counter
+	M_Api_Status_500                       Counter
+	M_Api_Status_404                       Counter
+	M_Api_User_SignUpStarted               Counter
+	M_Api_User_SignUpCompleted             Counter
+	M_Api_User_SignUpInvite                Counter
+	M_Api_Dashboard_Save                   Timer
+	M_Api_Dashboard_Get                    Timer
+	M_Api_Dashboard_Search                 Timer
+	M_Api_Admin_User_Create                Counter
+	M_Api_Login_Post                       Counter
+	M_Api_Login_OAuth                      Counter
+	M_Api_Org_Create                       Counter
+	M_Api_Dashboard_Snapshot_Create        Counter
+	M_Api_Dashboard_Snapshot_External      Counter
+	M_Api_Dashboard_Snapshot_Get           Counter
+	M_Models_Dashboard_Insert              Counter
+	M_Alerting_Result_State_Critical       Counter
+	M_Alerting_Result_State_Warning        Counter
+	M_Alerting_Result_State_Ok             Counter
+	M_Alerting_Result_State_Paused         Counter
+	M_Alerting_Result_State_Pending        Counter
+	M_Alerting_Result_State_ExecutionError Counter
+	M_Alerting_Active_Alerts               Counter
+	M_Alerting_Notification_Sent_Slack     Counter
+	M_Alerting_Notification_Sent_Email     Counter
+	M_Alerting_Notification_Sent_Webhook   Counter
 
 
 	// Timers
 	// Timers
 	M_DataSource_ProxyReq_Timer Timer
 	M_DataSource_ProxyReq_Timer Timer
@@ -75,10 +77,13 @@ func initMetricVars(settings *MetricSettings) {
 
 
 	M_Models_Dashboard_Insert = RegCounter("models.dashboard.insert")
 	M_Models_Dashboard_Insert = RegCounter("models.dashboard.insert")
 
 
-	M_Alerting_Result_Critical = RegCounter("alerting.result", "severity", "critical")
-	M_Alerting_Result_Warning = RegCounter("alerting.result", "severity", "warning")
-	M_Alerting_Result_Info = RegCounter("alerting.result", "severity", "info")
-	M_Alerting_Result_Ok = RegCounter("alerting.result", "severity", "ok")
+	M_Alerting_Result_State_Critical = RegCounter("alerting.result", "state", "critical")
+	M_Alerting_Result_State_Warning = RegCounter("alerting.result", "state", "warning")
+	M_Alerting_Result_State_Ok = RegCounter("alerting.result", "state", "ok")
+	M_Alerting_Result_State_Paused = RegCounter("alerting.result", "state", "paused")
+	M_Alerting_Result_State_Pending = RegCounter("alerting.result", "state", "pending")
+	M_Alerting_Result_State_ExecutionError = RegCounter("alerting.result", "state", "execution_error")
+
 	M_Alerting_Active_Alerts = RegCounter("alerting.active_alerts")
 	M_Alerting_Active_Alerts = RegCounter("alerting.active_alerts")
 	M_Alerting_Notification_Sent_Slack = RegCounter("alerting.notifications_sent", "type", "slack")
 	M_Alerting_Notification_Sent_Slack = RegCounter("alerting.notifications_sent", "type", "slack")
 	M_Alerting_Notification_Sent_Email = RegCounter("alerting.notifications_sent", "type", "email")
 	M_Alerting_Notification_Sent_Email = RegCounter("alerting.notifications_sent", "type", "email")

+ 14 - 10
pkg/services/alerting/result_handler.go

@@ -41,7 +41,7 @@ func (handler *DefaultResultHandler) Handle(ctx *EvalContext) {
 		ctx.Rule.State = m.AlertStateOK
 		ctx.Rule.State = m.AlertStateOK
 	}
 	}
 
 
-	countSeverity(ctx.Rule.Severity)
+	countStateResult(ctx.Rule.State)
 	if ctx.Rule.State != oldState {
 	if ctx.Rule.State != oldState {
 		handler.log.Info("New state change", "alertId", ctx.Rule.Id, "newState", ctx.Rule.State, "oldState", oldState)
 		handler.log.Info("New state change", "alertId", ctx.Rule.Id, "newState", ctx.Rule.State, "oldState", oldState)
 
 
@@ -78,15 +78,19 @@ func (handler *DefaultResultHandler) Handle(ctx *EvalContext) {
 	}
 	}
 }
 }
 
 
-func countSeverity(state m.AlertSeverityType) {
+func countStateResult(state m.AlertStateType) {
 	switch state {
 	switch state {
-	case m.AlertSeverityOK:
-		metrics.M_Alerting_Result_Ok.Inc(1)
-	case m.AlertSeverityInfo:
-		metrics.M_Alerting_Result_Info.Inc(1)
-	case m.AlertSeverityWarning:
-		metrics.M_Alerting_Result_Warning.Inc(1)
-	case m.AlertSeverityCritical:
-		metrics.M_Alerting_Result_Critical.Inc(1)
+	case m.AlertStateCritical:
+		metrics.M_Alerting_Result_State_Critical.Inc(1)
+	case m.AlertStateWarning:
+		metrics.M_Alerting_Result_State_Warning.Inc(1)
+	case m.AlertStateOK:
+		metrics.M_Alerting_Result_State_Ok.Inc(1)
+	case m.AlertStatePaused:
+		metrics.M_Alerting_Result_State_Paused.Inc(1)
+	case m.AlertStatePending:
+		metrics.M_Alerting_Result_State_Pending.Inc(1)
+	case m.AlertStateExeuctionError:
+		metrics.M_Alerting_Result_State_ExecutionError.Inc(1)
 	}
 	}
 }
 }