Przeglądaj źródła

tech(alerting): minor refactoring and code style

bergquist 9 lat temu
rodzic
commit
2cf797b567

+ 9 - 5
pkg/models/alerts.go

@@ -110,11 +110,11 @@ type GetAlertChangesQuery struct {
 }
 
 type AlertJob struct {
-	Offset  int64
-	Delay   bool
-	Running bool
-	Retry   int
-	Rule    AlertRule
+	Offset     int64
+	Delay      bool
+	Running    bool
+	RetryCount int
+	Rule       AlertRule
 }
 
 type AlertResult struct {
@@ -125,3 +125,7 @@ type AlertResult struct {
 	Description string
 	AlertJob    *AlertJob
 }
+
+func (ar *AlertResult) IsResultIncomplete() bool {
+	return ar.State == AlertStatePending
+}

+ 3 - 25
pkg/services/alerting/alert_rule_reader.go

@@ -5,6 +5,7 @@ import (
 	"time"
 
 	"github.com/grafana/grafana/pkg/bus"
+	"github.com/grafana/grafana/pkg/log"
 	m "github.com/grafana/grafana/pkg/models"
 )
 
@@ -51,31 +52,6 @@ func (arr *AlertRuleReader) updateRules() {
 	arr.Lock()
 	defer arr.Unlock()
 
-	/*
-		rules = []m.AlertRule{
-			//{Id: 1, Title: "alert rule 1", Interval: "10s", Frequency: 10},
-			//{Id: 2, Title: "alert rule 2", Interval: "10s", Frequency: 10},
-			//{Id: 3, Title: "alert rule 3", Interval: "10s", Frequency: 10},
-			//{Id: 4, Title: "alert rule 4", Interval: "10s", Frequency: 5},
-			//{Id: 5, Title: "alert rule 5", Interval: "10s", Frequency: 5},
-			{
-				Id:           1,
-				OrgId:        1,
-				Title:        "alert rule 1",
-				Frequency:    3,
-				DatasourceId: 1,
-				WarnOperator: ">",
-				WarnLevel:    3,
-				CritOperator: ">",
-				CritLevel:    4,
-				Aggregator:   "avg",
-				//Query:        `{"refId":"A","target":"statsd.fakesite.counters.session_start.*.count","textEditor":true}"`,
-				Query:        `{"hide":false,"refId":"A","target":"aliasByNode(statsd.fakesite.counters.session_start.*.count, 4)","textEditor":false}`,
-				QueryRange:   3600,
-			},
-		}
-	*/
-
 	cmd := &m.GetAlertsQuery{
 		OrgId: 1,
 	}
@@ -83,6 +59,8 @@ func (arr *AlertRuleReader) updateRules() {
 
 	if err == nil {
 		alertJobs = cmd.Result
+	} else {
+		log.Error(1, "AlertRuleReader: Could not load alerts")
 	}
 }
 

+ 16 - 12
pkg/services/alerting/alerting.go

@@ -74,8 +74,8 @@ func (scheduler *Scheduler) updateJobs(alertRuleFn func() []m.AlertRule) {
 			job = scheduler.jobs[rule.Id]
 		} else {
 			job = &m.AlertJob{
-				Running: false,
-				Retry:   0,
+				Running:    false,
+				RetryCount: 0,
 			}
 		}
 
@@ -110,24 +110,28 @@ func (scheduler *Scheduler) executor(executor Executor) {
 
 func (scheduler *Scheduler) handleResponses() {
 	for response := range scheduler.responseQueue {
-		log.Info("Response: alert(%d) status(%s) actual(%v) retry(%d) running(%v)", response.Id, response.State, response.ActualValue, response.AlertJob.Retry, response.AlertJob.Running)
+		log.Info("Response: alert(%d) status(%s) actual(%v) retry(%d)", response.Id, response.State, response.ActualValue, response.AlertJob.RetryCount)
 		response.AlertJob.Running = false
 
-		if response.State == m.AlertStatePending {
-			response.AlertJob.Retry++
-			if response.AlertJob.Retry > maxRetries {
-				response.State = m.AlertStateCritical
-				response.Description = fmt.Sprintf("Failed to run check after %d retires", maxRetries)
-				scheduler.saveState(response)
+		if response.IsResultIncomplete() {
+			response.AlertJob.RetryCount++
+			if response.AlertJob.RetryCount < maxRetries {
+				scheduler.runQueue <- response.AlertJob
+			} else {
+				saveState(&m.AlertResult{
+					Id:          response.Id,
+					State:       m.AlertStateCritical,
+					Description: fmt.Sprintf("Failed to run check after %d retires", maxRetries),
+				})
 			}
 		} else {
-			response.AlertJob.Retry = 0
-			scheduler.saveState(response)
+			response.AlertJob.RetryCount = 0
+			saveState(response)
 		}
 	}
 }
 
-func (scheduler *Scheduler) saveState(response *m.AlertResult) {
+func saveState(response *m.AlertResult) {
 	cmd := &m.UpdateAlertStateCommand{
 		AlertId:  response.Id,
 		NewState: response.State,