Browse Source

feat(alerting): work on alerting

Torkel Ödegaard 9 years ago
parent
commit
f13b869aa4

+ 15 - 17
pkg/api/api.go

@@ -245,25 +245,23 @@ func Register(r *macaron.Macaron) {
 		// metrics
 		r.Get("/metrics", wrap(GetInternalMetrics))
 
-		r.Group("/alerting", func() {
-			r.Group("/rules", func() {
-				r.Get("/:alertId/states", wrap(GetAlertStates))
-				//r.Put("/:alertId/state", bind(m.UpdateAlertStateCommand{}), wrap(PutAlertState))
-				r.Get("/:alertId", ValidateOrgAlert, wrap(GetAlert))
-				//r.Delete("/:alertId", ValidateOrgAlert, wrap(DelAlert)) disabled until we know how to handle it dashboard updates
-				r.Get("/", wrap(GetAlerts))
-			})
-
-			r.Get("/notifications", wrap(GetAlertNotifications))
-
-			r.Group("/notification", func() {
-				r.Post("/", bind(m.CreateAlertNotificationCommand{}), wrap(CreateAlertNotification))
-				r.Put("/:notificationId", bind(m.UpdateAlertNotificationCommand{}), wrap(UpdateAlertNotification))
-				r.Get("/:notificationId", wrap(GetAlertNotificationById))
-				r.Delete("/:notificationId", wrap(DeleteAlertNotification))
-			}, reqOrgAdmin)
+		r.Group("/alerts", func() {
+			r.Get("/:alertId/states", wrap(GetAlertStates))
+			//r.Put("/:alertId/state", bind(m.UpdateAlertStateCommand{}), wrap(PutAlertState))
+			r.Get("/:alertId", ValidateOrgAlert, wrap(GetAlert))
+			//r.Delete("/:alertId", ValidateOrgAlert, wrap(DelAlert)) disabled until we know how to handle it dashboard updates
+			r.Get("/", wrap(GetAlerts))
 		})
 
+		r.Get("/alert-notifications", wrap(GetAlertNotifications))
+
+		r.Group("/alert-notifications", func() {
+			r.Post("/", bind(m.CreateAlertNotificationCommand{}), wrap(CreateAlertNotification))
+			r.Put("/:notificationId", bind(m.UpdateAlertNotificationCommand{}), wrap(UpdateAlertNotification))
+			r.Get("/:notificationId", wrap(GetAlertNotificationById))
+			r.Delete("/:notificationId", wrap(DeleteAlertNotification))
+		}, reqOrgAdmin)
+
 		// error test
 		r.Get("/metrics/error", wrap(GenerateError))
 

+ 3 - 0
pkg/models/alert.go

@@ -18,6 +18,9 @@ type Alert struct {
 	Enabled     bool
 	Frequency   int64
 
+	CreatedBy int64
+	UpdatedBy int64
+
 	Created time.Time
 	Updated time.Time
 

+ 6 - 8
pkg/services/alerting/alertstates/states.go

@@ -5,14 +5,12 @@ var (
 		Ok,
 		Warn,
 		Critical,
-		Acknowledged,
-		Maintenance,
+		Unknown,
 	}
 
-	Ok           = "OK"
-	Warn         = "WARN"
-	Critical     = "CRITICAL"
-	Acknowledged = "ACKNOWLEDGED"
-	Maintenance  = "MAINTENANCE"
-	Pending      = "PENDING"
+	Ok       = "OK"
+	Warn     = "WARN"
+	Critical = "CRITICAL"
+	Pending  = "PENDING"
+	Unknown  = "UNKNOWN"
 )

+ 13 - 10
pkg/services/alerting/engine.go

@@ -19,6 +19,7 @@ type Engine struct {
 	ruleReader      RuleReader
 	log             log.Logger
 	responseHandler ResultHandler
+	alertJobTimeout time.Duration
 }
 
 func NewEngine() *Engine {
@@ -31,6 +32,7 @@ func NewEngine() *Engine {
 		ruleReader:      NewRuleReader(),
 		log:             log.New("alerting.engine"),
 		responseHandler: NewResultHandler(),
+		alertJobTimeout: time.Second * 5,
 	}
 
 	return e
@@ -87,24 +89,25 @@ func (e *Engine) execDispatch() {
 }
 
 func (e *Engine) executeJob(job *AlertJob) {
-	now := time.Now()
+	startTime := time.Now()
 
 	resultChan := make(chan *AlertResult, 1)
 	go e.handler.Execute(job, resultChan)
 
 	select {
-	case <-time.After(time.Second * 5):
+	case <-time.After(e.alertJobTimeout):
 		e.resultQueue <- &AlertResult{
-			State:         alertstates.Pending,
-			Duration:      float64(time.Since(now).Nanoseconds()) / float64(1000000),
-			Error:         fmt.Errorf("Timeout"),
-			AlertJob:      job,
-			ExeuctionTime: time.Now(),
+			State:     alertstates.Pending,
+			Error:     fmt.Errorf("Timeout"),
+			AlertJob:  job,
+			StartTime: startTime,
+			EndTime:   time.Now(),
 		}
+		close(resultChan)
 		e.log.Debug("Job Execution timeout", "alertRuleId", job.Rule.Id)
 	case result := <-resultChan:
-		result.Duration = float64(time.Since(now).Nanoseconds()) / float64(1000000)
-		e.log.Debug("Job Execution done", "timeTakenMs", result.Duration, "ruleId", job.Rule.Id)
+		duration := float64(result.EndTime.Nanosecond()-result.StartTime.Nanosecond()) / float64(1000000)
+		e.log.Debug("Job Execution done", "timeTakenMs", duration, "ruleId", job.Rule.Id)
 		e.resultQueue <- result
 	}
 }
@@ -117,7 +120,7 @@ func (e *Engine) resultHandler() {
 	}()
 
 	for result := range e.resultQueue {
-		e.log.Debug("Alert Rule Result", "ruleId", result.AlertJob.Rule.Id, "state", result.State, "value", result.ActualValue, "retry", result.AlertJob.RetryCount)
+		e.log.Debug("Alert Rule Result", "ruleId", result.AlertJob.Rule.Id, "state", result.State, "retry", result.AlertJob.RetryCount)
 
 		result.AlertJob.Running = false
 

+ 17 - 11
pkg/services/alerting/handler.go

@@ -26,18 +26,24 @@ func NewHandler() *HandlerImpl {
 }
 
 func (e *HandlerImpl) Execute(job *AlertJob, resultQueue chan *AlertResult) {
+	startTime := time.Now()
+
 	timeSeries, err := e.executeQuery(job)
 	if err != nil {
 		resultQueue <- &AlertResult{
-			Error:         err,
-			State:         alertstates.Pending,
-			AlertJob:      job,
-			ExeuctionTime: time.Now(),
+			Error:     err,
+			State:     alertstates.Pending,
+			AlertJob:  job,
+			StartTime: time.Now(),
+			EndTime:   time.Now(),
 		}
 	}
 
 	result := e.evaluateRule(job.Rule, timeSeries)
 	result.AlertJob = job
+	result.StartTime = startTime
+	result.EndTime = time.Now()
+
 	resultQueue <- result
 }
 
@@ -108,9 +114,9 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
 		e.log.Debug("Alert execution Crit", "name", serie.Name, "condition", condition2, "result", critResult)
 		if critResult {
 			triggeredAlert = append(triggeredAlert, &TriggeredAlert{
-				State:       alertstates.Critical,
-				ActualValue: transformedValue,
-				Name:        serie.Name,
+				State:  alertstates.Critical,
+				Value:  transformedValue,
+				Metric: serie.Name,
 			})
 			continue
 		}
@@ -120,9 +126,9 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
 		e.log.Debug("Alert execution Warn", "name", serie.Name, "condition", condition, "result", warnResult)
 		if warnResult {
 			triggeredAlert = append(triggeredAlert, &TriggeredAlert{
-				State:       alertstates.Warn,
-				ActualValue: transformedValue,
-				Name:        serie.Name,
+				State:  alertstates.Warn,
+				Value:  transformedValue,
+				Metric: serie.Name,
 			})
 		}
 	}
@@ -138,5 +144,5 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
 		}
 	}
 
-	return &AlertResult{State: executionState, Description: "Returned " + executionState, TriggeredAlerts: triggeredAlert, ExeuctionTime: time.Now()}
+	return &AlertResult{State: executionState, TriggeredAlerts: triggeredAlert}
 }

+ 9 - 8
pkg/services/alerting/models.go

@@ -24,19 +24,20 @@ func (aj *AlertJob) IncRetry() {
 
 type AlertResult struct {
 	State           string
-	ActualValue     float64
-	Duration        float64
 	TriggeredAlerts []*TriggeredAlert
-	Description     string
 	Error           error
-	AlertJob        *AlertJob
-	ExeuctionTime   time.Time
+	Description     string
+	StartTime       time.Time
+	EndTime         time.Time
+
+	AlertJob *AlertJob
 }
 
 type TriggeredAlert struct {
-	ActualValue float64
-	Name        string
-	State       string
+	Value  float64
+	Metric string
+	State  string
+	Tags   map[string]string
 }
 
 type Level struct {

+ 1 - 1
pkg/services/alerting/result_handler.go

@@ -60,7 +60,7 @@ func (handler *ResultHandlerImpl) shouldUpdateState(result *AlertResult) bool {
 	}
 
 	lastExecution := query.Result.Created
-	asdf := result.ExeuctionTime.Add(time.Minute * -15)
+	asdf := result.StartTime.Add(time.Minute * -15)
 	olderThen15Min := lastExecution.Before(asdf)
 	changedState := query.Result.NewState != result.State
 

+ 14 - 7
pkg/services/sqlstore/alert.go

@@ -158,24 +158,29 @@ func DeleteAlertDefinition(dashboardId int64, sess *xorm.Session) error {
 
 func SaveAlerts(cmd *m.SaveAlertsCommand) error {
 	return inTransaction(func(sess *xorm.Session) error {
-		alerts, err := GetAlertsByDashboardId2(cmd.DashboardId, sess)
+		existingAlerts, err := GetAlertsByDashboardId2(cmd.DashboardId, sess)
 		if err != nil {
 			return err
 		}
 
-		upsertAlerts(alerts, cmd, sess)
-		deleteMissingAlerts(alerts, cmd, sess)
+		if err := upsertAlerts(existingAlerts, cmd, sess); err != nil {
+			return err
+		}
+
+		if err := deleteMissingAlerts(existingAlerts, cmd, sess); err != nil {
+			return err
+		}
 
 		return nil
 	})
 }
 
-func upsertAlerts(alerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Session) error {
+func upsertAlerts(existingAlerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Session) error {
 	for _, alert := range cmd.Alerts {
 		update := false
 		var alertToUpdate *m.Alert
 
-		for _, k := range alerts {
+		for _, k := range existingAlerts {
 			if alert.PanelId == k.PanelId {
 				update = true
 				alert.Id = k.Id
@@ -195,11 +200,13 @@ func upsertAlerts(alerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Sessio
 
 				sqlog.Debug("Alert updated", "name", alert.Name, "id", alert.Id)
 			}
-
 		} else {
 			alert.Updated = time.Now()
 			alert.Created = time.Now()
-			alert.State = "OK"
+			alert.State = "UNKNOWN"
+			alert.CreatedBy = cmd.UserId
+			alert.UpdatedBy = cmd.UserId
+
 			_, err := sess.Insert(alert)
 			if err != nil {
 				return err

+ 2 - 2
public/app/features/alerting/alert_log_ctrl.ts

@@ -20,7 +20,7 @@ export class AlertLogCtrl {
   }
 
   loadAlertLogs(alertId: number) {
-    this.backendSrv.get(`/api/alerts/rules/${alertId}/states`).then(result => {
+    this.backendSrv.get(`/api/alerts/${alertId}/states`).then(result => {
       this.alertLogs = _.map(result, log => {
         log.iconCss = alertDef.getCssForState(log.newState);
         log.humanTime = moment(log.created).format("YYYY-MM-DD HH:mm:ss");
@@ -28,7 +28,7 @@ export class AlertLogCtrl {
       });
     });
 
-    this.backendSrv.get(`/api/alerts/rules/${alertId}`).then(result => {
+    this.backendSrv.get(`/api/alerts/${alertId}`).then(result => {
       this.alert = result;
     });
   }

+ 1 - 1
public/app/features/alerting/alerts_ctrl.ts

@@ -49,7 +49,7 @@ export class AlertListCtrl {
       state: stats
     };
 
-    this.backendSrv.get('/api/alerts/rules', params).then(result => {
+    this.backendSrv.get('/api/alerts', params).then(result => {
       this.alerts = _.map(result, alert => {
         alert.iconCss = alertDef.getCssForState(alert.state);
         return alert;

+ 3 - 3
public/app/features/alerting/notification_edit_ctrl.ts

@@ -24,7 +24,7 @@ export class AlertNotificationEditCtrl {
   }
 
   loadNotification(notificationId) {
-    this.backendSrv.get(`/api/alerts/notification/${notificationId}`).then(result => {
+    this.backendSrv.get(`/api/alert-notifications/${notificationId}`).then(result => {
       console.log(result);
       this.notification = result;
     });
@@ -37,7 +37,7 @@ export class AlertNotificationEditCtrl {
   save() {
     if (this.notification.id) {
       console.log('this.notification: ', this.notification);
-      this.backendSrv.put(`/api/alerts/notification/${this.notification.id}`, this.notification)
+      this.backendSrv.put(`/api/alert-notifications/${this.notification.id}`, this.notification)
         .then(result => {
           this.notification = result;
           this.$scope.appEvent('alert-success', ['Notification created!', '']);
@@ -45,7 +45,7 @@ export class AlertNotificationEditCtrl {
           this.$scope.appEvent('alert-error', ['Unable to create notification.', '']);
         });
     } else {
-      this.backendSrv.post(`/api/alerts/notification`, this.notification)
+      this.backendSrv.post(`/api/alert-notifications`, this.notification)
         .then(result => {
           this.notification = result;
           this.$scope.appEvent('alert-success', ['Notification updated!', '']);

+ 2 - 2
public/app/features/alerting/notifications_list_ctrl.ts

@@ -15,13 +15,13 @@ export class AlertNotificationsListCtrl {
   }
 
   loadNotifications() {
-    this.backendSrv.get(`/api/alerts/notifications`).then(result => {
+    this.backendSrv.get(`/api/alert-notifications`).then(result => {
       this.notifications = result;
     });
   }
 
   deleteNotification(notificationId) {
-    this.backendSrv.delete(`/api/alerts/notification/${notificationId}`)
+    this.backendSrv.delete(`/api/alerts-notification/${notificationId}`)
       .then(() => {
         this.notifications = this.notifications.filter(notification => {
           return notification.id !== notificationId;

+ 7 - 0
public/app/plugins/datasource/grafana-live/plugin.json

@@ -0,0 +1,7 @@
+{
+  "type": "datasource",
+  "name": "Grafana Live",
+  "id": "grafana-live",
+
+  "metrics": true
+}

+ 2 - 2
public/app/plugins/panel/graph/partials/tab_alerting.html

@@ -123,14 +123,14 @@
     <h5 class="section-heading">Information</h5>
     <div class="gf-form">
       <span class="gf-form-label width-10">Alert name</span>
-      <input type="text" class="gf-form-input width-22" ng-model="ctrl.panel.alerting.name">
+      <input type="text" class="gf-form-input width-22" ng-model="ctrl.alert.name">
     </div>
     <div class="gf-form-inline">
       <div class="gf-form">
         <span class="gf-form-label width-10" style="margin-top: -73px;">Alert description</span>
       </div>
       <div class="gf-form">
-        <textarea rows="5" ng-model="ctrl.panel.alerting.description" class="gf-form-input width-22"></textarea>
+        <textarea rows="5" ng-model="ctrl.alert.description" class="gf-form-input width-22"></textarea>
       </div>
     </div>
   </div>