Browse Source

feat(alerting): testing alert is starting to work

Torkel Ödegaard 9 years ago
parent
commit
f6a160b270

+ 7 - 0
pkg/api/alerting.go

@@ -83,6 +83,9 @@ func AlertTest(c *middleware.Context, dto dtos.AlertTestCommand) Response {
 	}
 
 	if err := bus.Dispatch(&backendCmd); err != nil {
+		if validationErr, ok := err.(alerting.AlertValidationError); ok {
+			return ApiError(422, validationErr.Error(), nil)
+		}
 		return ApiError(500, "Failed to test rule", err)
 	}
 
@@ -96,6 +99,10 @@ func AlertTest(c *middleware.Context, dto dtos.AlertTestCommand) Response {
 		dtoRes.Error = res.Error.Error()
 	}
 
+	for _, log := range res.Logs {
+		dtoRes.Logs = append(dtoRes.Logs, &dtos.AlertTestResultLog{Message: log.Message, Data: log.Data})
+	}
+
 	dtoRes.Timing = fmt.Sprintf("%1.3fs", res.GetDurationSeconds())
 
 	return Json(200, dtoRes)

+ 9 - 3
pkg/api/dtos/alerting.go

@@ -40,7 +40,13 @@ type AlertTestCommand struct {
 }
 
 type AlertTestResult struct {
-	Triggered bool   `json:"triggerd"`
-	Timing    string `json:"timing"`
-	Error     string `json:"error"`
+	Triggered bool                  `json:"triggerd"`
+	Timing    string                `json:"timing"`
+	Error     string                `json:"error"`
+	Logs      []*AlertTestResultLog `json:"logs"`
+}
+
+type AlertTestResultLog struct {
+	Message string      `json:"message"`
+	Data    interface{} `json:"data"`
 }

+ 8 - 0
pkg/services/alerting/alert_rule.go

@@ -23,6 +23,14 @@ type AlertRule struct {
 	Notifications []int64
 }
 
+type AlertValidationError struct {
+	Reason string
+}
+
+func (e AlertValidationError) Error() string {
+	return e.Reason
+}
+
 var (
 	ValueFormatRegex = regexp.MustCompile("^\\d+")
 	UnitFormatRegex  = regexp.MustCompile("\\w{1}$")

+ 12 - 6
pkg/services/alerting/conditions.go

@@ -2,7 +2,6 @@ package alerting
 
 import (
 	"encoding/json"
-	"errors"
 	"fmt"
 
 	"github.com/grafana/grafana/pkg/bus"
@@ -50,15 +49,22 @@ func (c *QueryCondition) executeQuery(context *AlertResultContext) (tsdb.TimeSer
 
 	resp, err := c.HandleRequest(req)
 	if err != nil {
-		return nil, fmt.Errorf("Alerting: GetSeries() tsdb.HandleRequest() error %v", err)
+		return nil, fmt.Errorf("tsdb.HandleRequest() error %v", err)
 	}
 
 	for _, v := range resp.Results {
 		if v.Error != nil {
-			return nil, fmt.Errorf("Alerting: GetSeries() tsdb.HandleRequest() response error %v", v)
+			return nil, fmt.Errorf("tsdb.HandleRequest() response error %v", v)
 		}
 
 		result = append(result, v.Series...)
+
+		if context.IsTestRun {
+			context.Logs = append(context.Logs, &AlertResultLogEntry{
+				Message: "Query Condition Query Result",
+				Data:    v.Series,
+			})
+		}
 	}
 
 	return result, nil
@@ -154,17 +160,17 @@ func NewDefaultAlertEvaluator(model *simplejson.Json) (*DefaultAlertEvaluator, e
 
 	evaluator.Type = model.Get("type").MustString()
 	if evaluator.Type == "" {
-		return nil, errors.New("Alert evaluator missing type property")
+		return nil, AlertValidationError{Reason: "Evaluator missing type property"}
 	}
 
 	params := model.Get("params").MustArray()
 	if len(params) == 0 {
-		return nil, errors.New("Alert evaluator missing threshold parameter")
+		return nil, AlertValidationError{Reason: "Evaluator missing threshold parameter"}
 	}
 
 	threshold, ok := params[0].(json.Number)
 	if !ok {
-		return nil, errors.New("Alert evaluator has invalid threshold parameter")
+		return nil, AlertValidationError{Reason: "Evaluator has invalid threshold parameter"}
 	}
 
 	evaluator.Threshold, _ = threshold.Float64()

+ 3 - 24
pkg/services/alerting/engine.go

@@ -1,7 +1,6 @@
 package alerting
 
 import (
-	"fmt"
 	"time"
 
 	"github.com/benbjohnson/clock"
@@ -18,7 +17,6 @@ type Engine struct {
 	ruleReader      RuleReader
 	log             log.Logger
 	responseHandler ResultHandler
-	alertJobTimeout time.Duration
 }
 
 func NewEngine() *Engine {
@@ -31,7 +29,6 @@ func NewEngine() *Engine {
 		ruleReader:      NewRuleReader(),
 		log:             log.New("alerting.engine"),
 		responseHandler: NewResultHandler(),
-		alertJobTimeout: time.Second * 5,
 	}
 
 	return e
@@ -82,32 +79,14 @@ func (e *Engine) execDispatch() {
 
 	for job := range e.execQueue {
 		log.Trace("Alerting: engine:execDispatch() starting job %s", job.Rule.Name)
-		job.Running = true
 		e.executeJob(job)
 	}
 }
 
 func (e *Engine) executeJob(job *AlertJob) {
-	startTime := time.Now()
-
-	resultChan := make(chan *AlertResultContext, 1)
-	go e.handler.Execute(job.Rule, resultChan)
-
-	select {
-	case <-time.After(e.alertJobTimeout):
-		e.resultQueue <- &AlertResultContext{
-			Error:     fmt.Errorf("Timeout"),
-			Rule:      job.Rule,
-			StartTime: startTime,
-			EndTime:   time.Now(),
-		}
-		close(resultChan)
-		e.log.Debug("Job Execution timeout", "alertRuleId", job.Rule.Id)
-	case result := <-resultChan:
-		e.log.Debug("Job Execution done", "timing", result.GetDurationSeconds(), "ruleId", job.Rule.Id)
-		e.resultQueue <- result
-	}
-
+	job.Running = true
+	context := NewAlertResultContext(job.Rule)
+	e.handler.Execute(context)
 	job.Running = false
 }
 

+ 1 - 1
pkg/services/alerting/extractor.go

@@ -129,7 +129,7 @@ func (e *DashAlertExtractor) GetAlerts() ([]*m.Alert, error) {
 				alerts = append(alerts, alert)
 			} else {
 				e.log.Error("Failed to extract alerts from dashboard", "error", err)
-				return nil, errors.New("Failed to extract alerts from dashboard")
+				return nil, err
 			}
 		}
 	}

+ 25 - 15
pkg/services/alerting/handler.go

@@ -1,6 +1,7 @@
 package alerting
 
 import (
+	"fmt"
 	"time"
 
 	"github.com/grafana/grafana/pkg/log"
@@ -11,41 +12,50 @@ var (
 )
 
 type HandlerImpl struct {
-	log log.Logger
+	log             log.Logger
+	alertJobTimeout time.Duration
 }
 
 func NewHandler() *HandlerImpl {
 	return &HandlerImpl{
-		log: log.New("alerting.executor"),
+		log:             log.New("alerting.executor"),
+		alertJobTimeout: time.Second * 5,
 	}
 }
 
-func (e *HandlerImpl) Execute(rule *AlertRule, resultQueue chan *AlertResultContext) {
-	resultQueue <- e.eval(rule)
-}
+func (e *HandlerImpl) Execute(context *AlertResultContext) {
+
+	go e.eval(context)
 
-func (e *HandlerImpl) eval(rule *AlertRule) *AlertResultContext {
-	result := &AlertResultContext{
-		StartTime: time.Now(),
-		Rule:      rule,
+	select {
+	case <-time.After(e.alertJobTimeout):
+		context.Error = fmt.Errorf("Timeout")
+		context.EndTime = time.Now()
+		e.log.Debug("Job Execution timeout", "alertId", context.Rule.Id)
+	case <-context.DoneChan:
+		e.log.Debug("Job Execution done", "timing", context.GetDurationSeconds(), "alertId", context.Rule.Id)
 	}
 
-	for _, condition := range rule.Conditions {
-		condition.Eval(result)
+}
+
+func (e *HandlerImpl) eval(context *AlertResultContext) {
+
+	for _, condition := range context.Rule.Conditions {
+		condition.Eval(context)
 
 		// break if condition could not be evaluated
-		if result.Error != nil {
+		if context.Error != nil {
 			break
 		}
 
 		// break if result has not triggered yet
-		if result.Triggered == false {
+		if context.Triggered == false {
 			break
 		}
 	}
 
-	result.EndTime = time.Now()
-	return result
+	context.EndTime = time.Now()
+	context.DoneChan <- true
 }
 
 // func (e *HandlerImpl) executeQuery(job *AlertJob) (tsdb.TimeSeriesSlice, error) {

+ 1 - 1
pkg/services/alerting/interfaces.go

@@ -7,7 +7,7 @@ import (
 )
 
 type AlertHandler interface {
-	Execute(rule *AlertRule, resultChan chan *AlertResultContext)
+	Execute(context *AlertResultContext)
 }
 
 type Scheduler interface {

+ 19 - 0
pkg/services/alerting/models.go

@@ -28,18 +28,37 @@ func (aj *AlertJob) IncRetry() {
 
 type AlertResultContext struct {
 	Triggered   bool
+	IsTestRun   bool
 	Details     []*AlertResultDetail
+	Logs        []*AlertResultLogEntry
 	Error       error
 	Description string
 	StartTime   time.Time
 	EndTime     time.Time
 	Rule        *AlertRule
+	DoneChan    chan bool
+	CancelChan  chan bool
 }
 
 func (a *AlertResultContext) GetDurationSeconds() float64 {
 	return float64(a.EndTime.Nanosecond()-a.StartTime.Nanosecond()) / float64(1000000000)
 }
 
+func NewAlertResultContext(rule *AlertRule) *AlertResultContext {
+	return &AlertResultContext{
+		StartTime:  time.Now(),
+		Rule:       rule,
+		Logs:       make([]*AlertResultLogEntry, 0),
+		DoneChan:   make(chan bool, 1),
+		CancelChan: make(chan bool, 1),
+	}
+}
+
+type AlertResultLogEntry struct {
+	Message string
+	Data    interface{}
+}
+
 type AlertResultDetail struct {
 	Value  float64
 	Metric string

+ 8 - 16
pkg/services/alerting/test_rule.go

@@ -2,7 +2,6 @@ package alerting
 
 import (
 	"fmt"
-	"time"
 
 	"github.com/grafana/grafana/pkg/bus"
 	"github.com/grafana/grafana/pkg/components/simplejson"
@@ -38,28 +37,21 @@ func handleAlertTestCommand(cmd *AlertTestCommand) error {
 				return err
 			}
 
-			if res, err := testAlertRule(rule); err != nil {
-				return err
-			} else {
-				cmd.Result = res
-				return nil
-			}
+			cmd.Result = testAlertRule(rule)
+			return nil
 		}
 	}
 
 	return fmt.Errorf("Could not find alert with panel id %d", cmd.PanelId)
 }
 
-func testAlertRule(rule *AlertRule) (*AlertResultContext, error) {
+func testAlertRule(rule *AlertRule) *AlertResultContext {
 	handler := NewHandler()
 
-	resultChan := make(chan *AlertResultContext, 1)
-	handler.Execute(rule, resultChan)
+	context := NewAlertResultContext(rule)
+	context.IsTestRun = true
 
-	select {
-	case <-time.After(time.Second * 10):
-		return &AlertResultContext{Error: fmt.Errorf("Timeout")}, nil
-	case result := <-resultChan:
-		return result, nil
-	}
+	handler.Execute(context)
+
+	return context
 }

+ 1 - 0
pkg/services/sqlstore/migrations/alert_mig.go

@@ -19,6 +19,7 @@ func addAlertMigrations(mg *Migrator) {
 			{Name: "settings", Type: DB_Text, Nullable: false},
 			{Name: "frequency", Type: DB_BigInt, Nullable: false},
 			{Name: "handler", Type: DB_BigInt, Nullable: false},
+			{Name: "severity", Type: DB_Text, Nullable: false},
 			{Name: "enabled", Type: DB_Bool, Nullable: false},
 			{Name: "created", Type: DB_DateTime, Nullable: false},
 			{Name: "updated", Type: DB_DateTime, Nullable: false},

+ 1 - 1
public/app/core/services/alert_srv.ts

@@ -16,7 +16,7 @@ export class AlertSrv {
 
   init() {
     this.$rootScope.onAppEvent('alert-error', (e, alert) => {
-      this.set(alert[0], alert[1], 'error', 0);
+      this.set(alert[0], alert[1], 'error', 7000);
     }, this.$rootScope);
 
     this.$rootScope.onAppEvent('alert-warning', (e, alert) => {

+ 3 - 3
public/app/plugins/panel/graph/alert_tab_ctrl.ts

@@ -69,7 +69,7 @@ export class AlertTabCtrl {
   initModel() {
     var alert = this.alert = this.panel.alert = this.panel.alert || {};
 
-    alert.conditions = [];
+    alert.conditions = alert.conditions || [];
     if (alert.conditions.length === 0) {
       alert.conditions.push(this.buildDefaultCondition());
     }
@@ -149,8 +149,8 @@ export class AlertTabCtrl {
       panelId: this.panelCtrl.panel.id,
     };
 
-    this.backendSrv.post('/api/alerts/test', payload).then(res => {
-      this.testResult = res;
+    return this.backendSrv.post('/api/alerts/test', payload).then(res => {
+      this.testResult = angular.toJson(res, true);
       this.testing = false;
     });
   }

+ 6 - 0
public/app/plugins/panel/graph/partials/tab_alerting.html

@@ -131,6 +131,12 @@
   Evaluating rule <i class="fa fa-spinner fa-spin"></i>
 </div>
 
+<div class="gf-form-group" ng-if="ctrl.testResult">
+  <pre>
+{{ctrl.testResult}}
+  </pre>
+</div>
+
 <div class="gf-form-group" ng-if="!ctrl.alert.enabled">
   <div class="gf-form-button-row">
     <button class="btn btn-inverse" ng-click="ctrl.enable()">