Browse Source

feat(alerting): add global usage metrics for alerting

closes #5786
bergquist 9 years ago
parent
commit
937726499f

+ 39 - 20
pkg/metrics/metrics.go

@@ -9,29 +9,38 @@ func init() {
 }
 
 var (
-	M_Instance_Start                  Counter
-	M_Page_Status_200                 Counter
-	M_Page_Status_500                 Counter
-	M_Page_Status_404                 Counter
-	M_Api_Status_500                  Counter
-	M_Api_Status_404                  Counter
-	M_Api_User_SignUpStarted          Counter
-	M_Api_User_SignUpCompleted        Counter
-	M_Api_User_SignUpInvite           Counter
-	M_Api_Dashboard_Save              Timer
-	M_Api_Dashboard_Get               Timer
-	M_Api_Dashboard_Search            Timer
-	M_Api_Admin_User_Create           Counter
-	M_Api_Login_Post                  Counter
-	M_Api_Login_OAuth                 Counter
-	M_Api_Org_Create                  Counter
-	M_Api_Dashboard_Snapshot_Create   Counter
-	M_Api_Dashboard_Snapshot_External Counter
-	M_Api_Dashboard_Snapshot_Get      Counter
-	M_Models_Dashboard_Insert         Counter
+	M_Instance_Start                     Counter
+	M_Page_Status_200                    Counter
+	M_Page_Status_500                    Counter
+	M_Page_Status_404                    Counter
+	M_Api_Status_500                     Counter
+	M_Api_Status_404                     Counter
+	M_Api_User_SignUpStarted             Counter
+	M_Api_User_SignUpCompleted           Counter
+	M_Api_User_SignUpInvite              Counter
+	M_Api_Dashboard_Save                 Timer
+	M_Api_Dashboard_Get                  Timer
+	M_Api_Dashboard_Search               Timer
+	M_Api_Admin_User_Create              Counter
+	M_Api_Login_Post                     Counter
+	M_Api_Login_OAuth                    Counter
+	M_Api_Org_Create                     Counter
+	M_Api_Dashboard_Snapshot_Create      Counter
+	M_Api_Dashboard_Snapshot_External    Counter
+	M_Api_Dashboard_Snapshot_Get         Counter
+	M_Models_Dashboard_Insert            Counter
+	M_Alerting_Result_Critical           Counter
+	M_Alerting_Result_Warning            Counter
+	M_Alerting_Result_Info               Counter
+	M_Alerting_Result_Ok                 Counter
+	M_Alerting_Active_Alerts             Counter
+	M_Alerting_Notification_Sent_Slack   Counter
+	M_Alerting_Notification_Sent_Email   Counter
+	M_Alerting_Notification_Sent_Webhook Counter
 
 	// Timers
 	M_DataSource_ProxyReq_Timer Timer
+	M_Alerting_Exeuction_Time   Timer
 )
 
 func initMetricVars(settings *MetricSettings) {
@@ -66,6 +75,16 @@ func initMetricVars(settings *MetricSettings) {
 
 	M_Models_Dashboard_Insert = RegCounter("models.dashboard.insert")
 
+	M_Alerting_Result_Critical = RegCounter("alerting.result", "severity", "critical")
+	M_Alerting_Result_Warning = RegCounter("alerting.result", "severity", "warning")
+	M_Alerting_Result_Info = RegCounter("alerting.result", "severity", "info")
+	M_Alerting_Result_Ok = RegCounter("alerting.result", "severity", "ok")
+	M_Alerting_Active_Alerts = RegCounter("alerting.active_alerts")
+	M_Alerting_Notification_Sent_Slack = RegCounter("alerting.notifcations_sent", "type", "slack")
+	M_Alerting_Notification_Sent_Email = RegCounter("alerting.notifcations_sent", "type", "email")
+	M_Alerting_Notification_Sent_Webhook = RegCounter("alerting.notifcations_sent", "type", "webhook")
+
 	// Timers
 	M_DataSource_ProxyReq_Timer = RegTimer("api.dataproxy.request.all")
+	M_Alerting_Exeuction_Time = RegTimer("alerting.execution_time")
 }

+ 3 - 4
pkg/services/alerting/eval_handler.go

@@ -5,10 +5,7 @@ import (
 	"time"
 
 	"github.com/grafana/grafana/pkg/log"
-)
-
-var (
-	descriptionFmt = "Actual value: %1.2f for %s. "
+	"github.com/grafana/grafana/pkg/metrics"
 )
 
 type DefaultEvalHandler struct {
@@ -55,5 +52,7 @@ func (e *DefaultEvalHandler) eval(context *EvalContext) {
 	}
 
 	context.EndTime = time.Now()
+	elapsedTime := context.EndTime.Sub(context.StartTime)
+	metrics.M_Alerting_Exeuction_Time.Update(elapsedTime)
 	context.DoneChan <- true
 }

+ 1 - 0
pkg/services/alerting/notifier.go

@@ -48,6 +48,7 @@ func (n *RootNotifier) Notify(context *EvalContext) {
 
 	for _, notifier := range notifiers {
 		n.log.Info("Sending notification", "firing", context.Firing, "type", notifier.GetType())
+
 		go notifier.Notify(context)
 	}
 }

+ 2 - 0
pkg/services/alerting/notifiers/email.go

@@ -5,6 +5,7 @@ import (
 
 	"github.com/grafana/grafana/pkg/bus"
 	"github.com/grafana/grafana/pkg/log"
+	"github.com/grafana/grafana/pkg/metrics"
 	m "github.com/grafana/grafana/pkg/models"
 	"github.com/grafana/grafana/pkg/services/alerting"
 )
@@ -38,6 +39,7 @@ func NewEmailNotifier(model *m.AlertNotification) (alerting.Notifier, error) {
 
 func (this *EmailNotifier) Notify(context *alerting.EvalContext) {
 	this.log.Info("Sending alert notification to", "addresses", this.Addresses)
+	metrics.M_Alerting_Notification_Sent_Email.Inc(1)
 
 	ruleUrl, err := context.GetRuleUrl()
 	if err != nil {

+ 2 - 0
pkg/services/alerting/notifiers/slack.go

@@ -6,6 +6,7 @@ import (
 
 	"github.com/grafana/grafana/pkg/bus"
 	"github.com/grafana/grafana/pkg/log"
+	"github.com/grafana/grafana/pkg/metrics"
 	m "github.com/grafana/grafana/pkg/models"
 	"github.com/grafana/grafana/pkg/services/alerting"
 )
@@ -38,6 +39,7 @@ type SlackNotifier struct {
 
 func (this *SlackNotifier) Notify(context *alerting.EvalContext) {
 	this.log.Info("Executing slack notification", "ruleId", context.Rule.Id, "notification", this.Name)
+	metrics.M_Alerting_Notification_Sent_Slack.Inc(1)
 
 	ruleUrl, err := context.GetRuleUrl()
 	if err != nil {

+ 2 - 0
pkg/services/alerting/notifiers/webhook.go

@@ -4,6 +4,7 @@ import (
 	"github.com/grafana/grafana/pkg/bus"
 	"github.com/grafana/grafana/pkg/components/simplejson"
 	"github.com/grafana/grafana/pkg/log"
+	"github.com/grafana/grafana/pkg/metrics"
 	m "github.com/grafana/grafana/pkg/models"
 	"github.com/grafana/grafana/pkg/services/alerting"
 )
@@ -40,6 +41,7 @@ type WebhookNotifier struct {
 
 func (this *WebhookNotifier) Notify(context *alerting.EvalContext) {
 	this.log.Info("Sending webhook")
+	metrics.M_Alerting_Notification_Sent_Webhook.Inc(1)
 
 	bodyJSON := simplejson.New()
 	bodyJSON.Set("title", context.GetNotificationTitle())

+ 2 - 0
pkg/services/alerting/reader.go

@@ -6,6 +6,7 @@ import (
 
 	"github.com/grafana/grafana/pkg/bus"
 	"github.com/grafana/grafana/pkg/log"
+	"github.com/grafana/grafana/pkg/metrics"
 	m "github.com/grafana/grafana/pkg/models"
 )
 
@@ -58,6 +59,7 @@ func (arr *DefaultRuleReader) Fetch() []*Rule {
 		}
 	}
 
+	metrics.M_Alerting_Active_Alerts.Inc(int64(len(res)))
 	return res
 }
 

+ 15 - 0
pkg/services/alerting/result_handler.go

@@ -5,6 +5,7 @@ import (
 
 	"github.com/grafana/grafana/pkg/bus"
 	"github.com/grafana/grafana/pkg/log"
+	"github.com/grafana/grafana/pkg/metrics"
 	m "github.com/grafana/grafana/pkg/models"
 	"github.com/grafana/grafana/pkg/services/annotations"
 )
@@ -37,6 +38,7 @@ func (handler *DefaultResultHandler) Handle(ctx *EvalContext) {
 		ctx.Rule.State = m.AlertStateOK
 	}
 
+	countSeverity(ctx.Rule.Severity)
 	if ctx.Rule.State != oldState {
 		handler.log.Info("New state change", "alertId", ctx.Rule.Id, "newState", ctx.Rule.State, "oldState", oldState)
 
@@ -69,3 +71,16 @@ func (handler *DefaultResultHandler) Handle(ctx *EvalContext) {
 		handler.notifier.Notify(ctx)
 	}
 }
+
+func countSeverity(state m.AlertSeverityType) {
+	switch state {
+	case m.AlertSeverityOK:
+		metrics.M_Alerting_Result_Ok.Inc(1)
+	case m.AlertSeverityInfo:
+		metrics.M_Alerting_Result_Info.Inc(1)
+	case m.AlertSeverityWarning:
+		metrics.M_Alerting_Result_Warning.Inc(1)
+	case m.AlertSeverityCritical:
+		metrics.M_Alerting_Result_Critical.Inc(1)
+	}
+}