alerting.go 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. package alerting
  2. import (
  3. "time"
  4. "fmt"
  5. "github.com/grafana/grafana/pkg/bus"
  6. "github.com/grafana/grafana/pkg/log"
  7. m "github.com/grafana/grafana/pkg/models"
  8. "github.com/grafana/grafana/pkg/setting"
  9. )
  10. func Init() {
  11. if !setting.AlertingEnabled {
  12. return
  13. }
  14. log.Info("Alerting: Initializing scheduler...")
  15. scheduler := NewScheduler()
  16. reader := NewRuleReader()
  17. go scheduler.Dispatch(reader)
  18. go scheduler.Executor(&ExecutorImpl{})
  19. go scheduler.HandleResponses()
  20. }
  21. type Scheduler struct {
  22. jobs map[int64]*m.AlertJob
  23. runQueue chan *m.AlertJob
  24. responseQueue chan *m.AlertResult
  25. alertRuleFetcher RuleReader
  26. }
  27. func NewScheduler() *Scheduler {
  28. return &Scheduler{
  29. jobs: make(map[int64]*m.AlertJob, 0),
  30. runQueue: make(chan *m.AlertJob, 1000),
  31. responseQueue: make(chan *m.AlertResult, 1000),
  32. }
  33. }
  34. func (this *Scheduler) Dispatch(reader RuleReader) {
  35. reschedule := time.NewTicker(time.Second * 10)
  36. secondTicker := time.NewTicker(time.Second)
  37. this.updateJobs(reader.Fetch)
  38. for {
  39. select {
  40. case <-secondTicker.C:
  41. this.queueJobs()
  42. case <-reschedule.C:
  43. this.updateJobs(reader.Fetch)
  44. }
  45. }
  46. }
  47. func (this *Scheduler) updateJobs(f func() []m.AlertJob) {
  48. log.Debug("Scheduler: UpdateJobs()")
  49. jobs := make(map[int64]*m.AlertJob, 0)
  50. rules := f()
  51. for i := 0; i < len(rules); i++ {
  52. rule := rules[i]
  53. rule.Offset = int64(i)
  54. jobs[rule.Rule.Id] = &rule
  55. }
  56. log.Debug("Scheduler: Selected %d jobs", len(jobs))
  57. this.jobs = jobs
  58. }
  59. func (this *Scheduler) queueJobs() {
  60. now := time.Now().Unix()
  61. for _, job := range this.jobs {
  62. if now%job.Rule.Frequency == 0 && job.Running == false {
  63. log.Info("Scheduler: Putting job on to run queue: %s", job.Rule.Title)
  64. this.runQueue <- job
  65. }
  66. }
  67. }
  68. func (this *Scheduler) Executor(executor Executor) {
  69. for job := range this.runQueue {
  70. //log.Info("Executor: queue length %d", len(this.runQueue))
  71. log.Info("Executor: executing %s", job.Rule.Title)
  72. this.jobs[job.Rule.Id].Running = true
  73. this.MeasureAndExecute(executor, job)
  74. }
  75. }
  76. func (this *Scheduler) HandleResponses() {
  77. for response := range this.responseQueue {
  78. log.Info("Response: alert(%d) status(%s) actual(%v)", response.Id, response.State, response.ActualValue)
  79. if this.jobs[response.Id] != nil {
  80. this.jobs[response.Id].Running = false
  81. }
  82. cmd := m.UpdateAlertStateCommand{
  83. AlertId: response.Id,
  84. NewState: response.State,
  85. }
  86. if cmd.NewState != m.AlertStateOk {
  87. cmd.Info = fmt.Sprintf("Actual value: %1.2f", response.ActualValue)
  88. }
  89. if err := bus.Dispatch(&cmd); err != nil {
  90. log.Error(1, "failed to save state", err)
  91. }
  92. }
  93. }
  94. func (this *Scheduler) MeasureAndExecute(exec Executor, job *m.AlertJob) {
  95. now := time.Now()
  96. responseChan := make(chan *m.AlertResult, 1)
  97. go exec.Execute(job, responseChan)
  98. select {
  99. case <-time.After(time.Second * 5):
  100. this.responseQueue <- &m.AlertResult{
  101. Id: job.Rule.Id,
  102. State: "timed out",
  103. Duration: float64(time.Since(now).Nanoseconds()) / float64(1000000),
  104. Rule: job.Rule,
  105. }
  106. case result := <-responseChan:
  107. result.Duration = float64(time.Since(now).Nanoseconds()) / float64(1000000)
  108. log.Info("Schedular: exeuction took %vms", result.Duration)
  109. this.responseQueue <- result
  110. }
  111. }