alerting.go 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. package alerting
  2. import (
  3. "time"
  4. "github.com/grafana/grafana/pkg/bus"
  5. "github.com/grafana/grafana/pkg/log"
  6. m "github.com/grafana/grafana/pkg/models"
  7. "github.com/grafana/grafana/pkg/setting"
  8. )
  9. func Init() {
  10. if !setting.AlertingEnabled {
  11. return
  12. }
  13. log.Info("Alerting: Initializing scheduler...")
  14. scheduler := NewScheduler()
  15. reader := NewRuleReader()
  16. go scheduler.dispatch(reader)
  17. go scheduler.executor(&ExecutorImpl{})
  18. go scheduler.handleResponses()
  19. }
  20. type Scheduler struct {
  21. jobs map[int64]*m.AlertJob
  22. runQueue chan *m.AlertJob
  23. responseQueue chan *m.AlertResult
  24. alertRuleFetcher RuleReader
  25. }
  26. func NewScheduler() *Scheduler {
  27. return &Scheduler{
  28. jobs: make(map[int64]*m.AlertJob, 0),
  29. runQueue: make(chan *m.AlertJob, 1000),
  30. responseQueue: make(chan *m.AlertResult, 1000),
  31. }
  32. }
  33. func (scheduler *Scheduler) dispatch(reader RuleReader) {
  34. reschedule := time.NewTicker(time.Second * 10)
  35. secondTicker := time.NewTicker(time.Second)
  36. scheduler.updateJobs(reader.Fetch)
  37. for {
  38. select {
  39. case <-secondTicker.C:
  40. scheduler.queueJobs()
  41. case <-reschedule.C:
  42. scheduler.updateJobs(reader.Fetch)
  43. }
  44. }
  45. }
  46. func (scheduler *Scheduler) updateJobs(alertRuleFn func() []m.AlertRule) {
  47. log.Debug("Scheduler: UpdateJobs()")
  48. jobs := make(map[int64]*m.AlertJob, 0)
  49. rules := alertRuleFn()
  50. for i := 0; i < len(rules); i++ {
  51. rule := rules[i]
  52. /*
  53. jobs[rule.Id] = &m.AlertJob{
  54. Offset: int64(i),
  55. Running: false,
  56. Rule: rule,
  57. }
  58. */
  59. job := &m.AlertJob{}
  60. if scheduler.jobs[rule.Id] != nil {
  61. job = scheduler.jobs[rule.Id]
  62. }
  63. job.Rule = rule
  64. job.Offset = int64(i)
  65. jobs[rule.Id] = job
  66. }
  67. log.Debug("Scheduler: Selected %d jobs", len(jobs))
  68. scheduler.jobs = jobs
  69. }
  70. func (scheduler *Scheduler) queueJobs() {
  71. now := time.Now().Unix()
  72. for _, job := range scheduler.jobs {
  73. if now%job.Rule.Frequency == 0 && job.Running == false {
  74. log.Info("Scheduler: Putting job on to run queue: %s", job.Rule.Title)
  75. scheduler.runQueue <- job
  76. }
  77. }
  78. }
  79. func (scheduler *Scheduler) executor(executor Executor) {
  80. for job := range scheduler.runQueue {
  81. //log.Info("Executor: queue length %d", len(this.runQueue))
  82. log.Info("Executor: executing %s", job.Rule.Title)
  83. job.Running = true
  84. scheduler.measureAndExecute(executor, job)
  85. }
  86. }
  87. func (scheduler *Scheduler) handleResponses() {
  88. for response := range scheduler.responseQueue {
  89. log.Info("Response: alert(%d) status(%s) actual(%v) running(%v)", response.Id, response.State, response.ActualValue, response.AlertJob.Running)
  90. response.AlertJob.Running = false
  91. cmd := &m.UpdateAlertStateCommand{
  92. AlertId: response.Id,
  93. NewState: response.State,
  94. Info: response.Description,
  95. }
  96. if err := bus.Dispatch(cmd); err != nil {
  97. log.Error(2, "failed to save state %v", err)
  98. }
  99. }
  100. }
  101. func (scheduler *Scheduler) measureAndExecute(exec Executor, job *m.AlertJob) {
  102. now := time.Now()
  103. responseChan := make(chan *m.AlertResult, 1)
  104. go exec.Execute(job, responseChan)
  105. select {
  106. case <-time.After(time.Second * 5):
  107. scheduler.responseQueue <- &m.AlertResult{
  108. Id: job.Rule.Id,
  109. State: "timed out",
  110. Duration: float64(time.Since(now).Nanoseconds()) / float64(1000000),
  111. AlertJob: job,
  112. }
  113. case result := <-responseChan:
  114. result.Duration = float64(time.Since(now).Nanoseconds()) / float64(1000000)
  115. log.Info("Schedular: exeuction took %vms", result.Duration)
  116. scheduler.responseQueue <- result
  117. }
  118. }