cloudwatch.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. package cloudwatch
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "regexp"
  7. "sort"
  8. "strconv"
  9. "strings"
  10. "time"
  11. "github.com/grafana/grafana/pkg/log"
  12. "github.com/grafana/grafana/pkg/models"
  13. "github.com/grafana/grafana/pkg/setting"
  14. "github.com/grafana/grafana/pkg/tsdb"
  15. "golang.org/x/sync/errgroup"
  16. "github.com/aws/aws-sdk-go/aws"
  17. "github.com/aws/aws-sdk-go/aws/awserr"
  18. "github.com/aws/aws-sdk-go/aws/request"
  19. "github.com/aws/aws-sdk-go/service/cloudwatch"
  20. "github.com/aws/aws-sdk-go/service/ec2/ec2iface"
  21. "github.com/grafana/grafana/pkg/components/null"
  22. "github.com/grafana/grafana/pkg/components/simplejson"
  23. "github.com/grafana/grafana/pkg/metrics"
  24. )
  25. type CloudWatchExecutor struct {
  26. *models.DataSource
  27. ec2Svc ec2iface.EC2API
  28. }
  29. type DatasourceInfo struct {
  30. Profile string
  31. Region string
  32. AuthType string
  33. AssumeRoleArn string
  34. Namespace string
  35. AccessKey string
  36. SecretKey string
  37. }
  38. func NewCloudWatchExecutor(dsInfo *models.DataSource) (tsdb.TsdbQueryEndpoint, error) {
  39. return &CloudWatchExecutor{}, nil
  40. }
  41. var (
  42. plog log.Logger
  43. standardStatistics map[string]bool
  44. aliasFormat *regexp.Regexp
  45. )
  46. func init() {
  47. plog = log.New("tsdb.cloudwatch")
  48. tsdb.RegisterTsdbQueryEndpoint("cloudwatch", NewCloudWatchExecutor)
  49. standardStatistics = map[string]bool{
  50. "Average": true,
  51. "Maximum": true,
  52. "Minimum": true,
  53. "Sum": true,
  54. "SampleCount": true,
  55. }
  56. aliasFormat = regexp.MustCompile(`\{\{\s*(.+?)\s*\}\}`)
  57. }
  58. func (e *CloudWatchExecutor) Query(ctx context.Context, dsInfo *models.DataSource, queryContext *tsdb.TsdbQuery) (*tsdb.Response, error) {
  59. var result *tsdb.Response
  60. e.DataSource = dsInfo
  61. queryType := queryContext.Queries[0].Model.Get("type").MustString("")
  62. var err error
  63. switch queryType {
  64. case "metricFindQuery":
  65. result, err = e.executeMetricFindQuery(ctx, queryContext)
  66. case "annotationQuery":
  67. result, err = e.executeAnnotationQuery(ctx, queryContext)
  68. case "timeSeriesQuery":
  69. fallthrough
  70. default:
  71. result, err = e.executeTimeSeriesQuery(ctx, queryContext)
  72. }
  73. return result, err
  74. }
  75. func (e *CloudWatchExecutor) executeTimeSeriesQuery(ctx context.Context, queryContext *tsdb.TsdbQuery) (*tsdb.Response, error) {
  76. results := &tsdb.Response{
  77. Results: make(map[string]*tsdb.QueryResult),
  78. }
  79. resultChan := make(chan *tsdb.QueryResult, len(queryContext.Queries))
  80. eg, ectx := errgroup.WithContext(ctx)
  81. getMetricDataQueries := make(map[string]map[string]*CloudWatchQuery)
  82. for i, model := range queryContext.Queries {
  83. queryType := model.Model.Get("type").MustString()
  84. if queryType != "timeSeriesQuery" && queryType != "" {
  85. continue
  86. }
  87. RefId := queryContext.Queries[i].RefId
  88. query, err := parseQuery(queryContext.Queries[i].Model)
  89. if err != nil {
  90. results.Results[RefId] = &tsdb.QueryResult{
  91. Error: err,
  92. }
  93. return results, nil
  94. }
  95. query.RefId = RefId
  96. if query.Id != "" {
  97. if _, ok := getMetricDataQueries[query.Region]; !ok {
  98. getMetricDataQueries[query.Region] = make(map[string]*CloudWatchQuery)
  99. }
  100. getMetricDataQueries[query.Region][query.Id] = query
  101. continue
  102. }
  103. if query.Id == "" && query.Expression != "" {
  104. results.Results[query.RefId] = &tsdb.QueryResult{
  105. Error: fmt.Errorf("Invalid query: id should be set if using expression"),
  106. }
  107. return results, nil
  108. }
  109. eg.Go(func() error {
  110. queryRes, err := e.executeQuery(ectx, query, queryContext)
  111. if ae, ok := err.(awserr.Error); ok && ae.Code() == "500" {
  112. return err
  113. }
  114. if err != nil {
  115. resultChan <- &tsdb.QueryResult{
  116. RefId: query.RefId,
  117. Error: err,
  118. }
  119. return nil
  120. }
  121. resultChan <- queryRes
  122. return nil
  123. })
  124. }
  125. if len(getMetricDataQueries) > 0 {
  126. for region, getMetricDataQuery := range getMetricDataQueries {
  127. q := getMetricDataQuery
  128. eg.Go(func() error {
  129. queryResponses, err := e.executeGetMetricDataQuery(ectx, region, q, queryContext)
  130. if ae, ok := err.(awserr.Error); ok && ae.Code() == "500" {
  131. return err
  132. }
  133. for _, queryRes := range queryResponses {
  134. if err != nil {
  135. queryRes.Error = err
  136. }
  137. resultChan <- queryRes
  138. }
  139. return nil
  140. })
  141. }
  142. }
  143. if err := eg.Wait(); err != nil {
  144. return nil, err
  145. }
  146. close(resultChan)
  147. for result := range resultChan {
  148. results.Results[result.RefId] = result
  149. }
  150. return results, nil
  151. }
  152. func (e *CloudWatchExecutor) executeQuery(ctx context.Context, query *CloudWatchQuery, queryContext *tsdb.TsdbQuery) (*tsdb.QueryResult, error) {
  153. client, err := e.getClient(query.Region)
  154. if err != nil {
  155. return nil, err
  156. }
  157. startTime, err := queryContext.TimeRange.ParseFrom()
  158. if err != nil {
  159. return nil, err
  160. }
  161. endTime, err := queryContext.TimeRange.ParseTo()
  162. if err != nil {
  163. return nil, err
  164. }
  165. if endTime.Before(startTime) {
  166. return nil, fmt.Errorf("Invalid time range: End time can't be before start time")
  167. }
  168. params := &cloudwatch.GetMetricStatisticsInput{
  169. Namespace: aws.String(query.Namespace),
  170. MetricName: aws.String(query.MetricName),
  171. Dimensions: query.Dimensions,
  172. Period: aws.Int64(int64(query.Period)),
  173. }
  174. if len(query.Statistics) > 0 {
  175. params.Statistics = query.Statistics
  176. }
  177. if len(query.ExtendedStatistics) > 0 {
  178. params.ExtendedStatistics = query.ExtendedStatistics
  179. }
  180. // 1 minutes resolution metrics is stored for 15 days, 15 * 24 * 60 = 21600
  181. if query.HighResolution && (((endTime.Unix() - startTime.Unix()) / int64(query.Period)) > 21600) {
  182. return nil, errors.New("too long query period")
  183. }
  184. var resp *cloudwatch.GetMetricStatisticsOutput
  185. for startTime.Before(endTime) {
  186. params.StartTime = aws.Time(startTime)
  187. if query.HighResolution {
  188. startTime = startTime.Add(time.Duration(1440*query.Period) * time.Second)
  189. } else {
  190. startTime = endTime
  191. }
  192. params.EndTime = aws.Time(startTime)
  193. if setting.Env == setting.DEV {
  194. plog.Debug("CloudWatch query", "raw query", params)
  195. }
  196. partResp, err := client.GetMetricStatisticsWithContext(ctx, params, request.WithResponseReadTimeout(10*time.Second))
  197. if err != nil {
  198. return nil, err
  199. }
  200. if resp != nil {
  201. resp.Datapoints = append(resp.Datapoints, partResp.Datapoints...)
  202. } else {
  203. resp = partResp
  204. }
  205. metrics.M_Aws_CloudWatch_GetMetricStatistics.Inc()
  206. }
  207. queryRes, err := parseResponse(resp, query)
  208. if err != nil {
  209. return nil, err
  210. }
  211. return queryRes, nil
  212. }
  213. func (e *CloudWatchExecutor) executeGetMetricDataQuery(ctx context.Context, region string, queries map[string]*CloudWatchQuery, queryContext *tsdb.TsdbQuery) ([]*tsdb.QueryResult, error) {
  214. queryResponses := make([]*tsdb.QueryResult, 0)
  215. // validate query
  216. for _, query := range queries {
  217. if !(len(query.Statistics) == 1 && len(query.ExtendedStatistics) == 0) &&
  218. !(len(query.Statistics) == 0 && len(query.ExtendedStatistics) == 1) {
  219. return queryResponses, errors.New("Statistics count should be 1")
  220. }
  221. }
  222. client, err := e.getClient(region)
  223. if err != nil {
  224. return queryResponses, err
  225. }
  226. startTime, err := queryContext.TimeRange.ParseFrom()
  227. if err != nil {
  228. return queryResponses, err
  229. }
  230. endTime, err := queryContext.TimeRange.ParseTo()
  231. if err != nil {
  232. return queryResponses, err
  233. }
  234. params := &cloudwatch.GetMetricDataInput{
  235. StartTime: aws.Time(startTime),
  236. EndTime: aws.Time(endTime),
  237. ScanBy: aws.String("TimestampAscending"),
  238. }
  239. for _, query := range queries {
  240. // 1 minutes resolution metrics is stored for 15 days, 15 * 24 * 60 = 21600
  241. if query.HighResolution && (((endTime.Unix() - startTime.Unix()) / int64(query.Period)) > 21600) {
  242. return queryResponses, errors.New("too long query period")
  243. }
  244. mdq := &cloudwatch.MetricDataQuery{
  245. Id: aws.String(query.Id),
  246. ReturnData: aws.Bool(query.ReturnData),
  247. }
  248. if query.Expression != "" {
  249. mdq.Expression = aws.String(query.Expression)
  250. } else {
  251. mdq.MetricStat = &cloudwatch.MetricStat{
  252. Metric: &cloudwatch.Metric{
  253. Namespace: aws.String(query.Namespace),
  254. MetricName: aws.String(query.MetricName),
  255. },
  256. Period: aws.Int64(int64(query.Period)),
  257. }
  258. for _, d := range query.Dimensions {
  259. mdq.MetricStat.Metric.Dimensions = append(mdq.MetricStat.Metric.Dimensions,
  260. &cloudwatch.Dimension{
  261. Name: d.Name,
  262. Value: d.Value,
  263. })
  264. }
  265. if len(query.Statistics) == 1 {
  266. mdq.MetricStat.Stat = query.Statistics[0]
  267. } else {
  268. mdq.MetricStat.Stat = query.ExtendedStatistics[0]
  269. }
  270. }
  271. params.MetricDataQueries = append(params.MetricDataQueries, mdq)
  272. }
  273. nextToken := ""
  274. mdr := make(map[string]*cloudwatch.MetricDataResult)
  275. for {
  276. if nextToken != "" {
  277. params.NextToken = aws.String(nextToken)
  278. }
  279. resp, err := client.GetMetricDataWithContext(ctx, params)
  280. if err != nil {
  281. return queryResponses, err
  282. }
  283. metrics.M_Aws_CloudWatch_GetMetricData.Add(float64(len(params.MetricDataQueries)))
  284. for _, r := range resp.MetricDataResults {
  285. if _, ok := mdr[*r.Id]; !ok {
  286. mdr[*r.Id] = r
  287. } else {
  288. mdr[*r.Id].Timestamps = append(mdr[*r.Id].Timestamps, r.Timestamps...)
  289. mdr[*r.Id].Values = append(mdr[*r.Id].Values, r.Values...)
  290. }
  291. }
  292. if resp.NextToken == nil || *resp.NextToken == "" {
  293. break
  294. }
  295. nextToken = *resp.NextToken
  296. }
  297. for i, r := range mdr {
  298. if *r.StatusCode != "Complete" {
  299. return queryResponses, fmt.Errorf("Part of query is failed: %s", *r.StatusCode)
  300. }
  301. queryRes := tsdb.NewQueryResult()
  302. queryRes.RefId = queries[i].RefId
  303. query := queries[*r.Id]
  304. series := tsdb.TimeSeries{
  305. Tags: map[string]string{},
  306. Points: make([]tsdb.TimePoint, 0),
  307. }
  308. for _, d := range query.Dimensions {
  309. series.Tags[*d.Name] = *d.Value
  310. }
  311. s := ""
  312. if len(query.Statistics) == 1 {
  313. s = *query.Statistics[0]
  314. } else {
  315. s = *query.ExtendedStatistics[0]
  316. }
  317. series.Name = formatAlias(query, s, series.Tags)
  318. for j, t := range r.Timestamps {
  319. expectedTimestamp := r.Timestamps[j].Add(time.Duration(query.Period) * time.Second)
  320. if j > 0 && expectedTimestamp.Before(*t) {
  321. series.Points = append(series.Points, tsdb.NewTimePoint(null.FloatFromPtr(nil), float64(expectedTimestamp.Unix()*1000)))
  322. }
  323. series.Points = append(series.Points, tsdb.NewTimePoint(null.FloatFrom(*r.Values[j]), float64((*t).Unix())*1000))
  324. }
  325. queryRes.Series = append(queryRes.Series, &series)
  326. queryRes.Meta = simplejson.New()
  327. queryResponses = append(queryResponses, queryRes)
  328. }
  329. return queryResponses, nil
  330. }
  331. func parseDimensions(model *simplejson.Json) ([]*cloudwatch.Dimension, error) {
  332. var result []*cloudwatch.Dimension
  333. for k, v := range model.Get("dimensions").MustMap() {
  334. kk := k
  335. if vv, ok := v.(string); ok {
  336. result = append(result, &cloudwatch.Dimension{
  337. Name: &kk,
  338. Value: &vv,
  339. })
  340. } else {
  341. return nil, errors.New("failed to parse")
  342. }
  343. }
  344. sort.Slice(result, func(i, j int) bool {
  345. return *result[i].Name < *result[j].Name
  346. })
  347. return result, nil
  348. }
  349. func parseStatistics(model *simplejson.Json) ([]string, []string, error) {
  350. var statistics []string
  351. var extendedStatistics []string
  352. for _, s := range model.Get("statistics").MustArray() {
  353. if ss, ok := s.(string); ok {
  354. if _, isStandard := standardStatistics[ss]; isStandard {
  355. statistics = append(statistics, ss)
  356. } else {
  357. extendedStatistics = append(extendedStatistics, ss)
  358. }
  359. } else {
  360. return nil, nil, errors.New("failed to parse")
  361. }
  362. }
  363. return statistics, extendedStatistics, nil
  364. }
  365. func parseQuery(model *simplejson.Json) (*CloudWatchQuery, error) {
  366. region, err := model.Get("region").String()
  367. if err != nil {
  368. return nil, err
  369. }
  370. namespace, err := model.Get("namespace").String()
  371. if err != nil {
  372. return nil, err
  373. }
  374. metricName, err := model.Get("metricName").String()
  375. if err != nil {
  376. return nil, err
  377. }
  378. id := model.Get("id").MustString("")
  379. expression := model.Get("expression").MustString("")
  380. dimensions, err := parseDimensions(model)
  381. if err != nil {
  382. return nil, err
  383. }
  384. statistics, extendedStatistics, err := parseStatistics(model)
  385. if err != nil {
  386. return nil, err
  387. }
  388. p := model.Get("period").MustString("")
  389. if p == "" {
  390. if namespace == "AWS/EC2" {
  391. p = "300"
  392. } else {
  393. p = "60"
  394. }
  395. }
  396. var period int
  397. if regexp.MustCompile(`^\d+$`).Match([]byte(p)) {
  398. period, err = strconv.Atoi(p)
  399. if err != nil {
  400. return nil, err
  401. }
  402. } else {
  403. d, err := time.ParseDuration(p)
  404. if err != nil {
  405. return nil, err
  406. }
  407. period = int(d.Seconds())
  408. }
  409. alias := model.Get("alias").MustString()
  410. if alias == "" {
  411. alias = "{{metric}}_{{stat}}"
  412. }
  413. returnData := model.Get("returnData").MustBool(false)
  414. highResolution := model.Get("highResolution").MustBool(false)
  415. return &CloudWatchQuery{
  416. Region: region,
  417. Namespace: namespace,
  418. MetricName: metricName,
  419. Dimensions: dimensions,
  420. Statistics: aws.StringSlice(statistics),
  421. ExtendedStatistics: aws.StringSlice(extendedStatistics),
  422. Period: period,
  423. Alias: alias,
  424. Id: id,
  425. Expression: expression,
  426. ReturnData: returnData,
  427. HighResolution: highResolution,
  428. }, nil
  429. }
  430. func formatAlias(query *CloudWatchQuery, stat string, dimensions map[string]string) string {
  431. if len(query.Id) > 0 && len(query.Expression) > 0 {
  432. return query.Id
  433. }
  434. data := map[string]string{}
  435. data["region"] = query.Region
  436. data["namespace"] = query.Namespace
  437. data["metric"] = query.MetricName
  438. data["stat"] = stat
  439. data["period"] = strconv.Itoa(query.Period)
  440. for k, v := range dimensions {
  441. data[k] = v
  442. }
  443. result := aliasFormat.ReplaceAllFunc([]byte(query.Alias), func(in []byte) []byte {
  444. labelName := strings.Replace(string(in), "{{", "", 1)
  445. labelName = strings.Replace(labelName, "}}", "", 1)
  446. labelName = strings.TrimSpace(labelName)
  447. if val, exists := data[labelName]; exists {
  448. return []byte(val)
  449. }
  450. return in
  451. })
  452. return string(result)
  453. }
  454. func parseResponse(resp *cloudwatch.GetMetricStatisticsOutput, query *CloudWatchQuery) (*tsdb.QueryResult, error) {
  455. queryRes := tsdb.NewQueryResult()
  456. queryRes.RefId = query.RefId
  457. var value float64
  458. for _, s := range append(query.Statistics, query.ExtendedStatistics...) {
  459. series := tsdb.TimeSeries{
  460. Tags: map[string]string{},
  461. Points: make([]tsdb.TimePoint, 0),
  462. }
  463. for _, d := range query.Dimensions {
  464. series.Tags[*d.Name] = *d.Value
  465. }
  466. series.Name = formatAlias(query, *s, series.Tags)
  467. lastTimestamp := make(map[string]time.Time)
  468. sort.Slice(resp.Datapoints, func(i, j int) bool {
  469. return (*resp.Datapoints[i].Timestamp).Before(*resp.Datapoints[j].Timestamp)
  470. })
  471. for _, v := range resp.Datapoints {
  472. switch *s {
  473. case "Average":
  474. value = *v.Average
  475. case "Maximum":
  476. value = *v.Maximum
  477. case "Minimum":
  478. value = *v.Minimum
  479. case "Sum":
  480. value = *v.Sum
  481. case "SampleCount":
  482. value = *v.SampleCount
  483. default:
  484. if strings.Index(*s, "p") == 0 && v.ExtendedStatistics[*s] != nil {
  485. value = *v.ExtendedStatistics[*s]
  486. }
  487. }
  488. // terminate gap of data points
  489. timestamp := *v.Timestamp
  490. if _, ok := lastTimestamp[*s]; ok {
  491. nextTimestampFromLast := lastTimestamp[*s].Add(time.Duration(query.Period) * time.Second)
  492. for timestamp.After(nextTimestampFromLast) {
  493. series.Points = append(series.Points, tsdb.NewTimePoint(null.FloatFromPtr(nil), float64(nextTimestampFromLast.Unix()*1000)))
  494. nextTimestampFromLast = nextTimestampFromLast.Add(time.Duration(query.Period) * time.Second)
  495. }
  496. }
  497. lastTimestamp[*s] = timestamp
  498. series.Points = append(series.Points, tsdb.NewTimePoint(null.FloatFrom(value), float64(timestamp.Unix()*1000)))
  499. }
  500. queryRes.Series = append(queryRes.Series, &series)
  501. queryRes.Meta = simplejson.New()
  502. if len(resp.Datapoints) > 0 && resp.Datapoints[0].Unit != nil {
  503. if unit, ok := cloudwatchUnitMappings[*resp.Datapoints[0].Unit]; ok {
  504. queryRes.Meta.Set("unit", unit)
  505. }
  506. }
  507. }
  508. return queryRes, nil
  509. }