cloudwatch.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. package cloudwatch
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "regexp"
  7. "sort"
  8. "strconv"
  9. "strings"
  10. "time"
  11. "github.com/grafana/grafana/pkg/log"
  12. "github.com/grafana/grafana/pkg/models"
  13. "github.com/grafana/grafana/pkg/setting"
  14. "github.com/grafana/grafana/pkg/tsdb"
  15. "golang.org/x/sync/errgroup"
  16. "github.com/aws/aws-sdk-go/aws"
  17. "github.com/aws/aws-sdk-go/aws/request"
  18. "github.com/aws/aws-sdk-go/service/cloudwatch"
  19. "github.com/aws/aws-sdk-go/service/ec2/ec2iface"
  20. "github.com/grafana/grafana/pkg/components/null"
  21. "github.com/grafana/grafana/pkg/components/simplejson"
  22. "github.com/grafana/grafana/pkg/metrics"
  23. )
  24. type CloudWatchExecutor struct {
  25. *models.DataSource
  26. ec2Svc ec2iface.EC2API
  27. }
  28. type DatasourceInfo struct {
  29. Profile string
  30. Region string
  31. AuthType string
  32. AssumeRoleArn string
  33. Namespace string
  34. AccessKey string
  35. SecretKey string
  36. }
  37. func NewCloudWatchExecutor(dsInfo *models.DataSource) (tsdb.TsdbQueryEndpoint, error) {
  38. return &CloudWatchExecutor{}, nil
  39. }
  40. var (
  41. plog log.Logger
  42. standardStatistics map[string]bool
  43. aliasFormat *regexp.Regexp
  44. )
  45. func init() {
  46. plog = log.New("tsdb.cloudwatch")
  47. tsdb.RegisterTsdbQueryEndpoint("cloudwatch", NewCloudWatchExecutor)
  48. standardStatistics = map[string]bool{
  49. "Average": true,
  50. "Maximum": true,
  51. "Minimum": true,
  52. "Sum": true,
  53. "SampleCount": true,
  54. }
  55. aliasFormat = regexp.MustCompile(`\{\{\s*(.+?)\s*\}\}`)
  56. }
  57. func (e *CloudWatchExecutor) Query(ctx context.Context, dsInfo *models.DataSource, queryContext *tsdb.TsdbQuery) (*tsdb.Response, error) {
  58. var result *tsdb.Response
  59. e.DataSource = dsInfo
  60. queryType := queryContext.Queries[0].Model.Get("type").MustString("")
  61. var err error
  62. switch queryType {
  63. case "metricFindQuery":
  64. result, err = e.executeMetricFindQuery(ctx, queryContext)
  65. case "annotationQuery":
  66. result, err = e.executeAnnotationQuery(ctx, queryContext)
  67. case "timeSeriesQuery":
  68. fallthrough
  69. default:
  70. result, err = e.executeTimeSeriesQuery(ctx, queryContext)
  71. }
  72. return result, err
  73. }
  74. func (e *CloudWatchExecutor) executeTimeSeriesQuery(ctx context.Context, queryContext *tsdb.TsdbQuery) (*tsdb.Response, error) {
  75. result := &tsdb.Response{
  76. Results: make(map[string]*tsdb.QueryResult),
  77. }
  78. eg, ectx := errgroup.WithContext(ctx)
  79. getMetricDataQueries := make(map[string]map[string]*CloudWatchQuery)
  80. for i, model := range queryContext.Queries {
  81. queryType := model.Model.Get("type").MustString()
  82. if queryType != "timeSeriesQuery" && queryType != "" {
  83. continue
  84. }
  85. query, err := parseQuery(queryContext.Queries[i].Model)
  86. if err != nil {
  87. return nil, err
  88. }
  89. query.RefId = queryContext.Queries[i].RefId
  90. if query.Id != "" {
  91. if _, ok := getMetricDataQueries[query.Region]; !ok {
  92. getMetricDataQueries[query.Region] = make(map[string]*CloudWatchQuery)
  93. }
  94. getMetricDataQueries[query.Region][query.Id] = query
  95. continue
  96. }
  97. if query.Id == "" && query.Expression != "" {
  98. return nil, fmt.Errorf("Invalid query: id should be set if using expression")
  99. }
  100. eg.Go(func() error {
  101. queryRes, err := e.executeQuery(ectx, query, queryContext)
  102. if err != nil {
  103. return err
  104. }
  105. result.Results[queryRes.RefId] = queryRes
  106. return nil
  107. })
  108. }
  109. if len(getMetricDataQueries) > 0 {
  110. for region, getMetricDataQuery := range getMetricDataQueries {
  111. q := getMetricDataQuery
  112. eg.Go(func() error {
  113. queryResponses, err := e.executeGetMetricDataQuery(ectx, region, q, queryContext)
  114. if err != nil {
  115. return err
  116. }
  117. for _, queryRes := range queryResponses {
  118. result.Results[queryRes.RefId] = queryRes
  119. }
  120. return nil
  121. })
  122. }
  123. }
  124. if err := eg.Wait(); err != nil {
  125. return nil, err
  126. }
  127. return result, nil
  128. }
  129. func (e *CloudWatchExecutor) executeQuery(ctx context.Context, query *CloudWatchQuery, queryContext *tsdb.TsdbQuery) (*tsdb.QueryResult, error) {
  130. client, err := e.getClient(query.Region)
  131. if err != nil {
  132. return nil, err
  133. }
  134. startTime, err := queryContext.TimeRange.ParseFrom()
  135. if err != nil {
  136. return nil, err
  137. }
  138. endTime, err := queryContext.TimeRange.ParseTo()
  139. if err != nil {
  140. return nil, err
  141. }
  142. if endTime.Before(startTime) {
  143. return nil, fmt.Errorf("Invalid time range: End time can't be before start time")
  144. }
  145. params := &cloudwatch.GetMetricStatisticsInput{
  146. Namespace: aws.String(query.Namespace),
  147. MetricName: aws.String(query.MetricName),
  148. Dimensions: query.Dimensions,
  149. Period: aws.Int64(int64(query.Period)),
  150. }
  151. if len(query.Statistics) > 0 {
  152. params.Statistics = query.Statistics
  153. }
  154. if len(query.ExtendedStatistics) > 0 {
  155. params.ExtendedStatistics = query.ExtendedStatistics
  156. }
  157. // 1 minutes resolutin metrics is stored for 15 days, 15 * 24 * 60 = 21600
  158. if query.HighResolution && (((endTime.Unix() - startTime.Unix()) / int64(query.Period)) > 21600) {
  159. return nil, errors.New("too long query period")
  160. }
  161. var resp *cloudwatch.GetMetricStatisticsOutput
  162. for startTime.Before(endTime) {
  163. params.StartTime = aws.Time(startTime)
  164. if query.HighResolution {
  165. startTime = startTime.Add(time.Duration(1440*query.Period) * time.Second)
  166. } else {
  167. startTime = endTime
  168. }
  169. params.EndTime = aws.Time(startTime)
  170. if setting.Env == setting.DEV {
  171. plog.Debug("CloudWatch query", "raw query", params)
  172. }
  173. partResp, err := client.GetMetricStatisticsWithContext(ctx, params, request.WithResponseReadTimeout(10*time.Second))
  174. if err != nil {
  175. return nil, err
  176. }
  177. if resp != nil {
  178. resp.Datapoints = append(resp.Datapoints, partResp.Datapoints...)
  179. } else {
  180. resp = partResp
  181. }
  182. metrics.M_Aws_CloudWatch_GetMetricStatistics.Inc()
  183. }
  184. queryRes, err := parseResponse(resp, query)
  185. if err != nil {
  186. return nil, err
  187. }
  188. return queryRes, nil
  189. }
  190. func (e *CloudWatchExecutor) executeGetMetricDataQuery(ctx context.Context, region string, queries map[string]*CloudWatchQuery, queryContext *tsdb.TsdbQuery) ([]*tsdb.QueryResult, error) {
  191. queryResponses := make([]*tsdb.QueryResult, 0)
  192. // validate query
  193. for _, query := range queries {
  194. if !(len(query.Statistics) == 1 && len(query.ExtendedStatistics) == 0) &&
  195. !(len(query.Statistics) == 0 && len(query.ExtendedStatistics) == 1) {
  196. return queryResponses, errors.New("Statistics count should be 1")
  197. }
  198. }
  199. client, err := e.getClient(region)
  200. if err != nil {
  201. return queryResponses, err
  202. }
  203. startTime, err := queryContext.TimeRange.ParseFrom()
  204. if err != nil {
  205. return queryResponses, err
  206. }
  207. endTime, err := queryContext.TimeRange.ParseTo()
  208. if err != nil {
  209. return queryResponses, err
  210. }
  211. params := &cloudwatch.GetMetricDataInput{
  212. StartTime: aws.Time(startTime),
  213. EndTime: aws.Time(endTime),
  214. ScanBy: aws.String("TimestampAscending"),
  215. }
  216. for _, query := range queries {
  217. // 1 minutes resolutin metrics is stored for 15 days, 15 * 24 * 60 = 21600
  218. if query.HighResolution && (((endTime.Unix() - startTime.Unix()) / int64(query.Period)) > 21600) {
  219. return nil, errors.New("too long query period")
  220. }
  221. mdq := &cloudwatch.MetricDataQuery{
  222. Id: aws.String(query.Id),
  223. ReturnData: aws.Bool(query.ReturnData),
  224. }
  225. if query.Expression != "" {
  226. mdq.Expression = aws.String(query.Expression)
  227. } else {
  228. mdq.MetricStat = &cloudwatch.MetricStat{
  229. Metric: &cloudwatch.Metric{
  230. Namespace: aws.String(query.Namespace),
  231. MetricName: aws.String(query.MetricName),
  232. },
  233. Period: aws.Int64(int64(query.Period)),
  234. }
  235. for _, d := range query.Dimensions {
  236. mdq.MetricStat.Metric.Dimensions = append(mdq.MetricStat.Metric.Dimensions,
  237. &cloudwatch.Dimension{
  238. Name: d.Name,
  239. Value: d.Value,
  240. })
  241. }
  242. if len(query.Statistics) == 1 {
  243. mdq.MetricStat.Stat = query.Statistics[0]
  244. } else {
  245. mdq.MetricStat.Stat = query.ExtendedStatistics[0]
  246. }
  247. }
  248. params.MetricDataQueries = append(params.MetricDataQueries, mdq)
  249. }
  250. nextToken := ""
  251. mdr := make(map[string]*cloudwatch.MetricDataResult)
  252. for {
  253. if nextToken != "" {
  254. params.NextToken = aws.String(nextToken)
  255. }
  256. resp, err := client.GetMetricDataWithContext(ctx, params)
  257. if err != nil {
  258. return queryResponses, err
  259. }
  260. metrics.M_Aws_CloudWatch_GetMetricData.Add(float64(len(params.MetricDataQueries)))
  261. for _, r := range resp.MetricDataResults {
  262. if _, ok := mdr[*r.Id]; !ok {
  263. mdr[*r.Id] = r
  264. } else {
  265. mdr[*r.Id].Timestamps = append(mdr[*r.Id].Timestamps, r.Timestamps...)
  266. mdr[*r.Id].Values = append(mdr[*r.Id].Values, r.Values...)
  267. }
  268. }
  269. if resp.NextToken == nil || *resp.NextToken == "" {
  270. break
  271. }
  272. nextToken = *resp.NextToken
  273. }
  274. for i, r := range mdr {
  275. if *r.StatusCode != "Complete" {
  276. return queryResponses, fmt.Errorf("Part of query is failed: %s", *r.StatusCode)
  277. }
  278. queryRes := tsdb.NewQueryResult()
  279. queryRes.RefId = queries[i].RefId
  280. query := queries[*r.Id]
  281. series := tsdb.TimeSeries{
  282. Tags: map[string]string{},
  283. Points: make([]tsdb.TimePoint, 0),
  284. }
  285. for _, d := range query.Dimensions {
  286. series.Tags[*d.Name] = *d.Value
  287. }
  288. s := ""
  289. if len(query.Statistics) == 1 {
  290. s = *query.Statistics[0]
  291. } else {
  292. s = *query.ExtendedStatistics[0]
  293. }
  294. series.Name = formatAlias(query, s, series.Tags)
  295. for j, t := range r.Timestamps {
  296. expectedTimestamp := r.Timestamps[j].Add(time.Duration(query.Period) * time.Second)
  297. if j > 0 && expectedTimestamp.Before(*t) {
  298. series.Points = append(series.Points, tsdb.NewTimePoint(null.FloatFromPtr(nil), float64(expectedTimestamp.Unix()*1000)))
  299. }
  300. series.Points = append(series.Points, tsdb.NewTimePoint(null.FloatFrom(*r.Values[j]), float64((*t).Unix())*1000))
  301. }
  302. queryRes.Series = append(queryRes.Series, &series)
  303. queryResponses = append(queryResponses, queryRes)
  304. }
  305. return queryResponses, nil
  306. }
  307. func parseDimensions(model *simplejson.Json) ([]*cloudwatch.Dimension, error) {
  308. var result []*cloudwatch.Dimension
  309. for k, v := range model.Get("dimensions").MustMap() {
  310. kk := k
  311. if vv, ok := v.(string); ok {
  312. result = append(result, &cloudwatch.Dimension{
  313. Name: &kk,
  314. Value: &vv,
  315. })
  316. } else {
  317. return nil, errors.New("failed to parse")
  318. }
  319. }
  320. sort.Slice(result, func(i, j int) bool {
  321. return *result[i].Name < *result[j].Name
  322. })
  323. return result, nil
  324. }
  325. func parseStatistics(model *simplejson.Json) ([]string, []string, error) {
  326. var statistics []string
  327. var extendedStatistics []string
  328. for _, s := range model.Get("statistics").MustArray() {
  329. if ss, ok := s.(string); ok {
  330. if _, isStandard := standardStatistics[ss]; isStandard {
  331. statistics = append(statistics, ss)
  332. } else {
  333. extendedStatistics = append(extendedStatistics, ss)
  334. }
  335. } else {
  336. return nil, nil, errors.New("failed to parse")
  337. }
  338. }
  339. return statistics, extendedStatistics, nil
  340. }
  341. func parseQuery(model *simplejson.Json) (*CloudWatchQuery, error) {
  342. region, err := model.Get("region").String()
  343. if err != nil {
  344. return nil, err
  345. }
  346. namespace, err := model.Get("namespace").String()
  347. if err != nil {
  348. return nil, err
  349. }
  350. metricName, err := model.Get("metricName").String()
  351. if err != nil {
  352. return nil, err
  353. }
  354. id := model.Get("id").MustString("")
  355. expression := model.Get("expression").MustString("")
  356. dimensions, err := parseDimensions(model)
  357. if err != nil {
  358. return nil, err
  359. }
  360. statistics, extendedStatistics, err := parseStatistics(model)
  361. if err != nil {
  362. return nil, err
  363. }
  364. p := model.Get("period").MustString("")
  365. if p == "" {
  366. if namespace == "AWS/EC2" {
  367. p = "300"
  368. } else {
  369. p = "60"
  370. }
  371. }
  372. var period int
  373. if regexp.MustCompile(`^\d+$`).Match([]byte(p)) {
  374. period, err = strconv.Atoi(p)
  375. if err != nil {
  376. return nil, err
  377. }
  378. } else {
  379. d, err := time.ParseDuration(p)
  380. if err != nil {
  381. return nil, err
  382. }
  383. period = int(d.Seconds())
  384. }
  385. alias := model.Get("alias").MustString()
  386. if alias == "" {
  387. alias = "{{metric}}_{{stat}}"
  388. }
  389. returnData := model.Get("returnData").MustBool(false)
  390. highResolution := model.Get("highResolution").MustBool(false)
  391. return &CloudWatchQuery{
  392. Region: region,
  393. Namespace: namespace,
  394. MetricName: metricName,
  395. Dimensions: dimensions,
  396. Statistics: aws.StringSlice(statistics),
  397. ExtendedStatistics: aws.StringSlice(extendedStatistics),
  398. Period: period,
  399. Alias: alias,
  400. Id: id,
  401. Expression: expression,
  402. ReturnData: returnData,
  403. HighResolution: highResolution,
  404. }, nil
  405. }
  406. func formatAlias(query *CloudWatchQuery, stat string, dimensions map[string]string) string {
  407. if len(query.Id) > 0 && len(query.Expression) > 0 {
  408. return query.Id
  409. }
  410. data := map[string]string{}
  411. data["region"] = query.Region
  412. data["namespace"] = query.Namespace
  413. data["metric"] = query.MetricName
  414. data["stat"] = stat
  415. data["period"] = strconv.Itoa(query.Period)
  416. for k, v := range dimensions {
  417. data[k] = v
  418. }
  419. result := aliasFormat.ReplaceAllFunc([]byte(query.Alias), func(in []byte) []byte {
  420. labelName := strings.Replace(string(in), "{{", "", 1)
  421. labelName = strings.Replace(labelName, "}}", "", 1)
  422. labelName = strings.TrimSpace(labelName)
  423. if val, exists := data[labelName]; exists {
  424. return []byte(val)
  425. }
  426. return in
  427. })
  428. return string(result)
  429. }
  430. func parseResponse(resp *cloudwatch.GetMetricStatisticsOutput, query *CloudWatchQuery) (*tsdb.QueryResult, error) {
  431. queryRes := tsdb.NewQueryResult()
  432. queryRes.RefId = query.RefId
  433. var value float64
  434. for _, s := range append(query.Statistics, query.ExtendedStatistics...) {
  435. series := tsdb.TimeSeries{
  436. Tags: map[string]string{},
  437. Points: make([]tsdb.TimePoint, 0),
  438. }
  439. for _, d := range query.Dimensions {
  440. series.Tags[*d.Name] = *d.Value
  441. }
  442. series.Name = formatAlias(query, *s, series.Tags)
  443. lastTimestamp := make(map[string]time.Time)
  444. sort.Slice(resp.Datapoints, func(i, j int) bool {
  445. return (*resp.Datapoints[i].Timestamp).Before(*resp.Datapoints[j].Timestamp)
  446. })
  447. for _, v := range resp.Datapoints {
  448. switch *s {
  449. case "Average":
  450. value = *v.Average
  451. case "Maximum":
  452. value = *v.Maximum
  453. case "Minimum":
  454. value = *v.Minimum
  455. case "Sum":
  456. value = *v.Sum
  457. case "SampleCount":
  458. value = *v.SampleCount
  459. default:
  460. if strings.Index(*s, "p") == 0 && v.ExtendedStatistics[*s] != nil {
  461. value = *v.ExtendedStatistics[*s]
  462. }
  463. }
  464. // terminate gap of data points
  465. timestamp := *v.Timestamp
  466. if _, ok := lastTimestamp[*s]; ok {
  467. nextTimestampFromLast := lastTimestamp[*s].Add(time.Duration(query.Period) * time.Second)
  468. for timestamp.After(nextTimestampFromLast) {
  469. series.Points = append(series.Points, tsdb.NewTimePoint(null.FloatFromPtr(nil), float64(nextTimestampFromLast.Unix()*1000)))
  470. nextTimestampFromLast = nextTimestampFromLast.Add(time.Duration(query.Period) * time.Second)
  471. }
  472. }
  473. lastTimestamp[*s] = timestamp
  474. series.Points = append(series.Points, tsdb.NewTimePoint(null.FloatFrom(value), float64(timestamp.Unix()*1000)))
  475. }
  476. queryRes.Series = append(queryRes.Series, &series)
  477. }
  478. return queryRes, nil
  479. }