From 2cb62061e0d98d7fd8c0f5e095abf9d102d9fbb1 Mon Sep 17 00:00:00 2001 From: greg linton Date: Wed, 16 Oct 2019 17:10:01 -0600 Subject: [PATCH 1/5] Add dial and response timeout to cloudwatch input --- plugins/inputs/cloudwatch/README.md | 6 +++ plugins/inputs/cloudwatch/cloudwatch.go | 69 +++++++++++++++++-------- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/plugins/inputs/cloudwatch/README.md b/plugins/inputs/cloudwatch/README.md index 369eadbc16290..77f38595d37da 100644 --- a/plugins/inputs/cloudwatch/README.md +++ b/plugins/inputs/cloudwatch/README.md @@ -70,6 +70,12 @@ API endpoint. In the following order the plugin will attempt to authenticate. ## See http://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/cloudwatch_limits.html # ratelimit = 25 + ## Timeout for http requests made by the cloudwatch client. + # response_timeout = "5s" + + ## Maximum amount of time a dial will wait for a connect to complete. Default is no timeout. + # dial_timeout = "30s" + ## Namespace-wide statistic filters. These allow fewer queries to be made to ## cloudwatch. # statistic_include = [ "average", "sum", "minimum", "maximum", sample_count" ] diff --git a/plugins/inputs/cloudwatch/cloudwatch.go b/plugins/inputs/cloudwatch/cloudwatch.go index 7aad67f5b5c3c..0644aa311de14 100644 --- a/plugins/inputs/cloudwatch/cloudwatch.go +++ b/plugins/inputs/cloudwatch/cloudwatch.go @@ -3,6 +3,8 @@ package cloudwatch import ( "errors" "fmt" + "net" + "net/http" "strconv" "strings" "sync" @@ -23,16 +25,18 @@ import ( type ( // CloudWatch contains the configuration and cache for the cloudwatch plugin. CloudWatch struct { - Region string `toml:"region"` - AccessKey string `toml:"access_key"` - SecretKey string `toml:"secret_key"` - RoleARN string `toml:"role_arn"` - Profile string `toml:"profile"` - CredentialPath string `toml:"shared_credential_file"` - Token string `toml:"token"` - EndpointURL string `toml:"endpoint_url"` - StatisticExclude []string `toml:"statistic_exclude"` - StatisticInclude []string `toml:"statistic_include"` + Region string `toml:"region"` + AccessKey string `toml:"access_key"` + SecretKey string `toml:"secret_key"` + RoleARN string `toml:"role_arn"` + Profile string `toml:"profile"` + CredentialPath string `toml:"shared_credential_file"` + Token string `toml:"token"` + EndpointURL string `toml:"endpoint_url"` + StatisticExclude []string `toml:"statistic_exclude"` + StatisticInclude []string `toml:"statistic_include"` + ResponseTimeout internal.Duration `toml:"response_timeout"` + DialTimeout internal.Duration `toml:"dial_timeout"` Period internal.Duration `toml:"period"` Delay internal.Duration `toml:"delay"` @@ -133,6 +137,12 @@ func (c *CloudWatch) SampleConfig() string { ## See http://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/cloudwatch_limits.html # ratelimit = 25 + ## Timeout for http requests made by the cloudwatch client. + # response_timeout = "5s" + + ## Maximum amount of time a dial will wait for a connect to complete. Default is no timeout. + # dial_timeout = "30s" + ## Namespace-wide statistic filters. These allow fewer queries to be made to ## cloudwatch. # statistic_include = [ "average", "sum", "minimum", "maximum", sample_count" ] @@ -183,10 +193,7 @@ func (c *CloudWatch) Gather(acc telegraf.Accumulator) error { return err } - err = c.updateWindow(time.Now()) - if err != nil { - return err - } + c.updateWindow(time.Now()) // Get all of the possible queries so we can send groups of 100. queries, err := c.getDataQueries(filteredMetrics) @@ -235,7 +242,7 @@ func (c *CloudWatch) Gather(acc telegraf.Accumulator) error { return c.aggregateMetrics(acc, results) } -func (c *CloudWatch) initializeCloudWatch() error { +func (c *CloudWatch) initializeCloudWatch() { credentialConfig := &internalaws.CredentialConfig{ Region: c.Region, AccessKey: c.AccessKey, @@ -248,10 +255,28 @@ func (c *CloudWatch) initializeCloudWatch() error { } configProvider := credentialConfig.Credentials() - cfg := &aws.Config{} + cfg := &aws.Config{ + HTTPClient: &http.Client{ + // use the DefaultTransport plus configured dial timeout + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: c.DialTimeout.Duration, + KeepAlive: 30 * time.Second, + DualStack: true, + }).DialContext, + ForceAttemptHTTP2: true, + MaxIdleConns: 100, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + }, + Timeout: c.ResponseTimeout.Duration, + }, + } + loglevel := aws.LogOff c.client = cloudwatch.New(configProvider, cfg.WithLogLevel(loglevel)) - return nil } type filteredMetric struct { @@ -370,7 +395,7 @@ func (c *CloudWatch) fetchNamespaceMetrics() ([]*cloudwatch.Metric, error) { return metrics, nil } -func (c *CloudWatch) updateWindow(relativeTo time.Time) error { +func (c *CloudWatch) updateWindow(relativeTo time.Time) { windowEnd := relativeTo.Add(-c.Delay.Duration) if c.windowEnd.IsZero() { @@ -382,8 +407,6 @@ func (c *CloudWatch) updateWindow(relativeTo time.Time) error { } c.windowEnd = windowEnd - - return nil } // getDataQueries gets all of the possible queries so we can maximize the request payload. @@ -533,8 +556,10 @@ func (c *CloudWatch) aggregateMetrics( func init() { inputs.Add("cloudwatch", func() telegraf.Input { return &CloudWatch{ - CacheTTL: internal.Duration{Duration: time.Hour}, - RateLimit: 25, + CacheTTL: internal.Duration{Duration: time.Hour}, + RateLimit: 25, + ResponseTimeout: internal.Duration{Duration: time.Second * 5}, + DialTimeout: internal.Duration{Duration: time.Second * 30}, } }) } From a73e41d48f83d44689b5f6e957fc51033d042cf2 Mon Sep 17 00:00:00 2001 From: greg linton Date: Thu, 17 Oct 2019 13:31:20 -0600 Subject: [PATCH 2/5] Set singular timeout --- plugins/inputs/cloudwatch/README.md | 5 +---- plugins/inputs/cloudwatch/cloudwatch.go | 21 ++++++++------------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/plugins/inputs/cloudwatch/README.md b/plugins/inputs/cloudwatch/README.md index 77f38595d37da..3cd098f4706f5 100644 --- a/plugins/inputs/cloudwatch/README.md +++ b/plugins/inputs/cloudwatch/README.md @@ -71,10 +71,7 @@ API endpoint. In the following order the plugin will attempt to authenticate. # ratelimit = 25 ## Timeout for http requests made by the cloudwatch client. - # response_timeout = "5s" - - ## Maximum amount of time a dial will wait for a connect to complete. Default is no timeout. - # dial_timeout = "30s" + # timeout = "5s" ## Namespace-wide statistic filters. These allow fewer queries to be made to ## cloudwatch. diff --git a/plugins/inputs/cloudwatch/cloudwatch.go b/plugins/inputs/cloudwatch/cloudwatch.go index 0644aa311de14..54c39d003c277 100644 --- a/plugins/inputs/cloudwatch/cloudwatch.go +++ b/plugins/inputs/cloudwatch/cloudwatch.go @@ -35,8 +35,7 @@ type ( EndpointURL string `toml:"endpoint_url"` StatisticExclude []string `toml:"statistic_exclude"` StatisticInclude []string `toml:"statistic_include"` - ResponseTimeout internal.Duration `toml:"response_timeout"` - DialTimeout internal.Duration `toml:"dial_timeout"` + Timeout internal.Duration `toml:"timeout"` Period internal.Duration `toml:"period"` Delay internal.Duration `toml:"delay"` @@ -138,10 +137,7 @@ func (c *CloudWatch) SampleConfig() string { # ratelimit = 25 ## Timeout for http requests made by the cloudwatch client. - # response_timeout = "5s" - - ## Maximum amount of time a dial will wait for a connect to complete. Default is no timeout. - # dial_timeout = "30s" + # timeout = "5s" ## Namespace-wide statistic filters. These allow fewer queries to be made to ## cloudwatch. @@ -257,11 +253,11 @@ func (c *CloudWatch) initializeCloudWatch() { cfg := &aws.Config{ HTTPClient: &http.Client{ - // use the DefaultTransport plus configured dial timeout + // use values from DefaultTransport Transport: &http.Transport{ Proxy: http.ProxyFromEnvironment, DialContext: (&net.Dialer{ - Timeout: c.DialTimeout.Duration, + Timeout: 30 * time.Second, KeepAlive: 30 * time.Second, DualStack: true, }).DialContext, @@ -271,7 +267,7 @@ func (c *CloudWatch) initializeCloudWatch() { TLSHandshakeTimeout: 10 * time.Second, ExpectContinueTimeout: 1 * time.Second, }, - Timeout: c.ResponseTimeout.Duration, + Timeout: c.Timeout.Duration, }, } @@ -556,10 +552,9 @@ func (c *CloudWatch) aggregateMetrics( func init() { inputs.Add("cloudwatch", func() telegraf.Input { return &CloudWatch{ - CacheTTL: internal.Duration{Duration: time.Hour}, - RateLimit: 25, - ResponseTimeout: internal.Duration{Duration: time.Second * 5}, - DialTimeout: internal.Duration{Duration: time.Second * 30}, + CacheTTL: internal.Duration{Duration: time.Hour}, + RateLimit: 25, + Timeout: internal.Duration{Duration: time.Second * 5}, } }) } From 4e07a040a9bbc3c80f3108b7b48f3248fc99880b Mon Sep 17 00:00:00 2001 From: greg linton Date: Thu, 17 Oct 2019 16:18:02 -0600 Subject: [PATCH 3/5] Don't report on individual failed pings --- plugins/inputs/ping/ping.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/plugins/inputs/ping/ping.go b/plugins/inputs/ping/ping.go index ac0e9ebdfdfe2..15c54dc0c719b 100644 --- a/plugins/inputs/ping/ping.go +++ b/plugins/inputs/ping/ping.go @@ -261,8 +261,6 @@ func (p *Ping) pingToURLNative(destination string, acc telegraf.Accumulator) { Seq: seq, }) if err != nil { - acc.AddFields("ping", map[string]interface{}{"result_code": 2}, map[string]string{"url": destination}) - acc.AddError(err) return } From 5b39b68fda30aff0cefa2461ab7307bab7b8daed Mon Sep 17 00:00:00 2001 From: greg linton Date: Mon, 21 Oct 2019 11:13:04 -0600 Subject: [PATCH 4/5] Remove 1.13 field --- plugins/inputs/cloudwatch/cloudwatch.go | 1 - 1 file changed, 1 deletion(-) diff --git a/plugins/inputs/cloudwatch/cloudwatch.go b/plugins/inputs/cloudwatch/cloudwatch.go index 54c39d003c277..5af281cfcc688 100644 --- a/plugins/inputs/cloudwatch/cloudwatch.go +++ b/plugins/inputs/cloudwatch/cloudwatch.go @@ -261,7 +261,6 @@ func (c *CloudWatch) initializeCloudWatch() { KeepAlive: 30 * time.Second, DualStack: true, }).DialContext, - ForceAttemptHTTP2: true, MaxIdleConns: 100, IdleConnTimeout: 90 * time.Second, TLSHandshakeTimeout: 10 * time.Second, From fda86c7a31a7fa66a1e0e514b3b783384ba19e6a Mon Sep 17 00:00:00 2001 From: greg linton Date: Mon, 21 Oct 2019 11:16:17 -0600 Subject: [PATCH 5/5] Revert unintended change --- plugins/inputs/ping/ping.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/inputs/ping/ping.go b/plugins/inputs/ping/ping.go index 15c54dc0c719b..ac0e9ebdfdfe2 100644 --- a/plugins/inputs/ping/ping.go +++ b/plugins/inputs/ping/ping.go @@ -261,6 +261,8 @@ func (p *Ping) pingToURLNative(destination string, acc telegraf.Accumulator) { Seq: seq, }) if err != nil { + acc.AddFields("ping", map[string]interface{}{"result_code": 2}, map[string]string{"url": destination}) + acc.AddError(err) return }