From 1979d7872f4f4584209921c35d9279397cde0e2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Fern=C3=A1ndez=20Haro?= Date: Fri, 18 Sep 2020 19:01:18 +0100 Subject: [PATCH] Cherry-pick to 7.9: [Metricbeat][Kibana] Apply backoff when errored at getting usage stats (#20772) (#21162) Co-authored-by: Shaunak Kashyap Co-authored-by: Shaunak Kashyap --- CHANGELOG.next.asciidoc | 1 + metricbeat/module/kibana/stats/stats.go | 14 +++++-- metricbeat/module/kibana/stats/stats_test.go | 44 ++++++++++++++++++-- 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 0b799cb12be3..1d5cafcb155b 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -101,6 +101,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Fix storage metricset to allow config without region/zone. {issue}17623[17623] {pull}17624[17624] - Fix overflow on Prometheus rates when new buckets are added on the go. {pull}17753[17753] - Add a switch to the driver definition on SQL module to use pretty names {pull}17378[17378] +- The Kibana collector applies backoff when errored at getting usage stats {pull}20772[20772] - The `elasticsearch/index` metricset only requests wildcard expansion for hidden indices if the monitored Elasticsearch cluster supports it. {pull}20938[20938] - Fix panic index out of range error when getting AWS account name. {pull}21101[21101] {issue}21095[21095] - Handle missing counters in the application_pool metricset. {pull}21071[21071] diff --git a/metricbeat/module/kibana/stats/stats.go b/metricbeat/module/kibana/stats/stats.go index 526a525951c2..a6e19d50f42c 100644 --- a/metricbeat/module/kibana/stats/stats.go +++ b/metricbeat/module/kibana/stats/stats.go @@ -38,9 +38,10 @@ func init() { } const ( - statsPath = "api/stats" - settingsPath = "api/settings" - usageCollectionPeriod = 24 * time.Hour + statsPath = "api/stats" + settingsPath = "api/settings" + usageCollectionPeriod = 24 * time.Hour + usageCollectionBackoff = 1 * time.Hour ) var ( @@ -57,6 +58,7 @@ type MetricSet struct { statsHTTP *helper.HTTP settingsHTTP *helper.HTTP usageLastCollectedOn time.Time + usageNextCollectOn time.Time isUsageExcludable bool } @@ -165,6 +167,10 @@ func (m *MetricSet) fetchStats(r mb.ReporterV2, now time.Time) error { content, err = m.statsHTTP.FetchContent() if err != nil { + if shouldCollectUsage { + // When errored in collecting the usage stats it may be counterproductive to try again on the next poll, try to collect the stats again after usageCollectionBackoff + m.usageNextCollectOn = now.Add(usageCollectionBackoff) + } return err } @@ -215,5 +221,5 @@ func (m *MetricSet) calculateIntervalMs() int64 { } func (m *MetricSet) shouldCollectUsage(now time.Time) bool { - return now.Sub(m.usageLastCollectedOn) > usageCollectionPeriod + return now.Sub(m.usageLastCollectedOn) > usageCollectionPeriod && now.Sub(m.usageNextCollectOn) > 0 } diff --git a/metricbeat/module/kibana/stats/stats_test.go b/metricbeat/module/kibana/stats/stats_test.go index b6757e5ecb21..56cbfc17e1f2 100644 --- a/metricbeat/module/kibana/stats/stats_test.go +++ b/metricbeat/module/kibana/stats/stats_test.go @@ -23,6 +23,7 @@ import ( "net/http" "net/http/httptest" "testing" + "time" "github.com/stretchr/testify/require" @@ -48,12 +49,12 @@ func TestFetchUsage(t *testing.T) { w.WriteHeader(503) case 1: // second call - // Make sure exclude_usage is still false since first call failed - require.Equal(t, "false", excludeUsage) + // Make sure exclude_usage is true since first call failed and it should not try again until usageCollectionBackoff time has passed + require.Equal(t, "true", excludeUsage) w.WriteHeader(200) case 2: // third call - // Make sure exclude_usage is now true since second call succeeded + // Make sure exclude_usage is still true require.Equal(t, "true", excludeUsage) w.WriteHeader(200) } @@ -76,3 +77,40 @@ func TestFetchUsage(t *testing.T) { // Third fetch mbtest.ReportingFetchV2Error(f) } + +func TestShouldCollectUsage(t *testing.T) { + now := time.Now() + + cases := map[string]struct { + usageLastCollectedOn time.Time + usageNextCollectOn time.Time + expectedResult bool + }{ + "within_usage_collection_period": { + usageLastCollectedOn: now.Add(-1 * usageCollectionPeriod), + expectedResult: false, + }, + "after_usage_collection_period_but_before_next_scheduled_collection": { + usageLastCollectedOn: now.Add(-2 * usageCollectionPeriod), + usageNextCollectOn: now.Add(3 * time.Hour), + expectedResult: false, + }, + "after_usage_collection_period_and_after_next_scheduled_collection": { + usageLastCollectedOn: now.Add(-2 * usageCollectionPeriod), + usageNextCollectOn: now.Add(-1 * time.Hour), + expectedResult: true, + }, + } + + for name, test := range cases { + t.Run(name, func(t *testing.T) { + m := MetricSet{ + usageLastCollectedOn: test.usageLastCollectedOn, + usageNextCollectOn: test.usageNextCollectOn, + } + + actualResult := m.shouldCollectUsage(now) + require.Equal(t, test.expectedResult, actualResult) + }) + } +}