diff --git a/CHANGELOG.md b/CHANGELOG.md index a9277d23c5..ae9f5391d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Added * [#5654](https://github.com/thanos-io/thanos/pull/5654) Query: add `--grpc-compression` flag that controls the compression used in gRPC client. With the flag it is now possible to compress the traffic between Query and StoreAPI nodes - you get lower network usage in exchange for a bit higher CPU/RAM usage. - [#5650](https://github.com/thanos-io/thanos/pull/5650) Query Frontend: Add sharded queries metrics. +- [#5658](https://github.com/thanos-io/thanos/pull/5658) Query Frontend: Introduce new optional parameters (`query-range.min-split-interval`, `query-range.max-split-interval`, `query-range.horizontal-shards`) to implement more dynamic horizontal query splitting. ### Changed diff --git a/cmd/thanos/query_frontend.go b/cmd/thanos/query_frontend.go index e9eee5966a..df7f2707bc 100644 --- a/cmd/thanos/query_frontend.go +++ b/cmd/thanos/query_frontend.go @@ -76,6 +76,17 @@ func registerQueryFrontend(app *extkingpin.App) { cmd.Flag("query-range.split-interval", "Split query range requests by an interval and execute in parallel, it should be greater than 0 when query-range.response-cache-config is configured."). Default("24h").DurationVar(&cfg.QueryRangeConfig.SplitQueriesByInterval) + cmd.Flag("query-range.min-split-interval", "Split query range requests above this interval in query-range.horizontal-shards requests of equal range. "+ + "Using this parameter is not allowed with query-range.split-interval. "+ + "One should also set query-range.split-min-horizontal-shards to a value greater than 1 to enable splitting."). + Default("0").DurationVar(&cfg.QueryRangeConfig.MinQuerySplitInterval) + + cmd.Flag("query-range.max-split-interval", "Split query range below this interval in query-range.horizontal-shards. Queries with a range longer than this value will be split in multiple requests of this length."). + Default("0").DurationVar(&cfg.QueryRangeConfig.MaxQuerySplitInterval) + + cmd.Flag("query-range.horizontal-shards", "Split queries in this many requests when query duration is below query-range.max-split-interval."). + Default("0").Int64Var(&cfg.QueryRangeConfig.HorizontalShards) + cmd.Flag("query-range.max-retries-per-request", "Maximum number of retries for a single query range request; beyond this, the downstream error is returned."). Default("5").IntVar(&cfg.QueryRangeConfig.MaxRetries) diff --git a/docs/components/query-frontend.md b/docs/components/query-frontend.md index 044f87ce76..bc150cb40f 100644 --- a/docs/components/query-frontend.md +++ b/docs/components/query-frontend.md @@ -280,6 +280,10 @@ Flags: and end with their step for better cache-ability. Note: Grafana dashboards do that by default. + --query-range.horizontal-shards=0 + Split queries in this many requests when query + duration is below + query-range.max-split-interval. --query-range.max-query-length=0 Limit the query time range (end - start time) in the query-frontend, 0 disables it. @@ -290,6 +294,19 @@ Flags: Maximum number of retries for a single query range request; beyond this, the downstream error is returned. + --query-range.max-split-interval=0 + Split query range below this interval in + query-range.horizontal-shards. Queries with a + range longer than this value will be split in + multiple requests of this length. + --query-range.min-split-interval=0 + Split query range requests above this interval + in query-range.horizontal-shards requests of + equal range. Using this parameter is not + allowed with query-range.split-interval. One + should also set + query-range.split-min-horizontal-shards to a + value greater than 1 to enable splitting. --query-range.partial-response Enable partial response for query range requests if no partial_response param is diff --git a/pkg/queryfrontend/config.go b/pkg/queryfrontend/config.go index d4a9847f47..63be47e4b2 100644 --- a/pkg/queryfrontend/config.go +++ b/pkg/queryfrontend/config.go @@ -219,6 +219,9 @@ type QueryRangeConfig struct { AlignRangeWithStep bool RequestDownsampled bool SplitQueriesByInterval time.Duration + MinQuerySplitInterval time.Duration + MaxQuerySplitInterval time.Duration + HorizontalShards int64 MaxRetries int Limits *cortexvalidation.Limits } @@ -242,14 +245,25 @@ type LabelsConfig struct { // Validate a fully initialized config. func (cfg *Config) Validate() error { if cfg.QueryRangeConfig.ResultsCacheConfig != nil { - if cfg.QueryRangeConfig.SplitQueriesByInterval <= 0 { - return errors.New("split queries interval should be greater than 0 when caching is enabled") + if cfg.QueryRangeConfig.SplitQueriesByInterval <= 0 && !cfg.isDynamicSplitSet() { + return errors.New("split queries or split threshold interval should be greater than 0 when caching is enabled") } if err := cfg.QueryRangeConfig.ResultsCacheConfig.Validate(querier.Config{}); err != nil { return errors.Wrap(err, "invalid ResultsCache config for query_range tripperware") } } + if cfg.isDynamicSplitSet() && cfg.isStaticSplitSet() { + return errors.New("split queries interval and dynamic query split interval cannot be set at the same time") + } + + if cfg.isDynamicSplitSet() { + + if err := cfg.validateDynamicSplitParams(); err != nil { + return err + } + } + if cfg.LabelsConfig.ResultsCacheConfig != nil { if cfg.LabelsConfig.SplitQueriesByInterval <= 0 { return errors.New("split queries interval should be greater than 0 when caching is enabled") @@ -269,3 +283,28 @@ func (cfg *Config) Validate() error { return nil } + +func (cfg *Config) validateDynamicSplitParams() error { + if cfg.QueryRangeConfig.HorizontalShards <= 0 { + return errors.New("min horizontal shards should be greater than 0 when query split threshold is enabled") + } + + if cfg.QueryRangeConfig.MaxQuerySplitInterval <= 0 { + return errors.New("max query split interval should be greater than 0 when query split threshold is enabled") + } + + if cfg.QueryRangeConfig.MinQuerySplitInterval <= 0 { + return errors.New("min query split interval should be greater than 0 when query split threshold is enabled") + } + return nil +} + +func (cfg *Config) isStaticSplitSet() bool { + return cfg.QueryRangeConfig.SplitQueriesByInterval != 0 +} + +func (cfg *Config) isDynamicSplitSet() bool { + return cfg.QueryRangeConfig.MinQuerySplitInterval > 0 || + cfg.QueryRangeConfig.HorizontalShards > 0 || + cfg.QueryRangeConfig.MaxQuerySplitInterval > 0 +} diff --git a/pkg/queryfrontend/config_test.go b/pkg/queryfrontend/config_test.go new file mode 100644 index 0000000000..f5bbe21f44 --- /dev/null +++ b/pkg/queryfrontend/config_test.go @@ -0,0 +1,110 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package queryfrontend + +import ( + "fmt" + "testing" + "time" + + "github.com/thanos-io/thanos/internal/cortex/chunk/cache" + "github.com/thanos-io/thanos/internal/cortex/querier/queryrange" + "github.com/thanos-io/thanos/pkg/testutil" +) + +func TestConfig_Validate(t *testing.T) { + + type testCase struct { + name string + config Config + err string + } + + testCases := []testCase{ + { + name: "invalid query range options", + config: Config{ + QueryRangeConfig: QueryRangeConfig{ + SplitQueriesByInterval: 10 * time.Hour, + HorizontalShards: 10, + MinQuerySplitInterval: 1 * time.Hour, + MaxQuerySplitInterval: day, + }, + }, + err: "split queries interval and dynamic query split interval cannot be set at the same time", + }, + { + name: "invalid parameters for dynamic query range split", + config: Config{ + QueryRangeConfig: QueryRangeConfig{ + SplitQueriesByInterval: 0, + HorizontalShards: 0, + MinQuerySplitInterval: 1 * time.Hour, + }, + }, + err: "min horizontal shards should be greater than 0 when query split threshold is enabled", + }, + { + name: "invalid parameters for dynamic query range split - 2", + config: Config{ + QueryRangeConfig: QueryRangeConfig{ + SplitQueriesByInterval: 0, + HorizontalShards: 10, + MaxQuerySplitInterval: 0, + MinQuerySplitInterval: 1 * time.Hour, + }, + }, + err: "max query split interval should be greater than 0 when query split threshold is enabled", + }, + { + name: "invalid parameters for dynamic query range split - 3", + config: Config{ + QueryRangeConfig: QueryRangeConfig{ + SplitQueriesByInterval: 0, + HorizontalShards: 10, + MaxQuerySplitInterval: 1 * time.Hour, + MinQuerySplitInterval: 0, + }, + LabelsConfig: LabelsConfig{ + DefaultTimeRange: day, + }, + }, + err: "min query split interval should be greater than 0 when query split threshold is enabled", + }, + { + name: "valid config with caching", + config: Config{ + DownstreamURL: "localhost:8080", + QueryRangeConfig: QueryRangeConfig{ + SplitQueriesByInterval: 10 * time.Hour, + HorizontalShards: 0, + MaxQuerySplitInterval: 0, + MinQuerySplitInterval: 0, + ResultsCacheConfig: &queryrange.ResultsCacheConfig{ + CacheConfig: cache.Config{}, + Compression: "", + CacheQueryableSamplesStats: false, + }, + }, + LabelsConfig: LabelsConfig{ + DefaultTimeRange: day, + }, + }, + err: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + err := tc.config.Validate() + if tc.err != "" { + testutil.NotOk(t, err) + testutil.Equals(t, tc.err, err.Error()) + } else { + testutil.Ok(t, err) + fmt.Println(err) + } + }) + } +} diff --git a/pkg/queryfrontend/roundtrip.go b/pkg/queryfrontend/roundtrip.go index a8901ced16..aa9942ff7d 100644 --- a/pkg/queryfrontend/roundtrip.go +++ b/pkg/queryfrontend/roundtrip.go @@ -179,11 +179,9 @@ func newQueryRangeTripperware( ) } - queryIntervalFn := func(_ queryrange.Request) time.Duration { - return config.SplitQueriesByInterval - } + if config.SplitQueriesByInterval != 0 || config.MinQuerySplitInterval != 0 { + queryIntervalFn := dynamicIntervalFn(config) - if config.SplitQueriesByInterval != 0 { queryRangeMiddleware = append( queryRangeMiddleware, queryrange.InstrumentMiddleware("split_by_interval", m), @@ -237,6 +235,28 @@ func newQueryRangeTripperware( }, nil } +func dynamicIntervalFn(config QueryRangeConfig) queryrange.IntervalFn { + return func(r queryrange.Request) time.Duration { + // Use static interval, by default. + if config.SplitQueriesByInterval != 0 { + return config.SplitQueriesByInterval + } + + queryInterval := time.Duration(r.GetEnd()-r.GetStart()) * time.Millisecond + // If the query is multiple of max interval, we use the max interval to split. + if queryInterval/config.MaxQuerySplitInterval >= 2 { + return config.MaxQuerySplitInterval + } + + if queryInterval > config.MinQuerySplitInterval { + // If the query duration is less than max interval, we split it equally in HorizontalShards. + return time.Duration(queryInterval.Milliseconds()/config.HorizontalShards) * time.Millisecond + } + + return config.MinQuerySplitInterval + } +} + // newLabelsTripperware returns a Tripperware for labels and series requests // configured with middlewares of split by interval and retry. func newLabelsTripperware( diff --git a/pkg/queryfrontend/roundtrip_test.go b/pkg/queryfrontend/roundtrip_test.go index 91ef3ce697..cd50c6e964 100644 --- a/pkg/queryfrontend/roundtrip_test.go +++ b/pkg/queryfrontend/roundtrip_test.go @@ -241,12 +241,15 @@ func TestRoundTripSplitIntervalMiddleware(t *testing.T) { labelsCodec := NewThanosLabelsCodec(true, 2*time.Hour) for _, tc := range []struct { - name string - splitInterval time.Duration - req queryrange.Request - codec queryrange.Codec - handlerFunc func(bool) (*int, http.Handler) - expected int + name string + splitInterval time.Duration + querySplitThreshold time.Duration + maxSplitInterval time.Duration + minHorizontalShards int64 + req queryrange.Request + codec queryrange.Codec + handlerFunc func(bool) (*int, http.Handler) + expected int }{ { name: "split interval == 0, disable split", @@ -280,6 +283,39 @@ func TestRoundTripSplitIntervalMiddleware(t *testing.T) { splitInterval: 1 * time.Hour, expected: 2, }, + { + name: "split to 4 requests, due to min horizontal shards", + req: testRequest, + handlerFunc: promqlResults, + codec: queryRangeCodec, + splitInterval: 0, + querySplitThreshold: 30 * time.Minute, + maxSplitInterval: 4 * time.Hour, + minHorizontalShards: 4, + expected: 4, + }, + { + name: "split to 2 requests, due to maxSplitInterval", + req: testRequest, + handlerFunc: promqlResults, + codec: queryRangeCodec, + splitInterval: 0, + querySplitThreshold: 30 * time.Minute, + maxSplitInterval: 1 * time.Hour, + minHorizontalShards: 4, + expected: 2, + }, + { + name: "split to 2 requests, due to maxSplitInterval", + req: testRequest, + handlerFunc: promqlResults, + codec: queryRangeCodec, + splitInterval: 0, + querySplitThreshold: 2 * time.Hour, + maxSplitInterval: 4 * time.Hour, + minHorizontalShards: 4, + expected: 1, + }, { name: "labels request won't be split", req: testLabelsRequest, @@ -320,6 +356,9 @@ func TestRoundTripSplitIntervalMiddleware(t *testing.T) { QueryRangeConfig: QueryRangeConfig{ Limits: defaultLimits, SplitQueriesByInterval: tc.splitInterval, + MinQuerySplitInterval: tc.querySplitThreshold, + MaxQuerySplitInterval: tc.maxSplitInterval, + HorizontalShards: tc.minHorizontalShards, }, LabelsConfig: LabelsConfig{ Limits: defaultLimits,