grafana · salvacorts · Mar 23, 2023 · Feb 27, 2023 · Feb 27, 2023 · Feb 28, 2023
diff --git a/pkg/logql/syntax/parser.go b/pkg/logql/syntax/parser.go
@@ -134,6 +134,29 @@ func ParseMatchers(input string) ([]*labels.Matcher, error) {
 	return matcherExpr.Mts, nil
 }
 
+// ExtractMatchersFromQuery extracts the matchers from a query.
+// compared to ParseMatchers, it will not fail if the query contains anything else.
+func ExtractMatchersFromQuery(input string) ([]*labels.Matcher, error) {
+	expr, err := ParseExpr(input)
+	if err != nil {
+		return nil, err
+	}
+
+	var matchers []*labels.Matcher
+	expr.Walk(func(e interface{}) {
+		switch concrete := e.(type) {
+		case *MatchersExpr:
+			matchers = concrete.Matchers()
+		}
+	})
+
+	if len(matchers) == 0 {
+		return nil, errors.New("failed to extract matchers from query")
+	}
+
+	return matchers, nil
+}
+
 func MatchersString(xs []*labels.Matcher) string {
 	return newMatcherExpr(xs).String()
 }

@@ -10,6 +10,9 @@ import (
 
 	"github.com/go-kit/log/level"
 	"github.com/grafana/dskit/tenant"
+	"github.com/grafana/loki/pkg/logql/syntax"
+	"github.com/grafana/loki/pkg/querier/queryrange/queryrangebase/definitions"
+	"github.com/grafana/loki/pkg/util/flagext"
 	"github.com/opentracing/opentracing-go"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/model/timestamp"
@@ -27,7 +30,9 @@ import (
 )
 
 const (
-	limitErrTmpl = "maximum of series (%d) reached for a single query"
+	limitErrTmpl                   = "maximum of series (%d) reached for a single query"
+	limErrQueryTooManyBytesTmpl    = "the query would read too many bytes (query: %s, limit: %s)"
+	limErrSubqueryTooManyBytesTmpl = "after splitting and sharding, at least one sub-query would read too many bytes (query: %s, limit: %s)"
-	limErrSubqueryTooManyBytesTmpl = "after splitting and sharding, at least one sub-query would read too many bytes (query: %s, limit: %s)"
+	limErrSubqueryTooManyBytesTmpl = "query too large to execute on a single querier, either because parallelization is not enabled or the query is unshardable: (query: %s, limit: %s)"
-	limErrSubqueryTooManyBytesTmpl = "after splitting and sharding, at least one sub-query would read too many bytes (query: %s, limit: %s)"
+	limErrSubqueryTooManyBytesTmpl = "query too large to execute on a single querier, either because parallelization is not enabled or the query is unshardable: (query: %s, limit: %s)"
 )
 
 var (
@@ -45,13 +50,16 @@ type Limits interface {
 	// TSDBMaxQueryParallelism returns the limit to the number of split queries the
 	// frontend will process in parallel for TSDB queries.
 	TSDBMaxQueryParallelism(string) int
+	MaxQueryBytesRead(u string) int
+	MaxSubqueryBytesRead(u string) int
 }
 
 type limits struct {
 	Limits
 	// Use pointers so nil value can indicate if the value was set.
 	splitDuration       *time.Duration
 	maxQueryParallelism *int
+	maxQueryBytesRead   *int
 }
 
 func (l limits) QuerySplitDuration(user string) time.Duration {
@@ -178,6 +186,83 @@ func (l limitsMiddleware) Do(ctx context.Context, r queryrangebase.Request) (que
 	return l.next.Do(ctx, r)
 }
 
+type querySizeLimiter struct {
+	next              queryrangebase.Handler
+	maxQueryBytesRead func(string) int
+	errorTemplate     string
+}
+
+// NewQuerySizeLimiterMiddleware creates a new Middleware that enforces query size limits.
+// The errorTemplate should format two strings: the bytes that would be read and the bytes limit.
+func NewQuerySizeLimiterMiddleware(maxQueryBytesRead func(string) int, errorTemplate string) queryrangebase.Middleware {
+	return queryrangebase.MiddlewareFunc(func(next queryrangebase.Handler) queryrangebase.Handler {
+		return &querySizeLimiter{
+			next:              next,
+			maxQueryBytesRead: maxQueryBytesRead,
+			errorTemplate:     errorTemplate,
+		}
+	})
+}
+
+// getIndexStatsForRequest return the index stats for the matchers in r's query
+func (q *querySizeLimiter) getIndexStatsForRequest(ctx context.Context, r queryrangebase.Request) (*logproto.IndexStatsResponse, error) {
+	matchers, err := syntax.ExtractMatchersFromQuery(r.GetQuery())
+	if err != nil {
+		return nil, err
+	}
+
+	// Get Stats for this query
+	var indexStatsReq definitions.Request = &logproto.IndexStatsRequest{}
+	indexStatsReq = indexStatsReq.WithStartEnd(r.GetStart(), r.GetEnd())
+	indexStatsReq = indexStatsReq.WithQuery(syntax.MatchersString(matchers))
+
+	resp, err := q.next.Do(ctx, indexStatsReq)
+	if err != nil {
+		return nil, err
+	}
+
+	return resp.(*IndexStatsResponse).Response, nil
+
+}
+
+// skipRequestType returns whether we should enforce the q.maxQueryBytesRead limit
+// on the r request type.
+// This is needed when we have two instances of this querySizeLimiter in the same middleware pipeline
+// since we don't want to compute the stats for the stats request from the upper querySizeLimiter.
+func (q *querySizeLimiter) skipRequestType(r queryrangebase.Request) bool {
+	_, ok := r.(*logproto.IndexStatsRequest)
+	return ok
+}
+
+func (q *querySizeLimiter) Do(ctx context.Context, r queryrangebase.Request) (queryrangebase.Response, error) {
+	if q.skipRequestType(r) {
+		return q.next.Do(ctx, r)
+	}
+
+	log, ctx := spanlogger.New(ctx, "query_size_limits")
+	defer log.Finish()
+
+	tenantIDs, err := tenant.TenantIDs(ctx)
+	if err != nil {
+		return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())
+	}
+
+	if maxBytesRead := validation.SmallestPositiveNonZeroIntPerTenant(tenantIDs, q.maxQueryBytesRead); maxBytesRead > 0 {
+		stats, err := q.getIndexStatsForRequest(ctx, r)
+		if err != nil {
+			return nil, httpgrpc.Errorf(http.StatusInternalServerError, "Failed to get index stats for query", err.Error())
+		}
+
+		if int(stats.Bytes) > maxBytesRead {
+			statsBytesStr := flagext.ByteSize(stats.Bytes).String()
+			maxBytesReadStr := flagext.ByteSize(maxBytesRead).String()
+			return nil, httpgrpc.Errorf(http.StatusBadRequest, q.errorTemplate, statsBytesStr, maxBytesReadStr)
+		}
+	}
+
+	return q.next.Do(ctx, r)
+}
+
 type seriesLimiter struct {
 	hashes map[uint64]struct{}
 	rw     sync.RWMutex

diff --git a/pkg/querier/queryrange/queryrangebase/results_cache.go b/pkg/querier/queryrange/queryrangebase/results_cache.go
@@ -211,7 +211,19 @@ func NewResultsCacheMiddleware(
 	}), nil
 }
 
+// skipRequestType returns whether we should cache the r request type.
+// This is needed when we have middlewares that send different requests types down
+// in the pipeline that do not support caching.
+func (s *resultsCache) skipRequestType(r Request) bool {
+	_, ok := r.(*logproto.IndexStatsRequest)
+	return ok
+}
+
 func (s resultsCache) Do(ctx context.Context, r Request) (Response, error) {
+	if s.skipRequestType(r) {
+		return s.next.Do(ctx, r)
+	}
+
 	tenantIDs, err := tenant.TenantIDs(ctx)
 	if err != nil {
 		return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())

@@ -232,7 +232,26 @@ type shardSplitter struct {
 	now          func() time.Time       // injectable time.Now
 }
 
+// skipRequestType returns whether we should apply sharding on the r request type.
+// This is needed when we have middlewares that send different requests types down
+// in the pipeline that do not support sharding.
+func (splitter *shardSplitter) skipRequestType(r queryrangebase.Request) bool {
+	if _, ok := r.(*LokiRequest); ok {
+		return false
+	}
+
+	if _, ok := r.(*LokiInstantRequest); ok {
+		return false
+	}
+
+	return true
+}
+
 func (splitter *shardSplitter) Do(ctx context.Context, r queryrangebase.Request) (queryrangebase.Response, error) {
+	if splitter.skipRequestType(r) {
+		return splitter.next.Do(ctx, r)
+	}
+
 	tenantIDs, err := tenant.TenantIDs(ctx)
 	if err != nil {
 		return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())

@@ -292,6 +292,7 @@ func NewLogFilterTripperware(
 		StatsCollectorMiddleware(),
 		NewLimitsMiddleware(limits),
 		queryrangebase.InstrumentMiddleware("split_by_interval", metrics.InstrumentMiddlewareMetrics),
+		queryrangebase.InstrumentMiddleware("split_by_interval", metrics.InstrumentMiddlewareMetrics),
 		SplitByIntervalMiddleware(schema.Configs, limits, codec, splitByTime, metrics.SplitByMetrics),
 	}
 
@@ -326,6 +327,12 @@ func NewLogFilterTripperware(
 		)
 	}
 
+	// Limit the bytes the sub-queries would fetch after splitting and sharding
+	queryRangeMiddleware = append(
+		queryRangeMiddleware,
+		NewQuerySizeLimiterMiddleware(limits.MaxQueryBytesRead, limErrSubqueryTooManyBytesTmpl),
+	)
+
 	if cfg.MaxRetries > 0 {
 		queryRangeMiddleware = append(
 			queryRangeMiddleware, queryrangebase.InstrumentMiddleware("retry", metrics.InstrumentMiddlewareMetrics),
@@ -354,6 +361,7 @@ func NewLimitedTripperware(
 	queryRangeMiddleware := []queryrangebase.Middleware{
 		StatsCollectorMiddleware(),
 		NewLimitsMiddleware(limits),
+		NewQuerySizeLimiterMiddleware(limits.MaxQueryBytesRead, limErrQueryTooManyBytesTmpl),
 		queryrangebase.InstrumentMiddleware("split_by_interval", metrics.InstrumentMiddlewareMetrics),
 		// Limited queries only need to fetch up to the requested line limit worth of logs,
 		// Our defaults for splitting and parallelism are much too aggressive for large customers and result in
@@ -396,6 +404,12 @@ func NewLimitedTripperware(
 		)
 	}
 
+	// Limit the bytes the sub-queries would fetch after splitting and sharding
+	queryRangeMiddleware = append(
+		queryRangeMiddleware,
+		NewQuerySizeLimiterMiddleware(limits.MaxQueryBytesRead, limErrSubqueryTooManyBytesTmpl),
+	)
+
 	if cfg.MaxRetries > 0 {
 		queryRangeMiddleware = append(
 			queryRangeMiddleware, queryrangebase.InstrumentMiddleware("retry", metrics.InstrumentMiddlewareMetrics),
@@ -506,7 +520,11 @@ func NewMetricTripperware(
 	metrics *Metrics,
 	registerer prometheus.Registerer,
 ) (queryrangebase.Tripperware, error) {
-	queryRangeMiddleware := []queryrangebase.Middleware{StatsCollectorMiddleware(), NewLimitsMiddleware(limits)}
+	queryRangeMiddleware := []queryrangebase.Middleware{
+		StatsCollectorMiddleware(),
+		NewLimitsMiddleware(limits),
+	}
+
 	if cfg.AlignQueriesWithStep {
 		queryRangeMiddleware = append(
 			queryRangeMiddleware,
@@ -515,6 +533,12 @@ func NewMetricTripperware(
 		)
 	}
 
+	// Limit the bytes the query would fetch regardless of splitting and sharding.
+	queryRangeMiddleware = append(
+		queryRangeMiddleware,
+		NewQuerySizeLimiterMiddleware(limits.MaxQueryBytesRead, limErrQueryTooManyBytesTmpl),
+	)
+
 	queryRangeMiddleware = append(
 		queryRangeMiddleware,
 		queryrangebase.InstrumentMiddleware("split_by_interval", metrics.InstrumentMiddlewareMetrics),
@@ -570,6 +594,12 @@ func NewMetricTripperware(
 		)
 	}
 
+	// Limit the bytes the sub-queries would fetch after splitting and sharding
+	queryRangeMiddleware = append(
+		queryRangeMiddleware,
+		NewQuerySizeLimiterMiddleware(limits.MaxSubqueryBytesRead, limErrSubqueryTooManyBytesTmpl),
+	)
+
 	if cfg.MaxRetries > 0 {
 		queryRangeMiddleware = append(
 			queryRangeMiddleware,
@@ -618,6 +648,12 @@ func NewInstantMetricTripperware(
 		)
 	}
 
+	// Limit the bytes the sub-queries would fetch after sharding
+	queryRangeMiddleware = append(
+		queryRangeMiddleware,
+		NewQuerySizeLimiterMiddleware(limits.MaxSubqueryBytesRead, limErrSubqueryTooManyBytesTmpl),
+	)
+
 	if cfg.MaxRetries > 0 {
 		queryRangeMiddleware = append(
 			queryRangeMiddleware,

@@ -317,13 +317,17 @@ func reduceSplitIntervalForRangeVector(r queryrangebase.Request, interval time.D
 func splitMetricByTime(r queryrangebase.Request, interval time.Duration) ([]queryrangebase.Request, error) {
 	var reqs []queryrangebase.Request
 
+	lokiReq, ok := r.(*LokiRequest)
+	if !ok {
+		// If this type of request cannot get split
+		return []queryrangebase.Request{}, nil
+	}
+
 	interval, err := reduceSplitIntervalForRangeVector(r, interval)
 	if err != nil {
 		return nil, err
 	}
 
-	lokiReq := r.(*LokiRequest)
-
 	// step align start and end time of the query. Start time is rounded down and end time is rounded up.
 	stepNs := r.GetStep() * 1e6
 	startNs := lokiReq.StartTs.UnixNano()

@@ -96,8 +96,10 @@ type Limits struct {
 	QueryTimeout               model.Duration `yaml:"query_timeout" json:"query_timeout"`
 
 	// Query frontend enforced limits. The default is actually parameterized by the queryrange config.
-	QuerySplitDuration  model.Duration `yaml:"split_queries_by_interval" json:"split_queries_by_interval"`
-	MinShardingLookback model.Duration `yaml:"min_sharding_lookback" json:"min_sharding_lookback"`
+	QuerySplitDuration   model.Duration   `yaml:"split_queries_by_interval" json:"split_queries_by_interval"`
+	MinShardingLookback  model.Duration   `yaml:"min_sharding_lookback" json:"min_sharding_lookback"`
+	MaxQueryBytesRead    flagext.ByteSize `yaml:"max_query_bytes_read" json:"max_query_bytes_read"`
+	MaxSubqueryBytesRead flagext.ByteSize `yaml:"max_subquery_bytes_read" json:"max_subquery_bytes_read"`
-	MaxSubqueryBytesRead flagext.ByteSize `yaml:"max_subquery_bytes_read" json:"max_subquery_bytes_read"`
+	MaxQuerierBytesRead flagext.ByteSize `yaml:"max_querier_bytes_read" json:"max_querier_bytes_read"`
-	MaxSubqueryBytesRead flagext.ByteSize `yaml:"max_subquery_bytes_read" json:"max_subquery_bytes_read"`
+	MaxQuerierBytesRead flagext.ByteSize `yaml:"max_querier_bytes_read" json:"max_querier_bytes_read"`
 
 	// Ruler defaults and limits.
 	RulerEvaluationDelay        model.Duration                   `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"`
@@ -223,6 +225,11 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
 	_ = l.MinShardingLookback.Set("0s")
 	f.Var(&l.MinShardingLookback, "frontend.min-sharding-lookback", "Limit queries that can be sharded. Queries within the time range of now and now minus this sharding lookback are not sharded. The default value of 0s disables the lookback, causing sharding of all queries at all times.")
 
+	_ = l.MaxQueryBytesRead.Set("0B")
-	_ = l.MaxQueryBytesRead.Set("0B")
-	_ = l.MaxQueryBytesRead.Set("0B")
+	f.Var(&l.MaxQueryBytesRead, "frontend.max-query-bytes-read", "TODO: Max number of bytes a query would fetch")
+	_ = l.MaxSubqueryBytesRead.Set("0B")
-	_ = l.MaxSubqueryBytesRead.Set("0B")
-	_ = l.MaxSubqueryBytesRead.Set("0B")
+	f.Var(&l.MaxSubqueryBytesRead, "frontend.max-subquery-bytes-read", "TODO: Max number of bytes a sub query would fetch after splitting and sharding")
+
 	_ = l.MaxCacheFreshness.Set("1m")
 	f.Var(&l.MaxCacheFreshness, "frontend.max-cache-freshness", "Most recent allowed cacheable result per-tenant, to prevent caching very recent results that might still be in flux.")
 
@@ -474,6 +481,16 @@ func (o *Overrides) QuerySplitDuration(userID string) time.Duration {
 	return time.Duration(o.getOverridesForUser(userID).QuerySplitDuration)
 }
 
+// MaxQueryBytesRead returns the maximum bytes a query can read.
+func (o *Overrides) MaxQueryBytesRead(userID string) int {
+	return o.getOverridesForUser(userID).MaxQueryBytesRead.Val()
+}
+
+// MaxSubqueryBytesRead returns the maximum bytes a sub query can read after splitting and sharding.
+func (o *Overrides) MaxSubqueryBytesRead(userID string) int {
+	return o.getOverridesForUser(userID).MaxSubqueryBytesRead.Val()
+}
+
 // MaxConcurrentTailRequests returns the limit to number of concurrent tail requests.
 func (o *Overrides) MaxConcurrentTailRequests(userID string) int {
 	return o.getOverridesForUser(userID).MaxConcurrentTailRequests