From 9b52dc87c76476a71c18543b18a951d783935853 Mon Sep 17 00:00:00 2001 From: tdakkota Date: Sat, 30 Nov 2024 10:44:05 +0300 Subject: [PATCH] perf(chstorage): use keys index for logs attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` │ old │ new │ │ sec/op │ sec/op vs base │ LogQL/Lookup_by_materialzied_attribute 19.65m ± 17% 23.97m ± 31% ~ (p=0.217 n=15) LogQL/Lookup_by_regular_attribute 7501.61m ± 1% 47.10m ± 22% -99.37% (p=0.000 n=15) geomean 384.0m 33.60m -91.25% ``` --- internal/chstorage/querier_logs_query.go | 16 ++++++++++++---- internal/chstorage/schema_logs.go | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/internal/chstorage/querier_logs_query.go b/internal/chstorage/querier_logs_query.go index ed6e98c5..3cf6178e 100644 --- a/internal/chstorage/querier_logs_query.go +++ b/internal/chstorage/querier_logs_query.go @@ -530,8 +530,7 @@ func (q *Querier) logQLLabelMatcher( case logstorage.LabelTraceID: return matchHex(chsql.Ident("trace_id"), m) default: - expr, ok := q.getMaterializedLabelColumn(unmappedLabel) - if ok { + if expr, ok := q.getMaterializedLabelColumn(unmappedLabel); ok { switch m.Op { case logql.OpEq, logql.OpNotEq: return chsql.Eq(expr, chsql.String(m.Value)) @@ -543,11 +542,12 @@ func (q *Querier) logQLLabelMatcher( } exprs := make([]chsql.Expr, 0, 3) + keysExprs := make([]chsql.Expr, 0, cap(exprs)) // Search in all attributes. for _, column := range []string{ colAttrs, - colResource, colScope, + colResource, } { // TODO: how to match integers, booleans, floats, arrays? var ( @@ -563,8 +563,16 @@ func (q *Querier) logQLLabelMatcher( panic(fmt.Sprintf("unexpected label matcher op %v", m.Op)) } exprs = append(exprs, sub) + keysExprs = append(keysExprs, chsql.JSONExtractKeys(chsql.Ident(column))) } - return chsql.JoinOr(exprs...) + // Force Clickhouse to use index. + return chsql.And( + chsql.Has( + chsql.ArrayConcat(keysExprs...), + chsql.String(labelName), + ), + chsql.JoinOr(exprs...), + ) } } diff --git a/internal/chstorage/schema_logs.go b/internal/chstorage/schema_logs.go index 5f72b056..77281e22 100644 --- a/internal/chstorage/schema_logs.go +++ b/internal/chstorage/schema_logs.go @@ -33,6 +33,7 @@ const ( INDEX idx_trace_id trace_id TYPE bloom_filter(0.001) GRANULARITY 1, INDEX idx_body body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 1, INDEX idx_ts timestamp TYPE minmax GRANULARITY 8192, + INDEX attribute_keys arrayConcat(JSONExtractKeys(attribute), JSONExtractKeys(scope), JSONExtractKeys(resource)) TYPE set(100), ) ENGINE = MergeTree PARTITION BY toYYYYMMDD(timestamp)