diff --git a/CHANGELOG.md b/CHANGELOG.md index eb99c02532..5edf441d56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel ### Added ### Changed +- [#2450](https://github.com/thanos-io/thanos/pull/2450) Store: regex-set optimization for `label=~"a|b|c"` matchers. ## [v0.12.0](https://github.com/thanos-io/thanos/releases/tag/v0.12.0) - 2020.04.15 diff --git a/pkg/store/bucket.go b/pkg/store/bucket.go index e0a1084312..c7d81098a7 100644 --- a/pkg/store/bucket.go +++ b/pkg/store/bucket.go @@ -1502,6 +1502,15 @@ func toPostingGroup(lvalsFn func(name string) ([]string, error), m *labels.Match return emptyPostingsGroup, nil } + if m.Type == labels.MatchRegexp && len(findSetMatches(m.Value)) > 0 { + vals := findSetMatches(m.Value) + toAdd := make([]labels.Label, 0, len(vals)) + for _, val := range vals { + toAdd = append(toAdd, labels.Label{Name: m.Name, Value: val}) + } + return newPostingGroup(false, toAdd, nil), nil + } + // If the matcher selects an empty value, it selects all the series which don't // have the label name set too. See: https://github.com/prometheus/prometheus/issues/3575 // and https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555. diff --git a/pkg/store/bucket_test.go b/pkg/store/bucket_test.go index 331660db80..0e5ccd7d64 100644 --- a/pkg/store/bucket_test.go +++ b/pkg/store/bucket_test.go @@ -1005,6 +1005,7 @@ func benchmarkExpandedPostings( iNotEmpty := labels.MustNewMatcher(labels.MatchNotEqual, "i", "") iNot2 := labels.MustNewMatcher(labels.MatchNotEqual, "n", "2"+postingsBenchSuffix) iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$") + iRegexSet := labels.MustNewMatcher(labels.MatchRegexp, "i", "0"+postingsBenchSuffix+"|1"+postingsBenchSuffix+"|2"+postingsBenchSuffix) series = series / 5 cases := []struct { @@ -1029,6 +1030,7 @@ func benchmarkExpandedPostings( {`n="1",i=~"1.+",j="foo"`, []*labels.Matcher{n1, i1Plus, jFoo}, int(float64(series) * 0.011111)}, {`n="1",i=~".+",i!="2",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2, jFoo}, int(float64(series) * 0.1)}, {`n="1",i=~".+",i!~"2.*",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2Star, jFoo}, int(1 + float64(series)*0.088888)}, + {`i=~"0|1|2"`, []*labels.Matcher{iRegexSet}, 150}, // 50 series for "1", 50 for "2" and 50 for "3". } for _, c := range cases { diff --git a/pkg/store/opts.go b/pkg/store/opts.go new file mode 100644 index 0000000000..e95a623a70 --- /dev/null +++ b/pkg/store/opts.go @@ -0,0 +1,63 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package store + +import ( + "strings" + "unicode/utf8" +) + +// Bitmap used by func isRegexMetaCharacter to check whether a character needs to be escaped. +var regexMetaCharacterBytes [16]byte + +// isRegexMetaCharacter reports whether byte b needs to be escaped. +func isRegexMetaCharacter(b byte) bool { + return b < utf8.RuneSelf && regexMetaCharacterBytes[b%16]&(1<<(b/16)) != 0 +} + +func init() { + for _, b := range []byte(`.+*?()|[]{}^$`) { + regexMetaCharacterBytes[b%16] |= 1 << (b / 16) + } +} + +// Copied from Prometheus querier.go, removed check for Prometheus wrapper. +// Returns list of values that can regex matches. +func findSetMatches(pattern string) []string { + escaped := false + sets := []*strings.Builder{{}} + for i := 0; i < len(pattern); i++ { + if escaped { + switch { + case isRegexMetaCharacter(pattern[i]): + sets[len(sets)-1].WriteByte(pattern[i]) + case pattern[i] == '\\': + sets[len(sets)-1].WriteByte('\\') + default: + return nil + } + escaped = false + } else { + switch { + case isRegexMetaCharacter(pattern[i]): + if pattern[i] == '|' { + sets = append(sets, &strings.Builder{}) + } else { + return nil + } + case pattern[i] == '\\': + escaped = true + default: + sets[len(sets)-1].WriteByte(pattern[i]) + } + } + } + matches := make([]string, 0, len(sets)) + for _, s := range sets { + if s.Len() > 0 { + matches = append(matches, s.String()) + } + } + return matches +} diff --git a/pkg/store/opts_test.go b/pkg/store/opts_test.go new file mode 100644 index 0000000000..e79fdf35bc --- /dev/null +++ b/pkg/store/opts_test.go @@ -0,0 +1,53 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package store + +import ( + "testing" + + "github.com/thanos-io/thanos/pkg/testutil" +) + +// Refer to https://github.com/prometheus/prometheus/issues/2651. +func TestFindSetMatches(t *testing.T) { + cases := []struct { + pattern string + exp []string + }{ + // Simple sets. + { + pattern: "foo|bar|baz", + exp: []string{ + "foo", + "bar", + "baz", + }, + }, + // Simple sets containing escaped characters. + { + pattern: "fo\\.o|bar\\?|\\^baz", + exp: []string{ + "fo.o", + "bar?", + "^baz", + }, + }, + // Simple sets containing special characters without escaping. + { + pattern: "fo.o|bar?|^baz", + exp: nil, + }, + { + pattern: "foo\\|bar\\|baz", + exp: []string{ + "foo|bar|baz", + }, + }, + } + + for _, c := range cases { + matches := findSetMatches(c.pattern) + testutil.Equals(t, c.exp, matches) + } +}