-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
store: Added regex-set optimization to ExpandedPostings (#2450)
* Added regex-set optimization to ExpandedPostings Signed-off-by: Peter Štibraný <[email protected]> * Fixed capitalization. Signed-off-by: Peter Štibraný <[email protected]> * CHANGELOG.md Signed-off-by: Peter Štibraný <[email protected]> * Removed unnecessary change. Signed-off-by: Peter Štibraný <[email protected]> * Remove whitespace Signed-off-by: Peter Štibraný <[email protected]> * Use testutil instead of testify. Signed-off-by: Peter Štibraný <[email protected]> * Added copyright header, from original Prometheus querier.go Signed-off-by: Peter Štibraný <[email protected]> * Use Thanos copyright header. 🤦 Signed-off-by: Peter Štibraný <[email protected]> * Added · at the end of the sentence. 🤯. I will randomly add emojis and GitHub emoji markup to commit messages that fix frustrating checks like this one. And intentionally not break the line. Let's see how lint deals with that! Ha. Signed-off-by: Peter Štibraný <[email protected]>
- Loading branch information
Showing
5 changed files
with
128 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
// Copyright (c) The Thanos Authors. | ||
// Licensed under the Apache License 2.0. | ||
|
||
package store | ||
|
||
import ( | ||
"strings" | ||
"unicode/utf8" | ||
) | ||
|
||
// Bitmap used by func isRegexMetaCharacter to check whether a character needs to be escaped. | ||
var regexMetaCharacterBytes [16]byte | ||
|
||
// isRegexMetaCharacter reports whether byte b needs to be escaped. | ||
func isRegexMetaCharacter(b byte) bool { | ||
return b < utf8.RuneSelf && regexMetaCharacterBytes[b%16]&(1<<(b/16)) != 0 | ||
} | ||
|
||
func init() { | ||
for _, b := range []byte(`.+*?()|[]{}^$`) { | ||
regexMetaCharacterBytes[b%16] |= 1 << (b / 16) | ||
} | ||
} | ||
|
||
// Copied from Prometheus querier.go, removed check for Prometheus wrapper. | ||
// Returns list of values that can regex matches. | ||
func findSetMatches(pattern string) []string { | ||
escaped := false | ||
sets := []*strings.Builder{{}} | ||
for i := 0; i < len(pattern); i++ { | ||
if escaped { | ||
switch { | ||
case isRegexMetaCharacter(pattern[i]): | ||
sets[len(sets)-1].WriteByte(pattern[i]) | ||
case pattern[i] == '\\': | ||
sets[len(sets)-1].WriteByte('\\') | ||
default: | ||
return nil | ||
} | ||
escaped = false | ||
} else { | ||
switch { | ||
case isRegexMetaCharacter(pattern[i]): | ||
if pattern[i] == '|' { | ||
sets = append(sets, &strings.Builder{}) | ||
} else { | ||
return nil | ||
} | ||
case pattern[i] == '\\': | ||
escaped = true | ||
default: | ||
sets[len(sets)-1].WriteByte(pattern[i]) | ||
} | ||
} | ||
} | ||
matches := make([]string, 0, len(sets)) | ||
for _, s := range sets { | ||
if s.Len() > 0 { | ||
matches = append(matches, s.String()) | ||
} | ||
} | ||
return matches | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
// Copyright (c) The Thanos Authors. | ||
// Licensed under the Apache License 2.0. | ||
|
||
package store | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/thanos-io/thanos/pkg/testutil" | ||
) | ||
|
||
// Refer to https://github.com/prometheus/prometheus/issues/2651. | ||
func TestFindSetMatches(t *testing.T) { | ||
cases := []struct { | ||
pattern string | ||
exp []string | ||
}{ | ||
// Simple sets. | ||
{ | ||
pattern: "foo|bar|baz", | ||
exp: []string{ | ||
"foo", | ||
"bar", | ||
"baz", | ||
}, | ||
}, | ||
// Simple sets containing escaped characters. | ||
{ | ||
pattern: "fo\\.o|bar\\?|\\^baz", | ||
exp: []string{ | ||
"fo.o", | ||
"bar?", | ||
"^baz", | ||
}, | ||
}, | ||
// Simple sets containing special characters without escaping. | ||
{ | ||
pattern: "fo.o|bar?|^baz", | ||
exp: nil, | ||
}, | ||
{ | ||
pattern: "foo\\|bar\\|baz", | ||
exp: []string{ | ||
"foo|bar|baz", | ||
}, | ||
}, | ||
} | ||
|
||
for _, c := range cases { | ||
matches := findSetMatches(c.pattern) | ||
testutil.Equals(t, c.exp, matches) | ||
} | ||
} |