-
Notifications
You must be signed in to change notification settings - Fork 20
/
match.go
237 lines (221 loc) · 5.37 KB
/
match.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
// Package match provides a simple pattern matcher with unicode support.
package match
import (
"unicode/utf8"
)
// Match returns true if str matches pattern. This is a very
// simple wildcard match where '*' matches on any number characters
// and '?' matches on any one character.
//
// pattern:
// { term }
// term:
// '*' matches any sequence of non-Separator characters
// '?' matches any single non-Separator character
// c matches character c (c != '*', '?', '\\')
// '\\' c matches character c
//
func Match(str, pattern string) bool {
if pattern == "*" {
return true
}
return match(str, pattern, 0, nil, -1) == rMatch
}
// MatchLimit is the same as Match but will limit the complexity of the match
// operation. This is to avoid long running matches, specifically to avoid ReDos
// attacks from arbritary inputs.
//
// How it works:
// The underlying match routine is recursive and may call itself when it
// encounters a sandwiched wildcard pattern, such as: `user:*:name`.
// Everytime it calls itself a counter is incremented.
// The operation is stopped when counter > maxcomp*len(str).
func MatchLimit(str, pattern string, maxcomp int) (matched, stopped bool) {
if pattern == "*" {
return true, false
}
counter := 0
r := match(str, pattern, len(str), &counter, maxcomp)
if r == rStop {
return false, true
}
return r == rMatch, false
}
type result int
const (
rNoMatch result = iota
rMatch
rStop
)
func match(str, pat string, slen int, counter *int, maxcomp int) result {
// check complexity limit
if maxcomp > -1 {
if *counter > slen*maxcomp {
return rStop
}
*counter++
}
for len(pat) > 0 {
var wild bool
pc, ps := rune(pat[0]), 1
if pc > 0x7f {
pc, ps = utf8.DecodeRuneInString(pat)
}
var sc rune
var ss int
if len(str) > 0 {
sc, ss = rune(str[0]), 1
if sc > 0x7f {
sc, ss = utf8.DecodeRuneInString(str)
}
}
switch pc {
case '?':
if ss == 0 {
return rNoMatch
}
case '*':
// Ignore repeating stars.
for len(pat) > 1 && pat[1] == '*' {
pat = pat[1:]
}
// If this star is the last character then it must be a match.
if len(pat) == 1 {
return rMatch
}
// Match and trim any non-wildcard suffix characters.
var ok bool
str, pat, ok = matchTrimSuffix(str, pat)
if !ok {
return rNoMatch
}
// Check for single star again.
if len(pat) == 1 {
return rMatch
}
// Perform recursive wildcard search.
r := match(str, pat[1:], slen, counter, maxcomp)
if r != rNoMatch {
return r
}
if len(str) == 0 {
return rNoMatch
}
wild = true
default:
if ss == 0 {
return rNoMatch
}
if pc == '\\' {
pat = pat[ps:]
pc, ps = utf8.DecodeRuneInString(pat)
if ps == 0 {
return rNoMatch
}
}
if sc != pc {
return rNoMatch
}
}
str = str[ss:]
if !wild {
pat = pat[ps:]
}
}
if len(str) == 0 {
return rMatch
}
return rNoMatch
}
// matchTrimSuffix matches and trims any non-wildcard suffix characters.
// Returns the trimed string and pattern.
//
// This is called because the pattern contains extra data after the wildcard
// star. Here we compare any suffix characters in the pattern to the suffix of
// the target string. Basically a reverse match that stops when a wildcard
// character is reached. This is a little trickier than a forward match because
// we need to evaluate an escaped character in reverse.
//
// Any matched characters will be trimmed from both the target
// string and the pattern.
func matchTrimSuffix(str, pat string) (string, string, bool) {
// It's expected that the pattern has at least two bytes and the first byte
// is a wildcard star '*'
match := true
for len(str) > 0 && len(pat) > 1 {
pc, ps := utf8.DecodeLastRuneInString(pat)
var esc bool
for i := 0; ; i++ {
if pat[len(pat)-ps-i-1] != '\\' {
if i&1 == 1 {
esc = true
ps++
}
break
}
}
if pc == '*' && !esc {
match = true
break
}
sc, ss := utf8.DecodeLastRuneInString(str)
if !((pc == '?' && !esc) || pc == sc) {
match = false
break
}
str = str[:len(str)-ss]
pat = pat[:len(pat)-ps]
}
return str, pat, match
}
var maxRuneBytes = [...]byte{244, 143, 191, 191}
// Allowable parses the pattern and determines the minimum and maximum allowable
// values that the pattern can represent.
// When the max cannot be determined, 'true' will be returned
// for infinite.
func Allowable(pattern string) (min, max string) {
if pattern == "" || pattern[0] == '*' {
return "", ""
}
minb := make([]byte, 0, len(pattern))
maxb := make([]byte, 0, len(pattern))
var wild bool
for i := 0; i < len(pattern); i++ {
if pattern[i] == '*' {
wild = true
break
}
if pattern[i] == '?' {
minb = append(minb, 0)
maxb = append(maxb, maxRuneBytes[:]...)
} else {
minb = append(minb, pattern[i])
maxb = append(maxb, pattern[i])
}
}
if wild {
r, n := utf8.DecodeLastRune(maxb)
if r != utf8.RuneError {
if r < utf8.MaxRune {
r++
if r > 0x7f {
b := make([]byte, 4)
nn := utf8.EncodeRune(b, r)
maxb = append(maxb[:len(maxb)-n], b[:nn]...)
} else {
maxb = append(maxb[:len(maxb)-n], byte(r))
}
}
}
}
return string(minb), string(maxb)
}
// IsPattern returns true if the string is a pattern.
func IsPattern(str string) bool {
for i := 0; i < len(str); i++ {
if str[i] == '*' || str[i] == '?' {
return true
}
}
return false
}