1// Package match provides a simple pattern matcher with unicode support.
2package match
3
4import (
5 "unicode/utf8"
6)
7
8// Match returns true if str matches pattern. This is a very
9// simple wildcard match where '*' matches on any number characters
10// and '?' matches on any one character.
11//
12// pattern:
13// { term }
14// term:
15// '*' matches any sequence of non-Separator characters
16// '?' matches any single non-Separator character
17// c matches character c (c != '*', '?', '\\')
18// '\\' c matches character c
19//
20func Match(str, pattern string) bool {
21 if pattern == "*" {
22 return true
23 }
24 return match(str, pattern, 0, nil, -1) == rMatch
25}
26
27// MatchLimit is the same as Match but will limit the complexity of the match
28// operation. This is to avoid long running matches, specifically to avoid ReDos
29// attacks from arbritary inputs.
30//
31// How it works:
32// The underlying match routine is recursive and may call itself when it
33// encounters a sandwiched wildcard pattern, such as: `user:*:name`.
34// Everytime it calls itself a counter is incremented.
35// The operation is stopped when counter > maxcomp*len(str).
36func MatchLimit(str, pattern string, maxcomp int) (matched, stopped bool) {
37 if pattern == "*" {
38 return true, false
39 }
40 counter := 0
41 r := match(str, pattern, len(str), &counter, maxcomp)
42 if r == rStop {
43 return false, true
44 }
45 return r == rMatch, false
46}
47
48type result int
49
50const (
51 rNoMatch result = iota
52 rMatch
53 rStop
54)
55
56func match(str, pat string, slen int, counter *int, maxcomp int) result {
57 // check complexity limit
58 if maxcomp > -1 {
59 if *counter > slen*maxcomp {
60 return rStop
61 }
62 *counter++
63 }
64
65 for len(pat) > 0 {
66 var wild bool
67 pc, ps := rune(pat[0]), 1
68 if pc > 0x7f {
69 pc, ps = utf8.DecodeRuneInString(pat)
70 }
71 var sc rune
72 var ss int
73 if len(str) > 0 {
74 sc, ss = rune(str[0]), 1
75 if sc > 0x7f {
76 sc, ss = utf8.DecodeRuneInString(str)
77 }
78 }
79 switch pc {
80 case '?':
81 if ss == 0 {
82 return rNoMatch
83 }
84 case '*':
85 // Ignore repeating stars.
86 for len(pat) > 1 && pat[1] == '*' {
87 pat = pat[1:]
88 }
89
90 // If this star is the last character then it must be a match.
91 if len(pat) == 1 {
92 return rMatch
93 }
94
95 // Match and trim any non-wildcard suffix characters.
96 var ok bool
97 str, pat, ok = matchTrimSuffix(str, pat)
98 if !ok {
99 return rNoMatch
100 }
101
102 // Check for single star again.
103 if len(pat) == 1 {
104 return rMatch
105 }
106
107 // Perform recursive wildcard search.
108 r := match(str, pat[1:], slen, counter, maxcomp)
109 if r != rNoMatch {
110 return r
111 }
112 if len(str) == 0 {
113 return rNoMatch
114 }
115 wild = true
116 default:
117 if ss == 0 {
118 return rNoMatch
119 }
120 if pc == '\\' {
121 pat = pat[ps:]
122 pc, ps = utf8.DecodeRuneInString(pat)
123 if ps == 0 {
124 return rNoMatch
125 }
126 }
127 if sc != pc {
128 return rNoMatch
129 }
130 }
131 str = str[ss:]
132 if !wild {
133 pat = pat[ps:]
134 }
135 }
136 if len(str) == 0 {
137 return rMatch
138 }
139 return rNoMatch
140}
141
142// matchTrimSuffix matches and trims any non-wildcard suffix characters.
143// Returns the trimed string and pattern.
144//
145// This is called because the pattern contains extra data after the wildcard
146// star. Here we compare any suffix characters in the pattern to the suffix of
147// the target string. Basically a reverse match that stops when a wildcard
148// character is reached. This is a little trickier than a forward match because
149// we need to evaluate an escaped character in reverse.
150//
151// Any matched characters will be trimmed from both the target
152// string and the pattern.
153func matchTrimSuffix(str, pat string) (string, string, bool) {
154 // It's expected that the pattern has at least two bytes and the first byte
155 // is a wildcard star '*'
156 match := true
157 for len(str) > 0 && len(pat) > 1 {
158 pc, ps := utf8.DecodeLastRuneInString(pat)
159 var esc bool
160 for i := 0; ; i++ {
161 if pat[len(pat)-ps-i-1] != '\\' {
162 if i&1 == 1 {
163 esc = true
164 ps++
165 }
166 break
167 }
168 }
169 if pc == '*' && !esc {
170 match = true
171 break
172 }
173 sc, ss := utf8.DecodeLastRuneInString(str)
174 if !((pc == '?' && !esc) || pc == sc) {
175 match = false
176 break
177 }
178 str = str[:len(str)-ss]
179 pat = pat[:len(pat)-ps]
180 }
181 return str, pat, match
182}
183
184var maxRuneBytes = [...]byte{244, 143, 191, 191}
185
186// Allowable parses the pattern and determines the minimum and maximum allowable
187// values that the pattern can represent.
188// When the max cannot be determined, 'true' will be returned
189// for infinite.
190func Allowable(pattern string) (min, max string) {
191 if pattern == "" || pattern[0] == '*' {
192 return "", ""
193 }
194
195 minb := make([]byte, 0, len(pattern))
196 maxb := make([]byte, 0, len(pattern))
197 var wild bool
198 for i := 0; i < len(pattern); i++ {
199 if pattern[i] == '*' {
200 wild = true
201 break
202 }
203 if pattern[i] == '?' {
204 minb = append(minb, 0)
205 maxb = append(maxb, maxRuneBytes[:]...)
206 } else {
207 minb = append(minb, pattern[i])
208 maxb = append(maxb, pattern[i])
209 }
210 }
211 if wild {
212 r, n := utf8.DecodeLastRune(maxb)
213 if r != utf8.RuneError {
214 if r < utf8.MaxRune {
215 r++
216 if r > 0x7f {
217 b := make([]byte, 4)
218 nn := utf8.EncodeRune(b, r)
219 maxb = append(maxb[:len(maxb)-n], b[:nn]...)
220 } else {
221 maxb = append(maxb[:len(maxb)-n], byte(r))
222 }
223 }
224 }
225 }
226 return string(minb), string(maxb)
227}
228
229// IsPattern returns true if the string is a pattern.
230func IsPattern(str string) bool {
231 for i := 0; i < len(str); i++ {
232 if str[i] == '*' || str[i] == '?' {
233 return true
234 }
235 }
236 return false
237}