1// Copyright (c) 2017, Daniel MartΓ <mvdan@mvdan.cc>
2// See LICENSE for licensing information
3
4// Package pattern allows working with shell pattern matching notation, also
5// known as wildcards or globbing.
6//
7// For reference, see
8// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13.
9package pattern
10
11import (
12 "bytes"
13 "fmt"
14 "regexp"
15 "strconv"
16 "strings"
17)
18
19// Mode can be used to supply a number of options to the package's functions.
20// Not all functions change their behavior with all of the options below.
21type Mode uint
22
23type SyntaxError struct {
24 msg string
25 err error
26}
27
28func (e SyntaxError) Error() string { return e.msg }
29
30func (e SyntaxError) Unwrap() error { return e.err }
31
32const (
33 Shortest Mode = 1 << iota // prefer the shortest match.
34 Filenames // "*" and "?" don't match slashes; only "**" does
35 Braces // support "{a,b}" and "{1..4}"
36 EntireString // match the entire string using ^$ delimiters
37 NoGlobCase // Do case-insensitive match (that is, use (?i) in the regexp)
38)
39
40var numRange = regexp.MustCompile(`^([+-]?\d+)\.\.([+-]?\d+)}`)
41
42// Regexp turns a shell pattern into a regular expression that can be used with
43// [regexp.Compile]. It will return an error if the input pattern was incorrect.
44// Otherwise, the returned expression can be passed to [regexp.MustCompile].
45//
46// For example, Regexp(`foo*bar?`, true) returns `foo.*bar.`.
47//
48// Note that this function (and [QuoteMeta]) should not be directly used with file
49// paths if Windows is supported, as the path separator on that platform is the
50// same character as the escaping character for shell patterns.
51func Regexp(pat string, mode Mode) (string, error) {
52 needsEscaping := false
53noopLoop:
54 for _, r := range pat {
55 switch r {
56 // including those that need escaping since they are
57 // regular expression metacharacters
58 case '*', '?', '[', '\\', '.', '+', '(', ')', '|',
59 ']', '{', '}', '^', '$':
60 needsEscaping = true
61 break noopLoop
62 }
63 }
64 if !needsEscaping && mode&EntireString == 0 { // short-cut without a string copy
65 return pat, nil
66 }
67 closingBraces := []int{}
68 var buf bytes.Buffer
69 // Enable matching `\n` with the `.` metacharacter as globs match `\n`
70 buf.WriteString("(?s)")
71 dotMeta := false
72 if mode&NoGlobCase != 0 {
73 buf.WriteString("(?i)")
74 }
75 if mode&EntireString != 0 {
76 buf.WriteString("^")
77 }
78writeLoop:
79 for i := 0; i < len(pat); i++ {
80 switch c := pat[i]; c {
81 case '*':
82 if mode&Filenames != 0 {
83 if i++; i < len(pat) && pat[i] == '*' {
84 if i++; i < len(pat) && pat[i] == '/' {
85 buf.WriteString("(.*/|)")
86 dotMeta = true
87 } else {
88 buf.WriteString(".*")
89 dotMeta = true
90 i--
91 }
92 } else {
93 buf.WriteString("[^/]*")
94 i--
95 }
96 } else {
97 buf.WriteString(".*")
98 dotMeta = true
99 }
100 if mode&Shortest != 0 {
101 buf.WriteByte('?')
102 }
103 case '?':
104 if mode&Filenames != 0 {
105 buf.WriteString("[^/]")
106 } else {
107 buf.WriteByte('.')
108 dotMeta = true
109 }
110 case '\\':
111 if i++; i >= len(pat) {
112 return "", &SyntaxError{msg: `\ at end of pattern`}
113 }
114 buf.WriteString(regexp.QuoteMeta(string(pat[i])))
115 case '[':
116 name, err := charClass(pat[i:])
117 if err != nil {
118 return "", &SyntaxError{msg: "charClass invalid", err: err}
119 }
120 if name != "" {
121 buf.WriteString(name)
122 i += len(name) - 1
123 break
124 }
125 if mode&Filenames != 0 {
126 for _, c := range pat[i:] {
127 if c == ']' {
128 break
129 } else if c == '/' {
130 buf.WriteString("\\[")
131 continue writeLoop
132 }
133 }
134 }
135 buf.WriteByte(c)
136 if i++; i >= len(pat) {
137 return "", &SyntaxError{msg: "[ was not matched with a closing ]"}
138 }
139 switch c = pat[i]; c {
140 case '!', '^':
141 buf.WriteByte('^')
142 if i++; i >= len(pat) {
143 return "", &SyntaxError{msg: "[ was not matched with a closing ]"}
144 }
145 }
146 if c = pat[i]; c == ']' {
147 buf.WriteByte(']')
148 if i++; i >= len(pat) {
149 return "", &SyntaxError{msg: "[ was not matched with a closing ]"}
150 }
151 }
152 rangeStart := byte(0)
153 loopBracket:
154 for ; i < len(pat); i++ {
155 c = pat[i]
156 buf.WriteByte(c)
157 switch c {
158 case '\\':
159 if i++; i < len(pat) {
160 buf.WriteByte(pat[i])
161 }
162 continue
163 case ']':
164 break loopBracket
165 }
166 if rangeStart != 0 && rangeStart > c {
167 return "", &SyntaxError{msg: fmt.Sprintf("invalid range: %c-%c", rangeStart, c)}
168 }
169 if c == '-' {
170 rangeStart = pat[i-1]
171 } else {
172 rangeStart = 0
173 }
174 }
175 if i >= len(pat) {
176 return "", &SyntaxError{msg: "[ was not matched with a closing ]"}
177 }
178 case '{':
179 if mode&Braces == 0 {
180 buf.WriteString(regexp.QuoteMeta(string(c)))
181 break
182 }
183 innerLevel := 1
184 commas := false
185 peekBrace:
186 for j := i + 1; j < len(pat); j++ {
187 switch c := pat[j]; c {
188 case '{':
189 innerLevel++
190 case ',':
191 commas = true
192 case '\\':
193 j++
194 case '}':
195 if innerLevel--; innerLevel > 0 {
196 continue
197 }
198 if !commas {
199 break peekBrace
200 }
201 closingBraces = append(closingBraces, j)
202 buf.WriteString("(?:")
203 continue writeLoop
204 }
205 }
206 if match := numRange.FindStringSubmatch(pat[i+1:]); len(match) == 3 {
207 start, err1 := strconv.Atoi(match[1])
208 end, err2 := strconv.Atoi(match[2])
209 if err1 != nil || err2 != nil || start > end {
210 return "", &SyntaxError{msg: fmt.Sprintf("invalid range: %q", match[0])}
211 }
212 // TODO: can we do better here?
213 buf.WriteString("(?:")
214 for n := start; n <= end; n++ {
215 if n > start {
216 buf.WriteByte('|')
217 }
218 fmt.Fprintf(&buf, "%d", n)
219 }
220 buf.WriteByte(')')
221 i += len(match[0])
222 break
223 }
224 buf.WriteString(regexp.QuoteMeta(string(c)))
225 case ',':
226 if len(closingBraces) == 0 {
227 buf.WriteString(regexp.QuoteMeta(string(c)))
228 } else {
229 buf.WriteByte('|')
230 }
231 case '}':
232 if len(closingBraces) > 0 && closingBraces[len(closingBraces)-1] == i {
233 buf.WriteByte(')')
234 closingBraces = closingBraces[:len(closingBraces)-1]
235 } else {
236 buf.WriteString(regexp.QuoteMeta(string(c)))
237 }
238 default:
239 if c > 128 {
240 buf.WriteByte(c)
241 } else {
242 buf.WriteString(regexp.QuoteMeta(string(c)))
243 }
244 }
245 }
246 if mode&EntireString != 0 {
247 buf.WriteString("$")
248 }
249 // No `.` metacharacters were used, so don't return the (?s) flag.
250 if !dotMeta {
251 return string(buf.Bytes()[4:]), nil
252 }
253 return buf.String(), nil
254}
255
256func charClass(s string) (string, error) {
257 if strings.HasPrefix(s, "[[.") || strings.HasPrefix(s, "[[=") {
258 return "", fmt.Errorf("collating features not available")
259 }
260 if !strings.HasPrefix(s, "[[:") {
261 return "", nil
262 }
263 name := s[3:]
264 end := strings.Index(name, ":]]")
265 if end < 0 {
266 return "", fmt.Errorf("[[: was not matched with a closing :]]")
267 }
268 name = name[:end]
269 switch name {
270 case "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
271 "lower", "print", "punct", "space", "upper", "word", "xdigit":
272 default:
273 return "", fmt.Errorf("invalid character class: %q", name)
274 }
275 return s[:len(name)+6], nil
276}
277
278// HasMeta returns whether a string contains any unescaped pattern
279// metacharacters: '*', '?', or '['. When the function returns false, the given
280// pattern can only match at most one string.
281//
282// For example, HasMeta(`foo\*bar`) returns false, but HasMeta(`foo*bar`)
283// returns true.
284//
285// This can be useful to avoid extra work, like [Regexp]. Note that this
286// function cannot be used to avoid [QuoteMeta], as backslashes are quoted by
287// that function but ignored here.
288func HasMeta(pat string, mode Mode) bool {
289 for i := 0; i < len(pat); i++ {
290 switch pat[i] {
291 case '\\':
292 i++
293 case '*', '?', '[':
294 return true
295 case '{':
296 if mode&Braces != 0 {
297 return true
298 }
299 }
300 }
301 return false
302}
303
304// QuoteMeta returns a string that quotes all pattern metacharacters in the
305// given text. The returned string is a pattern that matches the literal text.
306//
307// For example, QuoteMeta(`foo*bar?`) returns `foo\*bar\?`.
308func QuoteMeta(pat string, mode Mode) string {
309 needsEscaping := false
310loop:
311 for _, r := range pat {
312 switch r {
313 case '{':
314 if mode&Braces == 0 {
315 continue
316 }
317 fallthrough
318 case '*', '?', '[', '\\':
319 needsEscaping = true
320 break loop
321 }
322 }
323 if !needsEscaping { // short-cut without a string copy
324 return pat
325 }
326 var sb strings.Builder
327 for _, r := range pat {
328 switch r {
329 case '*', '?', '[', '\\':
330 sb.WriteByte('\\')
331 case '{':
332 if mode&Braces != 0 {
333 sb.WriteByte('\\')
334 }
335 }
336 sb.WriteRune(r)
337 }
338 return sb.String()
339}