1// Provide string-matching based on fnmatch.3
2package fnmatch
3
4// There are a few issues that I believe to be bugs, but this implementation is
5// based as closely as possible on BSD fnmatch. These bugs are present in the
6// source of BSD fnmatch, and so are replicated here. The issues are as follows:
7//
8// * FNM_PERIOD is no longer observed after the first * in a pattern
9// This only applies to matches done with FNM_PATHNAME as well
10// * FNM_PERIOD doesn't apply to ranges. According to the documentation,
11// a period must be matched explicitly, but a range will match it too
12
13import (
14 "unicode"
15 "unicode/utf8"
16)
17
18const (
19 FNM_NOESCAPE = (1 << iota)
20 FNM_PATHNAME
21 FNM_PERIOD
22
23 FNM_LEADING_DIR
24 FNM_CASEFOLD
25
26 FNM_IGNORECASE = FNM_CASEFOLD
27 FNM_FILE_NAME = FNM_PATHNAME
28)
29
30func unpackRune(str *string) rune {
31 rune, size := utf8.DecodeRuneInString(*str)
32 *str = (*str)[size:]
33 return rune
34}
35
36// Matches the pattern against the string, with the given flags,
37// and returns true if the match is successful.
38// This function should match fnmatch.3 as closely as possible.
39func Match(pattern, s string, flags int) bool {
40 // The implementation for this function was patterned after the BSD fnmatch.c
41 // source found at http://src.gnu-darwin.org/src/contrib/csup/fnmatch.c.html
42 noescape := (flags&FNM_NOESCAPE != 0)
43 pathname := (flags&FNM_PATHNAME != 0)
44 period := (flags&FNM_PERIOD != 0)
45 leadingdir := (flags&FNM_LEADING_DIR != 0)
46 casefold := (flags&FNM_CASEFOLD != 0)
47 // the following is some bookkeeping that the original fnmatch.c implementation did not do
48 // We are forced to do this because we're not keeping indexes into C strings but rather
49 // processing utf8-encoded strings. Use a custom unpacker to maintain our state for us
50 sAtStart := true
51 sLastAtStart := true
52 sLastSlash := false
53 sLastUnpacked := rune(0)
54 unpackS := func() rune {
55 sLastSlash = (sLastUnpacked == '/')
56 sLastUnpacked = unpackRune(&s)
57 sLastAtStart = sAtStart
58 sAtStart = false
59 return sLastUnpacked
60 }
61 for len(pattern) > 0 {
62 c := unpackRune(&pattern)
63 switch c {
64 case '?':
65 if len(s) == 0 {
66 return false
67 }
68 sc := unpackS()
69 if pathname && sc == '/' {
70 return false
71 }
72 if period && sc == '.' && (sLastAtStart || (pathname && sLastSlash)) {
73 return false
74 }
75 case '*':
76 // collapse multiple *'s
77 // don't use unpackRune here, the only char we care to detect is ASCII
78 for len(pattern) > 0 && pattern[0] == '*' {
79 pattern = pattern[1:]
80 }
81 if period && s[0] == '.' && (sAtStart || (pathname && sLastUnpacked == '/')) {
82 return false
83 }
84 // optimize for patterns with * at end or before /
85 if len(pattern) == 0 {
86 if pathname {
87 return leadingdir || (strchr(s, '/') == -1)
88 } else {
89 return true
90 }
91 return !(pathname && strchr(s, '/') >= 0)
92 } else if pathname && pattern[0] == '/' {
93 offset := strchr(s, '/')
94 if offset == -1 {
95 return false
96 } else {
97 // we already know our pattern and string have a /, skip past it
98 s = s[offset:] // use unpackS here to maintain our bookkeeping state
99 unpackS()
100 pattern = pattern[1:] // we know / is one byte long
101 break
102 }
103 }
104 // general case, recurse
105 for test := s; len(test) > 0; unpackRune(&test) {
106 // I believe the (flags &^ FNM_PERIOD) is a bug when FNM_PATHNAME is specified
107 // but this follows exactly from how fnmatch.c implements it
108 if Match(pattern, test, (flags &^ FNM_PERIOD)) {
109 return true
110 } else if pathname && test[0] == '/' {
111 break
112 }
113 }
114 return false
115 case '[':
116 if len(s) == 0 {
117 return false
118 }
119 if pathname && s[0] == '/' {
120 return false
121 }
122 sc := unpackS()
123 if !rangematch(&pattern, sc, flags) {
124 return false
125 }
126 case '\\':
127 if !noescape {
128 if len(pattern) > 0 {
129 c = unpackRune(&pattern)
130 }
131 }
132 fallthrough
133 default:
134 if len(s) == 0 {
135 return false
136 }
137 sc := unpackS()
138 switch {
139 case sc == c:
140 case casefold && unicode.ToLower(sc) == unicode.ToLower(c):
141 default:
142 return false
143 }
144 }
145 }
146 return len(s) == 0 || (leadingdir && s[0] == '/')
147}
148
149func rangematch(pattern *string, test rune, flags int) bool {
150 if len(*pattern) == 0 {
151 return false
152 }
153 casefold := (flags&FNM_CASEFOLD != 0)
154 noescape := (flags&FNM_NOESCAPE != 0)
155 if casefold {
156 test = unicode.ToLower(test)
157 }
158 var negate, matched bool
159 if (*pattern)[0] == '^' || (*pattern)[0] == '!' {
160 negate = true
161 (*pattern) = (*pattern)[1:]
162 }
163 for !matched && len(*pattern) > 1 && (*pattern)[0] != ']' {
164 c := unpackRune(pattern)
165 if !noescape && c == '\\' {
166 if len(*pattern) > 1 {
167 c = unpackRune(pattern)
168 } else {
169 return false
170 }
171 }
172 if casefold {
173 c = unicode.ToLower(c)
174 }
175 if (*pattern)[0] == '-' && len(*pattern) > 1 && (*pattern)[1] != ']' {
176 unpackRune(pattern) // skip the -
177 c2 := unpackRune(pattern)
178 if !noescape && c2 == '\\' {
179 if len(*pattern) > 0 {
180 c2 = unpackRune(pattern)
181 } else {
182 return false
183 }
184 }
185 if casefold {
186 c2 = unicode.ToLower(c2)
187 }
188 // this really should be more intelligent, but it looks like
189 // fnmatch.c does simple int comparisons, therefore we will as well
190 if c <= test && test <= c2 {
191 matched = true
192 }
193 } else if c == test {
194 matched = true
195 }
196 }
197 // skip past the rest of the pattern
198 ok := false
199 for !ok && len(*pattern) > 0 {
200 c := unpackRune(pattern)
201 if c == '\\' && len(*pattern) > 0 {
202 unpackRune(pattern)
203 } else if c == ']' {
204 ok = true
205 }
206 }
207 return ok && matched != negate
208}
209
210// define strchr because strings.Index() seems a bit overkill
211// returns the index of c in s, or -1 if there is no match
212func strchr(s string, c rune) int {
213 for i, sc := range s {
214 if sc == c {
215 return i
216 }
217 }
218 return -1
219}