fnmatch.go

  1// Provide string-matching based on fnmatch.3
  2package fnmatch
  3
  4// There are a few issues that I believe to be bugs, but this implementation is
  5// based as closely as possible on BSD fnmatch. These bugs are present in the
  6// source of BSD fnmatch, and so are replicated here. The issues are as follows:
  7//
  8// * FNM_PERIOD is no longer observed after the first * in a pattern
  9//   This only applies to matches done with FNM_PATHNAME as well
 10// * FNM_PERIOD doesn't apply to ranges. According to the documentation,
 11//   a period must be matched explicitly, but a range will match it too
 12
 13import (
 14	"unicode"
 15	"unicode/utf8"
 16)
 17
 18const (
 19	FNM_NOESCAPE = (1 << iota)
 20	FNM_PATHNAME
 21	FNM_PERIOD
 22
 23	FNM_LEADING_DIR
 24	FNM_CASEFOLD
 25
 26	FNM_IGNORECASE = FNM_CASEFOLD
 27	FNM_FILE_NAME  = FNM_PATHNAME
 28)
 29
 30func unpackRune(str *string) rune {
 31	rune, size := utf8.DecodeRuneInString(*str)
 32	*str = (*str)[size:]
 33	return rune
 34}
 35
 36// Matches the pattern against the string, with the given flags,
 37// and returns true if the match is successful.
 38// This function should match fnmatch.3 as closely as possible.
 39func Match(pattern, s string, flags int) bool {
 40	// The implementation for this function was patterned after the BSD fnmatch.c
 41	// source found at http://src.gnu-darwin.org/src/contrib/csup/fnmatch.c.html
 42	noescape := (flags&FNM_NOESCAPE != 0)
 43	pathname := (flags&FNM_PATHNAME != 0)
 44	period := (flags&FNM_PERIOD != 0)
 45	leadingdir := (flags&FNM_LEADING_DIR != 0)
 46	casefold := (flags&FNM_CASEFOLD != 0)
 47	// the following is some bookkeeping that the original fnmatch.c implementation did not do
 48	// We are forced to do this because we're not keeping indexes into C strings but rather
 49	// processing utf8-encoded strings. Use a custom unpacker to maintain our state for us
 50	sAtStart := true
 51	sLastAtStart := true
 52	sLastSlash := false
 53	sLastUnpacked := rune(0)
 54	unpackS := func() rune {
 55		sLastSlash = (sLastUnpacked == '/')
 56		sLastUnpacked = unpackRune(&s)
 57		sLastAtStart = sAtStart
 58		sAtStart = false
 59		return sLastUnpacked
 60	}
 61	for len(pattern) > 0 {
 62		c := unpackRune(&pattern)
 63		switch c {
 64		case '?':
 65			if len(s) == 0 {
 66				return false
 67			}
 68			sc := unpackS()
 69			if pathname && sc == '/' {
 70				return false
 71			}
 72			if period && sc == '.' && (sLastAtStart || (pathname && sLastSlash)) {
 73				return false
 74			}
 75		case '*':
 76			// collapse multiple *'s
 77			// don't use unpackRune here, the only char we care to detect is ASCII
 78			for len(pattern) > 0 && pattern[0] == '*' {
 79				pattern = pattern[1:]
 80			}
 81			if period && s[0] == '.' && (sAtStart || (pathname && sLastUnpacked == '/')) {
 82				return false
 83			}
 84			// optimize for patterns with * at end or before /
 85			if len(pattern) == 0 {
 86				if pathname {
 87					return leadingdir || (strchr(s, '/') == -1)
 88				} else {
 89					return true
 90				}
 91				return !(pathname && strchr(s, '/') >= 0)
 92			} else if pathname && pattern[0] == '/' {
 93				offset := strchr(s, '/')
 94				if offset == -1 {
 95					return false
 96				} else {
 97					// we already know our pattern and string have a /, skip past it
 98					s = s[offset:] // use unpackS here to maintain our bookkeeping state
 99					unpackS()
100					pattern = pattern[1:] // we know / is one byte long
101					break
102				}
103			}
104			// general case, recurse
105			for test := s; len(test) > 0; unpackRune(&test) {
106				// I believe the (flags &^ FNM_PERIOD) is a bug when FNM_PATHNAME is specified
107				// but this follows exactly from how fnmatch.c implements it
108				if Match(pattern, test, (flags &^ FNM_PERIOD)) {
109					return true
110				} else if pathname && test[0] == '/' {
111					break
112				}
113			}
114			return false
115		case '[':
116			if len(s) == 0 {
117				return false
118			}
119			if pathname && s[0] == '/' {
120				return false
121			}
122			sc := unpackS()
123			if !rangematch(&pattern, sc, flags) {
124				return false
125			}
126		case '\\':
127			if !noescape {
128				if len(pattern) > 0 {
129					c = unpackRune(&pattern)
130				}
131			}
132			fallthrough
133		default:
134			if len(s) == 0 {
135				return false
136			}
137			sc := unpackS()
138			switch {
139			case sc == c:
140			case casefold && unicode.ToLower(sc) == unicode.ToLower(c):
141			default:
142				return false
143			}
144		}
145	}
146	return len(s) == 0 || (leadingdir && s[0] == '/')
147}
148
149func rangematch(pattern *string, test rune, flags int) bool {
150	if len(*pattern) == 0 {
151		return false
152	}
153	casefold := (flags&FNM_CASEFOLD != 0)
154	noescape := (flags&FNM_NOESCAPE != 0)
155	if casefold {
156		test = unicode.ToLower(test)
157	}
158	var negate, matched bool
159	if (*pattern)[0] == '^' || (*pattern)[0] == '!' {
160		negate = true
161		(*pattern) = (*pattern)[1:]
162	}
163	for !matched && len(*pattern) > 1 && (*pattern)[0] != ']' {
164		c := unpackRune(pattern)
165		if !noescape && c == '\\' {
166			if len(*pattern) > 1 {
167				c = unpackRune(pattern)
168			} else {
169				return false
170			}
171		}
172		if casefold {
173			c = unicode.ToLower(c)
174		}
175		if (*pattern)[0] == '-' && len(*pattern) > 1 && (*pattern)[1] != ']' {
176			unpackRune(pattern) // skip the -
177			c2 := unpackRune(pattern)
178			if !noescape && c2 == '\\' {
179				if len(*pattern) > 0 {
180					c2 = unpackRune(pattern)
181				} else {
182					return false
183				}
184			}
185			if casefold {
186				c2 = unicode.ToLower(c2)
187			}
188			// this really should be more intelligent, but it looks like
189			// fnmatch.c does simple int comparisons, therefore we will as well
190			if c <= test && test <= c2 {
191				matched = true
192			}
193		} else if c == test {
194			matched = true
195		}
196	}
197	// skip past the rest of the pattern
198	ok := false
199	for !ok && len(*pattern) > 0 {
200		c := unpackRune(pattern)
201		if c == '\\' && len(*pattern) > 0 {
202			unpackRune(pattern)
203		} else if c == ']' {
204			ok = true
205		}
206	}
207	return ok && matched != negate
208}
209
210// define strchr because strings.Index() seems a bit overkill
211// returns the index of c in s, or -1 if there is no match
212func strchr(s string, c rune) int {
213	for i, sc := range s {
214		if sc == c {
215			return i
216		}
217	}
218	return -1
219}