ignore.go

  1/*
  2ignore is a library which returns a new ignorer object which can
  3test against various paths. This is particularly useful when trying
  4to filter files based on a .gitignore document
  5
  6The rules for parsing the input file are the same as the ones listed
  7in the Git docs here: http://git-scm.com/docs/gitignore
  8
  9The summarized version of the same has been copied here:
 10
 11    1. A blank line matches no files, so it can serve as a separator
 12       for readability.
 13    2. A line starting with # serves as a comment. Put a backslash ("\")
 14       in front of the first hash for patterns that begin with a hash.
 15    3. Trailing spaces are ignored unless they are quoted with backslash ("\").
 16    4. An optional prefix "!" which negates the pattern; any matching file
 17       excluded by a previous pattern will become included again. It is not
 18       possible to re-include a file if a parent directory of that file is
 19       excluded. Git doesn’t list excluded directories for performance reasons,
 20       so any patterns on contained files have no effect, no matter where they
 21       are defined. Put a backslash ("\") in front of the first "!" for
 22       patterns that begin with a literal "!", for example, "\!important!.txt".
 23    5. If the pattern ends with a slash, it is removed for the purpose of the
 24       following description, but it would only find a match with a directory.
 25       In other words, foo/ will match a directory foo and paths underneath it,
 26       but will not match a regular file or a symbolic link foo (this is
 27       consistent with the way how pathspec works in general in Git).
 28    6. If the pattern does not contain a slash /, Git treats it as a shell glob
 29       pattern and checks for a match against the pathname relative to the
 30       location of the .gitignore file (relative to the toplevel of the work
 31       tree if not from a .gitignore file).
 32    7. Otherwise, Git treats the pattern as a shell glob suitable for
 33       consumption by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the
 34       pattern will not match a / in the pathname. For example,
 35       "Documentation/*.html" matches "Documentation/git.html" but not
 36       "Documentation/ppc/ppc.html" or "tools/perf/Documentation/perf.html".
 37    8. A leading slash matches the beginning of the pathname. For example,
 38       "/*.c" matches "cat-file.c" but not "mozilla-sha1/sha1.c".
 39    9. Two consecutive asterisks ("**") in patterns matched against full
 40       pathname may have special meaning:
 41        i.   A leading "**" followed by a slash means match in all directories.
 42             For example, "** /foo" matches file or directory "foo" anywhere,
 43             the same as pattern "foo". "** /foo/bar" matches file or directory
 44             "bar" anywhere that is directly under directory "foo".
 45        ii.  A trailing "/**" matches everything inside. For example, "abc/**"
 46             matches all files inside directory "abc", relative to the location
 47             of the .gitignore file, with infinite depth.
 48        iii. A slash followed by two consecutive asterisks then a slash matches
 49             zero or more directories. For example, "a/** /b" matches "a/b",
 50             "a/x/b", "a/x/y/b" and so on.
 51        iv.  Other consecutive asterisks are considered invalid. */
 52package ignore
 53
 54import (
 55	"io/ioutil"
 56	"os"
 57	"regexp"
 58	"strings"
 59)
 60
 61////////////////////////////////////////////////////////////
 62
 63// IgnoreParser is an interface with `MatchesPaths`.
 64type IgnoreParser interface {
 65	MatchesPath(f string) bool
 66	MatchesPathHow(f string) (bool, *IgnorePattern)
 67}
 68
 69////////////////////////////////////////////////////////////
 70
 71// This function pretty much attempts to mimic the parsing rules
 72// listed above at the start of this file
 73func getPatternFromLine(line string) (*regexp.Regexp, bool) {
 74	// Trim OS-specific carriage returns.
 75	line = strings.TrimRight(line, "\r")
 76
 77	// Strip comments [Rule 2]
 78	if strings.HasPrefix(line, `#`) {
 79		return nil, false
 80	}
 81
 82	// Trim string [Rule 3]
 83	// TODO: Handle [Rule 3], when the " " is escaped with a \
 84	line = strings.Trim(line, " ")
 85
 86	// Exit for no-ops and return nil which will prevent us from
 87	// appending a pattern against this line
 88	if line == "" {
 89		return nil, false
 90	}
 91
 92	// TODO: Handle [Rule 4] which negates the match for patterns leading with "!"
 93	negatePattern := false
 94	if line[0] == '!' {
 95		negatePattern = true
 96		line = line[1:]
 97	}
 98
 99	// Handle [Rule 2, 4], when # or ! is escaped with a \
100	// Handle [Rule 4] once we tag negatePattern, strip the leading ! char
101	if regexp.MustCompile(`^(\#|\!)`).MatchString(line) {
102		line = line[1:]
103	}
104
105	// If we encounter a foo/*.blah in a folder, prepend the / char
106	if regexp.MustCompile(`([^\/+])/.*\*\.`).MatchString(line) && line[0] != '/' {
107		line = "/" + line
108	}
109
110	// Handle escaping the "." char
111	line = regexp.MustCompile(`\.`).ReplaceAllString(line, `\.`)
112
113	magicStar := "#$~"
114
115	// Handle "/**/" usage
116	if strings.HasPrefix(line, "/**/") {
117		line = line[1:]
118	}
119	line = regexp.MustCompile(`/\*\*/`).ReplaceAllString(line, `(/|/.+/)`)
120	line = regexp.MustCompile(`\*\*/`).ReplaceAllString(line, `(|.`+magicStar+`/)`)
121	line = regexp.MustCompile(`/\*\*`).ReplaceAllString(line, `(|/.`+magicStar+`)`)
122
123	// Handle escaping the "*" char
124	line = regexp.MustCompile(`\\\*`).ReplaceAllString(line, `\`+magicStar)
125	line = regexp.MustCompile(`\*`).ReplaceAllString(line, `([^/]*)`)
126
127	// Handle escaping the "?" char
128	line = strings.Replace(line, "?", `\?`, -1)
129
130	line = strings.Replace(line, magicStar, "*", -1)
131
132	// Temporary regex
133	var expr = ""
134	if strings.HasSuffix(line, "/") {
135		expr = line + "(|.*)$"
136	} else {
137		expr = line + "(|/.*)$"
138	}
139	if strings.HasPrefix(expr, "/") {
140		expr = "^(|/)" + expr[1:]
141	} else {
142		expr = "^(|.*/)" + expr
143	}
144	pattern, _ := regexp.Compile(expr)
145
146	return pattern, negatePattern
147}
148
149////////////////////////////////////////////////////////////
150
151// IgnorePattern encapsulates a pattern and if it is a negated pattern.
152type IgnorePattern struct {
153	Pattern *regexp.Regexp
154	Negate  bool
155	LineNo  int
156	Line    string
157}
158
159// GitIgnore wraps a list of ignore pattern.
160type GitIgnore struct {
161	patterns []*IgnorePattern
162}
163
164// CompileIgnoreLines accepts a variadic set of strings, and returns a GitIgnore
165// instance which converts and appends the lines in the input to regexp.Regexp
166// patterns held within the GitIgnore objects "patterns" field.
167func CompileIgnoreLines(lines ...string) *GitIgnore {
168	gi := &GitIgnore{}
169	for i, line := range lines {
170		pattern, negatePattern := getPatternFromLine(line)
171		if pattern != nil {
172			// LineNo is 1-based numbering to match `git check-ignore -v` output
173			ip := &IgnorePattern{pattern, negatePattern, i + 1, line}
174			gi.patterns = append(gi.patterns, ip)
175		}
176	}
177	return gi
178}
179
180// CompileIgnoreFile uses an ignore file as the input, parses the lines out of
181// the file and invokes the CompileIgnoreLines method.
182func CompileIgnoreFile(fpath string) (*GitIgnore, error) {
183	bs, err := ioutil.ReadFile(fpath)
184	if err != nil {
185		return nil, err
186	}
187
188	s := strings.Split(string(bs), "\n")
189	return CompileIgnoreLines(s...), nil
190}
191
192// CompileIgnoreFileAndLines accepts a ignore file as the input, parses the
193// lines out of the file and invokes the CompileIgnoreLines method with
194// additional lines.
195func CompileIgnoreFileAndLines(fpath string, lines ...string) (*GitIgnore, error) {
196	bs, err := ioutil.ReadFile(fpath)
197	if err != nil {
198		return nil, err
199	}
200
201	gi := CompileIgnoreLines(append(strings.Split(string(bs), "\n"), lines...)...)
202	return gi, nil
203}
204
205////////////////////////////////////////////////////////////
206
207// MatchesPath returns true if the given GitIgnore structure would target
208// a given path string `f`.
209func (gi *GitIgnore) MatchesPath(f string) bool {
210	matchesPath, _ := gi.MatchesPathHow(f)
211	return matchesPath
212}
213
214// MatchesPathHow returns true, `pattern` if the given GitIgnore structure would target
215// a given path string `f`.
216// The IgnorePattern has the Line, LineNo fields.
217func (gi *GitIgnore) MatchesPathHow(f string) (bool, *IgnorePattern) {
218	// Replace OS-specific path separator.
219	f = strings.Replace(f, string(os.PathSeparator), "/", -1)
220
221	matchesPath := false
222	var mip *IgnorePattern
223	for _, ip := range gi.patterns {
224		if ip.Pattern.MatchString(f) {
225			// If this is a regular target (not negated with a gitignore
226			// exclude "!" etc)
227			if !ip.Negate {
228				matchesPath = true
229				mip = ip
230			} else if matchesPath {
231				// Negated pattern, and matchesPath is already set
232				matchesPath = false
233			}
234		}
235	}
236	return matchesPath, mip
237}
238
239////////////////////////////////////////////////////////////