1/*
2Package fuzzy provides fuzzy string matching optimized
3for filenames and code symbols in the style of Sublime Text,
4VSCode, IntelliJ IDEA et al.
5*/
6package fuzzy
7
8import (
9 "sort"
10 "unicode"
11 "unicode/utf8"
12)
13
14// Match represents a matched string.
15type Match struct {
16 // The matched string.
17 Str string
18 // The index of the matched string in the supplied slice.
19 Index int
20 // The indexes of matched characters. Useful for highlighting matches.
21 MatchedIndexes []int
22 // Score used to rank matches
23 Score int
24}
25
26const (
27 firstCharMatchBonus = 10
28 matchFollowingSeparatorBonus = 20
29 camelCaseMatchBonus = 20
30 adjacentMatchBonus = 5
31 unmatchedLeadingCharPenalty = -5
32 maxUnmatchedLeadingCharPenalty = -15
33)
34
35var separators = []rune("/-_ .\\")
36
37// Matches is a slice of Match structs
38type Matches []Match
39
40func (a Matches) Len() int { return len(a) }
41func (a Matches) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
42func (a Matches) Less(i, j int) bool { return a[i].Score >= a[j].Score }
43
44// Source represents an abstract source of a list of strings. Source must be iterable type such as a slice.
45// The source will be iterated over till Len() with String(i) being called for each element where i is the
46// index of the element. You can find a working example in the README.
47type Source interface {
48 // The string to be matched at position i.
49 String(i int) string
50 // The length of the source. Typically is the length of the slice of things that you want to match.
51 Len() int
52}
53
54type stringSource []string
55
56func (ss stringSource) String(i int) string {
57 return ss[i]
58}
59
60func (ss stringSource) Len() int { return len(ss) }
61
62/*
63Find looks up pattern in data and returns matches
64in descending order of match quality. Match quality
65is determined by a set of bonus and penalty rules.
66
67The following types of matches apply a bonus:
68
69* The first character in the pattern matches the first character in the match string.
70
71* The matched character is camel cased.
72
73* The matched character follows a separator such as an underscore character.
74
75* The matched character is adjacent to a previous match.
76
77Penalties are applied for every character in the search string that wasn't matched and all leading
78characters upto the first match.
79
80Results are sorted by best match.
81*/
82func Find(pattern string, data []string) Matches {
83 return FindFrom(pattern, stringSource(data))
84}
85
86/*
87FindNoSort is an alternative Find implementation that does not sort
88the results in the end.
89*/
90func FindNoSort(pattern string, data []string) Matches {
91 return FindFromNoSort(pattern, stringSource(data))
92}
93
94/*
95FindFrom is an alternative implementation of Find using a Source
96instead of a list of strings.
97*/
98func FindFrom(pattern string, data Source) Matches {
99 matches := FindFromNoSort(pattern, data)
100 sort.Stable(matches)
101 return matches
102}
103
104/*
105FindFromNoSort is an alternative FindFrom implementation that does
106not sort results in the end.
107*/
108func FindFromNoSort(pattern string, data Source) Matches {
109 if len(pattern) == 0 {
110 return nil
111 }
112 runes := []rune(pattern)
113 var matches Matches
114 var matchedIndexes []int
115 for i := 0; i < data.Len(); i++ {
116 var match Match
117 match.Str = data.String(i)
118 match.Index = i
119 if matchedIndexes != nil {
120 match.MatchedIndexes = matchedIndexes
121 } else {
122 match.MatchedIndexes = make([]int, 0, len(runes))
123 }
124 var score int
125 patternIndex := 0
126 bestScore := -1
127 matchedIndex := -1
128 currAdjacentMatchBonus := 0
129 var last rune
130 var lastIndex int
131 nextc, nextSize := utf8.DecodeRuneInString(data.String(i))
132 var candidate rune
133 var candidateSize int
134 for j := 0; j < len(data.String(i)); j += candidateSize {
135 candidate, candidateSize = nextc, nextSize
136 if equalFold(candidate, runes[patternIndex]) {
137 score = 0
138 if j == 0 {
139 score += firstCharMatchBonus
140 }
141 if unicode.IsLower(last) && unicode.IsUpper(candidate) {
142 score += camelCaseMatchBonus
143 }
144 if j != 0 && isSeparator(last) {
145 score += matchFollowingSeparatorBonus
146 }
147 if len(match.MatchedIndexes) > 0 {
148 lastMatch := match.MatchedIndexes[len(match.MatchedIndexes)-1]
149 bonus := adjacentCharBonus(lastIndex, lastMatch, currAdjacentMatchBonus)
150 score += bonus
151 // adjacent matches are incremental and keep increasing based on previous adjacent matches
152 // thus we need to maintain the current match bonus
153 currAdjacentMatchBonus += bonus
154 }
155 if score > bestScore {
156 bestScore = score
157 matchedIndex = j
158 }
159 }
160 var nextp rune
161 if patternIndex < len(runes)-1 {
162 nextp = runes[patternIndex+1]
163 }
164 if j+candidateSize < len(data.String(i)) {
165 if data.String(i)[j+candidateSize] < utf8.RuneSelf { // Fast path for ASCII
166 nextc, nextSize = rune(data.String(i)[j+candidateSize]), 1
167 } else {
168 nextc, nextSize = utf8.DecodeRuneInString(data.String(i)[j+candidateSize:])
169 }
170 } else {
171 nextc, nextSize = 0, 0
172 }
173 // We apply the best score when we have the next match coming up or when the search string has ended.
174 // Tracking when the next match is coming up allows us to exhaustively find the best match and not necessarily
175 // the first match.
176 // For example given the pattern "tk" and search string "The Black Knight", exhaustively matching allows us
177 // to match the second k thus giving this string a higher score.
178 if equalFold(nextp, nextc) || nextc == 0 {
179 if matchedIndex > -1 {
180 if len(match.MatchedIndexes) == 0 {
181 penalty := matchedIndex * unmatchedLeadingCharPenalty
182 bestScore += max(penalty, maxUnmatchedLeadingCharPenalty)
183 }
184 match.Score += bestScore
185 match.MatchedIndexes = append(match.MatchedIndexes, matchedIndex)
186 score = 0
187 bestScore = -1
188 patternIndex++
189 }
190 }
191 lastIndex = j
192 last = candidate
193 }
194 // apply penalty for each unmatched character
195 penalty := len(match.MatchedIndexes) - len(data.String(i))
196 match.Score += penalty
197 if len(match.MatchedIndexes) == len(runes) {
198 matches = append(matches, match)
199 matchedIndexes = nil
200 } else {
201 matchedIndexes = match.MatchedIndexes[:0] // Recycle match index slice
202 }
203 }
204 return matches
205}
206
207// Taken from strings.EqualFold
208func equalFold(tr, sr rune) bool {
209 if tr == sr {
210 return true
211 }
212 if tr < sr {
213 tr, sr = sr, tr
214 }
215 // Fast check for ASCII.
216 if tr < utf8.RuneSelf {
217 // ASCII, and sr is upper case. tr must be lower case.
218 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
219 return true
220 }
221 return false
222 }
223
224 // General case. SimpleFold(x) returns the next equivalent rune > x
225 // or wraps around to smaller values.
226 r := unicode.SimpleFold(sr)
227 for r != sr && r < tr {
228 r = unicode.SimpleFold(r)
229 }
230 return r == tr
231}
232
233func adjacentCharBonus(i int, lastMatch int, currentBonus int) int {
234 if lastMatch == i {
235 return currentBonus*2 + adjacentMatchBonus
236 }
237 return 0
238}
239
240func isSeparator(s rune) bool {
241 for _, sep := range separators {
242 if s == sep {
243 return true
244 }
245 }
246 return false
247}
248
249func max(x int, y int) int {
250 if x > y {
251 return x
252 }
253 return y
254}