1package spellcheck
2
3import (
4 "sort"
5 "strings"
6 "unicode"
7)
8
9// Suggest returns up to limit candidate corrections for word, ranked by
10// edit distance ascending then alphabetically. Returns nil when the
11// checker has no dictionary loaded or when word is too short.
12func (c *Checker) Suggest(word string, limit int) []string {
13 if c == nil {
14 return nil
15 }
16 c.mu.RLock()
17 defer c.mu.RUnlock()
18 if !c.loaded || len(c.words) == 0 {
19 return nil
20 }
21 if limit <= 0 {
22 limit = 5
23 }
24
25 lower := strings.ToLower(word)
26 wRunes := []rune(lower)
27 if len(wRunes) < 2 {
28 return nil
29 }
30
31 // Allow up to 2 edits for short-to-medium words, 3 for longer ones.
32 maxDist := 2
33 if len(wRunes) >= 8 {
34 maxDist = 3
35 }
36
37 type cand struct {
38 word string
39 dist int
40 }
41 var cands []cand
42
43 for w := range c.words {
44 // Length filter: prune impossible candidates without an alloc.
45 ld := len(w) - len(lower)
46 if ld < 0 {
47 ld = -ld
48 }
49 if ld > maxDist {
50 continue
51 }
52 // First-rune similarity prunes most mismatched candidates cheaply.
53 if !firstRuneClose(w, lower) {
54 continue
55 }
56 d := levenshtein(wRunes, []rune(w), maxDist)
57 if d > maxDist {
58 continue
59 }
60 cands = append(cands, cand{w, d})
61 }
62
63 sort.Slice(cands, func(i, j int) bool {
64 if cands[i].dist != cands[j].dist {
65 return cands[i].dist < cands[j].dist
66 }
67 return cands[i].word < cands[j].word
68 })
69
70 if len(cands) > limit {
71 cands = cands[:limit]
72 }
73 out := make([]string, len(cands))
74 upper := unicode.IsUpper([]rune(word)[0])
75 for i, c := range cands {
76 out[i] = matchCase(c.word, upper)
77 }
78 return out
79}
80
81// firstRuneClose returns true when the first runes of a and b are equal,
82// adjacent on a QWERTY keyboard, or one of them is missing.
83func firstRuneClose(a, b string) bool {
84 if a == "" || b == "" {
85 return true
86 }
87 var ar, br rune
88 for _, r := range a {
89 ar = r
90 break
91 }
92 for _, r := range b {
93 br = r
94 break
95 }
96 if ar == br {
97 return true
98 }
99 return keyboardAdjacent(ar, br)
100}
101
102// keyboardAdjacent returns true when a and b are neighbours on a QWERTY
103// keyboard. Used purely to widen the candidate pool around typos like
104// "guzzy"ā"fuzzy" (gāf) without exploding the cost of suggestion.
105func keyboardAdjacent(a, b rune) bool {
106 neighbours := map[rune]string{
107 'a': "qwsz", 'b': "vghn", 'c': "xdfv", 'd': "serfcx", 'e': "wsdr",
108 'f': "drtgcv", 'g': "ftyhvb", 'h': "gyujnb", 'i': "ujko", 'j': "huikmn",
109 'k': "jiolm", 'l': "kop", 'm': "njk", 'n': "bhjm", 'o': "iklp",
110 'p': "ol", 'q': "wa", 'r': "edft", 's': "awedxz", 't': "rfgy",
111 'u': "yhji", 'v': "cfgb", 'w': "qase", 'x': "zsdc", 'y': "tghu",
112 'z': "asx",
113 }
114 a = unicode.ToLower(a)
115 b = unicode.ToLower(b)
116 if ns, ok := neighbours[a]; ok {
117 return strings.ContainsRune(ns, b)
118 }
119 return false
120}
121
122// levenshtein computes the edit distance between a and b, returning early
123// once the running minimum exceeds cutoff. Two-row dynamic programming
124// keeps the allocation small.
125func levenshtein(a, b []rune, cutoff int) int {
126 la, lb := len(a), len(b)
127 if la == 0 {
128 return lb
129 }
130 if lb == 0 {
131 return la
132 }
133 prev := make([]int, lb+1)
134 curr := make([]int, lb+1)
135 for j := 0; j <= lb; j++ {
136 prev[j] = j
137 }
138 for i := 1; i <= la; i++ {
139 curr[0] = i
140 minRow := curr[0]
141 for j := 1; j <= lb; j++ {
142 cost := 1
143 if a[i-1] == b[j-1] {
144 cost = 0
145 }
146 curr[j] = min3(curr[j-1]+1, prev[j]+1, prev[j-1]+cost)
147 if curr[j] < minRow {
148 minRow = curr[j]
149 }
150 }
151 if minRow > cutoff {
152 return cutoff + 1
153 }
154 prev, curr = curr, prev
155 }
156 return prev[lb]
157}
158
159func min3(a, b, c int) int {
160 m := a
161 if b < m {
162 m = b
163 }
164 if c < m {
165 m = c
166 }
167 return m
168}
169
170// matchCase capitalises the first rune of s when the original word
171// started with an uppercase letter, so suggestions blend back into the
172// user's writing.
173func matchCase(s string, upperFirst bool) string {
174 if !upperFirst || s == "" {
175 return s
176 }
177 r := []rune(s)
178 r[0] = unicode.ToUpper(r[0])
179 return string(r)
180}