1package spellcheck
2
3import (
4 "os"
5 "path/filepath"
6 "strings"
7 "testing"
8)
9
10func newTestChecker(t *testing.T, words ...string) *Checker {
11 t.Helper()
12 dir := t.TempDir()
13 path := filepath.Join(dir, "test.dic")
14 content := []byte("# header\n" + strings.Join(words, "\n") + "\n")
15 if err := os.WriteFile(path, content, 0o600); err != nil {
16 t.Fatalf("write dic: %v", err)
17 }
18 c := NewChecker()
19 if err := c.Load(path, "test"); err != nil {
20 t.Fatalf("load: %v", err)
21 }
22 return c
23}
24
25func TestCheckerCheck(t *testing.T) {
26 c := newTestChecker(t, "hello", "world", "go")
27 if !c.Check("hello") {
28 t.Error("hello should be known")
29 }
30 if !c.Check("Hello") {
31 t.Error("Hello should match case-insensitively")
32 }
33 if c.Check("helo") {
34 t.Error("helo should be unknown")
35 }
36 // Short / numeric / uppercase tokens are skipped.
37 if !c.Check("Z") {
38 t.Error("single rune skipped")
39 }
40 if !c.Check("ABC") {
41 t.Error("short uppercase acronym skipped")
42 }
43 if !c.Check("42") {
44 t.Error("numeric skipped")
45 }
46}
47
48func TestTokenize(t *testing.T) {
49 got := Tokenize("hello, world! it's nice")
50 want := []struct {
51 w string
52 start, end int
53 }{
54 {"hello", 0, 5},
55 {"world", 7, 12},
56 {"it's", 14, 18},
57 {"nice", 19, 23},
58 }
59 if len(got) != len(want) {
60 t.Fatalf("tokens = %d, want %d (%+v)", len(got), len(want), got)
61 }
62 for i, w := range want {
63 if got[i].Word != w.w || got[i].Start != w.start || got[i].End != w.end {
64 t.Errorf("token %d = %+v, want %s [%d:%d]", i, got[i], w.w, w.start, w.end)
65 }
66 }
67}
68
69func TestParseHunspellDicSkipsCountLine(t *testing.T) {
70 dir := t.TempDir()
71 p := filepath.Join(dir, "x.dic")
72 // First line is a count, words follow, with hunspell-style flags.
73 body := "3\nfoo/AB\nbar\nbaz\n"
74 if err := os.WriteFile(p, []byte(body), 0o600); err != nil {
75 t.Fatal(err)
76 }
77 w, _, err := parseHunspellDic(p)
78 if err != nil {
79 t.Fatal(err)
80 }
81 for _, k := range []string{"foo", "bar", "baz"} {
82 if _, ok := w[k]; !ok {
83 t.Errorf("missing %q", k)
84 }
85 }
86}
87
88func TestHighlightWrapsMisspelled(t *testing.T) {
89 c := newTestChecker(t, "hello", "world", "abcdefghijklmnopqrstuvwxyz")
90 out := Highlight("hello wurld", c, -1)
91 if !strings.Contains(out, "wurld") {
92 t.Fatalf("output missing word: %q", out)
93 }
94 if !strings.Contains(out, openSGR) || !strings.Contains(out, closeSGR) {
95 t.Errorf("expected SGR markers, got %q", out)
96 }
97 // "hello" is correct and must not be wrapped.
98 idxHello := strings.Index(out, "hello")
99 idxOpen := strings.Index(out, openSGR)
100 if idxOpen < idxHello {
101 t.Errorf("opener appeared before hello: open=%d hello=%d", idxOpen, idxHello)
102 }
103}
104
105func TestHighlightPreservesANSI(t *testing.T) {
106 c := newTestChecker(t, "good", "abcdefghijklmnopqrstuvwxyz")
107 // Pretend the line was rendered with a colour style around the whole
108 // content: ESC[31m...ESC[0m. Misspelled token "bd" inside.
109 in := "\x1b[31mgood bd\x1b[0m"
110 out := Highlight(in, c, -1)
111 if !strings.Contains(out, "\x1b[31m") {
112 t.Errorf("original colour ANSI lost: %q", out)
113 }
114 if !strings.Contains(out, openSGR) {
115 t.Errorf("missing underline open: %q", out)
116 }
117}
118
119func TestHighlightNoCheckerIsNoop(t *testing.T) {
120 in := "anything goes"
121 if got := Highlight(in, nil, -1); got != in {
122 t.Errorf("nil checker should be no-op, got %q", got)
123 }
124}
125
126func TestSuggest(t *testing.T) {
127 c := newTestChecker(t, "hello", "help", "world", "word", "ward", "wild")
128 got := c.Suggest("wurld", 5)
129 if len(got) == 0 {
130 t.Fatal("expected at least one suggestion")
131 }
132 // "world" should outrank "ward" / "wild" by edit distance.
133 if got[0] != "world" {
134 t.Errorf("top suggestion = %q, want world (all: %v)", got[0], got)
135 }
136}
137
138func TestSuggestCaseMatch(t *testing.T) {
139 c := newTestChecker(t, "hello", "world")
140 got := c.Suggest("Wurld", 3)
141 if len(got) == 0 || got[0] != "World" {
142 t.Errorf("expected capitalised World, got %v", got)
143 }
144}
145
146func TestCheckSkipsForeignScript(t *testing.T) {
147 c := newTestChecker(t, "hello", "world")
148 // Cyrillic — dict has no cyrillic runes, so we must NOT flag it.
149 if !c.Check("привет") {
150 t.Error("cyrillic word should be skipped against latin dict")
151 }
152 // Accented French not in dict ('é' absent) — must not flag.
153 if !c.Check("café") {
154 t.Error("accented word with foreign rune should be skipped")
155 }
156 // Plain ASCII typo still flagged.
157 if c.Check("helo") {
158 t.Error("ASCII typo should still be flagged")
159 }
160}
161
162func TestCheckRecognisesAccentsWhenDictHasThem(t *testing.T) {
163 // Dictionary that legitimately contains an accented word — its rune
164 // set covers 'é' so accented words can be evaluated normally.
165 c := newTestChecker(t, "café", "hello")
166 if !c.Check("café") {
167 t.Error("café should be recognised when present in dict")
168 }
169 if c.Check("cofé") {
170 t.Error("misspelled accented word should still be flagged")
171 }
172}
173
174func TestIsCheckable(t *testing.T) {
175 cases := map[string]bool{
176 "hello": true,
177 "a": false,
178 "42": false,
179 "hello42": false,
180 "NASA": false,
181 "hi@there": false,
182 "path/to": false,
183 "don't": true,
184 "HelloWorld": true, // mixed case, not an acronym
185 "INTERNATIONAL": true, // > 5 upper letters, treated as a word
186 }
187 for in, want := range cases {
188 if got := IsCheckable(in); got != want {
189 t.Errorf("IsCheckable(%q) = %v, want %v", in, got, want)
190 }
191 }
192}