spellcheck_test.go

  1package spellcheck
  2
  3import (
  4	"os"
  5	"path/filepath"
  6	"strings"
  7	"testing"
  8)
  9
 10func newTestChecker(t *testing.T, words ...string) *Checker {
 11	t.Helper()
 12	dir := t.TempDir()
 13	path := filepath.Join(dir, "test.dic")
 14	content := []byte("# header\n" + strings.Join(words, "\n") + "\n")
 15	if err := os.WriteFile(path, content, 0o600); err != nil {
 16		t.Fatalf("write dic: %v", err)
 17	}
 18	c := NewChecker()
 19	if err := c.Load(path, "test"); err != nil {
 20		t.Fatalf("load: %v", err)
 21	}
 22	return c
 23}
 24
 25func TestCheckerCheck(t *testing.T) {
 26	c := newTestChecker(t, "hello", "world", "go")
 27	if !c.Check("hello") {
 28		t.Error("hello should be known")
 29	}
 30	if !c.Check("Hello") {
 31		t.Error("Hello should match case-insensitively")
 32	}
 33	if c.Check("helo") {
 34		t.Error("helo should be unknown")
 35	}
 36	// Short / numeric / uppercase tokens are skipped.
 37	if !c.Check("Z") {
 38		t.Error("single rune skipped")
 39	}
 40	if !c.Check("ABC") {
 41		t.Error("short uppercase acronym skipped")
 42	}
 43	if !c.Check("42") {
 44		t.Error("numeric skipped")
 45	}
 46}
 47
 48func TestTokenize(t *testing.T) {
 49	got := Tokenize("hello, world! it's nice")
 50	want := []struct {
 51		w          string
 52		start, end int
 53	}{
 54		{"hello", 0, 5},
 55		{"world", 7, 12},
 56		{"it's", 14, 18},
 57		{"nice", 19, 23},
 58	}
 59	if len(got) != len(want) {
 60		t.Fatalf("tokens = %d, want %d (%+v)", len(got), len(want), got)
 61	}
 62	for i, w := range want {
 63		if got[i].Word != w.w || got[i].Start != w.start || got[i].End != w.end {
 64			t.Errorf("token %d = %+v, want %s [%d:%d]", i, got[i], w.w, w.start, w.end)
 65		}
 66	}
 67}
 68
 69func TestParseHunspellDicSkipsCountLine(t *testing.T) {
 70	dir := t.TempDir()
 71	p := filepath.Join(dir, "x.dic")
 72	// First line is a count, words follow, with hunspell-style flags.
 73	body := "3\nfoo/AB\nbar\nbaz\n"
 74	if err := os.WriteFile(p, []byte(body), 0o600); err != nil {
 75		t.Fatal(err)
 76	}
 77	w, _, err := parseHunspellDic(p)
 78	if err != nil {
 79		t.Fatal(err)
 80	}
 81	for _, k := range []string{"foo", "bar", "baz"} {
 82		if _, ok := w[k]; !ok {
 83			t.Errorf("missing %q", k)
 84		}
 85	}
 86}
 87
 88func TestHighlightWrapsMisspelled(t *testing.T) {
 89	c := newTestChecker(t, "hello", "world", "abcdefghijklmnopqrstuvwxyz")
 90	out := Highlight("hello wurld", c, -1)
 91	if !strings.Contains(out, "wurld") {
 92		t.Fatalf("output missing word: %q", out)
 93	}
 94	if !strings.Contains(out, openSGR) || !strings.Contains(out, closeSGR) {
 95		t.Errorf("expected SGR markers, got %q", out)
 96	}
 97	// "hello" is correct and must not be wrapped.
 98	idxHello := strings.Index(out, "hello")
 99	idxOpen := strings.Index(out, openSGR)
100	if idxOpen < idxHello {
101		t.Errorf("opener appeared before hello: open=%d hello=%d", idxOpen, idxHello)
102	}
103}
104
105func TestHighlightPreservesANSI(t *testing.T) {
106	c := newTestChecker(t, "good", "abcdefghijklmnopqrstuvwxyz")
107	// Pretend the line was rendered with a colour style around the whole
108	// content: ESC[31m...ESC[0m. Misspelled token "bd" inside.
109	in := "\x1b[31mgood bd\x1b[0m"
110	out := Highlight(in, c, -1)
111	if !strings.Contains(out, "\x1b[31m") {
112		t.Errorf("original colour ANSI lost: %q", out)
113	}
114	if !strings.Contains(out, openSGR) {
115		t.Errorf("missing underline open: %q", out)
116	}
117}
118
119func TestHighlightNoCheckerIsNoop(t *testing.T) {
120	in := "anything goes"
121	if got := Highlight(in, nil, -1); got != in {
122		t.Errorf("nil checker should be no-op, got %q", got)
123	}
124}
125
126func TestSuggest(t *testing.T) {
127	c := newTestChecker(t, "hello", "help", "world", "word", "ward", "wild")
128	got := c.Suggest("wurld", 5)
129	if len(got) == 0 {
130		t.Fatal("expected at least one suggestion")
131	}
132	// "world" should outrank "ward" / "wild" by edit distance.
133	if got[0] != "world" {
134		t.Errorf("top suggestion = %q, want world (all: %v)", got[0], got)
135	}
136}
137
138func TestSuggestCaseMatch(t *testing.T) {
139	c := newTestChecker(t, "hello", "world")
140	got := c.Suggest("Wurld", 3)
141	if len(got) == 0 || got[0] != "World" {
142		t.Errorf("expected capitalised World, got %v", got)
143	}
144}
145
146func TestCheckSkipsForeignScript(t *testing.T) {
147	c := newTestChecker(t, "hello", "world")
148	// Cyrillic — dict has no cyrillic runes, so we must NOT flag it.
149	if !c.Check("привет") {
150		t.Error("cyrillic word should be skipped against latin dict")
151	}
152	// Accented French not in dict ('é' absent) — must not flag.
153	if !c.Check("café") {
154		t.Error("accented word with foreign rune should be skipped")
155	}
156	// Plain ASCII typo still flagged.
157	if c.Check("helo") {
158		t.Error("ASCII typo should still be flagged")
159	}
160}
161
162func TestCheckRecognisesAccentsWhenDictHasThem(t *testing.T) {
163	// Dictionary that legitimately contains an accented word — its rune
164	// set covers 'é' so accented words can be evaluated normally.
165	c := newTestChecker(t, "café", "hello")
166	if !c.Check("café") {
167		t.Error("café should be recognised when present in dict")
168	}
169	if c.Check("cofé") {
170		t.Error("misspelled accented word should still be flagged")
171	}
172}
173
174func TestIsCheckable(t *testing.T) {
175	cases := map[string]bool{
176		"hello":         true,
177		"a":             false,
178		"42":            false,
179		"hello42":       false,
180		"NASA":          false,
181		"hi@there":      false,
182		"path/to":       false,
183		"don't":         true,
184		"HelloWorld":    true, // mixed case, not an acronym
185		"INTERNATIONAL": true, // > 5 upper letters, treated as a word
186	}
187	for in, want := range cases {
188		if got := IsCheckable(in); got != want {
189			t.Errorf("IsCheckable(%q) = %v, want %v", in, got, want)
190		}
191	}
192}