1// Package spellcheck provides dictionary-backed spell checking for the composer.
2//
3// Dictionaries follow the Hunspell .dic format (word list, optional /flags
4// per line). Affix rules are ignored: each base form is added to a flat
5// word set. Dictionaries are downloaded from the wooorm/dictionaries
6// GitHub repository on demand.
7package spellcheck
8
9import (
10 "bufio"
11 "fmt"
12 "os"
13 "path/filepath"
14 "strings"
15 "unicode"
16)
17
18// DictsDir returns the directory where dictionaries are stored.
19func DictsDir() (string, error) {
20 home, err := os.UserHomeDir()
21 if err != nil {
22 return "", fmt.Errorf("cannot find home directory: %w", err)
23 }
24 dir := filepath.Join(home, ".config", "matcha", "dicts")
25 if err := os.MkdirAll(dir, 0o750); err != nil {
26 return "", fmt.Errorf("cannot create dicts directory: %w", err)
27 }
28 return dir, nil
29}
30
31// DictPath returns the on-disk path for a given language code.
32func DictPath(lang string) (string, error) {
33 dir, err := DictsDir()
34 if err != nil {
35 return "", err
36 }
37 return filepath.Join(dir, lang+".dic"), nil
38}
39
40// DictInstalled reports whether the dictionary for lang exists on disk.
41func DictInstalled(lang string) bool {
42 path, err := DictPath(lang)
43 if err != nil {
44 return false
45 }
46 info, err := os.Stat(path)
47 return err == nil && !info.IsDir() && info.Size() > 0
48}
49
50// parseHunspellDic reads a Hunspell .dic file and returns the set of base
51// words plus the set of letter runes that appear in those words. The
52// first line (when numeric) is treated as a count and skipped. Each entry
53// may carry "/FLAGS" affix metadata which we strip — we don't expand
54// affix rules, so the checker recognises base forms only.
55func parseHunspellDic(path string) (map[string]struct{}, map[rune]struct{}, error) {
56 f, err := os.Open(path)
57 if err != nil {
58 return nil, nil, fmt.Errorf("open dict: %w", err)
59 }
60 defer f.Close() //nolint:errcheck
61
62 words := make(map[string]struct{}, 50000)
63 runes := make(map[rune]struct{}, 64)
64 scanner := bufio.NewScanner(f)
65 scanner.Buffer(make([]byte, 64*1024), 1024*1024)
66
67 first := true
68 for scanner.Scan() {
69 line := strings.TrimSpace(scanner.Text())
70 if line == "" || strings.HasPrefix(line, "#") {
71 continue
72 }
73 if first {
74 first = false
75 if _, err := fmt.Sscanf(line, "%d", new(int)); err == nil && !strings.ContainsAny(line, " \t") {
76 continue
77 }
78 }
79 if idx := strings.IndexByte(line, '/'); idx >= 0 {
80 line = line[:idx]
81 }
82 if idx := strings.IndexByte(line, '\t'); idx >= 0 {
83 line = line[:idx]
84 }
85 line = strings.TrimSpace(line)
86 if line == "" {
87 continue
88 }
89 lower := strings.ToLower(line)
90 words[lower] = struct{}{}
91 for _, r := range lower {
92 if isDictLetter(r) {
93 runes[r] = struct{}{}
94 }
95 }
96 }
97 if err := scanner.Err(); err != nil {
98 return nil, nil, fmt.Errorf("scan dict: %w", err)
99 }
100 return words, runes, nil
101}
102
103func isDictLetter(r rune) bool {
104 if r < 0x80 {
105 return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z')
106 }
107 return unicode.IsLetter(r)
108}