dict.go

  1// Package spellcheck provides dictionary-backed spell checking for the composer.
  2//
  3// Dictionaries follow the Hunspell .dic format (word list, optional /flags
  4// per line). Affix rules are ignored: each base form is added to a flat
  5// word set. Dictionaries are downloaded from the wooorm/dictionaries
  6// GitHub repository on demand.
  7package spellcheck
  8
  9import (
 10	"bufio"
 11	"fmt"
 12	"os"
 13	"path/filepath"
 14	"strings"
 15	"unicode"
 16)
 17
 18// DictsDir returns the directory where dictionaries are stored.
 19func DictsDir() (string, error) {
 20	home, err := os.UserHomeDir()
 21	if err != nil {
 22		return "", fmt.Errorf("cannot find home directory: %w", err)
 23	}
 24	dir := filepath.Join(home, ".config", "matcha", "dicts")
 25	if err := os.MkdirAll(dir, 0o750); err != nil {
 26		return "", fmt.Errorf("cannot create dicts directory: %w", err)
 27	}
 28	return dir, nil
 29}
 30
 31// DictPath returns the on-disk path for a given language code.
 32func DictPath(lang string) (string, error) {
 33	dir, err := DictsDir()
 34	if err != nil {
 35		return "", err
 36	}
 37	return filepath.Join(dir, lang+".dic"), nil
 38}
 39
 40// DictInstalled reports whether the dictionary for lang exists on disk.
 41func DictInstalled(lang string) bool {
 42	path, err := DictPath(lang)
 43	if err != nil {
 44		return false
 45	}
 46	info, err := os.Stat(path)
 47	return err == nil && !info.IsDir() && info.Size() > 0
 48}
 49
 50// parseHunspellDic reads a Hunspell .dic file and returns the set of base
 51// words plus the set of letter runes that appear in those words. The
 52// first line (when numeric) is treated as a count and skipped. Each entry
 53// may carry "/FLAGS" affix metadata which we strip — we don't expand
 54// affix rules, so the checker recognises base forms only.
 55func parseHunspellDic(path string) (map[string]struct{}, map[rune]struct{}, error) {
 56	f, err := os.Open(path)
 57	if err != nil {
 58		return nil, nil, fmt.Errorf("open dict: %w", err)
 59	}
 60	defer f.Close() //nolint:errcheck
 61
 62	words := make(map[string]struct{}, 50000)
 63	runes := make(map[rune]struct{}, 64)
 64	scanner := bufio.NewScanner(f)
 65	scanner.Buffer(make([]byte, 64*1024), 1024*1024)
 66
 67	first := true
 68	for scanner.Scan() {
 69		line := strings.TrimSpace(scanner.Text())
 70		if line == "" || strings.HasPrefix(line, "#") {
 71			continue
 72		}
 73		if first {
 74			first = false
 75			if _, err := fmt.Sscanf(line, "%d", new(int)); err == nil && !strings.ContainsAny(line, " \t") {
 76				continue
 77			}
 78		}
 79		if idx := strings.IndexByte(line, '/'); idx >= 0 {
 80			line = line[:idx]
 81		}
 82		if idx := strings.IndexByte(line, '\t'); idx >= 0 {
 83			line = line[:idx]
 84		}
 85		line = strings.TrimSpace(line)
 86		if line == "" {
 87			continue
 88		}
 89		lower := strings.ToLower(line)
 90		words[lower] = struct{}{}
 91		for _, r := range lower {
 92			if isDictLetter(r) {
 93				runes[r] = struct{}{}
 94			}
 95		}
 96	}
 97	if err := scanner.Err(); err != nil {
 98		return nil, nil, fmt.Errorf("scan dict: %w", err)
 99	}
100	return words, runes, nil
101}
102
103func isDictLetter(r rune) bool {
104	if r < 0x80 {
105		return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z')
106	}
107	return unicode.IsLetter(r)
108}