highlight.go

  1package spellcheck
  2
  3import (
  4	"strings"
  5	"unicode"
  6	"unicode/utf8"
  7)
  8
  9// Red dotted underline (extended SGR sub-parameter form, supported by
 10// kitty, WezTerm, iTerm2, Ghostty, foot, modern xterm). Terminals that
 11// ignore the sub-parameters render a plain underline instead.
 12const (
 13	openSGR  = "\x1b[4:4;58:2::255:0:0m"
 14	closeSGR = "\x1b[4:0;59m"
 15)
 16
 17// Highlight walks rendered text and wraps misspelled words in a red dotted
 18// underline. ANSI sequences already present in the input are preserved.
 19//
 20// The text is processed line by line. The line at index skipLine is left
 21// untouched — pass -1 to highlight every line.
 22func Highlight(text string, c *Checker, skipLine int) string {
 23	if c == nil || !c.Loaded() || text == "" {
 24		return text
 25	}
 26	lines := strings.Split(text, "\n")
 27	for i, line := range lines {
 28		if i == skipLine {
 29			continue
 30		}
 31		lines[i] = highlightLine(line, c)
 32	}
 33	return strings.Join(lines, "\n")
 34}
 35
 36type wordSpan struct {
 37	start, end int
 38	word       string
 39}
 40
 41func highlightLine(line string, c *Checker) string {
 42	if line == "" {
 43		return line
 44	}
 45
 46	spans := scanWords(line)
 47	if len(spans) == 0 {
 48		return line
 49	}
 50
 51	// Splice from end to start so earlier offsets stay valid.
 52	out := line
 53	wrapped := false
 54	for i := len(spans) - 1; i >= 0; i-- {
 55		s := spans[i]
 56		if !IsCheckable(s.word) {
 57			continue
 58		}
 59		if c.Check(s.word) {
 60			continue
 61		}
 62		out = out[:s.start] + openSGR + out[s.start:s.end] + closeSGR + out[s.end:]
 63		wrapped = true
 64	}
 65	if !wrapped {
 66		return line
 67	}
 68	return out
 69}
 70
 71// scanWords walks the raw line and returns word runs by byte offset.
 72// ANSI CSI/OSC escape sequences are skipped so they don't fragment words.
 73func scanWords(line string) []wordSpan {
 74	var spans []wordSpan
 75	var b strings.Builder
 76	start := -1
 77
 78	flush := func() {
 79		if b.Len() == 0 {
 80			return
 81		}
 82		w := strings.TrimRight(b.String(), "'’-")
 83		if w != "" {
 84			spans = append(spans, wordSpan{start: start, end: start + len(w), word: w})
 85		}
 86		b.Reset()
 87		start = -1
 88	}
 89
 90	i := 0
 91	for i < len(line) {
 92		if line[i] == 0x1b {
 93			flush()
 94			i += ansiSkip(line, i)
 95			continue
 96		}
 97		r, size := utf8.DecodeRuneInString(line[i:])
 98		if unicode.IsLetter(r) {
 99			if start < 0 {
100				start = i
101			}
102			b.WriteRune(r)
103			i += size
104			continue
105		}
106		if b.Len() > 0 && (r == '\'' || r == '’' || r == '-') {
107			b.WriteRune(r)
108			i += size
109			continue
110		}
111		flush()
112		i += size
113	}
114	flush()
115	return spans
116}
117
118// ansiSkip returns the byte length of the escape sequence beginning at
119// line[i] (which must be ESC). Malformed/truncated sequences consume the
120// remainder of the line.
121func ansiSkip(line string, i int) int {
122	if i+1 >= len(line) {
123		return 1
124	}
125	switch line[i+1] {
126	case '[':
127		// CSI: ESC [ params final (0x40..0x7e)
128		j := i + 2
129		for j < len(line) {
130			c := line[j]
131			if c >= 0x40 && c <= 0x7e {
132				return j - i + 1
133			}
134			j++
135		}
136		return len(line) - i
137	case ']':
138		// OSC: terminated by BEL or ST (ESC \).
139		j := i + 2
140		for j < len(line) {
141			if line[j] == 0x07 {
142				return j - i + 1
143			}
144			if line[j] == 0x1b && j+1 < len(line) && line[j+1] == '\\' {
145				return j - i + 2
146			}
147			j++
148		}
149		return len(line) - i
150	default:
151		return 2
152	}
153}