1package util
2
3import (
4 "testing"
5
6 powernap "github.com/charmbracelet/x/powernap/pkg/lsp"
7 "github.com/charmbracelet/x/powernap/pkg/lsp/protocol"
8 "github.com/stretchr/testify/require"
9)
10
11func TestPositionToByteOffset(t *testing.T) {
12 tests := []struct {
13 name string
14 lineText string
15 utf16Char uint32
16 expected int
17 }{
18 {
19 name: "ASCII only",
20 lineText: "hello world",
21 utf16Char: 6,
22 expected: 6,
23 },
24 {
25 name: "CJK characters (3 bytes each in UTF-8, 1 UTF-16 unit)",
26 lineText: "你好world",
27 utf16Char: 2,
28 expected: 6,
29 },
30 {
31 name: "CJK - position after CJK",
32 lineText: "var x = \"你好world\"",
33 utf16Char: 11,
34 expected: 15,
35 },
36 {
37 name: "Emoji (4 bytes in UTF-8, 2 UTF-16 units)",
38 lineText: "👋hello",
39 utf16Char: 2,
40 expected: 4,
41 },
42 {
43 name: "Multiple emoji",
44 lineText: "👋👋world",
45 utf16Char: 4,
46 expected: 8,
47 },
48 {
49 name: "Mixed content",
50 lineText: "Hello👋你好",
51 utf16Char: 8,
52 expected: 12,
53 },
54 {
55 name: "Position 0",
56 lineText: "hello",
57 utf16Char: 0,
58 expected: 0,
59 },
60 {
61 name: "Position beyond end",
62 lineText: "hi",
63 utf16Char: 100,
64 expected: 2,
65 },
66 {
67 name: "Empty string",
68 lineText: "",
69 utf16Char: 0,
70 expected: 0,
71 },
72 {
73 name: "Surrogate pair at start",
74 lineText: "𐐷hello",
75 utf16Char: 2,
76 expected: 4,
77 },
78 {
79 name: "ZWJ family emoji (1 grapheme, 7 runes, 11 UTF-16 units)",
80 lineText: "hello👨\u200d👩\u200d👧\u200d👦world",
81 utf16Char: 16,
82 expected: 30,
83 },
84 {
85 name: "ZWJ family emoji - offset into middle of grapheme cluster",
86 lineText: "hello👨\u200d👩\u200d👧\u200d👦world",
87 utf16Char: 8,
88 expected: 12,
89 },
90 {
91 name: "Flag emoji (1 grapheme, 2 runes, 4 UTF-16 units)",
92 lineText: "hello🇺🇸world",
93 utf16Char: 9,
94 expected: 13,
95 },
96 {
97 name: "Combining character (1 grapheme, 2 runes, 2 UTF-16 units)",
98 lineText: "caf\u0065\u0301!",
99 utf16Char: 5,
100 expected: 6,
101 },
102 {
103 name: "Skin tone modifier (1 grapheme, 2 runes, 4 UTF-16 units)",
104 lineText: "hi👋🏽bye",
105 utf16Char: 6,
106 expected: 10,
107 },
108 }
109
110 for _, tt := range tests {
111 t.Run(tt.name, func(t *testing.T) {
112 result := powernap.PositionToByteOffset(tt.lineText, tt.utf16Char)
113 if result != tt.expected {
114 t.Errorf("PositionToByteOffset(%q, %d) = %d, want %d",
115 tt.lineText, tt.utf16Char, result, tt.expected)
116 }
117 })
118 }
119}
120
121func TestApplyTextEdit_UTF16(t *testing.T) {
122 // Test that UTF-16 offsets are correctly converted to byte offsets
123 tests := []struct {
124 name string
125 lines []string
126 edit protocol.TextEdit
127 expected []string
128 }{
129 {
130 name: "ASCII only - no conversion needed",
131 lines: []string{"hello world"},
132 edit: protocol.TextEdit{
133 Range: protocol.Range{
134 Start: protocol.Position{Line: 0, Character: 6},
135 End: protocol.Position{Line: 0, Character: 11},
136 },
137 NewText: "universe",
138 },
139 expected: []string{"hello universe"},
140 },
141 {
142 name: "CJK characters - edit after Chinese characters",
143 lines: []string{`var x = "你好world"`},
144 edit: protocol.TextEdit{
145 Range: protocol.Range{
146 // "你好" = 2 UTF-16 units, but 6 bytes in UTF-8
147 // Position 11 is where "world" starts in UTF-16
148 Start: protocol.Position{Line: 0, Character: 11},
149 End: protocol.Position{Line: 0, Character: 16},
150 },
151 NewText: "universe",
152 },
153 expected: []string{`var x = "你好universe"`},
154 },
155 {
156 name: "Emoji - edit after emoji (2 UTF-16 units)",
157 lines: []string{`fmt.Println("👋hello")`},
158 edit: protocol.TextEdit{
159 Range: protocol.Range{
160 // 👋 = 2 UTF-16 units, 4 bytes in UTF-8
161 // Position 15 is where "hello" starts in UTF-16
162 Start: protocol.Position{Line: 0, Character: 15},
163 End: protocol.Position{Line: 0, Character: 20},
164 },
165 NewText: "world",
166 },
167 expected: []string{`fmt.Println("👋world")`},
168 },
169 {
170 name: "ZWJ family emoji - edit after grapheme cluster",
171 // "hello👨👩👧👦world" — family is 1 grapheme but 11 UTF-16 units
172 lines: []string{"hello\U0001F468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466world"},
173 edit: protocol.TextEdit{
174 Range: protocol.Range{
175 // "hello" = 5 UTF-16 units, family = 11 UTF-16 units
176 // "world" starts at UTF-16 offset 16
177 Start: protocol.Position{Line: 0, Character: 16},
178 End: protocol.Position{Line: 0, Character: 21},
179 },
180 NewText: "earth",
181 },
182 expected: []string{"hello\U0001F468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466earth"},
183 },
184 {
185 name: "ZWJ family emoji - edit splits grapheme cluster in half",
186 // LSP servers can position into the middle of a grapheme cluster.
187 // After "hello" (5 UTF-16 units), the ZWJ family emoji starts.
188 // UTF-16 offset 7 lands between 👨 (2 units) and ZWJ, inside
189 // the grapheme cluster. The byte offset for position 7 is 9
190 // (5 bytes for "hello" + 4 bytes for 👨).
191 lines: []string{"hello\U0001F468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466world"},
192 edit: protocol.TextEdit{
193 Range: protocol.Range{
194 Start: protocol.Position{Line: 0, Character: 7},
195 End: protocol.Position{Line: 0, Character: 16},
196 },
197 NewText: "",
198 },
199 // Keeps "hello" + 👨 (first rune of cluster) then removes
200 // the rest of the cluster, leaving "hello👨world".
201 expected: []string{"hello\U0001F468world"},
202 },
203 {
204 name: "Flag emoji - edit after flag",
205 // 🇺🇸 = 2 regional indicator runes, 4 UTF-16 units, 8 bytes
206 lines: []string{"hello🇺🇸world"},
207 edit: protocol.TextEdit{
208 Range: protocol.Range{
209 Start: protocol.Position{Line: 0, Character: 9},
210 End: protocol.Position{Line: 0, Character: 14},
211 },
212 NewText: "earth",
213 },
214 expected: []string{"hello🇺🇸earth"},
215 },
216 {
217 name: "Combining accent - edit after composed character",
218 // "café!" where é = e + U+0301 (2 code points, 2 UTF-16 units)
219 lines: []string{"caf\u0065\u0301!"},
220 edit: protocol.TextEdit{
221 Range: protocol.Range{
222 // "caf" = 3, "e" = 1, U+0301 = 1, total = 5 UTF-16 units
223 Start: protocol.Position{Line: 0, Character: 5},
224 End: protocol.Position{Line: 0, Character: 6},
225 },
226 NewText: "?",
227 },
228 expected: []string{"caf\u0065\u0301?"},
229 },
230 {
231 name: "Skin tone modifier - edit after modified emoji",
232 // 👋🏽 = U+1F44B U+1F3FD = 2 runes, 4 UTF-16 units, 8 bytes
233 lines: []string{"hi👋🏽bye"},
234 edit: protocol.TextEdit{
235 Range: protocol.Range{
236 // "hi" = 2, 👋🏽 = 4, total = 6 UTF-16 units
237 Start: protocol.Position{Line: 0, Character: 6},
238 End: protocol.Position{Line: 0, Character: 9},
239 },
240 NewText: "later",
241 },
242 expected: []string{"hi👋🏽later"},
243 },
244 }
245
246 for _, tt := range tests {
247 t.Run(tt.name, func(t *testing.T) {
248 result, err := applyTextEdit(tt.lines, tt.edit, powernap.UTF16)
249 if err != nil {
250 t.Fatalf("applyTextEdit failed: %v", err)
251 }
252 if len(result) != len(tt.expected) {
253 t.Errorf("expected %d lines, got %d: %v", len(tt.expected), len(result), result)
254 return
255 }
256 for i := range result {
257 if result[i] != tt.expected[i] {
258 t.Errorf("line %d: expected %q, got %q", i, tt.expected[i], result[i])
259 }
260 }
261 })
262 }
263}
264
265func TestApplyTextEdit_UTF8(t *testing.T) {
266 // Test that UTF-8 offsets are used directly without conversion
267 tests := []struct {
268 name string
269 lines []string
270 edit protocol.TextEdit
271 expected []string
272 }{
273 {
274 name: "ASCII only - direct byte offset",
275 lines: []string{"hello world"},
276 edit: protocol.TextEdit{
277 Range: protocol.Range{
278 Start: protocol.Position{Line: 0, Character: 6},
279 End: protocol.Position{Line: 0, Character: 11},
280 },
281 NewText: "universe",
282 },
283 expected: []string{"hello universe"},
284 },
285 {
286 name: "CJK characters - byte offset used directly",
287 lines: []string{`var x = "你好world"`},
288 edit: protocol.TextEdit{
289 Range: protocol.Range{
290 // With UTF-8 encoding, position 15 is the byte offset
291 Start: protocol.Position{Line: 0, Character: 15},
292 End: protocol.Position{Line: 0, Character: 20},
293 },
294 NewText: "universe",
295 },
296 expected: []string{`var x = "你好universe"`},
297 },
298 }
299
300 for _, tt := range tests {
301 t.Run(tt.name, func(t *testing.T) {
302 result, err := applyTextEdit(tt.lines, tt.edit, powernap.UTF8)
303 if err != nil {
304 t.Fatalf("applyTextEdit failed: %v", err)
305 }
306 if len(result) != len(tt.expected) {
307 t.Errorf("expected %d lines, got %d: %v", len(tt.expected), len(result), result)
308 return
309 }
310 for i := range result {
311 if result[i] != tt.expected[i] {
312 t.Errorf("line %d: expected %q, got %q", i, tt.expected[i], result[i])
313 }
314 }
315 })
316 }
317}
318
319func TestRangesOverlap(t *testing.T) {
320 t.Parallel()
321
322 tests := []struct {
323 name string
324 r1 protocol.Range
325 r2 protocol.Range
326 want bool
327 }{
328 {
329 name: "adjacent ranges do not overlap",
330 r1: protocol.Range{
331 Start: protocol.Position{Line: 0, Character: 0},
332 End: protocol.Position{Line: 0, Character: 5},
333 },
334 r2: protocol.Range{
335 Start: protocol.Position{Line: 0, Character: 5},
336 End: protocol.Position{Line: 0, Character: 10},
337 },
338 want: false,
339 },
340 {
341 name: "overlapping ranges",
342 r1: protocol.Range{
343 Start: protocol.Position{Line: 0, Character: 0},
344 End: protocol.Position{Line: 0, Character: 8},
345 },
346 r2: protocol.Range{
347 Start: protocol.Position{Line: 0, Character: 5},
348 End: protocol.Position{Line: 0, Character: 10},
349 },
350 want: true,
351 },
352 {
353 name: "non-overlapping with gap",
354 r1: protocol.Range{
355 Start: protocol.Position{Line: 0, Character: 0},
356 End: protocol.Position{Line: 0, Character: 3},
357 },
358 r2: protocol.Range{
359 Start: protocol.Position{Line: 0, Character: 7},
360 End: protocol.Position{Line: 0, Character: 10},
361 },
362 want: false,
363 },
364 }
365
366 for _, tt := range tests {
367 t.Run(tt.name, func(t *testing.T) {
368 t.Parallel()
369 got := rangesOverlap(tt.r1, tt.r2)
370 require.Equal(t, tt.want, got, "rangesOverlap(r1, r2)")
371 // Overlap should be symmetric
372 got2 := rangesOverlap(tt.r2, tt.r1)
373 require.Equal(t, tt.want, got2, "rangesOverlap(r2, r1) symmetry")
374 })
375 }
376}