edit_test.go

  1package util
  2
  3import (
  4	"testing"
  5
  6	powernap "github.com/charmbracelet/x/powernap/pkg/lsp"
  7	"github.com/charmbracelet/x/powernap/pkg/lsp/protocol"
  8	"github.com/stretchr/testify/require"
  9)
 10
 11func TestPositionToByteOffset(t *testing.T) {
 12	tests := []struct {
 13		name      string
 14		lineText  string
 15		utf16Char uint32
 16		expected  int
 17	}{
 18		{
 19			name:      "ASCII only",
 20			lineText:  "hello world",
 21			utf16Char: 6,
 22			expected:  6,
 23		},
 24		{
 25			name:      "CJK characters (3 bytes each in UTF-8, 1 UTF-16 unit)",
 26			lineText:  "你好world",
 27			utf16Char: 2,
 28			expected:  6,
 29		},
 30		{
 31			name:      "CJK - position after CJK",
 32			lineText:  "var x = \"你好world\"",
 33			utf16Char: 11,
 34			expected:  15,
 35		},
 36		{
 37			name:      "Emoji (4 bytes in UTF-8, 2 UTF-16 units)",
 38			lineText:  "👋hello",
 39			utf16Char: 2,
 40			expected:  4,
 41		},
 42		{
 43			name:      "Multiple emoji",
 44			lineText:  "👋👋world",
 45			utf16Char: 4,
 46			expected:  8,
 47		},
 48		{
 49			name:      "Mixed content",
 50			lineText:  "Hello👋你好",
 51			utf16Char: 8,
 52			expected:  12,
 53		},
 54		{
 55			name:      "Position 0",
 56			lineText:  "hello",
 57			utf16Char: 0,
 58			expected:  0,
 59		},
 60		{
 61			name:      "Position beyond end",
 62			lineText:  "hi",
 63			utf16Char: 100,
 64			expected:  2,
 65		},
 66		{
 67			name:      "Empty string",
 68			lineText:  "",
 69			utf16Char: 0,
 70			expected:  0,
 71		},
 72		{
 73			name:      "Surrogate pair at start",
 74			lineText:  "𐐷hello",
 75			utf16Char: 2,
 76			expected:  4,
 77		},
 78		{
 79			name:      "ZWJ family emoji (1 grapheme, 7 runes, 11 UTF-16 units)",
 80			lineText:  "hello👨\u200d👩\u200d👧\u200d👦world",
 81			utf16Char: 16,
 82			expected:  30,
 83		},
 84		{
 85			name:      "ZWJ family emoji - offset into middle of grapheme cluster",
 86			lineText:  "hello👨\u200d👩\u200d👧\u200d👦world",
 87			utf16Char: 8,
 88			expected:  12,
 89		},
 90		{
 91			name:      "Flag emoji (1 grapheme, 2 runes, 4 UTF-16 units)",
 92			lineText:  "hello🇺🇸world",
 93			utf16Char: 9,
 94			expected:  13,
 95		},
 96		{
 97			name:      "Combining character (1 grapheme, 2 runes, 2 UTF-16 units)",
 98			lineText:  "caf\u0065\u0301!",
 99			utf16Char: 5,
100			expected:  6,
101		},
102		{
103			name:      "Skin tone modifier (1 grapheme, 2 runes, 4 UTF-16 units)",
104			lineText:  "hi👋🏽bye",
105			utf16Char: 6,
106			expected:  10,
107		},
108	}
109
110	for _, tt := range tests {
111		t.Run(tt.name, func(t *testing.T) {
112			result := powernap.PositionToByteOffset(tt.lineText, tt.utf16Char)
113			if result != tt.expected {
114				t.Errorf("PositionToByteOffset(%q, %d) = %d, want %d",
115					tt.lineText, tt.utf16Char, result, tt.expected)
116			}
117		})
118	}
119}
120
121func TestApplyTextEdit_UTF16(t *testing.T) {
122	// Test that UTF-16 offsets are correctly converted to byte offsets
123	tests := []struct {
124		name     string
125		lines    []string
126		edit     protocol.TextEdit
127		expected []string
128	}{
129		{
130			name:  "ASCII only - no conversion needed",
131			lines: []string{"hello world"},
132			edit: protocol.TextEdit{
133				Range: protocol.Range{
134					Start: protocol.Position{Line: 0, Character: 6},
135					End:   protocol.Position{Line: 0, Character: 11},
136				},
137				NewText: "universe",
138			},
139			expected: []string{"hello universe"},
140		},
141		{
142			name:  "CJK characters - edit after Chinese characters",
143			lines: []string{`var x = "你好world"`},
144			edit: protocol.TextEdit{
145				Range: protocol.Range{
146					// "你好" = 2 UTF-16 units, but 6 bytes in UTF-8
147					// Position 11 is where "world" starts in UTF-16
148					Start: protocol.Position{Line: 0, Character: 11},
149					End:   protocol.Position{Line: 0, Character: 16},
150				},
151				NewText: "universe",
152			},
153			expected: []string{`var x = "你好universe"`},
154		},
155		{
156			name:  "Emoji - edit after emoji (2 UTF-16 units)",
157			lines: []string{`fmt.Println("👋hello")`},
158			edit: protocol.TextEdit{
159				Range: protocol.Range{
160					// 👋 = 2 UTF-16 units, 4 bytes in UTF-8
161					// Position 15 is where "hello" starts in UTF-16
162					Start: protocol.Position{Line: 0, Character: 15},
163					End:   protocol.Position{Line: 0, Character: 20},
164				},
165				NewText: "world",
166			},
167			expected: []string{`fmt.Println("👋world")`},
168		},
169		{
170			name: "ZWJ family emoji - edit after grapheme cluster",
171			// "hello👨‍👩‍👧‍👦world" — family is 1 grapheme but 11 UTF-16 units
172			lines: []string{"hello\U0001F468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466world"},
173			edit: protocol.TextEdit{
174				Range: protocol.Range{
175					// "hello" = 5 UTF-16 units, family = 11 UTF-16 units
176					// "world" starts at UTF-16 offset 16
177					Start: protocol.Position{Line: 0, Character: 16},
178					End:   protocol.Position{Line: 0, Character: 21},
179				},
180				NewText: "earth",
181			},
182			expected: []string{"hello\U0001F468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466earth"},
183		},
184		{
185			name: "ZWJ family emoji - edit splits grapheme cluster in half",
186			// LSP servers can position into the middle of a grapheme cluster.
187			// After "hello" (5 UTF-16 units), the ZWJ family emoji starts.
188			// UTF-16 offset 7 lands between 👨 (2 units) and ZWJ, inside
189			// the grapheme cluster. The byte offset for position 7 is 9
190			// (5 bytes for "hello" + 4 bytes for 👨).
191			lines: []string{"hello\U0001F468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466world"},
192			edit: protocol.TextEdit{
193				Range: protocol.Range{
194					Start: protocol.Position{Line: 0, Character: 7},
195					End:   protocol.Position{Line: 0, Character: 16},
196				},
197				NewText: "",
198			},
199			// Keeps "hello" + 👨 (first rune of cluster) then removes
200			// the rest of the cluster, leaving "hello👨world".
201			expected: []string{"hello\U0001F468world"},
202		},
203		{
204			name: "Flag emoji - edit after flag",
205			// 🇺🇸 = 2 regional indicator runes, 4 UTF-16 units, 8 bytes
206			lines: []string{"hello🇺🇸world"},
207			edit: protocol.TextEdit{
208				Range: protocol.Range{
209					Start: protocol.Position{Line: 0, Character: 9},
210					End:   protocol.Position{Line: 0, Character: 14},
211				},
212				NewText: "earth",
213			},
214			expected: []string{"hello🇺🇸earth"},
215		},
216		{
217			name: "Combining accent - edit after composed character",
218			// "café!" where é = e + U+0301 (2 code points, 2 UTF-16 units)
219			lines: []string{"caf\u0065\u0301!"},
220			edit: protocol.TextEdit{
221				Range: protocol.Range{
222					// "caf" = 3, "e" = 1, U+0301 = 1, total = 5 UTF-16 units
223					Start: protocol.Position{Line: 0, Character: 5},
224					End:   protocol.Position{Line: 0, Character: 6},
225				},
226				NewText: "?",
227			},
228			expected: []string{"caf\u0065\u0301?"},
229		},
230		{
231			name: "Skin tone modifier - edit after modified emoji",
232			// 👋🏽 = U+1F44B U+1F3FD = 2 runes, 4 UTF-16 units, 8 bytes
233			lines: []string{"hi👋🏽bye"},
234			edit: protocol.TextEdit{
235				Range: protocol.Range{
236					// "hi" = 2, 👋🏽 = 4, total = 6 UTF-16 units
237					Start: protocol.Position{Line: 0, Character: 6},
238					End:   protocol.Position{Line: 0, Character: 9},
239				},
240				NewText: "later",
241			},
242			expected: []string{"hi👋🏽later"},
243		},
244	}
245
246	for _, tt := range tests {
247		t.Run(tt.name, func(t *testing.T) {
248			result, err := applyTextEdit(tt.lines, tt.edit, powernap.UTF16)
249			if err != nil {
250				t.Fatalf("applyTextEdit failed: %v", err)
251			}
252			if len(result) != len(tt.expected) {
253				t.Errorf("expected %d lines, got %d: %v", len(tt.expected), len(result), result)
254				return
255			}
256			for i := range result {
257				if result[i] != tt.expected[i] {
258					t.Errorf("line %d: expected %q, got %q", i, tt.expected[i], result[i])
259				}
260			}
261		})
262	}
263}
264
265func TestApplyTextEdit_UTF8(t *testing.T) {
266	// Test that UTF-8 offsets are used directly without conversion
267	tests := []struct {
268		name     string
269		lines    []string
270		edit     protocol.TextEdit
271		expected []string
272	}{
273		{
274			name:  "ASCII only - direct byte offset",
275			lines: []string{"hello world"},
276			edit: protocol.TextEdit{
277				Range: protocol.Range{
278					Start: protocol.Position{Line: 0, Character: 6},
279					End:   protocol.Position{Line: 0, Character: 11},
280				},
281				NewText: "universe",
282			},
283			expected: []string{"hello universe"},
284		},
285		{
286			name:  "CJK characters - byte offset used directly",
287			lines: []string{`var x = "你好world"`},
288			edit: protocol.TextEdit{
289				Range: protocol.Range{
290					// With UTF-8 encoding, position 15 is the byte offset
291					Start: protocol.Position{Line: 0, Character: 15},
292					End:   protocol.Position{Line: 0, Character: 20},
293				},
294				NewText: "universe",
295			},
296			expected: []string{`var x = "你好universe"`},
297		},
298	}
299
300	for _, tt := range tests {
301		t.Run(tt.name, func(t *testing.T) {
302			result, err := applyTextEdit(tt.lines, tt.edit, powernap.UTF8)
303			if err != nil {
304				t.Fatalf("applyTextEdit failed: %v", err)
305			}
306			if len(result) != len(tt.expected) {
307				t.Errorf("expected %d lines, got %d: %v", len(tt.expected), len(result), result)
308				return
309			}
310			for i := range result {
311				if result[i] != tt.expected[i] {
312					t.Errorf("line %d: expected %q, got %q", i, tt.expected[i], result[i])
313				}
314			}
315		})
316	}
317}
318
319func TestRangesOverlap(t *testing.T) {
320	t.Parallel()
321
322	tests := []struct {
323		name string
324		r1   protocol.Range
325		r2   protocol.Range
326		want bool
327	}{
328		{
329			name: "adjacent ranges do not overlap",
330			r1: protocol.Range{
331				Start: protocol.Position{Line: 0, Character: 0},
332				End:   protocol.Position{Line: 0, Character: 5},
333			},
334			r2: protocol.Range{
335				Start: protocol.Position{Line: 0, Character: 5},
336				End:   protocol.Position{Line: 0, Character: 10},
337			},
338			want: false,
339		},
340		{
341			name: "overlapping ranges",
342			r1: protocol.Range{
343				Start: protocol.Position{Line: 0, Character: 0},
344				End:   protocol.Position{Line: 0, Character: 8},
345			},
346			r2: protocol.Range{
347				Start: protocol.Position{Line: 0, Character: 5},
348				End:   protocol.Position{Line: 0, Character: 10},
349			},
350			want: true,
351		},
352		{
353			name: "non-overlapping with gap",
354			r1: protocol.Range{
355				Start: protocol.Position{Line: 0, Character: 0},
356				End:   protocol.Position{Line: 0, Character: 3},
357			},
358			r2: protocol.Range{
359				Start: protocol.Position{Line: 0, Character: 7},
360				End:   protocol.Position{Line: 0, Character: 10},
361			},
362			want: false,
363		},
364	}
365
366	for _, tt := range tests {
367		t.Run(tt.name, func(t *testing.T) {
368			t.Parallel()
369			got := rangesOverlap(tt.r1, tt.r2)
370			require.Equal(t, tt.want, got, "rangesOverlap(r1, r2)")
371			// Overlap should be symmetric
372			got2 := rangesOverlap(tt.r2, tt.r1)
373			require.Equal(t, tt.want, got2, "rangesOverlap(r2, r1) symmetry")
374		})
375	}
376}