1use super::{
2 Highlights,
3 fold_map::{self, Chunk, FoldChunks, FoldEdit, FoldPoint, FoldSnapshot},
4};
5
6use language::Point;
7use multi_buffer::MultiBufferSnapshot;
8use std::{cmp, mem, num::NonZeroU32, ops::Range};
9use sum_tree::Bias;
10
11const MAX_EXPANSION_COLUMN: u32 = 256;
12
13// Handles a tab width <= 128
14const SPACES: &[u8; rope::Chunk::MASK_BITS] = &[b' '; _];
15const MAX_TABS: NonZeroU32 = NonZeroU32::new(SPACES.len() as u32).unwrap();
16
17/// Keeps track of hard tabs in a text buffer.
18///
19/// See the [`display_map` module documentation](crate::display_map) for more information.
20pub struct TabMap(TabSnapshot);
21
22impl TabMap {
23 pub fn new(fold_snapshot: FoldSnapshot, tab_size: NonZeroU32) -> (Self, TabSnapshot) {
24 let snapshot = TabSnapshot {
25 fold_snapshot,
26 tab_size: tab_size.min(MAX_TABS),
27 max_expansion_column: MAX_EXPANSION_COLUMN,
28 version: 0,
29 };
30 (Self(snapshot.clone()), snapshot)
31 }
32
33 #[cfg(test)]
34 pub fn set_max_expansion_column(&mut self, column: u32) -> TabSnapshot {
35 self.0.max_expansion_column = column;
36 self.0.clone()
37 }
38
39 pub fn sync(
40 &mut self,
41 fold_snapshot: FoldSnapshot,
42 mut fold_edits: Vec<FoldEdit>,
43 tab_size: NonZeroU32,
44 ) -> (TabSnapshot, Vec<TabEdit>) {
45 let old_snapshot = &mut self.0;
46 let mut new_snapshot = TabSnapshot {
47 fold_snapshot,
48 tab_size: tab_size.min(MAX_TABS),
49 max_expansion_column: old_snapshot.max_expansion_column,
50 version: old_snapshot.version,
51 };
52
53 if old_snapshot.fold_snapshot.version != new_snapshot.fold_snapshot.version {
54 new_snapshot.version += 1;
55 }
56
57 let tab_edits = if old_snapshot.tab_size == new_snapshot.tab_size {
58 // Expand each edit to include the next tab on the same line as the edit,
59 // and any subsequent tabs on that line that moved across the tab expansion
60 // boundary.
61 for fold_edit in &mut fold_edits {
62 let old_end = fold_edit.old.end.to_point(&old_snapshot.fold_snapshot);
63 let old_end_row_successor_offset = cmp::min(
64 FoldPoint::new(old_end.row() + 1, 0),
65 old_snapshot.fold_snapshot.max_point(),
66 )
67 .to_offset(&old_snapshot.fold_snapshot);
68 let new_end = fold_edit.new.end.to_point(&new_snapshot.fold_snapshot);
69
70 let mut offset_from_edit = 0;
71 let mut first_tab_offset = None;
72 let mut last_tab_with_changed_expansion_offset = None;
73 'outer: for chunk in old_snapshot.fold_snapshot.chunks(
74 fold_edit.old.end..old_end_row_successor_offset,
75 false,
76 Highlights::default(),
77 ) {
78 // todo(performance use tabs bitmask)
79 for (ix, _) in chunk.text.match_indices('\t') {
80 let offset_from_edit = offset_from_edit + (ix as u32);
81 if first_tab_offset.is_none() {
82 first_tab_offset = Some(offset_from_edit);
83 }
84
85 let old_column = old_end.column() + offset_from_edit;
86 let new_column = new_end.column() + offset_from_edit;
87 let was_expanded = old_column < old_snapshot.max_expansion_column;
88 let is_expanded = new_column < new_snapshot.max_expansion_column;
89 if was_expanded != is_expanded {
90 last_tab_with_changed_expansion_offset = Some(offset_from_edit);
91 } else if !was_expanded && !is_expanded {
92 break 'outer;
93 }
94 }
95
96 offset_from_edit += chunk.text.len() as u32;
97 if old_end.column() + offset_from_edit >= old_snapshot.max_expansion_column
98 && new_end.column() + offset_from_edit >= new_snapshot.max_expansion_column
99 {
100 break;
101 }
102 }
103
104 if let Some(offset) = last_tab_with_changed_expansion_offset.or(first_tab_offset) {
105 fold_edit.old.end.0 += offset as usize + 1;
106 fold_edit.new.end.0 += offset as usize + 1;
107 }
108 }
109
110 let _old_alloc_ptr = fold_edits.as_ptr();
111 // Combine any edits that overlap due to the expansion.
112 let mut fold_edits = fold_edits.into_iter();
113 if let Some(mut first_edit) = fold_edits.next() {
114 // This code relies on reusing allocations from the Vec<_> - at the time of writing .flatten() prevents them.
115 #[allow(clippy::filter_map_identity)]
116 let mut v: Vec<_> = fold_edits
117 .scan(&mut first_edit, |state, edit| {
118 if state.old.end >= edit.old.start {
119 state.old.end = edit.old.end;
120 state.new.end = edit.new.end;
121 Some(None) // Skip this edit, it's merged
122 } else {
123 let new_state = edit;
124 let result = Some(Some(state.clone())); // Yield the previous edit
125 **state = new_state;
126 result
127 }
128 })
129 .filter_map(|x| x)
130 .collect();
131 v.push(first_edit);
132 debug_assert_eq!(v.as_ptr(), _old_alloc_ptr, "Fold edits were reallocated");
133 v.into_iter()
134 .map(|fold_edit| {
135 let old_start = fold_edit.old.start.to_point(&old_snapshot.fold_snapshot);
136 let old_end = fold_edit.old.end.to_point(&old_snapshot.fold_snapshot);
137 let new_start = fold_edit.new.start.to_point(&new_snapshot.fold_snapshot);
138 let new_end = fold_edit.new.end.to_point(&new_snapshot.fold_snapshot);
139 TabEdit {
140 old: old_snapshot.to_tab_point(old_start)
141 ..old_snapshot.to_tab_point(old_end),
142 new: new_snapshot.to_tab_point(new_start)
143 ..new_snapshot.to_tab_point(new_end),
144 }
145 })
146 .collect()
147 } else {
148 vec![]
149 }
150 } else {
151 new_snapshot.version += 1;
152 vec![TabEdit {
153 old: TabPoint::zero()..old_snapshot.max_point(),
154 new: TabPoint::zero()..new_snapshot.max_point(),
155 }]
156 };
157 *old_snapshot = new_snapshot;
158 (old_snapshot.clone(), tab_edits)
159 }
160}
161
162#[derive(Clone)]
163pub struct TabSnapshot {
164 pub fold_snapshot: FoldSnapshot,
165 pub tab_size: NonZeroU32,
166 pub max_expansion_column: u32,
167 pub version: usize,
168}
169
170impl TabSnapshot {
171 pub fn buffer_snapshot(&self) -> &MultiBufferSnapshot {
172 &self.fold_snapshot.inlay_snapshot.buffer
173 }
174
175 pub fn line_len(&self, row: u32) -> u32 {
176 let max_point = self.max_point();
177 if row < max_point.row() {
178 self.to_tab_point(FoldPoint::new(row, self.fold_snapshot.line_len(row)))
179 .0
180 .column
181 } else {
182 max_point.column()
183 }
184 }
185
186 pub fn text_summary(&self) -> TextSummary {
187 self.text_summary_for_range(TabPoint::zero()..self.max_point())
188 }
189
190 pub fn text_summary_for_range(&self, range: Range<TabPoint>) -> TextSummary {
191 let input_start = self.to_fold_point(range.start, Bias::Left).0;
192 let input_end = self.to_fold_point(range.end, Bias::Right).0;
193 let input_summary = self
194 .fold_snapshot
195 .text_summary_for_range(input_start..input_end);
196
197 let line_end = if range.start.row() == range.end.row() {
198 range.end
199 } else {
200 self.max_point()
201 };
202 let first_line_chars = self
203 .chunks(range.start..line_end, false, Highlights::default())
204 .flat_map(|chunk| chunk.text.chars())
205 .take_while(|&c| c != '\n')
206 .count() as u32;
207
208 let last_line_chars = if range.start.row() == range.end.row() {
209 first_line_chars
210 } else {
211 self.chunks(
212 TabPoint::new(range.end.row(), 0)..range.end,
213 false,
214 Highlights::default(),
215 )
216 .flat_map(|chunk| chunk.text.chars())
217 .count() as u32
218 };
219
220 TextSummary {
221 lines: range.end.0 - range.start.0,
222 first_line_chars,
223 last_line_chars,
224 longest_row: input_summary.longest_row,
225 longest_row_chars: input_summary.longest_row_chars,
226 }
227 }
228
229 pub(crate) fn chunks<'a>(
230 &'a self,
231 range: Range<TabPoint>,
232 language_aware: bool,
233 highlights: Highlights<'a>,
234 ) -> TabChunks<'a> {
235 let (input_start, expanded_char_column, to_next_stop) =
236 self.to_fold_point(range.start, Bias::Left);
237 let input_column = input_start.column();
238 let input_start = input_start.to_offset(&self.fold_snapshot);
239 let input_end = self
240 .to_fold_point(range.end, Bias::Right)
241 .0
242 .to_offset(&self.fold_snapshot);
243 let to_next_stop = if range.start.0 + Point::new(0, to_next_stop) > range.end.0 {
244 range.end.column() - range.start.column()
245 } else {
246 to_next_stop
247 };
248
249 TabChunks {
250 snapshot: self,
251 fold_chunks: self.fold_snapshot.chunks(
252 input_start..input_end,
253 language_aware,
254 highlights,
255 ),
256 input_column,
257 column: expanded_char_column,
258 max_expansion_column: self.max_expansion_column,
259 output_position: range.start.0,
260 max_output_position: range.end.0,
261 tab_size: self.tab_size,
262 chunk: Chunk {
263 text: unsafe { std::str::from_utf8_unchecked(&SPACES[..to_next_stop as usize]) },
264 is_tab: true,
265 ..Default::default()
266 },
267 inside_leading_tab: to_next_stop > 0,
268 }
269 }
270
271 pub fn rows(&self, row: u32) -> fold_map::FoldRows<'_> {
272 self.fold_snapshot.row_infos(row)
273 }
274
275 #[cfg(test)]
276 pub fn text(&self) -> String {
277 self.chunks(
278 TabPoint::zero()..self.max_point(),
279 false,
280 Highlights::default(),
281 )
282 .map(|chunk| chunk.text)
283 .collect()
284 }
285
286 pub fn max_point(&self) -> TabPoint {
287 self.to_tab_point(self.fold_snapshot.max_point())
288 }
289
290 pub fn clip_point(&self, point: TabPoint, bias: Bias) -> TabPoint {
291 self.to_tab_point(
292 self.fold_snapshot
293 .clip_point(self.to_fold_point(point, bias).0, bias),
294 )
295 }
296
297 pub fn to_tab_point(&self, input: FoldPoint) -> TabPoint {
298 let chunks = self.fold_snapshot.chunks_at(FoldPoint::new(input.row(), 0));
299 let tab_cursor = TabStopCursor::new(chunks);
300 let expanded = self.expand_tabs(tab_cursor, input.column());
301 TabPoint::new(input.row(), expanded)
302 }
303
304 pub fn to_fold_point(&self, output: TabPoint, bias: Bias) -> (FoldPoint, u32, u32) {
305 let chunks = self
306 .fold_snapshot
307 .chunks_at(FoldPoint::new(output.row(), 0));
308
309 let tab_cursor = TabStopCursor::new(chunks);
310 let expanded = output.column();
311 let (collapsed, expanded_char_column, to_next_stop) =
312 self.collapse_tabs(tab_cursor, expanded, bias);
313
314 (
315 FoldPoint::new(output.row(), collapsed),
316 expanded_char_column,
317 to_next_stop,
318 )
319 }
320
321 pub fn make_tab_point(&self, point: Point, bias: Bias) -> TabPoint {
322 let inlay_point = self.fold_snapshot.inlay_snapshot.to_inlay_point(point);
323 let fold_point = self.fold_snapshot.to_fold_point(inlay_point, bias);
324 self.to_tab_point(fold_point)
325 }
326
327 pub fn to_point(&self, point: TabPoint, bias: Bias) -> Point {
328 let fold_point = self.to_fold_point(point, bias).0;
329 let inlay_point = fold_point.to_inlay_point(&self.fold_snapshot);
330 self.fold_snapshot
331 .inlay_snapshot
332 .to_buffer_point(inlay_point)
333 }
334
335 fn expand_tabs<'a, I>(&self, mut cursor: TabStopCursor<'a, I>, column: u32) -> u32
336 where
337 I: Iterator<Item = Chunk<'a>>,
338 {
339 let tab_size = self.tab_size.get();
340
341 let end_column = column.min(self.max_expansion_column);
342 let mut seek_target = end_column;
343 let mut tab_count = 0;
344 let mut expanded_tab_len = 0;
345
346 while let Some(tab_stop) = cursor.seek(seek_target) {
347 let expanded_chars_old = tab_stop.char_offset + expanded_tab_len - tab_count;
348 let tab_len = tab_size - ((expanded_chars_old - 1) % tab_size);
349 tab_count += 1;
350 expanded_tab_len += tab_len;
351
352 seek_target = end_column - cursor.byte_offset;
353 }
354
355 let left_over_char_bytes = if !cursor.is_char_boundary() {
356 cursor.bytes_until_next_char().unwrap_or(0) as u32
357 } else {
358 0
359 };
360
361 let collapsed_bytes = cursor.byte_offset() + left_over_char_bytes;
362 let expanded_bytes =
363 cursor.byte_offset() + expanded_tab_len - tab_count + left_over_char_bytes;
364
365 expanded_bytes + column.saturating_sub(collapsed_bytes)
366 }
367
368 fn collapse_tabs<'a, I>(
369 &self,
370 mut cursor: TabStopCursor<'a, I>,
371 column: u32,
372 bias: Bias,
373 ) -> (u32, u32, u32)
374 where
375 I: Iterator<Item = Chunk<'a>>,
376 {
377 let tab_size = self.tab_size.get();
378 let mut collapsed_column = column;
379 let mut seek_target = column.min(self.max_expansion_column);
380 let mut tab_count = 0;
381 let mut expanded_tab_len = 0;
382
383 while let Some(tab_stop) = cursor.seek(seek_target) {
384 // Calculate how much we want to expand this tab stop (into spaces)
385 let expanded_chars_old = tab_stop.char_offset + expanded_tab_len - tab_count;
386 let tab_len = tab_size - ((expanded_chars_old - 1) % tab_size);
387 // Increment tab count
388 tab_count += 1;
389 // The count of how many spaces we've added to this line in place of tab bytes
390 expanded_tab_len += tab_len;
391
392 // The count of bytes at this point in the iteration while considering tab_count and previous expansions
393 let expanded_bytes = tab_stop.byte_offset + expanded_tab_len - tab_count;
394
395 // Did we expand past the search target?
396 if expanded_bytes > column {
397 let mut expanded_chars = tab_stop.char_offset + expanded_tab_len - tab_count;
398 // We expanded past the search target, so need to account for the offshoot
399 expanded_chars -= expanded_bytes - column;
400 return match bias {
401 Bias::Left => (
402 cursor.byte_offset() - 1,
403 expanded_chars,
404 expanded_bytes - column,
405 ),
406 Bias::Right => (cursor.byte_offset(), expanded_chars, 0),
407 };
408 } else {
409 // otherwise we only want to move the cursor collapse column forward
410 collapsed_column = collapsed_column - tab_len + 1;
411 seek_target = (collapsed_column - cursor.byte_offset)
412 .min(self.max_expansion_column - cursor.byte_offset);
413 }
414 }
415
416 let collapsed_bytes = cursor.byte_offset();
417 let expanded_bytes = cursor.byte_offset() + expanded_tab_len - tab_count;
418 let expanded_chars = cursor.char_offset() + expanded_tab_len - tab_count;
419 (
420 collapsed_bytes + column.saturating_sub(expanded_bytes),
421 expanded_chars,
422 0,
423 )
424 }
425}
426
427#[derive(Copy, Clone, Debug, Default, Eq, Ord, PartialOrd, PartialEq)]
428pub struct TabPoint(pub Point);
429
430impl TabPoint {
431 pub fn new(row: u32, column: u32) -> Self {
432 Self(Point::new(row, column))
433 }
434
435 pub fn zero() -> Self {
436 Self::new(0, 0)
437 }
438
439 pub fn row(self) -> u32 {
440 self.0.row
441 }
442
443 pub fn column(self) -> u32 {
444 self.0.column
445 }
446}
447
448impl From<Point> for TabPoint {
449 fn from(point: Point) -> Self {
450 Self(point)
451 }
452}
453
454pub type TabEdit = text::Edit<TabPoint>;
455
456#[derive(Clone, Debug, Default, Eq, PartialEq)]
457pub struct TextSummary {
458 pub lines: Point,
459 pub first_line_chars: u32,
460 pub last_line_chars: u32,
461 pub longest_row: u32,
462 pub longest_row_chars: u32,
463}
464
465impl<'a> From<&'a str> for TextSummary {
466 fn from(text: &'a str) -> Self {
467 let sum = text::TextSummary::from(text);
468
469 TextSummary {
470 lines: sum.lines,
471 first_line_chars: sum.first_line_chars,
472 last_line_chars: sum.last_line_chars,
473 longest_row: sum.longest_row,
474 longest_row_chars: sum.longest_row_chars,
475 }
476 }
477}
478
479impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
480 fn add_assign(&mut self, other: &'a Self) {
481 let joined_chars = self.last_line_chars + other.first_line_chars;
482 if joined_chars > self.longest_row_chars {
483 self.longest_row = self.lines.row;
484 self.longest_row_chars = joined_chars;
485 }
486 if other.longest_row_chars > self.longest_row_chars {
487 self.longest_row = self.lines.row + other.longest_row;
488 self.longest_row_chars = other.longest_row_chars;
489 }
490
491 if self.lines.row == 0 {
492 self.first_line_chars += other.first_line_chars;
493 }
494
495 if other.lines.row == 0 {
496 self.last_line_chars += other.first_line_chars;
497 } else {
498 self.last_line_chars = other.last_line_chars;
499 }
500
501 self.lines += &other.lines;
502 }
503}
504
505pub struct TabChunks<'a> {
506 snapshot: &'a TabSnapshot,
507 max_expansion_column: u32,
508 max_output_position: Point,
509 tab_size: NonZeroU32,
510 // region: iteration state
511 fold_chunks: FoldChunks<'a>,
512 chunk: Chunk<'a>,
513 column: u32,
514 output_position: Point,
515 input_column: u32,
516 inside_leading_tab: bool,
517 // endregion: iteration state
518}
519
520impl TabChunks<'_> {
521 pub(crate) fn seek(&mut self, range: Range<TabPoint>) {
522 let (input_start, expanded_char_column, to_next_stop) =
523 self.snapshot.to_fold_point(range.start, Bias::Left);
524 let input_column = input_start.column();
525 let input_start = input_start.to_offset(&self.snapshot.fold_snapshot);
526 let input_end = self
527 .snapshot
528 .to_fold_point(range.end, Bias::Right)
529 .0
530 .to_offset(&self.snapshot.fold_snapshot);
531 let to_next_stop = if range.start.0 + Point::new(0, to_next_stop) > range.end.0 {
532 range.end.column() - range.start.column()
533 } else {
534 to_next_stop
535 };
536
537 self.fold_chunks.seek(input_start..input_end);
538 self.input_column = input_column;
539 self.column = expanded_char_column;
540 self.output_position = range.start.0;
541 self.max_output_position = range.end.0;
542 self.chunk = Chunk {
543 text: unsafe { std::str::from_utf8_unchecked(&SPACES[..to_next_stop as usize]) },
544 is_tab: true,
545 chars: 1u128.unbounded_shl(to_next_stop) - 1,
546 ..Default::default()
547 };
548 self.inside_leading_tab = to_next_stop > 0;
549 }
550}
551
552impl<'a> Iterator for TabChunks<'a> {
553 type Item = Chunk<'a>;
554
555 fn next(&mut self) -> Option<Self::Item> {
556 if self.chunk.text.is_empty() {
557 if let Some(chunk) = self.fold_chunks.next() {
558 self.chunk = chunk;
559 if self.inside_leading_tab {
560 self.chunk.text = &self.chunk.text[1..];
561 self.inside_leading_tab = false;
562 self.input_column += 1;
563 }
564 } else {
565 return None;
566 }
567 }
568
569 //todo(improve performance by using tab cursor)
570 for (ix, c) in self.chunk.text.char_indices() {
571 match c {
572 '\t' if ix > 0 => {
573 let (prefix, suffix) = self.chunk.text.split_at(ix);
574
575 let mask = 1u128.unbounded_shl(ix as u32).wrapping_sub(1);
576 let chars = self.chunk.chars & mask;
577 let tabs = self.chunk.tabs & mask;
578 self.chunk.tabs = self.chunk.tabs.unbounded_shr(ix as u32);
579 self.chunk.chars = self.chunk.chars.unbounded_shr(ix as u32);
580 self.chunk.text = suffix;
581 return Some(Chunk {
582 text: prefix,
583 chars,
584 tabs,
585 ..self.chunk.clone()
586 });
587 }
588 '\t' => {
589 self.chunk.text = &self.chunk.text[1..];
590 self.chunk.tabs >>= 1;
591 self.chunk.chars >>= 1;
592 let tab_size = if self.input_column < self.max_expansion_column {
593 self.tab_size.get()
594 } else {
595 1
596 };
597 let mut len = tab_size - self.column % tab_size;
598 let next_output_position = cmp::min(
599 self.output_position + Point::new(0, len),
600 self.max_output_position,
601 );
602 len = next_output_position.column - self.output_position.column;
603 self.column += len;
604 self.input_column += 1;
605 self.output_position = next_output_position;
606 return Some(Chunk {
607 text: unsafe { std::str::from_utf8_unchecked(&SPACES[..len as usize]) },
608 is_tab: true,
609 chars: 1u128.unbounded_shl(len) - 1,
610 tabs: 0,
611 ..self.chunk.clone()
612 });
613 }
614 '\n' => {
615 self.column = 0;
616 self.input_column = 0;
617 self.output_position += Point::new(1, 0);
618 }
619 _ => {
620 self.column += 1;
621 if !self.inside_leading_tab {
622 self.input_column += c.len_utf8() as u32;
623 }
624 self.output_position.column += c.len_utf8() as u32;
625 }
626 }
627 }
628
629 Some(mem::take(&mut self.chunk))
630 }
631}
632
633#[cfg(test)]
634mod tests {
635 use super::*;
636 use crate::{
637 MultiBuffer,
638 display_map::{
639 fold_map::{FoldMap, FoldOffset},
640 inlay_map::InlayMap,
641 },
642 };
643 use rand::{Rng, prelude::StdRng};
644 use util;
645
646 impl TabSnapshot {
647 fn expected_collapse_tabs(
648 &self,
649 chars: impl Iterator<Item = char>,
650 column: u32,
651 bias: Bias,
652 ) -> (u32, u32, u32) {
653 let tab_size = self.tab_size.get();
654
655 let mut expanded_bytes = 0;
656 let mut expanded_chars = 0;
657 let mut collapsed_bytes = 0;
658 for c in chars {
659 if expanded_bytes >= column {
660 break;
661 }
662 if collapsed_bytes >= self.max_expansion_column {
663 break;
664 }
665
666 if c == '\t' {
667 let tab_len = tab_size - (expanded_chars % tab_size);
668 expanded_chars += tab_len;
669 expanded_bytes += tab_len;
670 if expanded_bytes > column {
671 expanded_chars -= expanded_bytes - column;
672 return match bias {
673 Bias::Left => {
674 (collapsed_bytes, expanded_chars, expanded_bytes - column)
675 }
676 Bias::Right => (collapsed_bytes + 1, expanded_chars, 0),
677 };
678 }
679 } else {
680 expanded_chars += 1;
681 expanded_bytes += c.len_utf8() as u32;
682 }
683
684 if expanded_bytes > column && matches!(bias, Bias::Left) {
685 expanded_chars -= 1;
686 break;
687 }
688
689 collapsed_bytes += c.len_utf8() as u32;
690 }
691
692 (
693 collapsed_bytes + column.saturating_sub(expanded_bytes),
694 expanded_chars,
695 0,
696 )
697 }
698
699 pub fn expected_to_tab_point(&self, input: FoldPoint) -> TabPoint {
700 let chars = self.fold_snapshot.chars_at(FoldPoint::new(input.row(), 0));
701 let expanded = self.expected_expand_tabs(chars, input.column());
702 TabPoint::new(input.row(), expanded)
703 }
704
705 fn expected_expand_tabs(&self, chars: impl Iterator<Item = char>, column: u32) -> u32 {
706 let tab_size = self.tab_size.get();
707
708 let mut expanded_chars = 0;
709 let mut expanded_bytes = 0;
710 let mut collapsed_bytes = 0;
711 let end_column = column.min(self.max_expansion_column);
712 for c in chars {
713 if collapsed_bytes >= end_column {
714 break;
715 }
716 if c == '\t' {
717 let tab_len = tab_size - expanded_chars % tab_size;
718 expanded_bytes += tab_len;
719 expanded_chars += tab_len;
720 } else {
721 expanded_bytes += c.len_utf8() as u32;
722 expanded_chars += 1;
723 }
724 collapsed_bytes += c.len_utf8() as u32;
725 }
726
727 expanded_bytes + column.saturating_sub(collapsed_bytes)
728 }
729
730 fn expected_to_fold_point(&self, output: TabPoint, bias: Bias) -> (FoldPoint, u32, u32) {
731 let chars = self.fold_snapshot.chars_at(FoldPoint::new(output.row(), 0));
732 let expanded = output.column();
733 let (collapsed, expanded_char_column, to_next_stop) =
734 self.expected_collapse_tabs(chars, expanded, bias);
735 (
736 FoldPoint::new(output.row(), collapsed),
737 expanded_char_column,
738 to_next_stop,
739 )
740 }
741 }
742
743 #[gpui::test]
744 fn test_expand_tabs(cx: &mut gpui::App) {
745 let test_values = [
746 ("κg🏀 f\nwo🏀❌by🍐❎β🍗c\tβ❎ \ncλ🎉", 17),
747 (" \twςe", 4),
748 ("fε", 1),
749 ("i❎\t", 3),
750 ];
751 let buffer = MultiBuffer::build_simple("", cx);
752 let buffer_snapshot = buffer.read(cx).snapshot(cx);
753 let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot);
754 let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
755 let (_, tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap());
756
757 for (text, column) in test_values {
758 let mut tabs = 0u128;
759 let mut chars = 0u128;
760 for (idx, c) in text.char_indices() {
761 if c == '\t' {
762 tabs |= 1 << idx;
763 }
764 chars |= 1 << idx;
765 }
766
767 let chunks = [Chunk {
768 text,
769 tabs,
770 chars,
771 ..Default::default()
772 }];
773
774 let cursor = TabStopCursor::new(chunks);
775
776 assert_eq!(
777 tab_snapshot.expected_expand_tabs(text.chars(), column),
778 tab_snapshot.expand_tabs(cursor, column)
779 );
780 }
781 }
782
783 #[gpui::test]
784 fn test_collapse_tabs(cx: &mut gpui::App) {
785 let input = "A\tBC\tDEF\tG\tHI\tJ\tK\tL\tM";
786
787 let buffer = MultiBuffer::build_simple(input, cx);
788 let buffer_snapshot = buffer.read(cx).snapshot(cx);
789 let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot);
790 let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
791 let (_, tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap());
792
793 for (ix, _) in input.char_indices() {
794 let range = TabPoint::new(0, ix as u32)..tab_snapshot.max_point();
795
796 assert_eq!(
797 tab_snapshot.expected_to_fold_point(range.start, Bias::Left),
798 tab_snapshot.to_fold_point(range.start, Bias::Left),
799 "Failed with tab_point at column {ix}"
800 );
801 assert_eq!(
802 tab_snapshot.expected_to_fold_point(range.start, Bias::Right),
803 tab_snapshot.to_fold_point(range.start, Bias::Right),
804 "Failed with tab_point at column {ix}"
805 );
806
807 assert_eq!(
808 tab_snapshot.expected_to_fold_point(range.end, Bias::Left),
809 tab_snapshot.to_fold_point(range.end, Bias::Left),
810 "Failed with tab_point at column {ix}"
811 );
812 assert_eq!(
813 tab_snapshot.expected_to_fold_point(range.end, Bias::Right),
814 tab_snapshot.to_fold_point(range.end, Bias::Right),
815 "Failed with tab_point at column {ix}"
816 );
817 }
818 }
819
820 #[gpui::test]
821 fn test_to_fold_point_panic_reproduction(cx: &mut gpui::App) {
822 // This test reproduces a specific panic where to_fold_point returns incorrect results
823 let _text = "use macro_rules_attribute::apply;\nuse serde_json::Value;\nuse smol::{\n io::AsyncReadExt,\n process::{Command, Stdio},\n};\nuse smol_macros::main;\nuse std::io;\n\nfn test_random() {\n // Generate a random value\n let random_value = std::time::SystemTime::now()\n .duration_since(std::time::UNIX_EPOCH)\n .unwrap()\n .as_secs()\n % 100;\n\n // Create some complex nested data structures\n let mut vector = Vec::new();\n for i in 0..random_value {\n vector.push(i);\n }\n ";
824
825 let text = "γ\tw⭐\n🍐🍗 \t";
826 let buffer = MultiBuffer::build_simple(text, cx);
827 let buffer_snapshot = buffer.read(cx).snapshot(cx);
828 let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot);
829 let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
830 let (_, tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap());
831
832 // This should panic with the expected vs actual mismatch
833 let tab_point = TabPoint::new(0, 9);
834 let result = tab_snapshot.to_fold_point(tab_point, Bias::Left);
835 let expected = tab_snapshot.expected_to_fold_point(tab_point, Bias::Left);
836
837 assert_eq!(result, expected);
838 }
839
840 #[gpui::test(iterations = 100)]
841 fn test_collapse_tabs_random(cx: &mut gpui::App, mut rng: StdRng) {
842 // Generate random input string with up to 200 characters including tabs
843 // to stay within the MAX_EXPANSION_COLUMN limit of 256
844 let len = rng.random_range(0..=2048);
845 let tab_size = NonZeroU32::new(rng.random_range(1..=4)).unwrap();
846 let mut input = String::with_capacity(len);
847
848 for _ in 0..len {
849 if rng.random_bool(0.1) {
850 // 10% chance of inserting a tab
851 input.push('\t');
852 } else {
853 // 90% chance of inserting a random ASCII character (excluding tab, newline, carriage return)
854 let ch = loop {
855 let ascii_code = rng.random_range(32..=126); // printable ASCII range
856 let ch = ascii_code as u8 as char;
857 if ch != '\t' {
858 break ch;
859 }
860 };
861 input.push(ch);
862 }
863 }
864
865 let buffer = MultiBuffer::build_simple(&input, cx);
866 let buffer_snapshot = buffer.read(cx).snapshot(cx);
867 let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot);
868 let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
869 let (_, mut tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap());
870 tab_snapshot.max_expansion_column = rng.random_range(0..323);
871 tab_snapshot.tab_size = tab_size;
872
873 for (ix, _) in input.char_indices() {
874 let range = TabPoint::new(0, ix as u32)..tab_snapshot.max_point();
875
876 assert_eq!(
877 tab_snapshot.expected_to_fold_point(range.start, Bias::Left),
878 tab_snapshot.to_fold_point(range.start, Bias::Left),
879 "Failed with input: {}, with idx: {ix}",
880 input
881 );
882 assert_eq!(
883 tab_snapshot.expected_to_fold_point(range.start, Bias::Right),
884 tab_snapshot.to_fold_point(range.start, Bias::Right),
885 "Failed with input: {}, with idx: {ix}",
886 input
887 );
888
889 assert_eq!(
890 tab_snapshot.expected_to_fold_point(range.end, Bias::Left),
891 tab_snapshot.to_fold_point(range.end, Bias::Left),
892 "Failed with input: {}, with idx: {ix}",
893 input
894 );
895 assert_eq!(
896 tab_snapshot.expected_to_fold_point(range.end, Bias::Right),
897 tab_snapshot.to_fold_point(range.end, Bias::Right),
898 "Failed with input: {}, with idx: {ix}",
899 input
900 );
901 }
902 }
903
904 #[gpui::test]
905 fn test_long_lines(cx: &mut gpui::App) {
906 let max_expansion_column = 12;
907 let input = "A\tBC\tDEF\tG\tHI\tJ\tK\tL\tM";
908 let output = "A BC DEF G HI J K L M";
909
910 let buffer = MultiBuffer::build_simple(input, cx);
911 let buffer_snapshot = buffer.read(cx).snapshot(cx);
912 let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot);
913 let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
914 let (_, mut tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap());
915
916 tab_snapshot.max_expansion_column = max_expansion_column;
917 assert_eq!(tab_snapshot.text(), output);
918
919 for (ix, c) in input.char_indices() {
920 assert_eq!(
921 tab_snapshot
922 .chunks(
923 TabPoint::new(0, ix as u32)..tab_snapshot.max_point(),
924 false,
925 Highlights::default(),
926 )
927 .map(|c| c.text)
928 .collect::<String>(),
929 &output[ix..],
930 "text from index {ix}"
931 );
932
933 if c != '\t' {
934 let input_point = Point::new(0, ix as u32);
935 let output_point = Point::new(0, output.find(c).unwrap() as u32);
936 assert_eq!(
937 tab_snapshot.to_tab_point(FoldPoint(input_point)),
938 TabPoint(output_point),
939 "to_tab_point({input_point:?})"
940 );
941 assert_eq!(
942 tab_snapshot
943 .to_fold_point(TabPoint(output_point), Bias::Left)
944 .0,
945 FoldPoint(input_point),
946 "to_fold_point({output_point:?})"
947 );
948 }
949 }
950 }
951
952 #[gpui::test]
953 fn test_long_lines_with_character_spanning_max_expansion_column(cx: &mut gpui::App) {
954 let max_expansion_column = 8;
955 let input = "abcdefg⋯hij";
956
957 let buffer = MultiBuffer::build_simple(input, cx);
958 let buffer_snapshot = buffer.read(cx).snapshot(cx);
959 let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot);
960 let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
961 let (_, mut tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap());
962
963 tab_snapshot.max_expansion_column = max_expansion_column;
964 assert_eq!(tab_snapshot.text(), input);
965 }
966
967 #[gpui::test]
968 fn test_marking_tabs(cx: &mut gpui::App) {
969 let input = "\t \thello";
970
971 let buffer = MultiBuffer::build_simple(input, cx);
972 let buffer_snapshot = buffer.read(cx).snapshot(cx);
973 let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot);
974 let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
975 let (_, tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap());
976
977 assert_eq!(
978 chunks(&tab_snapshot, TabPoint::zero()),
979 vec![
980 (" ".to_string(), true),
981 (" ".to_string(), false),
982 (" ".to_string(), true),
983 ("hello".to_string(), false),
984 ]
985 );
986 assert_eq!(
987 chunks(&tab_snapshot, TabPoint::new(0, 2)),
988 vec![
989 (" ".to_string(), true),
990 (" ".to_string(), false),
991 (" ".to_string(), true),
992 ("hello".to_string(), false),
993 ]
994 );
995
996 fn chunks(snapshot: &TabSnapshot, start: TabPoint) -> Vec<(String, bool)> {
997 let mut chunks = Vec::new();
998 let mut was_tab = false;
999 let mut text = String::new();
1000 for chunk in snapshot.chunks(start..snapshot.max_point(), false, Highlights::default())
1001 {
1002 if chunk.is_tab != was_tab {
1003 if !text.is_empty() {
1004 chunks.push((mem::take(&mut text), was_tab));
1005 }
1006 was_tab = chunk.is_tab;
1007 }
1008 text.push_str(chunk.text);
1009 }
1010
1011 if !text.is_empty() {
1012 chunks.push((text, was_tab));
1013 }
1014 chunks
1015 }
1016 }
1017
1018 #[gpui::test(iterations = 100)]
1019 fn test_random_tabs(cx: &mut gpui::App, mut rng: StdRng) {
1020 let tab_size = NonZeroU32::new(rng.random_range(1..=4)).unwrap();
1021 let len = rng.random_range(0..30);
1022 let buffer = if rng.random() {
1023 let text = util::RandomCharIter::new(&mut rng)
1024 .take(len)
1025 .collect::<String>();
1026 MultiBuffer::build_simple(&text, cx)
1027 } else {
1028 MultiBuffer::build_random(&mut rng, cx)
1029 };
1030 let buffer_snapshot = buffer.read(cx).snapshot(cx);
1031 log::info!("Buffer text: {:?}", buffer_snapshot.text());
1032
1033 let (mut inlay_map, inlay_snapshot) = InlayMap::new(buffer_snapshot);
1034 log::info!("InlayMap text: {:?}", inlay_snapshot.text());
1035 let (mut fold_map, _) = FoldMap::new(inlay_snapshot.clone());
1036 fold_map.randomly_mutate(&mut rng);
1037 let (fold_snapshot, _) = fold_map.read(inlay_snapshot, vec![]);
1038 log::info!("FoldMap text: {:?}", fold_snapshot.text());
1039 let (inlay_snapshot, _) = inlay_map.randomly_mutate(&mut 0, &mut rng);
1040 log::info!("InlayMap text: {:?}", inlay_snapshot.text());
1041
1042 let (mut tab_map, _) = TabMap::new(fold_snapshot, tab_size);
1043 let tabs_snapshot = tab_map.set_max_expansion_column(32);
1044
1045 let text = text::Rope::from(tabs_snapshot.text().as_str());
1046 log::info!(
1047 "TabMap text (tab size: {}): {:?}",
1048 tab_size,
1049 tabs_snapshot.text(),
1050 );
1051
1052 for _ in 0..5 {
1053 let end_row = rng.random_range(0..=text.max_point().row);
1054 let end_column = rng.random_range(0..=text.line_len(end_row));
1055 let mut end = TabPoint(text.clip_point(Point::new(end_row, end_column), Bias::Right));
1056 let start_row = rng.random_range(0..=text.max_point().row);
1057 let start_column = rng.random_range(0..=text.line_len(start_row));
1058 let mut start =
1059 TabPoint(text.clip_point(Point::new(start_row, start_column), Bias::Left));
1060 if start > end {
1061 mem::swap(&mut start, &mut end);
1062 }
1063
1064 let expected_text = text
1065 .chunks_in_range(text.point_to_offset(start.0)..text.point_to_offset(end.0))
1066 .collect::<String>();
1067 let expected_summary = TextSummary::from(expected_text.as_str());
1068 assert_eq!(
1069 tabs_snapshot
1070 .chunks(start..end, false, Highlights::default())
1071 .map(|c| c.text)
1072 .collect::<String>(),
1073 expected_text,
1074 "chunks({:?}..{:?})",
1075 start,
1076 end
1077 );
1078
1079 let mut actual_summary = tabs_snapshot.text_summary_for_range(start..end);
1080 if tab_size.get() > 1 && inlay_snapshot.text().contains('\t') {
1081 actual_summary.longest_row = expected_summary.longest_row;
1082 actual_summary.longest_row_chars = expected_summary.longest_row_chars;
1083 }
1084 assert_eq!(actual_summary, expected_summary);
1085 }
1086
1087 for row in 0..=text.max_point().row {
1088 assert_eq!(
1089 tabs_snapshot.line_len(row),
1090 text.line_len(row),
1091 "line_len({row})"
1092 );
1093 }
1094 }
1095
1096 #[gpui::test(iterations = 100)]
1097 fn test_to_tab_point_random(cx: &mut gpui::App, mut rng: StdRng) {
1098 let tab_size = NonZeroU32::new(rng.random_range(1..=16)).unwrap();
1099 let len = rng.random_range(0..=2000);
1100
1101 // Generate random text using RandomCharIter
1102 let text = util::RandomCharIter::new(&mut rng)
1103 .take(len)
1104 .collect::<String>();
1105
1106 // Create buffer and tab map
1107 let buffer = MultiBuffer::build_simple(&text, cx);
1108 let buffer_snapshot = buffer.read(cx).snapshot(cx);
1109 let (mut inlay_map, inlay_snapshot) = InlayMap::new(buffer_snapshot);
1110 let (mut fold_map, fold_snapshot) = FoldMap::new(inlay_snapshot);
1111 let (mut tab_map, _) = TabMap::new(fold_snapshot, tab_size);
1112
1113 let mut next_inlay_id = 0;
1114 let (inlay_snapshot, inlay_edits) = inlay_map.randomly_mutate(&mut next_inlay_id, &mut rng);
1115 let (fold_snapshot, fold_edits) = fold_map.read(inlay_snapshot, inlay_edits);
1116 let max_fold_point = fold_snapshot.max_point();
1117 let (mut tab_snapshot, _) = tab_map.sync(fold_snapshot.clone(), fold_edits, tab_size);
1118
1119 // Test random fold points
1120 for _ in 0..50 {
1121 tab_snapshot.max_expansion_column = rng.random_range(0..=256);
1122 // Generate random fold point
1123 let row = rng.random_range(0..=max_fold_point.row());
1124 let max_column = if row < max_fold_point.row() {
1125 fold_snapshot.line_len(row)
1126 } else {
1127 max_fold_point.column()
1128 };
1129 let column = rng.random_range(0..=max_column + 10);
1130 let fold_point = FoldPoint::new(row, column);
1131
1132 let actual = tab_snapshot.to_tab_point(fold_point);
1133 let expected = tab_snapshot.expected_to_tab_point(fold_point);
1134
1135 assert_eq!(
1136 actual, expected,
1137 "to_tab_point mismatch for fold_point {:?} in text {:?}",
1138 fold_point, text
1139 );
1140 }
1141 }
1142
1143 #[gpui::test]
1144 fn test_tab_stop_cursor_utf8(cx: &mut gpui::App) {
1145 let text = "\tfoo\tbarbarbar\t\tbaz\n";
1146 let buffer = MultiBuffer::build_simple(text, cx);
1147 let buffer_snapshot = buffer.read(cx).snapshot(cx);
1148 let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot);
1149 let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
1150 let chunks = fold_snapshot.chunks(
1151 FoldOffset(0)..fold_snapshot.len(),
1152 false,
1153 Default::default(),
1154 );
1155 let mut cursor = TabStopCursor::new(chunks);
1156 assert!(cursor.seek(0).is_none());
1157 let mut tab_stops = Vec::new();
1158
1159 let mut all_tab_stops = Vec::new();
1160 let mut byte_offset = 0;
1161 for (offset, ch) in buffer.read(cx).snapshot(cx).text().char_indices() {
1162 byte_offset += ch.len_utf8() as u32;
1163
1164 if ch == '\t' {
1165 all_tab_stops.push(TabStop {
1166 byte_offset,
1167 char_offset: offset as u32 + 1,
1168 });
1169 }
1170 }
1171
1172 while let Some(tab_stop) = cursor.seek(u32::MAX) {
1173 tab_stops.push(tab_stop);
1174 }
1175 pretty_assertions::assert_eq!(tab_stops.as_slice(), all_tab_stops.as_slice(),);
1176
1177 assert_eq!(cursor.byte_offset(), byte_offset);
1178 }
1179
1180 #[gpui::test]
1181 fn test_tab_stop_with_end_range_utf8(cx: &mut gpui::App) {
1182 let input = "A\tBC\t"; // DEF\tG\tHI\tJ\tK\tL\tM
1183
1184 let buffer = MultiBuffer::build_simple(input, cx);
1185 let buffer_snapshot = buffer.read(cx).snapshot(cx);
1186 let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot);
1187 let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
1188
1189 let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0));
1190 let mut cursor = TabStopCursor::new(chunks);
1191
1192 let mut actual_tab_stops = Vec::new();
1193
1194 let mut expected_tab_stops = Vec::new();
1195 let mut byte_offset = 0;
1196 for (offset, ch) in buffer.read(cx).snapshot(cx).text().char_indices() {
1197 byte_offset += ch.len_utf8() as u32;
1198
1199 if ch == '\t' {
1200 expected_tab_stops.push(TabStop {
1201 byte_offset,
1202 char_offset: offset as u32 + 1,
1203 });
1204 }
1205 }
1206
1207 while let Some(tab_stop) = cursor.seek(u32::MAX) {
1208 actual_tab_stops.push(tab_stop);
1209 }
1210 pretty_assertions::assert_eq!(actual_tab_stops.as_slice(), expected_tab_stops.as_slice(),);
1211
1212 assert_eq!(cursor.byte_offset(), byte_offset);
1213 }
1214
1215 #[gpui::test(iterations = 100)]
1216 fn test_tab_stop_cursor_random_utf8(cx: &mut gpui::App, mut rng: StdRng) {
1217 // Generate random input string with up to 512 characters including tabs
1218 let len = rng.random_range(0..=2048);
1219 let mut input = String::with_capacity(len);
1220
1221 let mut skip_tabs = rng.random_bool(0.10);
1222 for idx in 0..len {
1223 if idx % 128 == 0 {
1224 skip_tabs = rng.random_bool(0.10);
1225 }
1226
1227 if rng.random_bool(0.15) && !skip_tabs {
1228 input.push('\t');
1229 } else {
1230 let ch = loop {
1231 let ascii_code = rng.random_range(32..=126); // printable ASCII range
1232 let ch = ascii_code as u8 as char;
1233 if ch != '\t' {
1234 break ch;
1235 }
1236 };
1237 input.push(ch);
1238 }
1239 }
1240
1241 // Build the buffer and create cursor
1242 let buffer = MultiBuffer::build_simple(&input, cx);
1243 let buffer_snapshot = buffer.read(cx).snapshot(cx);
1244 let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone());
1245 let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
1246
1247 // First, collect all expected tab positions
1248 let mut all_tab_stops = Vec::new();
1249 let mut byte_offset = 1;
1250 let mut char_offset = 1;
1251 for ch in buffer_snapshot.text().chars() {
1252 if ch == '\t' {
1253 all_tab_stops.push(TabStop {
1254 byte_offset,
1255 char_offset,
1256 });
1257 }
1258 byte_offset += ch.len_utf8() as u32;
1259 char_offset += 1;
1260 }
1261
1262 // Test with various distances
1263 let distances = vec![1, 5, 10, 50, 100, u32::MAX];
1264 // let distances = vec![150];
1265
1266 for distance in distances {
1267 let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0));
1268 let mut cursor = TabStopCursor::new(chunks);
1269
1270 let mut found_tab_stops = Vec::new();
1271 let mut position = distance;
1272 while let Some(tab_stop) = cursor.seek(position) {
1273 found_tab_stops.push(tab_stop);
1274 position = distance - tab_stop.byte_offset;
1275 }
1276
1277 let expected_found_tab_stops: Vec<_> = all_tab_stops
1278 .iter()
1279 .take_while(|tab_stop| tab_stop.byte_offset <= distance)
1280 .cloned()
1281 .collect();
1282
1283 pretty_assertions::assert_eq!(
1284 found_tab_stops,
1285 expected_found_tab_stops,
1286 "TabStopCursor output mismatch for distance {}. Input: {:?}",
1287 distance,
1288 input
1289 );
1290
1291 let final_position = cursor.byte_offset();
1292 if !found_tab_stops.is_empty() {
1293 let last_tab_stop = found_tab_stops.last().unwrap();
1294 assert!(
1295 final_position >= last_tab_stop.byte_offset,
1296 "Cursor final position {} is before last tab stop {}. Input: {:?}",
1297 final_position,
1298 last_tab_stop.byte_offset,
1299 input
1300 );
1301 }
1302 }
1303 }
1304
1305 #[gpui::test]
1306 fn test_tab_stop_cursor_utf16(cx: &mut gpui::App) {
1307 let text = "\r\t😁foo\tb😀arbar🤯bar\t\tbaz\n";
1308 let buffer = MultiBuffer::build_simple(text, cx);
1309 let buffer_snapshot = buffer.read(cx).snapshot(cx);
1310 let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot);
1311 let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
1312 let chunks = fold_snapshot.chunks(
1313 FoldOffset(0)..fold_snapshot.len(),
1314 false,
1315 Default::default(),
1316 );
1317 let mut cursor = TabStopCursor::new(chunks);
1318 assert!(cursor.seek(0).is_none());
1319
1320 let mut expected_tab_stops = Vec::new();
1321 let mut byte_offset = 0;
1322 for (i, ch) in fold_snapshot.chars_at(FoldPoint::new(0, 0)).enumerate() {
1323 byte_offset += ch.len_utf8() as u32;
1324
1325 if ch == '\t' {
1326 expected_tab_stops.push(TabStop {
1327 byte_offset,
1328 char_offset: i as u32 + 1,
1329 });
1330 }
1331 }
1332
1333 let mut actual_tab_stops = Vec::new();
1334 while let Some(tab_stop) = cursor.seek(u32::MAX) {
1335 actual_tab_stops.push(tab_stop);
1336 }
1337
1338 pretty_assertions::assert_eq!(actual_tab_stops.as_slice(), expected_tab_stops.as_slice(),);
1339
1340 assert_eq!(cursor.byte_offset(), byte_offset);
1341 }
1342
1343 #[gpui::test(iterations = 100)]
1344 fn test_tab_stop_cursor_random_utf16(cx: &mut gpui::App, mut rng: StdRng) {
1345 // Generate random input string with up to 512 characters including tabs
1346 let len = rng.random_range(0..=2048);
1347 let input = util::RandomCharIter::new(&mut rng)
1348 .take(len)
1349 .collect::<String>();
1350
1351 // Build the buffer and create cursor
1352 let buffer = MultiBuffer::build_simple(&input, cx);
1353 let buffer_snapshot = buffer.read(cx).snapshot(cx);
1354 let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone());
1355 let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
1356
1357 // First, collect all expected tab positions
1358 let mut all_tab_stops = Vec::new();
1359 let mut byte_offset = 0;
1360 for (i, ch) in buffer_snapshot.text().chars().enumerate() {
1361 byte_offset += ch.len_utf8() as u32;
1362 if ch == '\t' {
1363 all_tab_stops.push(TabStop {
1364 byte_offset,
1365 char_offset: i as u32 + 1,
1366 });
1367 }
1368 }
1369
1370 // Test with various distances
1371 // let distances = vec![1, 5, 10, 50, 100, u32::MAX];
1372 let distances = vec![150];
1373
1374 for distance in distances {
1375 let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0));
1376 let mut cursor = TabStopCursor::new(chunks);
1377
1378 let mut found_tab_stops = Vec::new();
1379 let mut position = distance;
1380 while let Some(tab_stop) = cursor.seek(position) {
1381 found_tab_stops.push(tab_stop);
1382 position = distance - tab_stop.byte_offset;
1383 }
1384
1385 let expected_found_tab_stops: Vec<_> = all_tab_stops
1386 .iter()
1387 .take_while(|tab_stop| tab_stop.byte_offset <= distance)
1388 .cloned()
1389 .collect();
1390
1391 pretty_assertions::assert_eq!(
1392 found_tab_stops,
1393 expected_found_tab_stops,
1394 "TabStopCursor output mismatch for distance {}. Input: {:?}",
1395 distance,
1396 input
1397 );
1398
1399 let final_position = cursor.byte_offset();
1400 if !found_tab_stops.is_empty() {
1401 let last_tab_stop = found_tab_stops.last().unwrap();
1402 assert!(
1403 final_position >= last_tab_stop.byte_offset,
1404 "Cursor final position {} is before last tab stop {}. Input: {:?}",
1405 final_position,
1406 last_tab_stop.byte_offset,
1407 input
1408 );
1409 }
1410 }
1411 }
1412}
1413
1414struct TabStopCursor<'a, I>
1415where
1416 I: Iterator<Item = Chunk<'a>>,
1417{
1418 chunks: I,
1419 byte_offset: u32,
1420 char_offset: u32,
1421 /// Chunk
1422 /// last tab position iterated through
1423 current_chunk: Option<(Chunk<'a>, u32)>,
1424}
1425
1426impl<'a, I> TabStopCursor<'a, I>
1427where
1428 I: Iterator<Item = Chunk<'a>>,
1429{
1430 fn new(chunks: impl IntoIterator<Item = Chunk<'a>, IntoIter = I>) -> Self {
1431 Self {
1432 chunks: chunks.into_iter(),
1433 byte_offset: 0,
1434 char_offset: 0,
1435 current_chunk: None,
1436 }
1437 }
1438
1439 fn bytes_until_next_char(&self) -> Option<usize> {
1440 self.current_chunk.as_ref().and_then(|(chunk, idx)| {
1441 let mut idx = *idx;
1442 let mut diff = 0;
1443 while idx > 0 && chunk.chars & (1 << idx) == 0 {
1444 idx -= 1;
1445 diff += 1;
1446 }
1447
1448 if chunk.chars & (1 << idx) != 0 {
1449 Some(
1450 (chunk.text[idx as usize..].chars().next()?)
1451 .len_utf8()
1452 .saturating_sub(diff),
1453 )
1454 } else {
1455 None
1456 }
1457 })
1458 }
1459
1460 fn is_char_boundary(&self) -> bool {
1461 self.current_chunk
1462 .as_ref()
1463 .is_some_and(|(chunk, idx)| (chunk.chars & (1 << *idx.min(&127))) != 0)
1464 }
1465
1466 /// distance: length to move forward while searching for the next tab stop
1467 fn seek(&mut self, distance: u32) -> Option<TabStop> {
1468 if distance == 0 {
1469 return None;
1470 }
1471
1472 let mut distance_traversed = 0;
1473
1474 while let Some((mut chunk, chunk_position)) = self
1475 .current_chunk
1476 .take()
1477 .or_else(|| self.chunks.next().zip(Some(0)))
1478 {
1479 if chunk.tabs == 0 {
1480 let chunk_distance = chunk.text.len() as u32 - chunk_position;
1481 if chunk_distance + distance_traversed >= distance {
1482 let overshoot = distance_traversed.abs_diff(distance);
1483
1484 self.byte_offset += overshoot;
1485 self.char_offset += get_char_offset(
1486 chunk_position..(chunk_position + overshoot).saturating_sub(1).min(127),
1487 chunk.chars,
1488 );
1489
1490 self.current_chunk = Some((chunk, chunk_position + overshoot));
1491
1492 return None;
1493 }
1494
1495 self.byte_offset += chunk_distance;
1496 self.char_offset += get_char_offset(
1497 chunk_position..(chunk_position + chunk_distance).saturating_sub(1).min(127),
1498 chunk.chars,
1499 );
1500 distance_traversed += chunk_distance;
1501 continue;
1502 }
1503 let tab_position = chunk.tabs.trailing_zeros() + 1;
1504
1505 if distance_traversed + tab_position - chunk_position > distance {
1506 let cursor_position = distance_traversed.abs_diff(distance);
1507
1508 self.char_offset += get_char_offset(
1509 chunk_position..(chunk_position + cursor_position - 1),
1510 chunk.chars,
1511 );
1512 self.current_chunk = Some((chunk, cursor_position + chunk_position));
1513 self.byte_offset += cursor_position;
1514
1515 return None;
1516 }
1517
1518 self.byte_offset += tab_position - chunk_position;
1519 self.char_offset += get_char_offset(chunk_position..(tab_position - 1), chunk.chars);
1520
1521 let tabstop = TabStop {
1522 char_offset: self.char_offset,
1523 byte_offset: self.byte_offset,
1524 };
1525
1526 chunk.tabs = (chunk.tabs - 1) & chunk.tabs;
1527
1528 if tab_position as usize != chunk.text.len() {
1529 self.current_chunk = Some((chunk, tab_position));
1530 }
1531
1532 return Some(tabstop);
1533 }
1534
1535 None
1536 }
1537
1538 fn byte_offset(&self) -> u32 {
1539 self.byte_offset
1540 }
1541
1542 fn char_offset(&self) -> u32 {
1543 self.char_offset
1544 }
1545}
1546
1547#[inline(always)]
1548fn get_char_offset(range: Range<u32>, bit_map: u128) -> u32 {
1549 // This edge case can happen when we're at chunk position 128
1550
1551 if range.start == range.end {
1552 return if (1u128 << range.start) & bit_map == 0 {
1553 0
1554 } else {
1555 1
1556 };
1557 }
1558 let end_shift: u128 = 127u128 - range.end.min(127) as u128;
1559 let mut bit_mask = (u128::MAX >> range.start) << range.start;
1560 bit_mask = (bit_mask << end_shift) >> end_shift;
1561 let bit_map = bit_map & bit_mask;
1562
1563 bit_map.count_ones()
1564}
1565
1566#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1567struct TabStop {
1568 char_offset: u32,
1569 byte_offset: u32,
1570}