From b8c30f448fc56a6edff5c35f9e84ab53da968058 Mon Sep 17 00:00:00 2001 From: Anthony Eid <56899983+Anthony-Eid@users.noreply.github.com> Date: Wed, 10 Sep 2025 16:13:41 -0400 Subject: [PATCH] Improve Tab Map performance (#32243) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Context While looking into: #32051 and #16120 with instruments, I noticed that `TabSnapshot::to_tab_point` and `TabSnapshot::to_fold_point` are a common bottleneck between the two issues. This PR takes the first steps into closing the stated issues by improving the performance of both those functions. ### Method `to_tab_point` and `to_fold_point` iterate through each character in their rows to find tab characters and translate those characters into their respective transformations. This PR changes this iteration to take advantage of the tab character bitmap in the `Rope` data structure and goes directly to each tab character when iterating. The tab bitmap is now passed from each layer in-between the `Rope` to the `TabMap`. ### Testing I added several randomized tests to ensure that the new `to_tab_point` and `to_fold_point` functions have the same behavior as the old methods they're replacing. I also added `test_random_chunk_bitmap` on each layer the tab bitmap is passed up to the `TabMap` to make sure that the bitmap being passed is transformed correctly between the layers of `DisplayMap`. `test_random_chunk_bitmap` was added to these layers: - buffer - multi buffer - custom_highlights - inlay_map - fold_map ## Benchmarking I setup benchmarks with criterion that is runnable via `cargo bench -p editor --profile=release-fast`. When benchmarking I had my laptop plugged in and did so from the terminal with a minimal amount of processes running. I'm also on a m4 max ### Results #### To Tab Point Went from completing 6.8M iterations in 5s with an average time of `736.13 ns` to `683.38 ns` which is a `-7.1875%` improvement #### To Fold Point Went from completing 6.8M iterations in 5s with an average time of `736.55 ns` to `682.40 ns` which is a `-7.1659%` improvement #### Editor render Went from having an average render time of `62.561 µs` to `57.216 µs` which is a `-8.8248%` improvement #### Build Buffer with one long line Went from having an average buffer build time of `3.2549 ms` to `3.2635 ms` which is a `+0.2151%` regression within the margin of error #### Editor with 1000 multi cursor input Went from having an average edit time of `133.05 ms` to `122.96 ms` which is a `-7.5776%` improvement Release Notes: - N/A --------- Co-authored-by: Remco Smits Co-authored-by: Cole Miller Co-authored-by: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> --- Cargo.lock | 1 + crates/editor/Cargo.toml | 10 + crates/editor/benches/display_map.rs | 102 ++ crates/editor/benches/editor_render.rs | 172 ++++ crates/editor/src/display_map.rs | 8 +- crates/editor/src/display_map/block_map.rs | 14 +- .../src/display_map/custom_highlights.rs | 161 +++- crates/editor/src/display_map/fold_map.rs | 124 ++- crates/editor/src/display_map/inlay_map.rs | 143 ++- crates/editor/src/display_map/tab_map.rs | 900 ++++++++++++++++-- crates/editor/src/display_map/wrap_map.rs | 16 + crates/editor/src/editor.rs | 7 +- crates/language/src/buffer.rs | 24 +- crates/language/src/buffer_tests.rs | 77 ++ crates/multi_buffer/src/multi_buffer.rs | 16 +- crates/multi_buffer/src/multi_buffer_tests.rs | 233 +++++ crates/rope/src/chunk.rs | 7 +- crates/rope/src/rope.rs | 93 ++ 18 files changed, 2017 insertions(+), 91 deletions(-) create mode 100644 crates/editor/benches/display_map.rs create mode 100644 crates/editor/benches/editor_render.rs diff --git a/Cargo.lock b/Cargo.lock index 39b8885eae2ae70c72a4705a9bd486dd0a1d37dc..f3927c6c95b7431301401e12d5da803cc2a7d455 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5017,6 +5017,7 @@ dependencies = [ "clock", "collections", "convert_case 0.8.0", + "criterion", "ctor", "dap", "db", diff --git a/crates/editor/Cargo.toml b/crates/editor/Cargo.toml index 339f98ae8bd88263f1fea12c535569864faae294..be06cc04dfc7ee3f080e8d995783abb819e95842 100644 --- a/crates/editor/Cargo.toml +++ b/crates/editor/Cargo.toml @@ -94,6 +94,7 @@ zed_actions.workspace = true workspace-hack.workspace = true [dev-dependencies] +criterion.workspace = true ctor.workspace = true gpui = { workspace = true, features = ["test-support"] } language = { workspace = true, features = ["test-support"] } @@ -119,3 +120,12 @@ util = { workspace = true, features = ["test-support"] } workspace = { workspace = true, features = ["test-support"] } http_client = { workspace = true, features = ["test-support"] } zlog.workspace = true + + +[[bench]] +name = "editor_render" +harness = false + +[[bench]] +name = "display_map" +harness = false diff --git a/crates/editor/benches/display_map.rs b/crates/editor/benches/display_map.rs new file mode 100644 index 0000000000000000000000000000000000000000..919249ad01b87fe5fbabe1b5fe6e563179b41d10 --- /dev/null +++ b/crates/editor/benches/display_map.rs @@ -0,0 +1,102 @@ +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use editor::MultiBuffer; +use gpui::TestDispatcher; +use itertools::Itertools; +use rand::{Rng, SeedableRng, rngs::StdRng}; +use std::num::NonZeroU32; +use text::Bias; +use util::RandomCharIter; + +fn to_tab_point_benchmark(c: &mut Criterion) { + let rng = StdRng::seed_from_u64(1); + let dispatcher = TestDispatcher::new(rng); + let cx = gpui::TestAppContext::build(dispatcher, None); + + let create_tab_map = |length: usize| { + let mut rng = StdRng::seed_from_u64(1); + let text = RandomCharIter::new(&mut rng) + .take(length) + .collect::(); + let buffer = cx.update(|cx| MultiBuffer::build_simple(&text, cx)); + + let buffer_snapshot = cx.read(|cx| buffer.read(cx).snapshot(cx)); + use editor::display_map::*; + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot.clone()); + let fold_point = fold_snapshot.to_fold_point( + inlay_snapshot.to_point(InlayOffset(rng.random_range(0..length))), + Bias::Left, + ); + let (_, snapshot) = TabMap::new(fold_snapshot, NonZeroU32::new(4).unwrap()); + + (length, snapshot, fold_point) + }; + + let inputs = [1024].into_iter().map(create_tab_map).collect_vec(); + + let mut group = c.benchmark_group("To tab point"); + + for (batch_size, snapshot, fold_point) in inputs { + group.bench_with_input( + BenchmarkId::new("to_tab_point", batch_size), + &snapshot, + |bench, snapshot| { + bench.iter(|| { + snapshot.to_tab_point(fold_point); + }); + }, + ); + } + + group.finish(); +} + +fn to_fold_point_benchmark(c: &mut Criterion) { + let rng = StdRng::seed_from_u64(1); + let dispatcher = TestDispatcher::new(rng); + let cx = gpui::TestAppContext::build(dispatcher, None); + + let create_tab_map = |length: usize| { + let mut rng = StdRng::seed_from_u64(1); + let text = RandomCharIter::new(&mut rng) + .take(length) + .collect::(); + let buffer = cx.update(|cx| MultiBuffer::build_simple(&text, cx)); + + let buffer_snapshot = cx.read(|cx| buffer.read(cx).snapshot(cx)); + use editor::display_map::*; + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot.clone()); + + let fold_point = fold_snapshot.to_fold_point( + inlay_snapshot.to_point(InlayOffset(rng.random_range(0..length))), + Bias::Left, + ); + + let (_, snapshot) = TabMap::new(fold_snapshot, NonZeroU32::new(4).unwrap()); + let tab_point = snapshot.to_tab_point(fold_point); + + (length, snapshot, tab_point) + }; + + let inputs = [1024].into_iter().map(create_tab_map).collect_vec(); + + let mut group = c.benchmark_group("To fold point"); + + for (batch_size, snapshot, tab_point) in inputs { + group.bench_with_input( + BenchmarkId::new("to_fold_point", batch_size), + &snapshot, + |bench, snapshot| { + bench.iter(|| { + snapshot.to_fold_point(tab_point, Bias::Left); + }); + }, + ); + } + + group.finish(); +} + +criterion_group!(benches, to_tab_point_benchmark, to_fold_point_benchmark); +criterion_main!(benches); diff --git a/crates/editor/benches/editor_render.rs b/crates/editor/benches/editor_render.rs new file mode 100644 index 0000000000000000000000000000000000000000..0ae1af5537fb62a7658ccd306545503b818c28ae --- /dev/null +++ b/crates/editor/benches/editor_render.rs @@ -0,0 +1,172 @@ +use criterion::{Bencher, BenchmarkId}; +use editor::{ + Editor, EditorMode, MultiBuffer, + actions::{DeleteToPreviousWordStart, SelectAll, SplitSelectionIntoLines}, +}; +use gpui::{AppContext, Focusable as _, TestAppContext, TestDispatcher}; +use project::Project; +use rand::{Rng as _, SeedableRng as _, rngs::StdRng}; +use settings::SettingsStore; +use ui::IntoElement; +use util::RandomCharIter; + +fn editor_input_with_1000_cursors(bencher: &mut Bencher<'_>, cx: &TestAppContext) { + let mut cx = cx.clone(); + let text = String::from_iter(["line:\n"; 1000]); + let buffer = cx.update(|cx| MultiBuffer::build_simple(&text, cx)); + + let cx = cx.add_empty_window(); + let editor = cx.update(|window, cx| { + let editor = cx.new(|cx| { + let mut editor = Editor::new(EditorMode::full(), buffer, None, window, cx); + editor.set_style(editor::EditorStyle::default(), window, cx); + editor.select_all(&SelectAll, window, cx); + editor.split_selection_into_lines( + &SplitSelectionIntoLines { + keep_selections: true, + }, + window, + cx, + ); + editor + }); + window.focus(&editor.focus_handle(cx)); + editor + }); + + bencher.iter(|| { + cx.update(|window, cx| { + editor.update(cx, |editor, cx| { + editor.handle_input("hello world", window, cx); + editor.delete_to_previous_word_start( + &DeleteToPreviousWordStart { + ignore_newlines: false, + ignore_brackets: false, + }, + window, + cx, + ); + editor.delete_to_previous_word_start( + &DeleteToPreviousWordStart { + ignore_newlines: false, + ignore_brackets: false, + }, + window, + cx, + ); + }); + }) + }); +} + +fn open_editor_with_one_long_line(bencher: &mut Bencher<'_>, args: &(String, TestAppContext)) { + let (text, cx) = args; + let mut cx = cx.clone(); + + bencher.iter(|| { + let buffer = cx.update(|cx| MultiBuffer::build_simple(&text, cx)); + + let cx = cx.add_empty_window(); + let _ = cx.update(|window, cx| { + let editor = cx.new(|cx| { + let mut editor = Editor::new(EditorMode::full(), buffer, None, window, cx); + editor.set_style(editor::EditorStyle::default(), window, cx); + editor + }); + window.focus(&editor.focus_handle(cx)); + editor + }); + }); +} + +fn editor_render(bencher: &mut Bencher<'_>, cx: &TestAppContext) { + let mut cx = cx.clone(); + let buffer = cx.update(|cx| { + let mut rng = StdRng::seed_from_u64(1); + let text_len = rng.random_range(10000..90000); + if rng.random() { + let text = RandomCharIter::new(&mut rng) + .take(text_len) + .collect::(); + MultiBuffer::build_simple(&text, cx) + } else { + MultiBuffer::build_random(&mut rng, cx) + } + }); + + let cx = cx.add_empty_window(); + let editor = cx.update(|window, cx| { + let editor = cx.new(|cx| { + let mut editor = Editor::new(EditorMode::full(), buffer, None, window, cx); + editor.set_style(editor::EditorStyle::default(), window, cx); + editor + }); + window.focus(&editor.focus_handle(cx)); + editor + }); + + bencher.iter(|| { + cx.update(|window, cx| { + // editor.update(cx, |editor, cx| editor.move_down(&MoveDown, window, cx)); + let mut view = editor.clone().into_any_element(); + let _ = view.request_layout(window, cx); + let _ = view.prepaint(window, cx); + view.paint(window, cx); + }); + }) +} + +pub fn benches() { + let dispatcher = TestDispatcher::new(StdRng::seed_from_u64(1)); + let cx = gpui::TestAppContext::build(dispatcher, None); + cx.update(|cx| { + let store = SettingsStore::test(cx); + cx.set_global(store); + assets::Assets.load_test_fonts(cx); + theme::init(theme::LoadThemes::JustBase, cx); + // release_channel::init(SemanticVersion::default(), cx); + client::init_settings(cx); + language::init(cx); + workspace::init_settings(cx); + Project::init_settings(cx); + editor::init(cx); + }); + + let mut criterion: criterion::Criterion<_> = + (criterion::Criterion::default()).configure_from_args(); + + // setup app context + let mut group = criterion.benchmark_group("Time to render"); + group.bench_with_input( + BenchmarkId::new("editor_render", "TestAppContext"), + &cx, + editor_render, + ); + + group.finish(); + + let text = String::from_iter(["char"; 1000]); + let mut group = criterion.benchmark_group("Build buffer with one long line"); + group.bench_with_input( + BenchmarkId::new("editor_with_one_long_line", "(String, TestAppContext )"), + &(text, cx.clone()), + open_editor_with_one_long_line, + ); + + group.finish(); + + let mut group = criterion.benchmark_group("multi cursor edits"); + group.bench_with_input( + BenchmarkId::new("editor_input_with_1000_cursors", "TestAppContext"), + &cx, + editor_input_with_1000_cursors, + ); + group.finish(); +} + +fn main() { + benches(); + criterion::Criterion::default() + .configure_from_args() + .final_summary(); +} diff --git a/crates/editor/src/display_map.rs b/crates/editor/src/display_map.rs index 7b2bd2eb419b1436b3f55f88e1904028e25f33d6..1acbdab7a6646fe46b9ad9d9cb09c1549d64bb1a 100644 --- a/crates/editor/src/display_map.rs +++ b/crates/editor/src/display_map.rs @@ -37,13 +37,13 @@ pub use block_map::{ use block_map::{BlockRow, BlockSnapshot}; use collections::{HashMap, HashSet}; pub use crease_map::*; +use fold_map::FoldSnapshot; pub use fold_map::{ ChunkRenderer, ChunkRendererContext, ChunkRendererId, Fold, FoldId, FoldPlaceholder, FoldPoint, }; -use fold_map::{FoldMap, FoldSnapshot}; use gpui::{App, Context, Entity, Font, HighlightStyle, LineLayout, Pixels, UnderlineStyle}; pub use inlay_map::Inlay; -use inlay_map::{InlayMap, InlaySnapshot}; +use inlay_map::InlaySnapshot; pub use inlay_map::{InlayOffset, InlayPoint}; pub use invisibles::{is_invisible, replacement}; use language::{ @@ -66,12 +66,14 @@ use std::{ sync::Arc, }; use sum_tree::{Bias, TreeMap}; -use tab_map::{TabMap, TabSnapshot}; +use tab_map::TabSnapshot; use text::{BufferId, LineIndent}; use ui::{SharedString, px}; use unicode_segmentation::UnicodeSegmentation; use wrap_map::{WrapMap, WrapSnapshot}; +pub use crate::display_map::{fold_map::FoldMap, inlay_map::InlayMap, tab_map::TabMap}; + #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum FoldStatus { Folded, diff --git a/crates/editor/src/display_map/block_map.rs b/crates/editor/src/display_map/block_map.rs index de734e5ea62f23d2396fb03393c32e55d0e1fc7b..03d04e7010248293604d10c2f3e553430e74c9c6 100644 --- a/crates/editor/src/display_map/block_map.rs +++ b/crates/editor/src/display_map/block_map.rs @@ -1737,6 +1737,7 @@ impl<'a> Iterator for BlockChunks<'a> { return Some(Chunk { text: unsafe { std::str::from_utf8_unchecked(&NEWLINES[..line_count as usize]) }, + chars: (1 << line_count) - 1, ..Default::default() }); } @@ -1766,17 +1767,26 @@ impl<'a> Iterator for BlockChunks<'a> { let (mut prefix, suffix) = self.input_chunk.text.split_at(prefix_bytes); self.input_chunk.text = suffix; + self.input_chunk.tabs >>= prefix_bytes.saturating_sub(1); + self.input_chunk.chars >>= prefix_bytes.saturating_sub(1); + + let mut tabs = self.input_chunk.tabs; + let mut chars = self.input_chunk.chars; if self.masked { // Not great for multibyte text because to keep cursor math correct we // need to have the same number of bytes in the input as output. - let chars = prefix.chars().count(); - let bullet_len = chars; + let chars_count = prefix.chars().count(); + let bullet_len = chars_count; prefix = &BULLETS[..bullet_len]; + chars = (1 << bullet_len) - 1; + tabs = 0; } let chunk = Chunk { text: prefix, + tabs, + chars, ..self.input_chunk.clone() }; diff --git a/crates/editor/src/display_map/custom_highlights.rs b/crates/editor/src/display_map/custom_highlights.rs index e4af453f894f5d4afeeed990f2233f1ec1b5dc76..b7518af59c28dbc95a36d24b36a7eae2862916b6 100644 --- a/crates/editor/src/display_map/custom_highlights.rs +++ b/crates/editor/src/display_map/custom_highlights.rs @@ -134,19 +134,33 @@ impl<'a> Iterator for CustomHighlightsChunks<'a> { let chunk = self .buffer_chunk - .get_or_insert_with(|| self.buffer_chunks.next().unwrap()); + .get_or_insert_with(|| self.buffer_chunks.next().unwrap_or_default()); if chunk.text.is_empty() { - *chunk = self.buffer_chunks.next().unwrap(); + *chunk = self.buffer_chunks.next()?; } - let (prefix, suffix) = chunk - .text - .split_at(chunk.text.len().min(next_highlight_endpoint - self.offset)); + let split_idx = chunk.text.len().min(next_highlight_endpoint - self.offset); + let (prefix, suffix) = chunk.text.split_at(split_idx); + + let (chars, tabs) = if split_idx == 128 { + let output = (chunk.chars, chunk.tabs); + chunk.chars = 0; + chunk.tabs = 0; + output + } else { + let mask = (1 << split_idx) - 1; + let output = (chunk.chars & mask, chunk.tabs & mask); + chunk.chars = chunk.chars >> split_idx; + chunk.tabs = chunk.tabs >> split_idx; + output + }; chunk.text = suffix; self.offset += prefix.len(); let mut prefix = Chunk { text: prefix, + chars, + tabs, ..chunk.clone() }; if !self.active_highlights.is_empty() { @@ -173,3 +187,140 @@ impl Ord for HighlightEndpoint { .then_with(|| self.style.is_some().cmp(&other.style.is_some())) } } + +#[cfg(test)] +mod tests { + use std::{any::TypeId, sync::Arc}; + + use super::*; + use crate::MultiBuffer; + use gpui::App; + use rand::prelude::*; + use util::RandomCharIter; + + #[gpui::test(iterations = 100)] + fn test_random_chunk_bitmaps(cx: &mut App, mut rng: StdRng) { + // Generate random buffer using existing test infrastructure + let len = rng.random_range(10..10000); + let buffer = if rng.random() { + let text = RandomCharIter::new(&mut rng).take(len).collect::(); + MultiBuffer::build_simple(&text, cx) + } else { + MultiBuffer::build_random(&mut rng, cx) + }; + + let buffer_snapshot = buffer.read(cx).snapshot(cx); + + // Create random highlights + let mut highlights = sum_tree::TreeMap::default(); + let highlight_count = rng.random_range(1..10); + + for _i in 0..highlight_count { + let style = HighlightStyle { + color: Some(gpui::Hsla { + h: rng.random::(), + s: rng.random::(), + l: rng.random::(), + a: 1.0, + }), + ..Default::default() + }; + + let mut ranges = Vec::new(); + let range_count = rng.random_range(1..10); + let text = buffer_snapshot.text(); + for _ in 0..range_count { + if buffer_snapshot.len() == 0 { + continue; + } + + let mut start = rng.random_range(0..=buffer_snapshot.len().saturating_sub(10)); + + while !text.is_char_boundary(start) { + start = start.saturating_sub(1); + } + + let end_end = buffer_snapshot.len().min(start + 100); + let mut end = rng.random_range(start..=end_end); + while !text.is_char_boundary(end) { + end = end.saturating_sub(1); + } + + if start < end { + start = end; + } + let start_anchor = buffer_snapshot.anchor_before(start); + let end_anchor = buffer_snapshot.anchor_after(end); + ranges.push(start_anchor..end_anchor); + } + + let type_id = TypeId::of::<()>(); // Simple type ID for testing + highlights.insert(HighlightKey::Type(type_id), Arc::new((style, ranges))); + } + + // Get all chunks and verify their bitmaps + let chunks = + CustomHighlightsChunks::new(0..buffer_snapshot.len(), false, None, &buffer_snapshot); + + for chunk in chunks { + let chunk_text = chunk.text; + let chars_bitmap = chunk.chars; + let tabs_bitmap = chunk.tabs; + + // Check empty chunks have empty bitmaps + if chunk_text.is_empty() { + assert_eq!( + chars_bitmap, 0, + "Empty chunk should have empty chars bitmap" + ); + assert_eq!(tabs_bitmap, 0, "Empty chunk should have empty tabs bitmap"); + continue; + } + + // Verify that chunk text doesn't exceed 128 bytes + assert!( + chunk_text.len() <= 128, + "Chunk text length {} exceeds 128 bytes", + chunk_text.len() + ); + + // Verify chars bitmap + let char_indices = chunk_text + .char_indices() + .map(|(i, _)| i) + .collect::>(); + + for byte_idx in 0..chunk_text.len() { + let should_have_bit = char_indices.contains(&byte_idx); + let has_bit = chars_bitmap & (1 << byte_idx) != 0; + + if has_bit != should_have_bit { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Char indices: {:?}", char_indices); + eprintln!("Chars bitmap: {:#b}", chars_bitmap); + assert_eq!( + has_bit, should_have_bit, + "Chars bitmap mismatch at byte index {} in chunk {:?}. Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, should_have_bit, has_bit + ); + } + } + + // Verify tabs bitmap + for (byte_idx, byte) in chunk_text.bytes().enumerate() { + let is_tab = byte == b'\t'; + let has_bit = tabs_bitmap & (1 << byte_idx) != 0; + + if has_bit != is_tab { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Tabs bitmap: {:#b}", tabs_bitmap); + assert_eq!( + has_bit, is_tab, + "Tabs bitmap mismatch at byte index {} in chunk {:?}. Byte: {:?}, Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, byte as char, is_tab, has_bit + ); + } + } + } + } +} diff --git a/crates/editor/src/display_map/fold_map.rs b/crates/editor/src/display_map/fold_map.rs index 6d160d0d6d58dbeeac89749aeabcedef6010c1c3..405f25219fa6d7bcef03c745aa34fec351d7abd3 100644 --- a/crates/editor/src/display_map/fold_map.rs +++ b/crates/editor/src/display_map/fold_map.rs @@ -320,13 +320,13 @@ impl FoldMapWriter<'_> { /// Decides where the fold indicators should be; also tracks parts of a source file that are currently folded. /// /// See the [`display_map` module documentation](crate::display_map) for more information. -pub(crate) struct FoldMap { +pub struct FoldMap { snapshot: FoldSnapshot, next_fold_id: FoldId, } impl FoldMap { - pub(crate) fn new(inlay_snapshot: InlaySnapshot) -> (Self, FoldSnapshot) { + pub fn new(inlay_snapshot: InlaySnapshot) -> (Self, FoldSnapshot) { let this = Self { snapshot: FoldSnapshot { folds: SumTree::new(&inlay_snapshot.buffer), @@ -360,7 +360,7 @@ impl FoldMap { (self.snapshot.clone(), edits) } - pub fn write( + pub(crate) fn write( &mut self, inlay_snapshot: InlaySnapshot, edits: Vec, @@ -529,6 +529,7 @@ impl FoldMap { }, placeholder: Some(TransformPlaceholder { text: ELLIPSIS, + chars: 1, renderer: ChunkRenderer { id: ChunkRendererId::Fold(fold.id), render: Arc::new(move |cx| { @@ -872,6 +873,14 @@ impl FoldSnapshot { .flat_map(|chunk| chunk.text.chars()) } + pub fn chunks_at(&self, start: FoldPoint) -> FoldChunks<'_> { + self.chunks( + start.to_offset(self)..self.len(), + false, + Highlights::default(), + ) + } + #[cfg(test)] pub fn clip_offset(&self, offset: FoldOffset, bias: Bias) -> FoldOffset { if offset > self.len() { @@ -1034,6 +1043,7 @@ struct Transform { #[derive(Clone, Debug)] struct TransformPlaceholder { text: &'static str, + chars: u128, renderer: ChunkRenderer, } @@ -1274,6 +1284,10 @@ pub struct Chunk<'a> { pub is_inlay: bool, /// An optional recipe for how the chunk should be presented. pub renderer: Option, + /// Bitmap of tab character locations in chunk + pub tabs: u128, + /// Bitmap of character locations in chunk + pub chars: u128, } #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] @@ -1391,6 +1405,7 @@ impl<'a> Iterator for FoldChunks<'a> { self.output_offset.0 += placeholder.text.len(); return Some(Chunk { text: placeholder.text, + chars: placeholder.chars, renderer: Some(placeholder.renderer.clone()), ..Default::default() }); @@ -1429,6 +1444,16 @@ impl<'a> Iterator for FoldChunks<'a> { chunk.text = &chunk.text [(self.inlay_offset - buffer_chunk_start).0..(chunk_end - buffer_chunk_start).0]; + let bit_end = (chunk_end - buffer_chunk_start).0; + let mask = if bit_end >= 128 { + u128::MAX + } else { + (1u128 << bit_end) - 1 + }; + + chunk.tabs = (chunk.tabs >> (self.inlay_offset - buffer_chunk_start).0) & mask; + chunk.chars = (chunk.chars >> (self.inlay_offset - buffer_chunk_start).0) & mask; + if chunk_end == transform_end { self.transform_cursor.next(); } else if chunk_end == buffer_chunk_end { @@ -1439,6 +1464,8 @@ impl<'a> Iterator for FoldChunks<'a> { self.output_offset.0 += chunk.text.len(); return Some(Chunk { text: chunk.text, + tabs: chunk.tabs, + chars: chunk.chars, syntax_highlight_id: chunk.syntax_highlight_id, highlight_style: chunk.highlight_style, diagnostic_severity: chunk.diagnostic_severity, @@ -2072,6 +2099,97 @@ mod tests { ); } + #[gpui::test(iterations = 100)] + fn test_random_chunk_bitmaps(cx: &mut gpui::App, mut rng: StdRng) { + init_test(cx); + + // Generate random buffer using existing test infrastructure + let text_len = rng.random_range(0..10000); + let buffer = if rng.random() { + let text = RandomCharIter::new(&mut rng) + .take(text_len) + .collect::(); + MultiBuffer::build_simple(&text, cx) + } else { + MultiBuffer::build_random(&mut rng, cx) + }; + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot); + let (mut fold_map, _) = FoldMap::new(inlay_snapshot.clone()); + + // Perform random mutations + let mutation_count = rng.random_range(1..10); + for _ in 0..mutation_count { + fold_map.randomly_mutate(&mut rng); + } + + let (snapshot, _) = fold_map.read(inlay_snapshot, vec![]); + + // Get all chunks and verify their bitmaps + let chunks = snapshot.chunks( + FoldOffset(0)..FoldOffset(snapshot.len().0), + false, + Highlights::default(), + ); + + for chunk in chunks { + let chunk_text = chunk.text; + let chars_bitmap = chunk.chars; + let tabs_bitmap = chunk.tabs; + + // Check empty chunks have empty bitmaps + if chunk_text.is_empty() { + assert_eq!( + chars_bitmap, 0, + "Empty chunk should have empty chars bitmap" + ); + assert_eq!(tabs_bitmap, 0, "Empty chunk should have empty tabs bitmap"); + continue; + } + + // Verify that chunk text doesn't exceed 128 bytes + assert!( + chunk_text.len() <= 128, + "Chunk text length {} exceeds 128 bytes", + chunk_text.len() + ); + + // Verify chars bitmap + let char_indices = chunk_text + .char_indices() + .map(|(i, _)| i) + .collect::>(); + + for byte_idx in 0..chunk_text.len() { + let should_have_bit = char_indices.contains(&byte_idx); + let has_bit = chars_bitmap & (1 << byte_idx) != 0; + + if has_bit != should_have_bit { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Char indices: {:?}", char_indices); + eprintln!("Chars bitmap: {:#b}", chars_bitmap); + assert_eq!( + has_bit, should_have_bit, + "Chars bitmap mismatch at byte index {} in chunk {:?}. Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, should_have_bit, has_bit + ); + } + } + + // Verify tabs bitmap + for (byte_idx, byte) in chunk_text.bytes().enumerate() { + let is_tab = byte == b'\t'; + let has_bit = tabs_bitmap & (1 << byte_idx) != 0; + + assert_eq!( + has_bit, is_tab, + "Tabs bitmap mismatch at byte index {} in chunk {:?}. Byte: {:?}, Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, byte as char, is_tab, has_bit + ); + } + } + } + fn init_test(cx: &mut gpui::App) { let store = SettingsStore::test(cx); cx.set_global(store); diff --git a/crates/editor/src/display_map/inlay_map.rs b/crates/editor/src/display_map/inlay_map.rs index 62c16f5a1e453d5e9cc60e63804cbe611781aa16..9ceb0897d242f710353c2f7a90992b2a39f40958 100644 --- a/crates/editor/src/display_map/inlay_map.rs +++ b/crates/editor/src/display_map/inlay_map.rs @@ -11,7 +11,7 @@ use std::{ sync::Arc, }; use sum_tree::{Bias, Cursor, Dimensions, SumTree}; -use text::{Patch, Rope}; +use text::{ChunkBitmaps, Patch, Rope}; use ui::{ActiveTheme, IntoElement as _, ParentElement as _, Styled as _, div}; use super::{Highlights, custom_highlights::CustomHighlightsChunks, fold_map::ChunkRendererId}; @@ -245,8 +245,9 @@ pub struct InlayChunks<'a> { transforms: Cursor<'a, Transform, Dimensions>, buffer_chunks: CustomHighlightsChunks<'a>, buffer_chunk: Option>, - inlay_chunks: Option>, - inlay_chunk: Option<&'a str>, + inlay_chunks: Option>, + /// text, char bitmap, tabs bitmap + inlay_chunk: Option>, output_offset: InlayOffset, max_output_offset: InlayOffset, highlight_styles: HighlightStyles, @@ -316,11 +317,25 @@ impl<'a> Iterator for InlayChunks<'a> { let (prefix, suffix) = chunk.text.split_at(split_index); + let (chars, tabs) = if split_index == 128 { + let output = (chunk.chars, chunk.tabs); + chunk.chars = 0; + chunk.tabs = 0; + output + } else { + let mask = (1 << split_index) - 1; + let output = (chunk.chars & mask, chunk.tabs & mask); + chunk.chars = chunk.chars >> split_index; + chunk.tabs = chunk.tabs >> split_index; + output + }; chunk.text = suffix; self.output_offset.0 += prefix.len(); InlayChunk { chunk: Chunk { text: prefix, + chars, + tabs, ..chunk.clone() }, renderer: None, @@ -397,9 +412,14 @@ impl<'a> Iterator for InlayChunks<'a> { let start = offset_in_inlay; let end = cmp::min(self.max_output_offset, self.transforms.end().0) - self.transforms.start().0; - inlay.text.chunks_in_range(start.0..end.0) + let chunks = inlay.text.chunks_in_range(start.0..end.0); + text::ChunkWithBitmaps(chunks) }); - let inlay_chunk = self + let ChunkBitmaps { + text: inlay_chunk, + chars, + tabs, + } = self .inlay_chunk .get_or_insert_with(|| inlay_chunks.next().unwrap()); @@ -421,6 +441,20 @@ impl<'a> Iterator for InlayChunks<'a> { let (chunk, remainder) = inlay_chunk.split_at(split_index); *inlay_chunk = remainder; + + let (chars, tabs) = if split_index == 128 { + let output = (*chars, *tabs); + *chars = 0; + *tabs = 0; + output + } else { + let mask = (1 << split_index as u32) - 1; + let output = (*chars & mask, *tabs & mask); + *chars = *chars >> split_index; + *tabs = *tabs >> split_index; + output + }; + if inlay_chunk.is_empty() { self.inlay_chunk = None; } @@ -430,6 +464,8 @@ impl<'a> Iterator for InlayChunks<'a> { InlayChunk { chunk: Chunk { text: chunk, + chars, + tabs, highlight_style, is_inlay: true, ..Chunk::default() @@ -1224,6 +1260,7 @@ mod tests { use std::{any::TypeId, cmp::Reverse, env, sync::Arc}; use sum_tree::TreeMap; use text::Patch; + use util::RandomCharIter; use util::post_inc; #[test] @@ -1966,6 +2003,102 @@ mod tests { } } + #[gpui::test(iterations = 100)] + fn test_random_chunk_bitmaps(cx: &mut gpui::App, mut rng: StdRng) { + init_test(cx); + + // Generate random buffer using existing test infrastructure + let text_len = rng.random_range(0..10000); + let buffer = if rng.random() { + let text = RandomCharIter::new(&mut rng) + .take(text_len) + .collect::(); + MultiBuffer::build_simple(&text, cx) + } else { + MultiBuffer::build_random(&mut rng, cx) + }; + + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (mut inlay_map, _) = InlayMap::new(buffer_snapshot.clone()); + + // Perform random mutations to add inlays + let mut next_inlay_id = 0; + let mutation_count = rng.random_range(1..10); + for _ in 0..mutation_count { + inlay_map.randomly_mutate(&mut next_inlay_id, &mut rng); + } + + let (snapshot, _) = inlay_map.sync(buffer_snapshot, vec![]); + + // Get all chunks and verify their bitmaps + let chunks = snapshot.chunks( + InlayOffset(0)..InlayOffset(snapshot.len().0), + false, + Highlights::default(), + ); + + for chunk in chunks.into_iter().map(|inlay_chunk| inlay_chunk.chunk) { + let chunk_text = chunk.text; + let chars_bitmap = chunk.chars; + let tabs_bitmap = chunk.tabs; + + // Check empty chunks have empty bitmaps + if chunk_text.is_empty() { + assert_eq!( + chars_bitmap, 0, + "Empty chunk should have empty chars bitmap" + ); + assert_eq!(tabs_bitmap, 0, "Empty chunk should have empty tabs bitmap"); + continue; + } + + // Verify that chunk text doesn't exceed 128 bytes + assert!( + chunk_text.len() <= 128, + "Chunk text length {} exceeds 128 bytes", + chunk_text.len() + ); + + // Verify chars bitmap + let char_indices = chunk_text + .char_indices() + .map(|(i, _)| i) + .collect::>(); + + for byte_idx in 0..chunk_text.len() { + let should_have_bit = char_indices.contains(&byte_idx); + let has_bit = chars_bitmap & (1 << byte_idx) != 0; + + if has_bit != should_have_bit { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Char indices: {:?}", char_indices); + eprintln!("Chars bitmap: {:#b}", chars_bitmap); + assert_eq!( + has_bit, should_have_bit, + "Chars bitmap mismatch at byte index {} in chunk {:?}. Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, should_have_bit, has_bit + ); + } + } + + // Verify tabs bitmap + for (byte_idx, byte) in chunk_text.bytes().enumerate() { + let is_tab = byte == b'\t'; + let has_bit = tabs_bitmap & (1 << byte_idx) != 0; + + if has_bit != is_tab { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Tabs bitmap: {:#b}", tabs_bitmap); + assert_eq!( + has_bit, is_tab, + "Tabs bitmap mismatch at byte index {} in chunk {:?}. Byte: {:?}, Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, byte as char, is_tab, has_bit + ); + } + } + } + } + fn init_test(cx: &mut App) { let store = SettingsStore::test(cx); cx.set_global(store); diff --git a/crates/editor/src/display_map/tab_map.rs b/crates/editor/src/display_map/tab_map.rs index 523e777d9113b203dafbb5e151ba22a01394c956..e42d17123dfce9d0ca8c4faa84eabbaabf5707f4 100644 --- a/crates/editor/src/display_map/tab_map.rs +++ b/crates/editor/src/display_map/tab_map.rs @@ -2,6 +2,7 @@ use super::{ Highlights, fold_map::{self, Chunk, FoldChunks, FoldEdit, FoldPoint, FoldSnapshot}, }; + use language::Point; use multi_buffer::MultiBufferSnapshot; use std::{cmp, mem, num::NonZeroU32, ops::Range}; @@ -72,6 +73,7 @@ impl TabMap { false, Highlights::default(), ) { + // todo(performance use tabs bitmask) for (ix, _) in chunk.text.match_indices('\t') { let offset_from_edit = offset_from_edit + (ix as u32); if first_tab_offset.is_none() { @@ -230,7 +232,7 @@ impl TabSnapshot { } } - pub fn chunks<'a>( + pub(crate) fn chunks<'a>( &'a self, range: Range, language_aware: bool, @@ -299,21 +301,29 @@ impl TabSnapshot { } pub fn to_tab_point(&self, input: FoldPoint) -> TabPoint { - let chars = self.fold_snapshot.chars_at(FoldPoint::new(input.row(), 0)); - let expanded = self.expand_tabs(chars, input.column()); + let chunks = self.fold_snapshot.chunks_at(FoldPoint::new(input.row(), 0)); + let tab_cursor = TabStopCursor::new(chunks); + let expanded = self.expand_tabs(tab_cursor, input.column()); TabPoint::new(input.row(), expanded) } pub fn to_fold_point(&self, output: TabPoint, bias: Bias) -> (FoldPoint, u32, u32) { - let chars = self.fold_snapshot.chars_at(FoldPoint::new(output.row(), 0)); + let chunks = self + .fold_snapshot + .chunks_at(FoldPoint::new(output.row(), 0)); + + let tab_cursor = TabStopCursor::new(chunks); let expanded = output.column(); let (collapsed, expanded_char_column, to_next_stop) = - self.collapse_tabs(chars, expanded, bias); - ( + self.collapse_tabs(tab_cursor, expanded, bias); + + let result = ( FoldPoint::new(output.row(), collapsed), expanded_char_column, to_next_stop, - ) + ); + + result } pub fn make_tab_point(&self, point: Point, bias: Bias) -> TabPoint { @@ -330,72 +340,90 @@ impl TabSnapshot { .to_buffer_point(inlay_point) } - fn expand_tabs(&self, chars: impl Iterator, column: u32) -> u32 { + fn expand_tabs<'a, I>(&self, mut cursor: TabStopCursor<'a, I>, column: u32) -> u32 + where + I: Iterator>, + { let tab_size = self.tab_size.get(); - let mut expanded_chars = 0; - let mut expanded_bytes = 0; - let mut collapsed_bytes = 0; let end_column = column.min(self.max_expansion_column); - for c in chars { - if collapsed_bytes >= end_column { - break; - } - if c == '\t' { - let tab_len = tab_size - expanded_chars % tab_size; - expanded_bytes += tab_len; - expanded_chars += tab_len; - } else { - expanded_bytes += c.len_utf8() as u32; - expanded_chars += 1; - } - collapsed_bytes += c.len_utf8() as u32; + let mut seek_target = end_column; + let mut tab_count = 0; + let mut expanded_tab_len = 0; + + while let Some(tab_stop) = cursor.seek(seek_target) { + let expanded_chars_old = tab_stop.char_offset + expanded_tab_len - tab_count; + let tab_len = tab_size - ((expanded_chars_old - 1) % tab_size); + tab_count += 1; + expanded_tab_len += tab_len; + + seek_target = end_column - cursor.byte_offset; } + + let left_over_char_bytes = if !cursor.is_char_boundary() { + cursor.bytes_until_next_char().unwrap_or(0) as u32 + } else { + 0 + }; + + let collapsed_bytes = cursor.byte_offset() + left_over_char_bytes; + let expanded_bytes = + cursor.byte_offset() + expanded_tab_len - tab_count + left_over_char_bytes; + expanded_bytes + column.saturating_sub(collapsed_bytes) } - fn collapse_tabs( + fn collapse_tabs<'a, I>( &self, - chars: impl Iterator, + mut cursor: TabStopCursor<'a, I>, column: u32, bias: Bias, - ) -> (u32, u32, u32) { + ) -> (u32, u32, u32) + where + I: Iterator>, + { let tab_size = self.tab_size.get(); - - let mut expanded_bytes = 0; - let mut expanded_chars = 0; - let mut collapsed_bytes = 0; - for c in chars { - if expanded_bytes >= column { - break; - } - if collapsed_bytes >= self.max_expansion_column { - break; - } - - if c == '\t' { - let tab_len = tab_size - (expanded_chars % tab_size); - expanded_chars += tab_len; - expanded_bytes += tab_len; - if expanded_bytes > column { - expanded_chars -= expanded_bytes - column; - return match bias { - Bias::Left => (collapsed_bytes, expanded_chars, expanded_bytes - column), - Bias::Right => (collapsed_bytes + 1, expanded_chars, 0), - }; - } + let mut collapsed_column = column; + let mut seek_target = column.min(self.max_expansion_column); + let mut tab_count = 0; + let mut expanded_tab_len = 0; + + while let Some(tab_stop) = cursor.seek(seek_target) { + // Calculate how much we want to expand this tab stop (into spaces) + let expanded_chars_old = tab_stop.char_offset + expanded_tab_len - tab_count; + let tab_len = tab_size - ((expanded_chars_old - 1) % tab_size); + // Increment tab count + tab_count += 1; + // The count of how many spaces we've added to this line in place of tab bytes + expanded_tab_len += tab_len; + + // The count of bytes at this point in the iteration while considering tab_count and previous expansions + let expanded_bytes = tab_stop.byte_offset + expanded_tab_len - tab_count; + + // Did we expand past the search target? + if expanded_bytes > column { + let mut expanded_chars = tab_stop.char_offset + expanded_tab_len - tab_count; + // We expanded past the search target, so need to account for the offshoot + expanded_chars -= expanded_bytes - column; + return match bias { + Bias::Left => ( + cursor.byte_offset() - 1, + expanded_chars, + expanded_bytes - column, + ), + Bias::Right => (cursor.byte_offset(), expanded_chars, 0), + }; } else { - expanded_chars += 1; - expanded_bytes += c.len_utf8() as u32; - } - - if expanded_bytes > column && matches!(bias, Bias::Left) { - expanded_chars -= 1; - break; + // otherwise we only want to move the cursor collapse column forward + collapsed_column = collapsed_column - tab_len + 1; + seek_target = (collapsed_column - cursor.byte_offset) + .min(self.max_expansion_column - cursor.byte_offset); } - - collapsed_bytes += c.len_utf8() as u32; } + + let collapsed_bytes = cursor.byte_offset(); + let expanded_bytes = cursor.byte_offset() + expanded_tab_len - tab_count; + let expanded_chars = cursor.char_offset() + expanded_tab_len - tab_count; ( collapsed_bytes + column.saturating_sub(expanded_bytes), expanded_chars, @@ -523,6 +551,7 @@ impl TabChunks<'_> { self.chunk = Chunk { text: &SPACES[0..(to_next_stop as usize)], is_tab: true, + chars: (1u128 << to_next_stop) - 1, ..Default::default() }; self.inside_leading_tab = to_next_stop > 0; @@ -546,18 +575,37 @@ impl<'a> Iterator for TabChunks<'a> { } } + //todo(improve performance by using tab cursor) for (ix, c) in self.chunk.text.char_indices() { match c { '\t' => { if ix > 0 { let (prefix, suffix) = self.chunk.text.split_at(ix); + + let (chars, tabs) = if ix == 128 { + let output = (self.chunk.chars, self.chunk.tabs); + self.chunk.chars = 0; + self.chunk.tabs = 0; + output + } else { + let mask = (1 << ix) - 1; + let output = (self.chunk.chars & mask, self.chunk.tabs & mask); + self.chunk.chars = self.chunk.chars >> ix; + self.chunk.tabs = self.chunk.tabs >> ix; + output + }; + self.chunk.text = suffix; return Some(Chunk { text: prefix, + chars, + tabs, ..self.chunk.clone() }); } else { self.chunk.text = &self.chunk.text[1..]; + self.chunk.tabs >>= 1; + self.chunk.chars >>= 1; let tab_size = if self.input_column < self.max_expansion_column { self.tab_size.get() } else { @@ -575,6 +623,8 @@ impl<'a> Iterator for TabChunks<'a> { return Some(Chunk { text: &SPACES[..len as usize], is_tab: true, + chars: (1 << len) - 1, + tabs: 0, ..self.chunk.clone() }); } @@ -603,21 +653,270 @@ mod tests { use super::*; use crate::{ MultiBuffer, - display_map::{fold_map::FoldMap, inlay_map::InlayMap}, + display_map::{ + fold_map::{FoldMap, FoldOffset}, + inlay_map::InlayMap, + }, }; use rand::{Rng, prelude::StdRng}; + use util; + + impl TabSnapshot { + fn expected_collapse_tabs( + &self, + chars: impl Iterator, + column: u32, + bias: Bias, + ) -> (u32, u32, u32) { + let tab_size = self.tab_size.get(); + + let mut expanded_bytes = 0; + let mut expanded_chars = 0; + let mut collapsed_bytes = 0; + for c in chars { + if expanded_bytes >= column { + break; + } + if collapsed_bytes >= self.max_expansion_column { + break; + } + + if c == '\t' { + let tab_len = tab_size - (expanded_chars % tab_size); + expanded_chars += tab_len; + expanded_bytes += tab_len; + if expanded_bytes > column { + expanded_chars -= expanded_bytes - column; + return match bias { + Bias::Left => { + (collapsed_bytes, expanded_chars, expanded_bytes - column) + } + Bias::Right => (collapsed_bytes + 1, expanded_chars, 0), + }; + } + } else { + expanded_chars += 1; + expanded_bytes += c.len_utf8() as u32; + } + + if expanded_bytes > column && matches!(bias, Bias::Left) { + expanded_chars -= 1; + break; + } + + collapsed_bytes += c.len_utf8() as u32; + } + + ( + collapsed_bytes + column.saturating_sub(expanded_bytes), + expanded_chars, + 0, + ) + } + + pub fn expected_to_tab_point(&self, input: FoldPoint) -> TabPoint { + let chars = self.fold_snapshot.chars_at(FoldPoint::new(input.row(), 0)); + let expanded = self.expected_expand_tabs(chars, input.column()); + TabPoint::new(input.row(), expanded) + } + + fn expected_expand_tabs(&self, chars: impl Iterator, column: u32) -> u32 { + let tab_size = self.tab_size.get(); + + let mut expanded_chars = 0; + let mut expanded_bytes = 0; + let mut collapsed_bytes = 0; + let end_column = column.min(self.max_expansion_column); + for c in chars { + if collapsed_bytes >= end_column { + break; + } + if c == '\t' { + let tab_len = tab_size - expanded_chars % tab_size; + expanded_bytes += tab_len; + expanded_chars += tab_len; + } else { + expanded_bytes += c.len_utf8() as u32; + expanded_chars += 1; + } + collapsed_bytes += c.len_utf8() as u32; + } + + expanded_bytes + column.saturating_sub(collapsed_bytes) + } + + fn expected_to_fold_point(&self, output: TabPoint, bias: Bias) -> (FoldPoint, u32, u32) { + let chars = self.fold_snapshot.chars_at(FoldPoint::new(output.row(), 0)); + let expanded = output.column(); + let (collapsed, expanded_char_column, to_next_stop) = + self.expected_collapse_tabs(chars, expanded, bias); + ( + FoldPoint::new(output.row(), collapsed), + expanded_char_column, + to_next_stop, + ) + } + } #[gpui::test] fn test_expand_tabs(cx: &mut gpui::App) { + let test_values = [ + ("κg🏀 f\nwo🏀❌by🍐❎β🍗c\tβ❎ \ncλ🎉", 17), + (" \twςe", 4), + ("fε", 1), + ("i❎\t", 3), + ]; let buffer = MultiBuffer::build_simple("", cx); let buffer_snapshot = buffer.read(cx).snapshot(cx); let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot); let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); let (_, tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap()); - assert_eq!(tab_snapshot.expand_tabs("\t".chars(), 0), 0); - assert_eq!(tab_snapshot.expand_tabs("\t".chars(), 1), 4); - assert_eq!(tab_snapshot.expand_tabs("\ta".chars(), 2), 5); + for (text, column) in test_values { + let mut tabs = 0u128; + let mut chars = 0u128; + for (idx, c) in text.char_indices() { + if c == '\t' { + tabs |= 1 << idx; + } + chars |= 1 << idx; + } + + let chunks = [Chunk { + text, + tabs, + chars, + ..Default::default() + }]; + + let cursor = TabStopCursor::new(chunks); + + assert_eq!( + tab_snapshot.expected_expand_tabs(text.chars(), column), + tab_snapshot.expand_tabs(cursor, column) + ); + } + } + + #[gpui::test] + fn test_collapse_tabs(cx: &mut gpui::App) { + let input = "A\tBC\tDEF\tG\tHI\tJ\tK\tL\tM"; + + let buffer = MultiBuffer::build_simple(input, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + let (_, tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap()); + + for (ix, _) in input.char_indices() { + let range = TabPoint::new(0, ix as u32)..tab_snapshot.max_point(); + + assert_eq!( + tab_snapshot.expected_to_fold_point(range.start, Bias::Left), + tab_snapshot.to_fold_point(range.start, Bias::Left), + "Failed with tab_point at column {ix}" + ); + assert_eq!( + tab_snapshot.expected_to_fold_point(range.start, Bias::Right), + tab_snapshot.to_fold_point(range.start, Bias::Right), + "Failed with tab_point at column {ix}" + ); + + assert_eq!( + tab_snapshot.expected_to_fold_point(range.end, Bias::Left), + tab_snapshot.to_fold_point(range.end, Bias::Left), + "Failed with tab_point at column {ix}" + ); + assert_eq!( + tab_snapshot.expected_to_fold_point(range.end, Bias::Right), + tab_snapshot.to_fold_point(range.end, Bias::Right), + "Failed with tab_point at column {ix}" + ); + } + } + + #[gpui::test] + fn test_to_fold_point_panic_reproduction(cx: &mut gpui::App) { + // This test reproduces a specific panic where to_fold_point returns incorrect results + let _text = "use macro_rules_attribute::apply;\nuse serde_json::Value;\nuse smol::{\n io::AsyncReadExt,\n process::{Command, Stdio},\n};\nuse smol_macros::main;\nuse std::io;\n\nfn test_random() {\n // Generate a random value\n let random_value = std::time::SystemTime::now()\n .duration_since(std::time::UNIX_EPOCH)\n .unwrap()\n .as_secs()\n % 100;\n\n // Create some complex nested data structures\n let mut vector = Vec::new();\n for i in 0..random_value {\n vector.push(i);\n }\n "; + + let text = "γ\tw⭐\n🍐🍗 \t"; + let buffer = MultiBuffer::build_simple(text, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + let (_, tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap()); + + // This should panic with the expected vs actual mismatch + let tab_point = TabPoint::new(0, 9); + let result = tab_snapshot.to_fold_point(tab_point, Bias::Left); + let expected = tab_snapshot.expected_to_fold_point(tab_point, Bias::Left); + + assert_eq!(result, expected); + } + + #[gpui::test(iterations = 100)] + fn test_collapse_tabs_random(cx: &mut gpui::App, mut rng: StdRng) { + // Generate random input string with up to 200 characters including tabs + // to stay within the MAX_EXPANSION_COLUMN limit of 256 + let len = rng.random_range(0..=2048); + let tab_size = NonZeroU32::new(rng.random_range(1..=4)).unwrap(); + let mut input = String::with_capacity(len); + + for _ in 0..len { + if rng.random_bool(0.1) { + // 10% chance of inserting a tab + input.push('\t'); + } else { + // 90% chance of inserting a random ASCII character (excluding tab, newline, carriage return) + let ch = loop { + let ascii_code = rng.random_range(32..=126); // printable ASCII range + let ch = ascii_code as u8 as char; + if ch != '\t' { + break ch; + } + }; + input.push(ch); + } + } + + let buffer = MultiBuffer::build_simple(&input, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + let (_, mut tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap()); + tab_snapshot.max_expansion_column = rng.random_range(0..323); + tab_snapshot.tab_size = tab_size; + + for (ix, _) in input.char_indices() { + let range = TabPoint::new(0, ix as u32)..tab_snapshot.max_point(); + + assert_eq!( + tab_snapshot.expected_to_fold_point(range.start, Bias::Left), + tab_snapshot.to_fold_point(range.start, Bias::Left), + "Failed with input: {}, with idx: {ix}", + input + ); + assert_eq!( + tab_snapshot.expected_to_fold_point(range.start, Bias::Right), + tab_snapshot.to_fold_point(range.start, Bias::Right), + "Failed with input: {}, with idx: {ix}", + input + ); + + assert_eq!( + tab_snapshot.expected_to_fold_point(range.end, Bias::Left), + tab_snapshot.to_fold_point(range.end, Bias::Left), + "Failed with input: {}, with idx: {ix}", + input + ); + assert_eq!( + tab_snapshot.expected_to_fold_point(range.end, Bias::Right), + tab_snapshot.to_fold_point(range.end, Bias::Right), + "Failed with input: {}, with idx: {ix}", + input + ); + } } #[gpui::test] @@ -811,4 +1110,479 @@ mod tests { ); } } + + #[gpui::test(iterations = 100)] + fn test_to_tab_point_random(cx: &mut gpui::App, mut rng: StdRng) { + let tab_size = NonZeroU32::new(rng.random_range(1..=16)).unwrap(); + let len = rng.random_range(0..=2000); + + // Generate random text using RandomCharIter + let text = util::RandomCharIter::new(&mut rng) + .take(len) + .collect::(); + + // Create buffer and tab map + let buffer = MultiBuffer::build_simple(&text, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (mut inlay_map, inlay_snapshot) = InlayMap::new(buffer_snapshot); + let (mut fold_map, fold_snapshot) = FoldMap::new(inlay_snapshot); + let (mut tab_map, _) = TabMap::new(fold_snapshot, tab_size); + + let mut next_inlay_id = 0; + let (inlay_snapshot, inlay_edits) = inlay_map.randomly_mutate(&mut next_inlay_id, &mut rng); + let (fold_snapshot, fold_edits) = fold_map.read(inlay_snapshot, inlay_edits); + let max_fold_point = fold_snapshot.max_point(); + let (mut tab_snapshot, _) = tab_map.sync(fold_snapshot.clone(), fold_edits, tab_size); + + // Test random fold points + for _ in 0..50 { + tab_snapshot.max_expansion_column = rng.random_range(0..=256); + // Generate random fold point + let row = rng.random_range(0..=max_fold_point.row()); + let max_column = if row < max_fold_point.row() { + fold_snapshot.line_len(row) + } else { + max_fold_point.column() + }; + let column = rng.random_range(0..=max_column + 10); + let fold_point = FoldPoint::new(row, column); + + let actual = tab_snapshot.to_tab_point(fold_point); + let expected = tab_snapshot.expected_to_tab_point(fold_point); + + assert_eq!( + actual, expected, + "to_tab_point mismatch for fold_point {:?} in text {:?}", + fold_point, text + ); + } + } + + #[gpui::test] + fn test_tab_stop_cursor_utf8(cx: &mut gpui::App) { + let text = "\tfoo\tbarbarbar\t\tbaz\n"; + let buffer = MultiBuffer::build_simple(text, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + let chunks = fold_snapshot.chunks( + FoldOffset(0)..fold_snapshot.len(), + false, + Default::default(), + ); + let mut cursor = TabStopCursor::new(chunks); + assert!(cursor.seek(0).is_none()); + let mut tab_stops = Vec::new(); + + let mut all_tab_stops = Vec::new(); + let mut byte_offset = 0; + for (offset, ch) in buffer.read(cx).snapshot(cx).text().char_indices() { + byte_offset += ch.len_utf8() as u32; + + if ch == '\t' { + all_tab_stops.push(TabStop { + byte_offset, + char_offset: offset as u32 + 1, + }); + } + } + + while let Some(tab_stop) = cursor.seek(u32::MAX) { + tab_stops.push(tab_stop); + } + pretty_assertions::assert_eq!(tab_stops.as_slice(), all_tab_stops.as_slice(),); + + assert_eq!(cursor.byte_offset(), byte_offset); + } + + #[gpui::test] + fn test_tab_stop_with_end_range_utf8(cx: &mut gpui::App) { + let input = "A\tBC\t"; // DEF\tG\tHI\tJ\tK\tL\tM + + let buffer = MultiBuffer::build_simple(input, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + + let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0)); + let mut cursor = TabStopCursor::new(chunks); + + let mut actual_tab_stops = Vec::new(); + + let mut expected_tab_stops = Vec::new(); + let mut byte_offset = 0; + for (offset, ch) in buffer.read(cx).snapshot(cx).text().char_indices() { + byte_offset += ch.len_utf8() as u32; + + if ch == '\t' { + expected_tab_stops.push(TabStop { + byte_offset, + char_offset: offset as u32 + 1, + }); + } + } + + while let Some(tab_stop) = cursor.seek(u32::MAX) { + actual_tab_stops.push(tab_stop); + } + pretty_assertions::assert_eq!(actual_tab_stops.as_slice(), expected_tab_stops.as_slice(),); + + assert_eq!(cursor.byte_offset(), byte_offset); + } + + #[gpui::test(iterations = 100)] + fn test_tab_stop_cursor_random_utf8(cx: &mut gpui::App, mut rng: StdRng) { + // Generate random input string with up to 512 characters including tabs + let len = rng.random_range(0..=2048); + let mut input = String::with_capacity(len); + + let mut skip_tabs = rng.random_bool(0.10); + for idx in 0..len { + if idx % 128 == 0 { + skip_tabs = rng.random_bool(0.10); + } + + if rng.random_bool(0.15) && !skip_tabs { + input.push('\t'); + } else { + let ch = loop { + let ascii_code = rng.random_range(32..=126); // printable ASCII range + let ch = ascii_code as u8 as char; + if ch != '\t' { + break ch; + } + }; + input.push(ch); + } + } + + // Build the buffer and create cursor + let buffer = MultiBuffer::build_simple(&input, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone()); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + + // First, collect all expected tab positions + let mut all_tab_stops = Vec::new(); + let mut byte_offset = 1; + let mut char_offset = 1; + for ch in buffer_snapshot.text().chars() { + if ch == '\t' { + all_tab_stops.push(TabStop { + byte_offset, + char_offset, + }); + } + byte_offset += ch.len_utf8() as u32; + char_offset += 1; + } + + // Test with various distances + let distances = vec![1, 5, 10, 50, 100, u32::MAX]; + // let distances = vec![150]; + + for distance in distances { + let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0)); + let mut cursor = TabStopCursor::new(chunks); + + let mut found_tab_stops = Vec::new(); + let mut position = distance; + while let Some(tab_stop) = cursor.seek(position) { + found_tab_stops.push(tab_stop); + position = distance - tab_stop.byte_offset; + } + + let expected_found_tab_stops: Vec<_> = all_tab_stops + .iter() + .take_while(|tab_stop| tab_stop.byte_offset <= distance) + .cloned() + .collect(); + + pretty_assertions::assert_eq!( + found_tab_stops, + expected_found_tab_stops, + "TabStopCursor output mismatch for distance {}. Input: {:?}", + distance, + input + ); + + let final_position = cursor.byte_offset(); + if !found_tab_stops.is_empty() { + let last_tab_stop = found_tab_stops.last().unwrap(); + assert!( + final_position >= last_tab_stop.byte_offset, + "Cursor final position {} is before last tab stop {}. Input: {:?}", + final_position, + last_tab_stop.byte_offset, + input + ); + } + } + } + + #[gpui::test] + fn test_tab_stop_cursor_utf16(cx: &mut gpui::App) { + let text = "\r\t😁foo\tb😀arbar🤯bar\t\tbaz\n"; + let buffer = MultiBuffer::build_simple(text, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + let chunks = fold_snapshot.chunks( + FoldOffset(0)..fold_snapshot.len(), + false, + Default::default(), + ); + let mut cursor = TabStopCursor::new(chunks); + assert!(cursor.seek(0).is_none()); + + let mut expected_tab_stops = Vec::new(); + let mut byte_offset = 0; + for (i, ch) in fold_snapshot.chars_at(FoldPoint::new(0, 0)).enumerate() { + byte_offset += ch.len_utf8() as u32; + + if ch == '\t' { + expected_tab_stops.push(TabStop { + byte_offset, + char_offset: i as u32 + 1, + }); + } + } + + let mut actual_tab_stops = Vec::new(); + while let Some(tab_stop) = cursor.seek(u32::MAX) { + actual_tab_stops.push(tab_stop); + } + + pretty_assertions::assert_eq!(actual_tab_stops.as_slice(), expected_tab_stops.as_slice(),); + + assert_eq!(cursor.byte_offset(), byte_offset); + } + + #[gpui::test(iterations = 100)] + fn test_tab_stop_cursor_random_utf16(cx: &mut gpui::App, mut rng: StdRng) { + // Generate random input string with up to 512 characters including tabs + let len = rng.random_range(0..=2048); + let input = util::RandomCharIter::new(&mut rng) + .take(len) + .collect::(); + + // Build the buffer and create cursor + let buffer = MultiBuffer::build_simple(&input, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone()); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + + // First, collect all expected tab positions + let mut all_tab_stops = Vec::new(); + let mut byte_offset = 0; + for (i, ch) in buffer_snapshot.text().chars().enumerate() { + byte_offset += ch.len_utf8() as u32; + if ch == '\t' { + all_tab_stops.push(TabStop { + byte_offset, + char_offset: i as u32 + 1, + }); + } + } + + // Test with various distances + // let distances = vec![1, 5, 10, 50, 100, u32::MAX]; + let distances = vec![150]; + + for distance in distances { + let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0)); + let mut cursor = TabStopCursor::new(chunks); + + let mut found_tab_stops = Vec::new(); + let mut position = distance; + while let Some(tab_stop) = cursor.seek(position) { + found_tab_stops.push(tab_stop); + position = distance - tab_stop.byte_offset; + } + + let expected_found_tab_stops: Vec<_> = all_tab_stops + .iter() + .take_while(|tab_stop| tab_stop.byte_offset <= distance) + .cloned() + .collect(); + + pretty_assertions::assert_eq!( + found_tab_stops, + expected_found_tab_stops, + "TabStopCursor output mismatch for distance {}. Input: {:?}", + distance, + input + ); + + let final_position = cursor.byte_offset(); + if !found_tab_stops.is_empty() { + let last_tab_stop = found_tab_stops.last().unwrap(); + assert!( + final_position >= last_tab_stop.byte_offset, + "Cursor final position {} is before last tab stop {}. Input: {:?}", + final_position, + last_tab_stop.byte_offset, + input + ); + } + } + } +} + +struct TabStopCursor<'a, I> +where + I: Iterator>, +{ + chunks: I, + byte_offset: u32, + char_offset: u32, + /// Chunk + /// last tab position iterated through + current_chunk: Option<(Chunk<'a>, u32)>, +} + +impl<'a, I> TabStopCursor<'a, I> +where + I: Iterator>, +{ + fn new(chunks: impl IntoIterator, IntoIter = I>) -> Self { + Self { + chunks: chunks.into_iter(), + byte_offset: 0, + char_offset: 0, + current_chunk: None, + } + } + + fn bytes_until_next_char(&self) -> Option { + self.current_chunk.as_ref().and_then(|(chunk, idx)| { + let mut idx = *idx; + let mut diff = 0; + while idx > 0 && chunk.chars & (1 << idx) == 0 { + idx -= 1; + diff += 1; + } + + if chunk.chars & (1 << idx) != 0 { + Some( + (chunk.text[idx as usize..].chars().next()?) + .len_utf8() + .saturating_sub(diff), + ) + } else { + None + } + }) + } + + fn is_char_boundary(&self) -> bool { + self.current_chunk + .as_ref() + .is_some_and(|(chunk, idx)| (chunk.chars & (1 << *idx.min(&127))) != 0) + } + + /// distance: length to move forward while searching for the next tab stop + fn seek(&mut self, distance: u32) -> Option { + if distance == 0 { + return None; + } + + let mut distance_traversed = 0; + + while let Some((mut chunk, chunk_position)) = self + .current_chunk + .take() + .or_else(|| self.chunks.next().zip(Some(0))) + { + if chunk.tabs == 0 { + let chunk_distance = chunk.text.len() as u32 - chunk_position; + if chunk_distance + distance_traversed >= distance { + let overshoot = distance_traversed.abs_diff(distance); + + self.byte_offset += overshoot; + self.char_offset += get_char_offset( + chunk_position..(chunk_position + overshoot).saturating_sub(1).min(127), + chunk.chars, + ); + + self.current_chunk = Some((chunk, chunk_position + overshoot)); + + return None; + } + + self.byte_offset += chunk_distance; + self.char_offset += get_char_offset( + chunk_position..(chunk_position + chunk_distance).saturating_sub(1).min(127), + chunk.chars, + ); + distance_traversed += chunk_distance; + continue; + } + let tab_position = chunk.tabs.trailing_zeros() + 1; + + if distance_traversed + tab_position - chunk_position > distance { + let cursor_position = distance_traversed.abs_diff(distance); + + self.char_offset += get_char_offset( + chunk_position..(chunk_position + cursor_position - 1), + chunk.chars, + ); + self.current_chunk = Some((chunk, cursor_position + chunk_position)); + self.byte_offset += cursor_position; + + return None; + } + + self.byte_offset += tab_position - chunk_position; + self.char_offset += get_char_offset(chunk_position..(tab_position - 1), chunk.chars); + + let tabstop = TabStop { + char_offset: self.char_offset, + byte_offset: self.byte_offset, + }; + + chunk.tabs = (chunk.tabs - 1) & chunk.tabs; + + if tab_position as usize != chunk.text.len() { + self.current_chunk = Some((chunk, tab_position)); + } + + return Some(tabstop); + } + + None + } + + fn byte_offset(&self) -> u32 { + self.byte_offset + } + + fn char_offset(&self) -> u32 { + self.char_offset + } +} + +#[inline(always)] +fn get_char_offset(range: Range, bit_map: u128) -> u32 { + // This edge case can happen when we're at chunk position 128 + + if range.start == range.end { + return if (1u128 << range.start) & bit_map == 0 { + 0 + } else { + 1 + }; + } + let end_shift: u128 = 127u128 - range.end.min(127) as u128; + let mut bit_mask = (u128::MAX >> range.start) << range.start; + bit_mask = (bit_mask << end_shift) >> end_shift; + let bit_map = bit_map & bit_mask; + + bit_map.count_ones() +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct TabStop { + char_offset: u32, + byte_offset: u32, } diff --git a/crates/editor/src/display_map/wrap_map.rs b/crates/editor/src/display_map/wrap_map.rs index 127293726a59d1945e8f9dcbfcd2eb3da0cc2290..cd354d8229634956651ab74dd384332db0eb219e 100644 --- a/crates/editor/src/display_map/wrap_map.rs +++ b/crates/editor/src/display_map/wrap_map.rs @@ -970,9 +970,25 @@ impl<'a> Iterator for WrapChunks<'a> { } let (prefix, suffix) = self.input_chunk.text.split_at(input_len); + + let (chars, tabs) = if input_len == 128 { + let output = (self.input_chunk.chars, self.input_chunk.tabs); + self.input_chunk.chars = 0; + self.input_chunk.tabs = 0; + output + } else { + let mask = (1 << input_len) - 1; + let output = (self.input_chunk.chars & mask, self.input_chunk.tabs & mask); + self.input_chunk.chars = self.input_chunk.chars >> input_len; + self.input_chunk.tabs = self.input_chunk.tabs >> input_len; + output + }; + self.input_chunk.text = suffix; Some(Chunk { text: prefix, + chars, + tabs, ..self.input_chunk.clone() }) } diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index aacf6d4f282ca12b273949a08704782839864a66..b731006a62990b5b9de75223ca38fbebb684c91c 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -18906,12 +18906,7 @@ impl Editor { } /// called by the Element so we know what style we were most recently rendered with. - pub(crate) fn set_style( - &mut self, - style: EditorStyle, - window: &mut Window, - cx: &mut Context, - ) { + pub fn set_style(&mut self, style: EditorStyle, window: &mut Window, cx: &mut Context) { // We intentionally do not inform the display map about the minimap style // so that wrapping is not recalculated and stays consistent for the editor // and its linked minimap. diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 0a8d62845aa26a5504a8c86e897cafa21c57f0f8..51e6b6d1e032aa5e786e9117b96aff8adaba638f 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -27,6 +27,7 @@ use gpui::{ App, AppContext as _, Context, Entity, EventEmitter, HighlightStyle, SharedString, StyledText, Task, TaskLabel, TextStyle, }; + use lsp::{LanguageServerId, NumberOrString}; use parking_lot::Mutex; use schemars::JsonSchema; @@ -500,6 +501,10 @@ pub struct Chunk<'a> { pub is_unnecessary: bool, /// Whether this chunk of text was originally a tab character. pub is_tab: bool, + /// A bitset of which characters are tabs in this string. + pub tabs: u128, + /// Bitmap of character indices in this chunk + pub chars: u128, /// Whether this chunk of text was originally a tab character. pub is_inlay: bool, /// Whether to underline the corresponding text range in the editor. @@ -4919,7 +4924,12 @@ impl<'a> Iterator for BufferChunks<'a> { } self.diagnostic_endpoints = diagnostic_endpoints; - if let Some(chunk) = self.chunks.peek() { + if let Some(ChunkBitmaps { + text: chunk, + chars: chars_map, + tabs, + }) = self.chunks.peek_tabs() + { let chunk_start = self.range.start; let mut chunk_end = (self.chunks.offset() + chunk.len()) .min(next_capture_start) @@ -4934,6 +4944,16 @@ impl<'a> Iterator for BufferChunks<'a> { let slice = &chunk[chunk_start - self.chunks.offset()..chunk_end - self.chunks.offset()]; + let bit_end = chunk_end - self.chunks.offset(); + + let mask = if bit_end >= 128 { + u128::MAX + } else { + (1u128 << bit_end) - 1 + }; + let tabs = (tabs >> (chunk_start - self.chunks.offset())) & mask; + let chars_map = (chars_map >> (chunk_start - self.chunks.offset())) & mask; + self.range.start = chunk_end; if self.range.start == self.chunks.offset() + chunk.len() { self.chunks.next().unwrap(); @@ -4945,6 +4965,8 @@ impl<'a> Iterator for BufferChunks<'a> { underline: self.underline, diagnostic_severity: self.current_diagnostic_severity(), is_unnecessary: self.current_code_is_unnecessary(), + tabs, + chars: chars_map, ..Chunk::default() }) } else { diff --git a/crates/language/src/buffer_tests.rs b/crates/language/src/buffer_tests.rs index 8f3f15cacd4be19e375d46d480b3e41f4edd779e..fcd93390c891f1d65b2f424a5bc70cd7f23c7912 100644 --- a/crates/language/src/buffer_tests.rs +++ b/crates/language/src/buffer_tests.rs @@ -3852,3 +3852,80 @@ fn init_settings(cx: &mut App, f: fn(&mut AllLanguageSettingsContent)) { settings.update_user_settings::(cx, f); }); } + +#[gpui::test(iterations = 100)] +fn test_random_chunk_bitmaps(cx: &mut App, mut rng: StdRng) { + use util::RandomCharIter; + + // Generate random text + let len = rng.random_range(0..10000); + let text = RandomCharIter::new(&mut rng).take(len).collect::(); + + let buffer = cx.new(|cx| Buffer::local(text, cx)); + let snapshot = buffer.read(cx).snapshot(); + + // Get all chunks and verify their bitmaps + let chunks = snapshot.chunks(0..snapshot.len(), false); + + for chunk in chunks { + let chunk_text = chunk.text; + let chars_bitmap = chunk.chars; + let tabs_bitmap = chunk.tabs; + + // Check empty chunks have empty bitmaps + if chunk_text.is_empty() { + assert_eq!( + chars_bitmap, 0, + "Empty chunk should have empty chars bitmap" + ); + assert_eq!(tabs_bitmap, 0, "Empty chunk should have empty tabs bitmap"); + continue; + } + + // Verify that chunk text doesn't exceed 128 bytes + assert!( + chunk_text.len() <= 128, + "Chunk text length {} exceeds 128 bytes", + chunk_text.len() + ); + + // Verify chars bitmap + let char_indices = chunk_text + .char_indices() + .map(|(i, _)| i) + .collect::>(); + + for byte_idx in 0..chunk_text.len() { + let should_have_bit = char_indices.contains(&byte_idx); + let has_bit = chars_bitmap & (1 << byte_idx) != 0; + + if has_bit != should_have_bit { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Char indices: {:?}", char_indices); + eprintln!("Chars bitmap: {:#b}", chars_bitmap); + } + + assert_eq!( + has_bit, should_have_bit, + "Chars bitmap mismatch at byte index {} in chunk {:?}. Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, should_have_bit, has_bit + ); + } + + // Verify tabs bitmap + for (byte_idx, byte) in chunk_text.bytes().enumerate() { + let is_tab = byte == b'\t'; + let has_bit = tabs_bitmap & (1 << byte_idx) != 0; + + if has_bit != is_tab { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Tabs bitmap: {:#b}", tabs_bitmap); + assert_eq!( + has_bit, is_tab, + "Tabs bitmap mismatch at byte index {} in chunk {:?}. Byte: {:?}, Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, byte as char, is_tab, has_bit + ); + } + } + } +} diff --git a/crates/multi_buffer/src/multi_buffer.rs b/crates/multi_buffer/src/multi_buffer.rs index 0bcfa2f4d3eb605f0beefb0f2a53817059597ce2..8fa8c2c08c25aa6f003365556594ed0719f9861e 100644 --- a/crates/multi_buffer/src/multi_buffer.rs +++ b/crates/multi_buffer/src/multi_buffer.rs @@ -7740,12 +7740,21 @@ impl<'a> Iterator for MultiBufferChunks<'a> { let diff_transform_end = diff_transform_end.min(self.range.end); if diff_transform_end < chunk_end { - let (before, after) = - chunk.text.split_at(diff_transform_end - self.range.start); + let split_idx = diff_transform_end - self.range.start; + let (before, after) = chunk.text.split_at(split_idx); self.range.start = diff_transform_end; + let mask = (1 << split_idx) - 1; + let chars = chunk.chars & mask; + let tabs = chunk.tabs & mask; + chunk.text = after; + chunk.chars = chunk.chars >> split_idx; + chunk.tabs = chunk.tabs >> split_idx; + Some(Chunk { text: before, + chars, + tabs, ..chunk.clone() }) } else { @@ -7789,6 +7798,7 @@ impl<'a> Iterator for MultiBufferChunks<'a> { self.range.start += "\n".len(); Chunk { text: "\n", + chars: 1u128, ..Default::default() } }; @@ -7885,9 +7895,11 @@ impl<'a> Iterator for ExcerptChunks<'a> { if self.footer_height > 0 { let text = unsafe { str::from_utf8_unchecked(&NEWLINES[..self.footer_height]) }; + let chars = (1 << self.footer_height) - 1; self.footer_height = 0; return Some(Chunk { text, + chars, ..Default::default() }); } diff --git a/crates/multi_buffer/src/multi_buffer_tests.rs b/crates/multi_buffer/src/multi_buffer_tests.rs index efc622b0172a13ae9a6ad3bf366904706a36580f..1be82500786b36fc014c2acf4fb49d4e8abc4d6b 100644 --- a/crates/multi_buffer/src/multi_buffer_tests.rs +++ b/crates/multi_buffer/src/multi_buffer_tests.rs @@ -7,6 +7,7 @@ use parking_lot::RwLock; use rand::prelude::*; use settings::SettingsStore; use std::env; +use util::RandomCharIter; use util::test::sample_text; #[ctor::ctor] @@ -3716,3 +3717,235 @@ fn test_new_empty_buffers_title_can_be_set(cx: &mut App) { }); assert_eq!(multibuffer.read(cx).title(cx), "Hey"); } + +#[gpui::test(iterations = 100)] +fn test_random_chunk_bitmaps(cx: &mut App, mut rng: StdRng) { + let multibuffer = if rng.random() { + let len = rng.random_range(0..10000); + let text = RandomCharIter::new(&mut rng).take(len).collect::(); + let buffer = cx.new(|cx| Buffer::local(text, cx)); + cx.new(|cx| MultiBuffer::singleton(buffer, cx)) + } else { + MultiBuffer::build_random(&mut rng, cx) + }; + + let snapshot = multibuffer.read(cx).snapshot(cx); + + let chunks = snapshot.chunks(0..snapshot.len(), false); + + for chunk in chunks { + let chunk_text = chunk.text; + let chars_bitmap = chunk.chars; + let tabs_bitmap = chunk.tabs; + + if chunk_text.is_empty() { + assert_eq!( + chars_bitmap, 0, + "Empty chunk should have empty chars bitmap" + ); + assert_eq!(tabs_bitmap, 0, "Empty chunk should have empty tabs bitmap"); + continue; + } + + assert!( + chunk_text.len() <= 128, + "Chunk text length {} exceeds 128 bytes", + chunk_text.len() + ); + + // Verify chars bitmap + let char_indices = chunk_text + .char_indices() + .map(|(i, _)| i) + .collect::>(); + + for byte_idx in 0..chunk_text.len() { + let should_have_bit = char_indices.contains(&byte_idx); + let has_bit = chars_bitmap & (1 << byte_idx) != 0; + + if has_bit != should_have_bit { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Char indices: {:?}", char_indices); + eprintln!("Chars bitmap: {:#b}", chars_bitmap); + } + + assert_eq!( + has_bit, should_have_bit, + "Chars bitmap mismatch at byte index {} in chunk {:?}. Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, should_have_bit, has_bit + ); + } + + for (byte_idx, byte) in chunk_text.bytes().enumerate() { + let is_tab = byte == b'\t'; + let has_bit = tabs_bitmap & (1 << byte_idx) != 0; + + if has_bit != is_tab { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Tabs bitmap: {:#b}", tabs_bitmap); + assert_eq!( + has_bit, is_tab, + "Tabs bitmap mismatch at byte index {} in chunk {:?}. Byte: {:?}, Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, byte as char, is_tab, has_bit + ); + } + } + } +} + +#[gpui::test(iterations = 100)] +fn test_random_chunk_bitmaps_with_diffs(cx: &mut App, mut rng: StdRng) { + use buffer_diff::BufferDiff; + use util::RandomCharIter; + + let multibuffer = if rng.random() { + let len = rng.random_range(100..10000); + let text = RandomCharIter::new(&mut rng).take(len).collect::(); + let buffer = cx.new(|cx| Buffer::local(text, cx)); + cx.new(|cx| MultiBuffer::singleton(buffer, cx)) + } else { + MultiBuffer::build_random(&mut rng, cx) + }; + + let _diff_count = rng.random_range(1..5); + let mut diffs = Vec::new(); + + multibuffer.update(cx, |multibuffer, cx| { + for buffer_id in multibuffer.excerpt_buffer_ids() { + if rng.random_bool(0.7) { + if let Some(buffer_handle) = multibuffer.buffer(buffer_id) { + let buffer_text = buffer_handle.read(cx).text(); + let mut base_text = String::new(); + + for line in buffer_text.lines() { + if rng.random_bool(0.3) { + continue; + } else if rng.random_bool(0.3) { + let line_len = rng.random_range(0..50); + let modified_line = RandomCharIter::new(&mut rng) + .take(line_len) + .collect::(); + base_text.push_str(&modified_line); + base_text.push('\n'); + } else { + base_text.push_str(line); + base_text.push('\n'); + } + } + + if rng.random_bool(0.5) { + let extra_lines = rng.random_range(1..5); + for _ in 0..extra_lines { + let line_len = rng.random_range(0..50); + let extra_line = RandomCharIter::new(&mut rng) + .take(line_len) + .collect::(); + base_text.push_str(&extra_line); + base_text.push('\n'); + } + } + + let diff = + cx.new(|cx| BufferDiff::new_with_base_text(&base_text, &buffer_handle, cx)); + diffs.push(diff.clone()); + multibuffer.add_diff(diff, cx); + } + } + } + }); + + multibuffer.update(cx, |multibuffer, cx| { + if rng.random_bool(0.5) { + multibuffer.set_all_diff_hunks_expanded(cx); + } else { + let snapshot = multibuffer.snapshot(cx); + let text = snapshot.text(); + + let mut ranges = Vec::new(); + for _ in 0..rng.random_range(1..5) { + if snapshot.len() == 0 { + break; + } + + let diff_size = rng.random_range(5..1000); + let mut start = rng.random_range(0..snapshot.len()); + + while !text.is_char_boundary(start) { + start = start.saturating_sub(1); + } + + let mut end = rng.random_range(start..snapshot.len().min(start + diff_size)); + + while !text.is_char_boundary(end) { + end = end.saturating_add(1); + } + let start_anchor = snapshot.anchor_after(start); + let end_anchor = snapshot.anchor_before(end); + ranges.push(start_anchor..end_anchor); + } + multibuffer.expand_diff_hunks(ranges, cx); + } + }); + + let snapshot = multibuffer.read(cx).snapshot(cx); + + let chunks = snapshot.chunks(0..snapshot.len(), false); + + for chunk in chunks { + let chunk_text = chunk.text; + let chars_bitmap = chunk.chars; + let tabs_bitmap = chunk.tabs; + + if chunk_text.is_empty() { + assert_eq!( + chars_bitmap, 0, + "Empty chunk should have empty chars bitmap" + ); + assert_eq!(tabs_bitmap, 0, "Empty chunk should have empty tabs bitmap"); + continue; + } + + assert!( + chunk_text.len() <= 128, + "Chunk text length {} exceeds 128 bytes", + chunk_text.len() + ); + + let char_indices = chunk_text + .char_indices() + .map(|(i, _)| i) + .collect::>(); + + for byte_idx in 0..chunk_text.len() { + let should_have_bit = char_indices.contains(&byte_idx); + let has_bit = chars_bitmap & (1 << byte_idx) != 0; + + if has_bit != should_have_bit { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Char indices: {:?}", char_indices); + eprintln!("Chars bitmap: {:#b}", chars_bitmap); + } + + assert_eq!( + has_bit, should_have_bit, + "Chars bitmap mismatch at byte index {} in chunk {:?}. Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, should_have_bit, has_bit + ); + } + + for (byte_idx, byte) in chunk_text.bytes().enumerate() { + let is_tab = byte == b'\t'; + let has_bit = tabs_bitmap & (1 << byte_idx) != 0; + + if has_bit != is_tab { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Tabs bitmap: {:#b}", tabs_bitmap); + assert_eq!( + has_bit, is_tab, + "Tabs bitmap mismatch at byte index {} in chunk {:?}. Byte: {:?}, Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, byte as char, is_tab, has_bit + ); + } + } + } +} diff --git a/crates/rope/src/chunk.rs b/crates/rope/src/chunk.rs index 689875274a460abafb808ab7db7db3f5e0487a03..6b1f290852bb7918d562a9c9110433ffeaf2fed9 100644 --- a/crates/rope/src/chunk.rs +++ b/crates/rope/src/chunk.rs @@ -13,7 +13,7 @@ pub struct Chunk { chars: u128, chars_utf16: u128, newlines: u128, - tabs: u128, + pub tabs: u128, pub text: ArrayString, } @@ -67,6 +67,11 @@ impl Chunk { pub fn slice(&self, range: Range) -> ChunkSlice<'_> { self.as_slice().slice(range) } + + #[inline(always)] + pub fn chars(&self) -> u128 { + self.chars + } } #[derive(Clone, Copy, Debug)] diff --git a/crates/rope/src/rope.rs b/crates/rope/src/rope.rs index 9185b5baa300af93ec7ceb3e951ae6ba71772721..8bcaef20ca3bd5c79413791764a313fd1e6b75ac 100644 --- a/crates/rope/src/rope.rs +++ b/crates/rope/src/rope.rs @@ -594,6 +594,15 @@ impl<'a> Cursor<'a> { } } +pub struct ChunkBitmaps<'a> { + /// A slice of text up to 128 bytes in size + pub text: &'a str, + /// Bitmap of character locations in text. LSB ordered + pub chars: u128, + /// Bitmap of tab locations in text. LSB ordered + pub tabs: u128, +} + #[derive(Clone)] pub struct Chunks<'a> { chunks: sum_tree::Cursor<'a, Chunk, usize>, @@ -757,6 +766,36 @@ impl<'a> Chunks<'a> { self.offset < initial_offset && self.offset == 0 } + /// Returns bitmaps that represent character positions and tab positions + pub fn peak_with_bitmaps(&self) -> Option> { + if !self.offset_is_valid() { + return None; + } + + let chunk = self.chunks.item()?; + let chunk_start = *self.chunks.start(); + let slice_range = if self.reversed { + let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start; + let slice_end = self.offset - chunk_start; + slice_start..slice_end + } else { + let slice_start = self.offset - chunk_start; + let slice_end = cmp::min(self.chunks.end(), self.range.end) - chunk_start; + slice_start..slice_end + }; + + let bitmask = (1u128 << slice_range.end as u128).saturating_sub(1); + + let chars = (chunk.chars() & bitmask) >> slice_range.start; + let tabs = (chunk.tabs & bitmask) >> slice_range.start; + + Some(ChunkBitmaps { + text: &chunk.text[slice_range], + chars, + tabs, + }) + } + pub fn peek(&self) -> Option<&'a str> { if !self.offset_is_valid() { return None; @@ -777,6 +816,36 @@ impl<'a> Chunks<'a> { Some(&chunk.text[slice_range]) } + pub fn peek_tabs(&self) -> Option> { + if !self.offset_is_valid() { + return None; + } + + let chunk = self.chunks.item()?; + let chunk_start = *self.chunks.start(); + let slice_range = if self.reversed { + let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start; + let slice_end = self.offset - chunk_start; + slice_start..slice_end + } else { + let slice_start = self.offset - chunk_start; + let slice_end = cmp::min(self.chunks.end(), self.range.end) - chunk_start; + slice_start..slice_end + }; + let chunk_start_offset = slice_range.start; + let slice_text = &chunk.text[slice_range]; + + // Shift the tabs to align with our slice window + let shifted_tabs = chunk.tabs >> chunk_start_offset; + let shifted_chars = chunk.chars() >> chunk_start_offset; + + Some(ChunkBitmaps { + text: slice_text, + chars: shifted_chars, + tabs: shifted_tabs, + }) + } + pub fn lines(self) -> Lines<'a> { let reversed = self.reversed; Lines { @@ -822,6 +891,30 @@ impl<'a> Chunks<'a> { } } +pub struct ChunkWithBitmaps<'a>(pub Chunks<'a>); + +impl<'a> Iterator for ChunkWithBitmaps<'a> { + /// text, chars bitmap, tabs bitmap + type Item = ChunkBitmaps<'a>; + + fn next(&mut self) -> Option { + let chunk_bitmaps = self.0.peak_with_bitmaps()?; + if self.0.reversed { + self.0.offset -= chunk_bitmaps.text.len(); + if self.0.offset <= *self.0.chunks.start() { + self.0.chunks.prev(); + } + } else { + self.0.offset += chunk_bitmaps.text.len(); + if self.0.offset >= self.0.chunks.end() { + self.0.chunks.next(); + } + } + + Some(chunk_bitmaps) + } +} + impl<'a> Iterator for Chunks<'a> { type Item = &'a str;