multi_buffer: Fix up some anchor checks (#43454)

Lukas Wirth created

Release Notes:

- N/A *or* Added/Fixed/Improved ...

Change summary

Cargo.lock                                                      |  1 
crates/agent_ui/src/inline_assistant.rs                         |  1 
crates/assistant_text_thread/Cargo.toml                         |  1 
crates/assistant_text_thread/src/assistant_text_thread_tests.rs |  7 
crates/assistant_text_thread/src/text_thread.rs                 | 12 
crates/buffer_diff/src/buffer_diff.rs                           |  1 
crates/editor/src/selections_collection.rs                      | 23 +
crates/multi_buffer/src/anchor.rs                               | 44 ++
crates/multi_buffer/src/multi_buffer.rs                         | 43 ++
crates/multi_buffer/src/multi_buffer_tests.rs                   |  2 
crates/multi_buffer/src/path_key.rs                             | 13 
crates/remote/src/transport/wsl.rs                              | 25 
crates/rope/src/rope.rs                                         |  6 
crates/text/src/anchor.rs                                       | 20 +
crates/text/src/text.rs                                         | 13 
15 files changed, 154 insertions(+), 58 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -884,6 +884,7 @@ dependencies = [
  "fuzzy",
  "gpui",
  "indoc",
+ "itertools 0.14.0",
  "language",
  "language_model",
  "log",

crates/agent_ui/src/inline_assistant.rs 🔗

@@ -1445,6 +1445,7 @@ impl InlineAssistant {
                     multi_buffer.update(cx, |multi_buffer, cx| {
                         multi_buffer.push_excerpts(
                             old_buffer.clone(),
+                            // todo(lw): buffer_start and buffer_end might come from different snapshots!
                             Some(ExcerptRange::new(buffer_start..buffer_end)),
                             cx,
                         );

crates/assistant_text_thread/Cargo.toml 🔗

@@ -29,6 +29,7 @@ fs.workspace = true
 futures.workspace = true
 fuzzy.workspace = true
 gpui.workspace = true
+itertools.workspace = true
 language.workspace = true
 language_model.workspace = true
 log.workspace = true

crates/assistant_text_thread/src/assistant_text_thread_tests.rs 🔗

@@ -880,10 +880,9 @@ async fn test_random_context_collaboration(cx: &mut TestAppContext, mut rng: Std
                     let num_sections = rng.random_range(0..=3);
                     let mut section_start = 0;
                     for _ in 0..num_sections {
-                        let mut section_end = rng.random_range(section_start..=output_text.len());
-                        while !output_text.is_char_boundary(section_end) {
-                            section_end += 1;
-                        }
+                        let section_end = output_text.floor_char_boundary(
+                            rng.random_range(section_start..=output_text.len()),
+                        );
                         events.push(Ok(SlashCommandEvent::StartSection {
                             icon: IconName::Ai,
                             label: "section".into(),

crates/assistant_text_thread/src/text_thread.rs 🔗

@@ -16,6 +16,7 @@ use gpui::{
     App, AppContext as _, Context, Entity, EventEmitter, RenderImage, SharedString, Subscription,
     Task,
 };
+use itertools::Itertools as _;
 use language::{AnchorRangeExt, Bias, Buffer, LanguageRegistry, OffsetRangeExt, Point, ToOffset};
 use language_model::{
     LanguageModel, LanguageModelCacheConfiguration, LanguageModelCompletionEvent,
@@ -1853,14 +1854,17 @@ impl TextThread {
                         }
 
                         if ensure_trailing_newline
-                            && buffer.contains_str_at(command_range_end, "\n")
+                            && buffer
+                                .chars_at(command_range_end)
+                                .next()
+                                .is_some_and(|c| c == '\n')
                         {
-                            let newline_offset = insert_position.saturating_sub(1);
-                            if buffer.contains_str_at(newline_offset, "\n")
+                            if let Some((prev_char, '\n')) =
+                                buffer.reversed_chars_at(insert_position).next_tuple()
                                 && last_section_range.is_none_or(|last_section_range| {
                                     !last_section_range
                                         .to_offset(buffer)
-                                        .contains(&newline_offset)
+                                        .contains(&(insert_position - prev_char.len_utf8()))
                                 })
                             {
                                 deletions.push((command_range_end..command_range_end + 1, ""));

crates/buffer_diff/src/buffer_diff.rs 🔗

@@ -147,6 +147,7 @@ impl std::fmt::Debug for BufferDiffInner {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         f.debug_struct("BufferDiffSnapshot")
             .field("hunks", &self.hunks)
+            .field("remote_id", &self.base_text.remote_id())
             .finish()
     }
 }

crates/editor/src/selections_collection.rs 🔗

@@ -415,6 +415,29 @@ impl SelectionsCollection {
             !mutable_collection.disjoint.is_empty() || mutable_collection.pending.is_some(),
             "There must be at least one selection"
         );
+        if cfg!(debug_assertions) {
+            mutable_collection.disjoint.iter().for_each(|selection| {
+                assert!(
+                    snapshot.can_resolve(&selection.start),
+                    "disjoint selection start is not resolvable for the given snapshot:\n{selection:?}",
+                );
+                assert!(
+                    snapshot.can_resolve(&selection.end),
+                    "disjoint selection end is not resolvable for the given snapshot: {selection:?}",
+                );
+            });
+            if let Some(pending) = &mutable_collection.pending {
+                let selection = &pending.selection;
+                assert!(
+                    snapshot.can_resolve(&selection.start),
+                    "pending selection start is not resolvable for the given snapshot: {pending:?}",
+                );
+                assert!(
+                    snapshot.can_resolve(&selection.end),
+                    "pending selection end is not resolvable for the given snapshot: {pending:?}",
+                );
+            }
+        }
         (mutable_collection.selections_changed, result)
     }
 

crates/multi_buffer/src/anchor.rs 🔗

@@ -9,14 +9,33 @@ use std::{
 use sum_tree::Bias;
 use text::BufferId;
 
-#[derive(Clone, Copy, Eq, PartialEq, Debug, Hash)]
+#[derive(Clone, Copy, Eq, PartialEq, Hash)]
 pub struct Anchor {
+    /// Invariant: If buffer id is `None`, excerpt id must be `ExcerptId::min()` or `ExcerptId::max()`.
     pub buffer_id: Option<BufferId>,
     pub excerpt_id: ExcerptId,
     pub text_anchor: text::Anchor,
     pub diff_base_anchor: Option<text::Anchor>,
 }
 
+impl std::fmt::Debug for Anchor {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if *self == Self::min() {
+            return f.write_str("Anchor::MIN");
+        }
+        if *self == Self::max() {
+            return f.write_str("Anchor::MAX");
+        }
+
+        f.debug_struct("Anchor")
+            .field("buffer_id", &self.buffer_id)
+            .field("excerpt_id", &self.excerpt_id)
+            .field("text_anchor", &self.text_anchor)
+            .field("diff_base_anchor", &self.diff_base_anchor)
+            .finish()
+    }
+}
+
 impl Anchor {
     pub fn with_diff_base_anchor(self, diff_base_anchor: text::Anchor) -> Self {
         Self {
@@ -30,6 +49,10 @@ impl Anchor {
         buffer_id: BufferId,
         text_anchor: text::Anchor,
     ) -> Self {
+        debug_assert!(
+            text_anchor.buffer_id.is_none_or(|id| id == buffer_id),
+            "buffer id does not match the one in the text anchor: {buffer_id:?} {text_anchor:?}",
+        );
         Self {
             buffer_id: Some(buffer_id),
             excerpt_id,
@@ -77,7 +100,12 @@ impl Anchor {
         if excerpt_id_cmp.is_ne() {
             return excerpt_id_cmp;
         }
-        if self_excerpt_id == ExcerptId::min() || self_excerpt_id == ExcerptId::max() {
+        if self_excerpt_id == ExcerptId::max()
+            && self.text_anchor == text::Anchor::MAX
+            && self.text_anchor == text::Anchor::MAX
+            && self.diff_base_anchor.is_none()
+            && other.diff_base_anchor.is_none()
+        {
             return Ordering::Equal;
         }
         if let Some(excerpt) = snapshot.excerpt(self_excerpt_id) {
@@ -119,8 +147,8 @@ impl Anchor {
             && let Some(excerpt) = snapshot.excerpt(self.excerpt_id)
         {
             return Self {
-                buffer_id: self.buffer_id,
-                excerpt_id: self.excerpt_id,
+                buffer_id: Some(excerpt.buffer_id),
+                excerpt_id: excerpt.id,
                 text_anchor: self.text_anchor.bias_left(&excerpt.buffer),
                 diff_base_anchor: self.diff_base_anchor.map(|a| {
                     if let Some(base_text) = snapshot
@@ -143,8 +171,8 @@ impl Anchor {
             && let Some(excerpt) = snapshot.excerpt(self.excerpt_id)
         {
             return Self {
-                buffer_id: self.buffer_id,
-                excerpt_id: self.excerpt_id,
+                buffer_id: Some(excerpt.buffer_id),
+                excerpt_id: excerpt.id,
                 text_anchor: self.text_anchor.bias_right(&excerpt.buffer),
                 diff_base_anchor: self.diff_base_anchor.map(|a| {
                     if let Some(base_text) = snapshot
@@ -174,8 +202,8 @@ impl Anchor {
     }
 
     pub fn is_valid(&self, snapshot: &MultiBufferSnapshot) -> bool {
-        if *self == Anchor::min() || *self == Anchor::max() {
-            true
+        if *self == Anchor::min() || self.excerpt_id == ExcerptId::max() {
+            !snapshot.is_empty()
         } else if let Some(excerpt) = snapshot.excerpt(self.excerpt_id) {
             (self.text_anchor == excerpt.range.context.start
                 || self.text_anchor == excerpt.range.context.end

crates/multi_buffer/src/multi_buffer.rs 🔗

@@ -5076,8 +5076,7 @@ impl MultiBufferSnapshot {
         excerpt_id: ExcerptId,
         text_anchor: Range<text::Anchor>,
     ) -> Option<Range<Anchor>> {
-        let excerpt_id = self.latest_excerpt_id(excerpt_id);
-        let excerpt = self.excerpt(excerpt_id)?;
+        let excerpt = self.excerpt(self.latest_excerpt_id(excerpt_id))?;
 
         Some(
             self.anchor_in_excerpt_(excerpt, text_anchor.start)?
@@ -5092,8 +5091,7 @@ impl MultiBufferSnapshot {
         excerpt_id: ExcerptId,
         text_anchor: text::Anchor,
     ) -> Option<Anchor> {
-        let excerpt_id = self.latest_excerpt_id(excerpt_id);
-        let excerpt = self.excerpt(excerpt_id)?;
+        let excerpt = self.excerpt(self.latest_excerpt_id(excerpt_id))?;
         self.anchor_in_excerpt_(excerpt, text_anchor)
     }
 
@@ -5130,7 +5128,8 @@ impl MultiBufferSnapshot {
     }
 
     pub fn can_resolve(&self, anchor: &Anchor) -> bool {
-        if anchor.excerpt_id == ExcerptId::min() || anchor.excerpt_id == ExcerptId::max() {
+        if *anchor == Anchor::min() || anchor.excerpt_id == ExcerptId::max() {
+            // todo(lw): should be `!self.is_empty()`
             true
         } else if let Some(excerpt) = self.excerpt(anchor.excerpt_id) {
             excerpt.buffer.can_resolve(&anchor.text_anchor)
@@ -5791,8 +5790,8 @@ impl MultiBufferSnapshot {
             .and_then(|(buffer, _)| buffer.file())
     }
 
-    pub fn language_at<T: ToOffset>(&self, point: T) -> Option<&Arc<Language>> {
-        self.point_to_buffer_offset(point)
+    pub fn language_at<T: ToOffset>(&self, offset: T) -> Option<&Arc<Language>> {
+        self.point_to_buffer_offset(offset)
             .and_then(|(buffer, offset)| buffer.language_at(offset))
     }
 
@@ -5992,13 +5991,27 @@ impl MultiBufferSnapshot {
         theme: Option<&SyntaxTheme>,
     ) -> Option<(BufferId, Vec<OutlineItem<Anchor>>)> {
         let anchor = self.anchor_before(offset);
-        let excerpt_id = anchor.excerpt_id;
-        let excerpt = self.excerpt(excerpt_id)?;
-        let buffer_id = excerpt.buffer_id;
+        let excerpt @ &Excerpt {
+            id: excerpt_id,
+            buffer_id,
+            ref buffer,
+            ..
+        } = self.excerpt(anchor.excerpt_id)?;
+        if cfg!(debug_assertions) {
+            match anchor.buffer_id {
+                // we clearly are hitting this according to sentry, but in what situations can this occur?
+                Some(anchor_buffer_id) => {
+                    assert_eq!(
+                        anchor_buffer_id, buffer_id,
+                        "anchor {anchor:?} does not match with resolved excerpt {excerpt:?}"
+                    )
+                }
+                None => assert_eq!(anchor, Anchor::max()),
+            }
+        };
         Some((
             buffer_id,
-            excerpt
-                .buffer
+            buffer
                 .symbols_containing(anchor.text_anchor, theme)
                 .into_iter()
                 .flat_map(|item| {
@@ -6114,6 +6127,12 @@ impl MultiBufferSnapshot {
         }
     }
 
+    /// Returns the excerpt for the given id. The returned excerpt is guaranteed
+    /// to have the same excerpt id as the one passed in, with the exception of
+    /// `ExcerptId::max()`.
+    ///
+    /// Callers of this function should generally use the resulting excerpt's `id` field
+    /// afterwards.
     fn excerpt(&self, excerpt_id: ExcerptId) -> Option<&Excerpt> {
         let mut cursor = self.excerpts.cursor::<Option<&Locator>>(());
         let locator = self.excerpt_locator_for_id(excerpt_id);

crates/multi_buffer/src/multi_buffer_tests.rs 🔗

@@ -3050,7 +3050,9 @@ async fn test_random_multibuffer(cx: &mut TestAppContext, mut rng: StdRng) {
 
         for _ in 0..10 {
             let end_ix = rng.random_range(0..=text_rope.len());
+            let end_ix = text_rope.floor_char_boundary(end_ix);
             let start_ix = rng.random_range(0..=end_ix);
+            let start_ix = text_rope.floor_char_boundary(start_ix);
             assert_eq!(
                 snapshot
                     .bytes_in_range(MultiBufferOffset(start_ix)..MultiBufferOffset(end_ix))

crates/multi_buffer/src/path_key.rs 🔗

@@ -57,7 +57,7 @@ impl MultiBuffer {
         let snapshot = self.read(cx);
         let excerpt = snapshot.excerpt(*excerpt_id)?;
         Some(Anchor::in_buffer(
-            *excerpt_id,

+            excerpt.id,

             excerpt.buffer_id,
             excerpt.range.context.start,
         ))
@@ -182,11 +182,16 @@ impl MultiBuffer {
             };
 
             let ids_to_expand = HashSet::from_iter(ids);
+            let mut excerpt_id_ = None;

             let expanded_ranges = excerpt_ids.iter().filter_map(|excerpt_id| {
                 let excerpt = snapshot.excerpt(*excerpt_id)?;
+                let excerpt_id = excerpt.id;

+                if excerpt_id_.is_none() {

+                    excerpt_id_ = Some(excerpt_id);

+                }

 
                 let mut context = excerpt.range.context.to_point(&excerpt.buffer);
-                if ids_to_expand.contains(excerpt_id) {

+                if ids_to_expand.contains(&excerpt_id) {

                     match direction {
                         ExpandExcerptDirection::Up => {
                             context.start.row = context.start.row.saturating_sub(line_count);
@@ -222,10 +227,10 @@ impl MultiBuffer {
                 }
                 merged_ranges.push(range)
             }
-            let Some(excerpt_id) = excerpt_ids.first() else {

+            let Some(excerpt_id) = excerpt_id_ else {

                 continue;
             };
-            let Some(buffer_id) = &snapshot.buffer_id_for_excerpt(*excerpt_id) else {

+            let Some(buffer_id) = &snapshot.buffer_id_for_excerpt(excerpt_id) else {

                 continue;
             };
 

crates/remote/src/transport/wsl.rs 🔗

@@ -141,10 +141,6 @@ impl WslRemoteConnection {
         windows_path_to_wsl_path_impl(&self.connection_options, source, self.can_exec).await
     }
 
-    fn wsl_command(&self, program: &str, args: &[impl AsRef<OsStr>]) -> process::Command {
-        wsl_command_impl(&self.connection_options, program, args, self.can_exec)
-    }
-
     async fn run_wsl_command(&self, program: &str, args: &[&str]) -> Result<String> {
         run_wsl_command_impl(&self.connection_options, program, args, self.can_exec).await
     }
@@ -345,16 +341,17 @@ impl RemoteConnection for WslRemoteConnection {
         if reconnect {
             proxy_args.push("--reconnect".to_owned());
         }
-        let proxy_process = match self
-            .wsl_command("env", &proxy_args)
-            .kill_on_drop(true)
-            .spawn()
-        {
-            Ok(process) => process,
-            Err(error) => {
-                return Task::ready(Err(anyhow!("failed to spawn remote server: {}", error)));
-            }
-        };
+
+        let proxy_process =
+            match wsl_command_impl(&self.connection_options, "env", &proxy_args, self.can_exec)
+                .kill_on_drop(true)
+                .spawn()
+            {
+                Ok(process) => process,
+                Err(error) => {
+                    return Task::ready(Err(anyhow!("failed to spawn remote server: {}", error)));
+                }
+            };
 
         super::handle_rpc_messages_over_child_process_stdio(
             proxy_process,

crates/rope/src/rope.rs 🔗

@@ -715,10 +715,8 @@ impl<'a> Chunks<'a> {
             range.start
         };
         let chunk_offset = offset - chunks.start();
-        if let Some(chunk) = chunks.item()
-            && !chunk.text.is_char_boundary(chunk_offset)
-        {
-            panic!("byte index {} is not a char boundary", offset);
+        if let Some(chunk) = chunks.item() {
+            chunk.assert_char_boundary(chunk_offset);
         }
         Self {
             chunks,

crates/text/src/anchor.rs 🔗

@@ -6,7 +6,7 @@ use std::{cmp::Ordering, fmt::Debug, ops::Range};
 use sum_tree::{Bias, Dimensions};
 
 /// A timestamped position in a buffer
-#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
+#[derive(Copy, Clone, Eq, PartialEq, Hash)]
 pub struct Anchor {
     pub timestamp: clock::Lamport,
     /// The byte offset in the buffer
@@ -16,6 +16,24 @@ pub struct Anchor {
     pub buffer_id: Option<BufferId>,
 }
 
+impl Debug for Anchor {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if *self == Self::MIN {
+            return f.write_str("Anchor::MIN");
+        }
+        if *self == Self::MAX {
+            return f.write_str("Anchor::MAX");
+        }
+
+        f.debug_struct("Anchor")
+            .field("timestamp", &self.timestamp)
+            .field("offset", &self.offset)
+            .field("bias", &self.bias)
+            .field("buffer_id", &self.buffer_id)
+            .finish()
+    }
+}
+
 impl Anchor {
     pub const MIN: Self = Self {
         timestamp: clock::Lamport::MIN,

crates/text/src/text.rs 🔗

@@ -2444,7 +2444,9 @@ impl BufferSnapshot {
         } else if bias == Bias::Right && offset == self.len() {
             Anchor::MAX
         } else {
-            if offset > self.visible_text.len() {
+            if cfg!(debug_assertions) {
+                self.visible_text.assert_char_boundary(offset);
+            } else if offset > self.visible_text.len() {
                 panic!("offset {} is out of bounds", offset)
             }
             let (start, _, item) = self.fragments.find::<usize, _>(&None, &offset, bias);
@@ -3137,12 +3139,9 @@ impl ToOffset for Point {
 
 impl ToOffset for usize {
     fn to_offset(&self, snapshot: &BufferSnapshot) -> usize {
-        assert!(
-            *self <= snapshot.len(),
-            "offset {} is out of range, snapshot length is {}",
-            self,
-            snapshot.len()
-        );
+        if cfg!(debug_assertions) {
+            snapshot.as_rope().assert_char_boundary(*self);
+        }
         *self
     }
 }