Fix panic on inlay split (#33676)

Richard Feldman created

Closes #33641

Release Notes:

- Fixed panic when trying to split on multibyte UTF-8 sequences.

Change summary

crates/editor/src/display_map/inlay_map.rs | 278 +++++++++++++++++++++++
1 file changed, 269 insertions(+), 9 deletions(-)

Detailed changes

crates/editor/src/display_map/inlay_map.rs đź”—

@@ -296,12 +296,25 @@ impl<'a> Iterator for InlayChunks<'a> {
                     *chunk = self.buffer_chunks.next().unwrap();
                 }
 
-                let (prefix, suffix) = chunk.text.split_at(
-                    chunk
-                        .text
-                        .len()
-                        .min(self.transforms.end(&()).0.0 - self.output_offset.0),
-                );
+                let desired_bytes = self.transforms.end(&()).0.0 - self.output_offset.0;
+
+                // If we're already at the transform boundary, skip to the next transform
+                if desired_bytes == 0 {
+                    self.inlay_chunks = None;
+                    self.transforms.next(&());
+                    return self.next();
+                }
+
+                // Determine split index handling edge cases
+                let split_index = if desired_bytes >= chunk.text.len() {
+                    chunk.text.len()
+                } else if chunk.text.is_char_boundary(desired_bytes) {
+                    desired_bytes
+                } else {
+                    find_next_utf8_boundary(chunk.text, desired_bytes)
+                };
+
+                let (prefix, suffix) = chunk.text.split_at(split_index);
 
                 chunk.text = suffix;
                 self.output_offset.0 += prefix.len();
@@ -391,8 +404,24 @@ impl<'a> Iterator for InlayChunks<'a> {
                 let inlay_chunk = self
                     .inlay_chunk
                     .get_or_insert_with(|| inlay_chunks.next().unwrap());
-                let (chunk, remainder) =
-                    inlay_chunk.split_at(inlay_chunk.len().min(next_inlay_highlight_endpoint));
+
+                // Determine split index handling edge cases
+                let split_index = if next_inlay_highlight_endpoint >= inlay_chunk.len() {
+                    inlay_chunk.len()
+                } else if next_inlay_highlight_endpoint == 0 {
+                    // Need to take at least one character to make progress
+                    inlay_chunk
+                        .chars()
+                        .next()
+                        .map(|c| c.len_utf8())
+                        .unwrap_or(1)
+                } else if inlay_chunk.is_char_boundary(next_inlay_highlight_endpoint) {
+                    next_inlay_highlight_endpoint
+                } else {
+                    find_next_utf8_boundary(inlay_chunk, next_inlay_highlight_endpoint)
+                };
+
+                let (chunk, remainder) = inlay_chunk.split_at(split_index);
                 *inlay_chunk = remainder;
                 if inlay_chunk.is_empty() {
                     self.inlay_chunk = None;
@@ -412,7 +441,7 @@ impl<'a> Iterator for InlayChunks<'a> {
             }
         };
 
-        if self.output_offset == self.transforms.end(&()).0 {
+        if self.output_offset >= self.transforms.end(&()).0 {
             self.inlay_chunks = None;
             self.transforms.next(&());
         }
@@ -1143,6 +1172,31 @@ fn push_isomorphic(sum_tree: &mut SumTree<Transform>, summary: TextSummary) {
     }
 }
 
+/// Given a byte index that is NOT a UTF-8 boundary, find the next one.
+/// Assumes: 0 < byte_index < text.len() and !text.is_char_boundary(byte_index)
+#[inline(always)]
+fn find_next_utf8_boundary(text: &str, byte_index: usize) -> usize {
+    let bytes = text.as_bytes();
+    let mut idx = byte_index + 1;
+
+    // Scan forward until we find a boundary
+    while idx < text.len() {
+        if is_utf8_char_boundary(bytes[idx]) {
+            return idx;
+        }
+        idx += 1;
+    }
+
+    // Hit the end, return the full length
+    text.len()
+}
+
+// Private helper function taken from Rust's core::num module (which is both Apache2 and MIT licensed)
+const fn is_utf8_char_boundary(byte: u8) -> bool {
+    // This is bit magic equivalent to: b < 128 || b >= 192
+    (byte as i8) >= -0x40
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -1882,4 +1936,210 @@ mod tests {
         cx.set_global(store);
         theme::init(theme::LoadThemes::JustBase, cx);
     }
+
+    /// Helper to create test highlights for an inlay
+    fn create_inlay_highlights(
+        inlay_id: InlayId,
+        highlight_range: Range<usize>,
+        position: Anchor,
+    ) -> TreeMap<TypeId, TreeMap<InlayId, (HighlightStyle, InlayHighlight)>> {
+        let mut inlay_highlights = TreeMap::default();
+        let mut type_highlights = TreeMap::default();
+        type_highlights.insert(
+            inlay_id,
+            (
+                HighlightStyle::default(),
+                InlayHighlight {
+                    inlay: inlay_id,
+                    range: highlight_range,
+                    inlay_position: position,
+                },
+            ),
+        );
+        inlay_highlights.insert(TypeId::of::<()>(), type_highlights);
+        inlay_highlights
+    }
+
+    #[gpui::test]
+    fn test_inlay_utf8_boundary_panic_fix(cx: &mut App) {
+        init_test(cx);
+
+        // This test verifies that we handle UTF-8 character boundaries correctly
+        // when splitting inlay text for highlighting. Previously, this would panic
+        // when trying to split at byte 13, which is in the middle of the '…' character.
+        //
+        // See https://github.com/zed-industries/zed/issues/33641
+        let buffer = MultiBuffer::build_simple("fn main() {}\n", cx);
+        let (mut inlay_map, _) = InlayMap::new(buffer.read(cx).snapshot(cx));
+
+        // Create an inlay with text that contains a multi-byte character
+        // The string "SortingDirec…" contains an ellipsis character '…' which is 3 bytes (E2 80 A6)
+        let inlay_text = "SortingDirec…";
+        let position = buffer.read(cx).snapshot(cx).anchor_before(Point::new(0, 5));
+
+        let inlay = Inlay {
+            id: InlayId::Hint(0),
+            position,
+            text: text::Rope::from(inlay_text),
+            color: None,
+        };
+
+        let (inlay_snapshot, _) = inlay_map.splice(&[], vec![inlay]);
+
+        // Create highlights that request a split at byte 13, which is in the middle
+        // of the '…' character (bytes 12..15). We include the full character.
+        let inlay_highlights = create_inlay_highlights(InlayId::Hint(0), 0..13, position);
+
+        let highlights = crate::display_map::Highlights {
+            text_highlights: None,
+            inlay_highlights: Some(&inlay_highlights),
+            styles: crate::display_map::HighlightStyles::default(),
+        };
+
+        // Collect chunks - this previously would panic
+        let chunks: Vec<_> = inlay_snapshot
+            .chunks(
+                InlayOffset(0)..InlayOffset(inlay_snapshot.len().0),
+                false,
+                highlights,
+            )
+            .collect();
+
+        // Verify the chunks are correct
+        let full_text: String = chunks.iter().map(|c| c.chunk.text).collect();
+        assert_eq!(full_text, "fn maSortingDirec…in() {}\n");
+
+        // Verify the highlighted portion includes the complete ellipsis character
+        let highlighted_chunks: Vec<_> = chunks
+            .iter()
+            .filter(|c| c.chunk.highlight_style.is_some() && c.chunk.is_inlay)
+            .collect();
+
+        assert_eq!(highlighted_chunks.len(), 1);
+        assert_eq!(highlighted_chunks[0].chunk.text, "SortingDirec…");
+    }
+
+    #[gpui::test]
+    fn test_inlay_utf8_boundaries(cx: &mut App) {
+        init_test(cx);
+
+        struct TestCase {
+            inlay_text: &'static str,
+            highlight_range: Range<usize>,
+            expected_highlighted: &'static str,
+            description: &'static str,
+        }
+
+        let test_cases = vec![
+            TestCase {
+                inlay_text: "Hellođź‘‹World",
+                highlight_range: 0..7,
+                expected_highlighted: "Hellođź‘‹",
+                description: "Emoji boundary - rounds up to include full emoji",
+            },
+            TestCase {
+                inlay_text: "Test→End",
+                highlight_range: 0..5,
+                expected_highlighted: "Test→",
+                description: "Arrow boundary - rounds up to include full arrow",
+            },
+            TestCase {
+                inlay_text: "café",
+                highlight_range: 0..4,
+                expected_highlighted: "café",
+                description: "Accented char boundary - rounds up to include full Ă©",
+            },
+            TestCase {
+                inlay_text: "🎨🎭🎪",
+                highlight_range: 0..5,
+                expected_highlighted: "🎨🎭",
+                description: "Multiple emojis - partial highlight",
+            },
+            TestCase {
+                inlay_text: "普通话",
+                highlight_range: 0..4,
+                expected_highlighted: "普通",
+                description: "Chinese characters - partial highlight",
+            },
+            TestCase {
+                inlay_text: "Hello",
+                highlight_range: 0..2,
+                expected_highlighted: "He",
+                description: "ASCII only - no adjustment needed",
+            },
+            TestCase {
+                inlay_text: "đź‘‹",
+                highlight_range: 0..1,
+                expected_highlighted: "đź‘‹",
+                description: "Single emoji - partial byte range includes whole char",
+            },
+            TestCase {
+                inlay_text: "Test",
+                highlight_range: 0..0,
+                expected_highlighted: "",
+                description: "Empty range",
+            },
+            TestCase {
+                inlay_text: "🎨ABC",
+                highlight_range: 2..5,
+                expected_highlighted: "A",
+                description: "Range starting mid-emoji skips the emoji",
+            },
+        ];
+
+        for test_case in test_cases {
+            let buffer = MultiBuffer::build_simple("test", cx);
+            let (mut inlay_map, _) = InlayMap::new(buffer.read(cx).snapshot(cx));
+            let position = buffer.read(cx).snapshot(cx).anchor_before(Point::new(0, 2));
+
+            let inlay = Inlay {
+                id: InlayId::Hint(0),
+                position,
+                text: text::Rope::from(test_case.inlay_text),
+                color: None,
+            };
+
+            let (inlay_snapshot, _) = inlay_map.splice(&[], vec![inlay]);
+            let inlay_highlights = create_inlay_highlights(
+                InlayId::Hint(0),
+                test_case.highlight_range.clone(),
+                position,
+            );
+
+            let highlights = crate::display_map::Highlights {
+                text_highlights: None,
+                inlay_highlights: Some(&inlay_highlights),
+                styles: crate::display_map::HighlightStyles::default(),
+            };
+
+            let chunks: Vec<_> = inlay_snapshot
+                .chunks(
+                    InlayOffset(0)..InlayOffset(inlay_snapshot.len().0),
+                    false,
+                    highlights,
+                )
+                .collect();
+
+            // Verify we got chunks and they total to the expected text
+            let full_text: String = chunks.iter().map(|c| c.chunk.text).collect();
+            assert_eq!(
+                full_text,
+                format!("te{}st", test_case.inlay_text),
+                "Full text mismatch for case: {}",
+                test_case.description
+            );
+
+            // Verify that the highlighted portion matches expectations
+            let highlighted_text: String = chunks
+                .iter()
+                .filter(|c| c.chunk.highlight_style.is_some() && c.chunk.is_inlay)
+                .map(|c| c.chunk.text)
+                .collect();
+            assert_eq!(
+                highlighted_text, test_case.expected_highlighted,
+                "Highlighted text mismatch for case: {} (text: '{}', range: {:?})",
+                test_case.description, test_case.inlay_text, test_case.highlight_range
+            );
+        }
+    }
 }