From f0620c6f8f3c7bc9369143583f26fa6e07719303 Mon Sep 17 00:00:00 2001 From: Marco Mihai Condrache <52580954+marcocondrache@users.noreply.github.com> Date: Mon, 2 Mar 2026 22:33:05 +0100 Subject: [PATCH] editor: Preserve compound emojis (#50082) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #45466 Emojis like 🧑‍✈️ are grapheme clusters formed using zero-width joiners and variation selectors. Iterating over the string by individual chars in `highlight_invisibles` breaks these clusters, even though they represent a single visible character. Before you mark this PR as ready for review, make sure that you have: - [ ] Added a solid test coverage and/or screenshots from doing manual testing - [x] Done a self-review taking into account security and performance aspects - [ ] Aligned any UI changes with the [UI checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist) Release Notes: - Zed now supports compound emojis --- crates/editor/src/display_map.rs | 47 +++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/crates/editor/src/display_map.rs b/crates/editor/src/display_map.rs index 10c17871709e7f6ac237cb3ecb000724b0095c01..610e30f4e1538fa1eb91768a91bd816b3cbd00dd 100644 --- a/crates/editor/src/display_map.rs +++ b/crates/editor/src/display_map.rs @@ -1498,7 +1498,7 @@ impl<'a> HighlightedChunk<'a> { self, editor_style: &'a EditorStyle, ) -> impl Iterator + 'a { - let mut chars = self.text.chars().peekable(); + let mut chunks = self.text.graphemes(true).peekable(); let mut text = self.text; let style = self.style; let is_tab = self.is_tab; @@ -1506,10 +1506,12 @@ impl<'a> HighlightedChunk<'a> { let is_inlay = self.is_inlay; iter::from_fn(move || { let mut prefix_len = 0; - while let Some(&ch) = chars.peek() { - if !is_invisible(ch) { - prefix_len += ch.len_utf8(); - chars.next(); + while let Some(&chunk) = chunks.peek() { + let mut chars = chunk.chars(); + let Some(ch) = chars.next() else { break }; + if chunk.len() != ch.len_utf8() || !is_invisible(ch) { + prefix_len += chunk.len(); + chunks.next(); continue; } if prefix_len > 0 { @@ -1523,8 +1525,8 @@ impl<'a> HighlightedChunk<'a> { replacement: renderer.clone(), }); } - chars.next(); - let (prefix, suffix) = text.split_at(ch.len_utf8()); + chunks.next(); + let (prefix, suffix) = text.split_at(chunk.len()); text = suffix; if let Some(replacement) = replacement(ch) { let invisible_highlight = HighlightStyle { @@ -4124,4 +4126,35 @@ pub mod tests { assert_eq!(ranges[0].start, DisplayPoint::new(DisplayRow(0), 10)); assert_eq!(ranges[0].end, DisplayPoint::new(DisplayRow(0), 14)); } + + #[test] + fn test_highlight_invisibles_preserves_compound_emojis() { + let editor_style = EditorStyle::default(); + + let pilot_emoji = "🧑\u{200d}✈\u{fe0f}"; + let chunk = HighlightedChunk { + text: pilot_emoji, + style: None, + is_tab: false, + is_inlay: false, + replacement: None, + }; + + let chunks: Vec<_> = chunk + .highlight_invisibles(&editor_style) + .map(|chunk| chunk.text.to_string()) + .collect(); + + assert_eq!( + chunks.concat(), + pilot_emoji, + "all text bytes must be preserved" + ); + assert_eq!( + chunks.len(), + 1, + "compound emoji should not be split into multiple chunks, got: {:?}", + chunks, + ); + } }