gpui: Do not render ligatures between different styled text runs (#39928)

Lukas Wirth created

This relands https://github.com/zed-industries/zed/pull/37175 as
https://github.com/zed-industries/zed/pull/39886 fixed the jiggling
issue.

Currently when we render text with differing styles adjacently we might
form a ligature between the text, causing the ligature forming
characters to take on one of the two styles. This can especially become
confusing when a ligature is formed between actual text and inlay hints.

Annoyingly, the only ways to prevent this with core text is to either
render each run separately, or to insert a zero-width non-joiner to
force core text to break the ligatures apart, as it otherwise will merge
subsequent font runs of the same fonts.

We currently do layouting on a per line basis and it is unlikely we want
to change that as it would incur a lot of complexity and annoyances to
merge things back into a line, so this goes with the other approach of
inserting ZWNJ characters instead.

Note that neither linux nor windows seem to currently render ligatures,
so this only concerns macOS rendering at the moment.

Closes https://github.com/zed-industries/zed/issues/23194

Release Notes:

- Fixed ligatures forming between real text and inlay hints on macOS

Change summary

crates/gpui/src/platform/mac/text_system.rs | 169 +++++++++++++++++++---
crates/gpui/src/text_system.rs              | 102 ++++++++-----
crates/gpui/src/text_system/line_layout.rs  |  15 -
3 files changed, 205 insertions(+), 81 deletions(-)

Detailed changes

crates/gpui/src/platform/mac/text_system.rs 🔗

@@ -43,7 +43,7 @@ use pathfinder_geometry::{
     vector::{Vector2F, Vector2I},
 };
 use smallvec::SmallVec;
-use std::{borrow::Cow, char, cmp, convert::TryFrom, sync::Arc};
+use std::{borrow::Cow, char, convert::TryFrom, sync::Arc};
 
 use super::open_type::apply_features_and_fallbacks;
 
@@ -67,6 +67,7 @@ struct MacTextSystemState {
     font_ids_by_postscript_name: HashMap<String, FontId>,
     font_ids_by_font_key: HashMap<FontKey, SmallVec<[FontId; 4]>>,
     postscript_names_by_font_id: HashMap<FontId, String>,
+    zwnjs_scratch_space: Vec<(usize, usize)>,
 }
 
 impl MacTextSystem {
@@ -79,6 +80,7 @@ impl MacTextSystem {
             font_ids_by_postscript_name: HashMap::default(),
             font_ids_by_font_key: HashMap::default(),
             postscript_names_by_font_id: HashMap::default(),
+            zwnjs_scratch_space: Vec::new(),
         }))
     }
 }
@@ -428,30 +430,43 @@ impl MacTextSystemState {
     }
 
     fn layout_line(&mut self, text: &str, font_size: Pixels, font_runs: &[FontRun]) -> LineLayout {
+        const ZWNJ: char = '\u{200C}';
+        const ZWNJ_STR: &str = "\u{200C}";
+        const ZWNJ_SIZE_16: usize = ZWNJ.len_utf16();
+
+        self.zwnjs_scratch_space.clear();
         // Construct the attributed string, converting UTF8 ranges to UTF16 ranges.
         let mut string = CFMutableAttributedString::new();
         let mut max_ascent = 0.0f32;
         let mut max_descent = 0.0f32;
-        {
-            string.replace_str(&CFString::new(text), CFRange::init(0, 0));
-            let utf16_line_len = string.char_len() as usize;
 
-            let mut ix_converter = StringIndexConverter::new(text);
+        {
+            let mut ix_converter = StringIndexConverter::new(&text);
+            let mut last_font_run = None;
             for run in font_runs {
-                let utf8_end = ix_converter.utf8_ix + run.len;
-                let utf16_start = ix_converter.utf16_ix;
-
-                if utf16_start >= utf16_line_len {
-                    break;
+                let text = &text[ix_converter.utf8_ix..][..run.len];
+                // if the fonts are the same, we need to disconnect the text with a ZWNJ
+                // to prevent core text from forming ligatures between them
+                let needs_zwnj = last_font_run.replace(run.font_id) == Some(run.font_id);
+
+                let n_zwnjs = self.zwnjs_scratch_space.len();
+                let utf16_start = ix_converter.utf16_ix + n_zwnjs * ZWNJ_SIZE_16;
+                ix_converter.advance_to_utf8_ix(ix_converter.utf8_ix + run.len);
+
+                string.replace_str(&CFString::new(text), CFRange::init(utf16_start as isize, 0));
+                if needs_zwnj {
+                    let zwnjs_pos = string.char_len();
+                    self.zwnjs_scratch_space.push((n_zwnjs, zwnjs_pos as usize));
+                    string.replace_str(
+                        &CFString::from_static_string(ZWNJ_STR),
+                        CFRange::init(zwnjs_pos, 0),
+                    );
                 }
-
-                ix_converter.advance_to_utf8_ix(utf8_end);
-                let utf16_end = cmp::min(ix_converter.utf16_ix, utf16_line_len);
+                let utf16_end = string.char_len() as usize;
 
                 let cf_range =
                     CFRange::init(utf16_start as isize, (utf16_end - utf16_start) as isize);
-
-                let font: &FontKitFont = &self.fonts[run.font_id.0];
+                let font = &self.fonts[run.font_id.0];
 
                 let font_metrics = font.metrics();
                 let font_scale = font_size.0 / font_metrics.units_per_em as f32;
@@ -465,17 +480,12 @@ impl MacTextSystemState {
                         &font.native_font().clone_with_font_size(font_size.into()),
                     );
                 }
-
-                if utf16_end == utf16_line_len {
-                    break;
-                }
             }
         }
-
         // Retrieve the glyphs from the shaped line, converting UTF16 offsets to UTF8 offsets.
         let line = CTLine::new_with_attributed_string(string.as_concrete_TypeRef());
         let glyph_runs = line.glyph_runs();
-        let mut runs = Vec::with_capacity(glyph_runs.len() as usize);
+        let mut runs = <Vec<ShapedRun>>::with_capacity(glyph_runs.len() as usize);
         let mut ix_converter = StringIndexConverter::new(text);
         for run in glyph_runs.into_iter() {
             let attributes = run.attributes().unwrap();
@@ -487,28 +497,44 @@ impl MacTextSystemState {
             };
             let font_id = self.id_for_native_font(font);
 
-            let mut glyphs = Vec::with_capacity(run.glyph_count().try_into().unwrap_or(0));
-            for ((glyph_id, position), glyph_utf16_ix) in run
+            let mut glyphs = match runs.last_mut() {
+                Some(run) if run.font_id == font_id => &mut run.glyphs,
+                _ => {
+                    runs.push(ShapedRun {
+                        font_id,
+                        glyphs: Vec::with_capacity(run.glyph_count().try_into().unwrap_or(0)),
+                    });
+                    &mut runs.last_mut().unwrap().glyphs
+                }
+            };
+            for ((&glyph_id, position), &glyph_utf16_ix) in run
                 .glyphs()
                 .iter()
                 .zip(run.positions().iter())
                 .zip(run.string_indices().iter())
             {
-                let glyph_utf16_ix = usize::try_from(*glyph_utf16_ix).unwrap();
+                let mut glyph_utf16_ix = usize::try_from(glyph_utf16_ix).unwrap();
+                let r = self
+                    .zwnjs_scratch_space
+                    .binary_search_by(|&(_, it)| it.cmp(&glyph_utf16_ix));
+                match r {
+                    // this glyph is a ZWNJ, skip it
+                    Ok(_) => continue,
+                    // adjust the index to account for the ZWNJs we've inserted
+                    Err(idx) => glyph_utf16_ix -= idx * ZWNJ_SIZE_16,
+                }
                 if ix_converter.utf16_ix > glyph_utf16_ix {
                     // We cannot reuse current index converter, as it can only seek forward. Restart the search.
                     ix_converter = StringIndexConverter::new(text);
                 }
                 ix_converter.advance_to_utf16_ix(glyph_utf16_ix);
                 glyphs.push(ShapedGlyph {
-                    id: GlyphId(*glyph_id as u32),
+                    id: GlyphId(glyph_id as u32),
                     position: point(position.x as f32, position.y as f32).map(px),
                     index: ix_converter.utf8_ix,
                     is_emoji: self.is_emoji(font_id),
                 });
             }
-
-            runs.push(ShapedRun { font_id, glyphs });
         }
         let typographic_bounds = line.get_typographic_bounds();
         LineLayout {
@@ -707,4 +733,93 @@ mod tests {
         // There's no glyph for \u{feff}
         assert_eq!(layout.runs[0].glyphs[1].id, GlyphId(69u32)); // b
     }
+
+    #[test]
+    fn test_layout_line_zwnj_insertion() {
+        let fonts = MacTextSystem::new();
+        let font_id = fonts.font_id(&font("Helvetica")).unwrap();
+
+        let text = "hello world";
+        let font_runs = &[
+            FontRun { font_id, len: 5 }, // "hello"
+            FontRun { font_id, len: 6 }, // " world"
+        ];
+
+        let layout = fonts.layout_line(text, px(16.), font_runs);
+        assert_eq!(layout.len, text.len());
+
+        for run in &layout.runs {
+            for glyph in &run.glyphs {
+                assert!(
+                    glyph.index < text.len(),
+                    "Glyph index {} is out of bounds for text length {}",
+                    glyph.index,
+                    text.len()
+                );
+            }
+        }
+
+        // Test with different font runs - should not insert ZWNJ
+        let font_id2 = fonts.font_id(&font("Times")).unwrap_or(font_id);
+        let font_runs_different = &[
+            FontRun { font_id, len: 5 }, // "hello"
+            // " world"
+            FontRun {
+                font_id: font_id2,
+                len: 6,
+            },
+        ];
+
+        let layout2 = fonts.layout_line(text, px(16.), font_runs_different);
+        assert_eq!(layout2.len, text.len());
+
+        for run in &layout2.runs {
+            for glyph in &run.glyphs {
+                assert!(
+                    glyph.index < text.len(),
+                    "Glyph index {} is out of bounds for text length {}",
+                    glyph.index,
+                    text.len()
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_layout_line_zwnj_edge_cases() {
+        let fonts = MacTextSystem::new();
+        let font_id = fonts.font_id(&font("Helvetica")).unwrap();
+
+        let text = "hello";
+        let font_runs = &[FontRun { font_id, len: 5 }];
+        let layout = fonts.layout_line(text, px(16.), font_runs);
+        assert_eq!(layout.len, text.len());
+
+        let text = "abc";
+        let font_runs = &[
+            FontRun { font_id, len: 1 }, // "a"
+            FontRun { font_id, len: 1 }, // "b"
+            FontRun { font_id, len: 1 }, // "c"
+        ];
+        let layout = fonts.layout_line(text, px(16.), font_runs);
+        assert_eq!(layout.len, text.len());
+
+        for run in &layout.runs {
+            for glyph in &run.glyphs {
+                assert!(
+                    glyph.index < text.len(),
+                    "Glyph index {} is out of bounds for text length {}",
+                    glyph.index,
+                    text.len()
+                );
+            }
+        }
+
+        // Test with empty text
+        let text = "";
+        let font_runs = &[];
+        let layout = fonts.layout_line(text, px(16.), font_runs);
+        assert_eq!(layout.len, 0);
+        assert!(layout.runs.is_empty());
+    }
 }

crates/gpui/src/text_system.rs 🔗

@@ -420,9 +420,10 @@ impl WindowTextSystem {
         let mut wrapped_lines = 0;
 
         let mut process_line = |line_text: SharedString| {
+            font_runs.clear();
             let line_end = line_start + line_text.len();
 
-            let mut last_font: Option<Font> = None;
+            let mut last_font: Option<FontId> = None;
             let mut decoration_runs = SmallVec::<[DecorationRun; 32]>::new();
             let mut run_start = line_start;
             while run_start < line_end {
@@ -432,23 +433,14 @@ impl WindowTextSystem {
 
                 let run_len_within_line = cmp::min(line_end, run_start + run.len) - run_start;
 
-                if last_font == Some(run.font.clone()) {
-                    font_runs.last_mut().unwrap().len += run_len_within_line;
-                } else {
-                    last_font = Some(run.font.clone());
-                    font_runs.push(FontRun {
-                        len: run_len_within_line,
-                        font_id: self.resolve_font(&run.font),
-                    });
-                }
-
-                if decoration_runs.last().is_some_and(|last_run| {
-                    last_run.color == run.color
-                        && last_run.underline == run.underline
-                        && last_run.strikethrough == run.strikethrough
-                        && last_run.background_color == run.background_color
-                }) {
-                    decoration_runs.last_mut().unwrap().len += run_len_within_line as u32;
+                let decoration_changed = if let Some(last_run) = decoration_runs.last_mut()
+                    && last_run.color == run.color
+                    && last_run.underline == run.underline
+                    && last_run.strikethrough == run.strikethrough
+                    && last_run.background_color == run.background_color
+                {
+                    last_run.len += run_len_within_line as u32;
+                    false
                 } else {
                     decoration_runs.push(DecorationRun {
                         len: run_len_within_line as u32,
@@ -457,6 +449,21 @@ impl WindowTextSystem {
                         underline: run.underline,
                         strikethrough: run.strikethrough,
                     });
+                    true
+                };
+
+                if let Some(font_run) = font_runs.last_mut()
+                    && Some(font_run.font_id) == last_font
+                    && !decoration_changed
+                {
+                    font_run.len += run_len_within_line;
+                } else {
+                    let font_id = self.resolve_font(&run.font);
+                    last_font = Some(font_id);
+                    font_runs.push(FontRun {
+                        len: run_len_within_line,
+                        font_id,
+                    });
                 }
 
                 if run_len_within_line == run.len {
@@ -491,8 +498,6 @@ impl WindowTextSystem {
                     runs.next();
                 }
             }
-
-            font_runs.clear();
         };
 
         let mut split_lines = text.split('\n');
@@ -526,37 +531,54 @@ impl WindowTextSystem {
     /// Subsets of the line can be styled independently with the `runs` parameter.
     /// Generally, you should prefer to use [`Self::shape_line`] instead, which
     /// can be painted directly.
-    pub fn layout_line<Text>(
+    pub fn layout_line(
         &self,
-        text: Text,
+        text: &str,
         font_size: Pixels,
         runs: &[TextRun],
         force_width: Option<Pixels>,
-    ) -> Arc<LineLayout>
-    where
-        Text: AsRef<str>,
-        SharedString: From<Text>,
-    {
+    ) -> Arc<LineLayout> {
+        let mut last_run = None::<&TextRun>;
+        let mut last_font: Option<FontId> = None;
         let mut font_runs = self.font_runs_pool.lock().pop().unwrap_or_default();
+        font_runs.clear();
+
         for run in runs.iter() {
-            let font_id = self.resolve_font(&run.font);
-            if let Some(last_run) = font_runs.last_mut()
-                && last_run.font_id == font_id
+            let decoration_changed = if let Some(last_run) = last_run
+                && last_run.color == run.color
+                && last_run.underline == run.underline
+                && last_run.strikethrough == run.strikethrough
+            // we do not consider differing background color relevant, as it does not affect glyphs
+            // && last_run.background_color == run.background_color
             {
-                last_run.len += run.len;
-                continue;
+                false
+            } else {
+                last_run = Some(run);
+                true
+            };
+
+            if let Some(font_run) = font_runs.last_mut()
+                && Some(font_run.font_id) == last_font
+                && !decoration_changed
+            {
+                font_run.len += run.len;
+            } else {
+                let font_id = self.resolve_font(&run.font);
+                last_font = Some(font_id);
+                font_runs.push(FontRun {
+                    len: run.len,
+                    font_id,
+                });
             }
-            font_runs.push(FontRun {
-                len: run.len,
-                font_id,
-            });
         }
 
-        let layout =
-            self.line_layout_cache
-                .layout_line_internal(text, font_size, &font_runs, force_width);
+        let layout = self.line_layout_cache.layout_line(
+            &SharedString::new(text),
+            font_size,
+            &font_runs,
+            force_width,
+        );
 
-        font_runs.clear();
         self.font_runs_pool.lock().push(font_runs);
 
         layout

crates/gpui/src/text_system/line_layout.rs 🔗

@@ -501,7 +501,7 @@ impl LineLayoutCache {
         } else {
             drop(current_frame);
             let text = SharedString::from(text);
-            let unwrapped_layout = self.layout_line::<&SharedString>(&text, font_size, runs);
+            let unwrapped_layout = self.layout_line::<&SharedString>(&text, font_size, runs, None);
             let wrap_boundaries = if let Some(wrap_width) = wrap_width {
                 unwrapped_layout.compute_wrap_boundaries(text.as_ref(), wrap_width, max_lines)
             } else {
@@ -535,19 +535,6 @@ impl LineLayoutCache {
         text: Text,
         font_size: Pixels,
         runs: &[FontRun],
-    ) -> Arc<LineLayout>
-    where
-        Text: AsRef<str>,
-        SharedString: From<Text>,
-    {
-        self.layout_line_internal(text, font_size, runs, None)
-    }
-
-    pub fn layout_line_internal<Text>(
-        &self,
-        text: Text,
-        font_size: Pixels,
-        runs: &[FontRun],
         force_width: Option<Pixels>,
     ) -> Arc<LineLayout>
     where