From 62083fe7963dd5bed4579bb12abac1b7800cdbaa Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Mon, 1 Sep 2025 09:49:52 +0200 Subject: [PATCH] gpui: Do not render ligatures between different styled text runs (#37175) Currently when we render text with differing styles adjacently we might form a ligature between the text, causing the ligature forming characters to take on one of the two styles. This can especially become confusing when a ligature is formed between actual text and inlay hints. Annoyingly, the only ways to prevent this with core text is to either render each run separately, or to insert a zero-width non-joiner to force core text to break the ligatures apart, as it otherwise will merge subsequent font runs of the same fonts. We currently do layouting on a per line basis and it is unlikely we want to change that as it would incur a lot of complexity and annoyances to merge things back into a line, so this goes with the other approach of inserting ZWNJ characters instead. Note that neither linux nor windows seem to currently render ligatures, so this only concerns macOS rendering at the moment. Release Notes: - Fixed ligatures forming between real text and inlay hints on macOS --- crates/gpui/src/platform/mac/text_system.rs | 170 ++++++++++++++++---- crates/gpui/src/text_system.rs | 102 +++++++----- crates/gpui/src/text_system/line_layout.rs | 15 +- 3 files changed, 205 insertions(+), 82 deletions(-) diff --git a/crates/gpui/src/platform/mac/text_system.rs b/crates/gpui/src/platform/mac/text_system.rs index 72a0f2e565d9937e3aaf4082b663c3e2ae6ac91d..ba7017b58f76f028a1c5c80959e9359bc379c0cb 100644 --- a/crates/gpui/src/platform/mac/text_system.rs +++ b/crates/gpui/src/platform/mac/text_system.rs @@ -43,7 +43,7 @@ use pathfinder_geometry::{ vector::{Vector2F, Vector2I}, }; use smallvec::SmallVec; -use std::{borrow::Cow, char, cmp, convert::TryFrom, sync::Arc}; +use std::{borrow::Cow, char, convert::TryFrom, sync::Arc}; use super::open_type::apply_features_and_fallbacks; @@ -67,6 +67,7 @@ struct MacTextSystemState { font_ids_by_postscript_name: HashMap, font_ids_by_font_key: HashMap>, postscript_names_by_font_id: HashMap, + zwnjs_scratch_space: Vec<(usize, usize)>, } impl MacTextSystem { @@ -79,6 +80,7 @@ impl MacTextSystem { font_ids_by_postscript_name: HashMap::default(), font_ids_by_font_key: HashMap::default(), postscript_names_by_font_id: HashMap::default(), + zwnjs_scratch_space: Vec::new(), })) } } @@ -424,29 +426,41 @@ impl MacTextSystemState { } fn layout_line(&mut self, text: &str, font_size: Pixels, font_runs: &[FontRun]) -> LineLayout { + const ZWNJ: char = '\u{200C}'; + const ZWNJ_STR: &str = "\u{200C}"; + const ZWNJ_SIZE_16: usize = ZWNJ.len_utf16(); + + self.zwnjs_scratch_space.clear(); // Construct the attributed string, converting UTF8 ranges to UTF16 ranges. let mut string = CFMutableAttributedString::new(); - { - string.replace_str(&CFString::new(text), CFRange::init(0, 0)); - let utf16_line_len = string.char_len() as usize; - let mut ix_converter = StringIndexConverter::new(text); + { + let mut ix_converter = StringIndexConverter::new(&text); + let mut last_font_run = None; for run in font_runs { - let utf8_end = ix_converter.utf8_ix + run.len; - let utf16_start = ix_converter.utf16_ix; - - if utf16_start >= utf16_line_len { - break; + let text = &text[ix_converter.utf8_ix..][..run.len]; + // if the fonts are the same, we need to disconnect the text with a ZWNJ + // to prevent core text from forming ligatures between them + let needs_zwnj = last_font_run.replace(run.font_id) == Some(run.font_id); + + let n_zwnjs = self.zwnjs_scratch_space.len(); + let utf16_start = ix_converter.utf16_ix + n_zwnjs * ZWNJ_SIZE_16; + ix_converter.advance_to_utf8_ix(ix_converter.utf8_ix + run.len); + + string.replace_str(&CFString::new(text), CFRange::init(utf16_start as isize, 0)); + if needs_zwnj { + let zwnjs_pos = string.char_len(); + self.zwnjs_scratch_space.push((n_zwnjs, zwnjs_pos as usize)); + string.replace_str( + &CFString::from_static_string(ZWNJ_STR), + CFRange::init(zwnjs_pos, 0), + ); } - - ix_converter.advance_to_utf8_ix(utf8_end); - let utf16_end = cmp::min(ix_converter.utf16_ix, utf16_line_len); + let utf16_end = string.char_len() as usize; let cf_range = CFRange::init(utf16_start as isize, (utf16_end - utf16_start) as isize); - - let font: &FontKitFont = &self.fonts[run.font_id.0]; - + let font = &self.fonts[run.font_id.0]; unsafe { string.set_attribute( cf_range, @@ -454,17 +468,12 @@ impl MacTextSystemState { &font.native_font().clone_with_font_size(font_size.into()), ); } - - if utf16_end == utf16_line_len { - break; - } } } - // Retrieve the glyphs from the shaped line, converting UTF16 offsets to UTF8 offsets. let line = CTLine::new_with_attributed_string(string.as_concrete_TypeRef()); let glyph_runs = line.glyph_runs(); - let mut runs = Vec::with_capacity(glyph_runs.len() as usize); + let mut runs = >::with_capacity(glyph_runs.len() as usize); let mut ix_converter = StringIndexConverter::new(text); for run in glyph_runs.into_iter() { let attributes = run.attributes().unwrap(); @@ -476,28 +485,44 @@ impl MacTextSystemState { }; let font_id = self.id_for_native_font(font); - let mut glyphs = Vec::with_capacity(run.glyph_count().try_into().unwrap_or(0)); - for ((glyph_id, position), glyph_utf16_ix) in run + let mut glyphs = match runs.last_mut() { + Some(run) if run.font_id == font_id => &mut run.glyphs, + _ => { + runs.push(ShapedRun { + font_id, + glyphs: Vec::with_capacity(run.glyph_count().try_into().unwrap_or(0)), + }); + &mut runs.last_mut().unwrap().glyphs + } + }; + for ((&glyph_id, position), &glyph_utf16_ix) in run .glyphs() .iter() .zip(run.positions().iter()) .zip(run.string_indices().iter()) { - let glyph_utf16_ix = usize::try_from(*glyph_utf16_ix).unwrap(); + let mut glyph_utf16_ix = usize::try_from(glyph_utf16_ix).unwrap(); + let r = self + .zwnjs_scratch_space + .binary_search_by(|&(_, it)| it.cmp(&glyph_utf16_ix)); + match r { + // this glyph is a ZWNJ, skip it + Ok(_) => continue, + // adjust the index to account for the ZWNJs we've inserted + Err(idx) => glyph_utf16_ix -= idx * ZWNJ_SIZE_16, + } if ix_converter.utf16_ix > glyph_utf16_ix { // We cannot reuse current index converter, as it can only seek forward. Restart the search. ix_converter = StringIndexConverter::new(text); } ix_converter.advance_to_utf16_ix(glyph_utf16_ix); glyphs.push(ShapedGlyph { - id: GlyphId(*glyph_id as u32), + id: GlyphId(glyph_id as u32), position: point(position.x as f32, position.y as f32).map(px), index: ix_converter.utf8_ix, is_emoji: self.is_emoji(font_id), }); } - - runs.push(ShapedRun { font_id, glyphs }); } let typographic_bounds = line.get_typographic_bounds(); LineLayout { @@ -696,4 +721,93 @@ mod tests { // There's no glyph for \u{feff} assert_eq!(layout.runs[0].glyphs[1].id, GlyphId(69u32)); // b } + + #[test] + fn test_layout_line_zwnj_insertion() { + let fonts = MacTextSystem::new(); + let font_id = fonts.font_id(&font("Helvetica")).unwrap(); + + let text = "hello world"; + let font_runs = &[ + FontRun { font_id, len: 5 }, // "hello" + FontRun { font_id, len: 6 }, // " world" + ]; + + let layout = fonts.layout_line(text, px(16.), font_runs); + assert_eq!(layout.len, text.len()); + + for run in &layout.runs { + for glyph in &run.glyphs { + assert!( + glyph.index < text.len(), + "Glyph index {} is out of bounds for text length {}", + glyph.index, + text.len() + ); + } + } + + // Test with different font runs - should not insert ZWNJ + let font_id2 = fonts.font_id(&font("Times")).unwrap_or(font_id); + let font_runs_different = &[ + FontRun { font_id, len: 5 }, // "hello" + // " world" + FontRun { + font_id: font_id2, + len: 6, + }, + ]; + + let layout2 = fonts.layout_line(text, px(16.), font_runs_different); + assert_eq!(layout2.len, text.len()); + + for run in &layout2.runs { + for glyph in &run.glyphs { + assert!( + glyph.index < text.len(), + "Glyph index {} is out of bounds for text length {}", + glyph.index, + text.len() + ); + } + } + } + + #[test] + fn test_layout_line_zwnj_edge_cases() { + let fonts = MacTextSystem::new(); + let font_id = fonts.font_id(&font("Helvetica")).unwrap(); + + let text = "hello"; + let font_runs = &[FontRun { font_id, len: 5 }]; + let layout = fonts.layout_line(text, px(16.), font_runs); + assert_eq!(layout.len, text.len()); + + let text = "abc"; + let font_runs = &[ + FontRun { font_id, len: 1 }, // "a" + FontRun { font_id, len: 1 }, // "b" + FontRun { font_id, len: 1 }, // "c" + ]; + let layout = fonts.layout_line(text, px(16.), font_runs); + assert_eq!(layout.len, text.len()); + + for run in &layout.runs { + for glyph in &run.glyphs { + assert!( + glyph.index < text.len(), + "Glyph index {} is out of bounds for text length {}", + glyph.index, + text.len() + ); + } + } + + // Test with empty text + let text = ""; + let font_runs = &[]; + let layout = fonts.layout_line(text, px(16.), font_runs); + assert_eq!(layout.len, 0); + assert!(layout.runs.is_empty()); + } } diff --git a/crates/gpui/src/text_system.rs b/crates/gpui/src/text_system.rs index 53991089da94c58d0035bff0d607ad3ab57a69bd..be34b9e2aac055bd9f17c2f69b3c72d24e392593 100644 --- a/crates/gpui/src/text_system.rs +++ b/crates/gpui/src/text_system.rs @@ -413,9 +413,10 @@ impl WindowTextSystem { let mut wrapped_lines = 0; let mut process_line = |line_text: SharedString| { + font_runs.clear(); let line_end = line_start + line_text.len(); - let mut last_font: Option = None; + let mut last_font: Option = None; let mut decoration_runs = SmallVec::<[DecorationRun; 32]>::new(); let mut run_start = line_start; while run_start < line_end { @@ -425,23 +426,14 @@ impl WindowTextSystem { let run_len_within_line = cmp::min(line_end, run_start + run.len) - run_start; - if last_font == Some(run.font.clone()) { - font_runs.last_mut().unwrap().len += run_len_within_line; - } else { - last_font = Some(run.font.clone()); - font_runs.push(FontRun { - len: run_len_within_line, - font_id: self.resolve_font(&run.font), - }); - } - - if decoration_runs.last().is_some_and(|last_run| { - last_run.color == run.color - && last_run.underline == run.underline - && last_run.strikethrough == run.strikethrough - && last_run.background_color == run.background_color - }) { - decoration_runs.last_mut().unwrap().len += run_len_within_line as u32; + let decoration_changed = if let Some(last_run) = decoration_runs.last_mut() + && last_run.color == run.color + && last_run.underline == run.underline + && last_run.strikethrough == run.strikethrough + && last_run.background_color == run.background_color + { + last_run.len += run_len_within_line as u32; + false } else { decoration_runs.push(DecorationRun { len: run_len_within_line as u32, @@ -450,6 +442,21 @@ impl WindowTextSystem { underline: run.underline, strikethrough: run.strikethrough, }); + true + }; + + if let Some(font_run) = font_runs.last_mut() + && Some(font_run.font_id) == last_font + && !decoration_changed + { + font_run.len += run_len_within_line; + } else { + let font_id = self.resolve_font(&run.font); + last_font = Some(font_id); + font_runs.push(FontRun { + len: run_len_within_line, + font_id, + }); } if run_len_within_line == run.len { @@ -484,8 +491,6 @@ impl WindowTextSystem { runs.next(); } } - - font_runs.clear(); }; let mut split_lines = text.split('\n'); @@ -519,37 +524,54 @@ impl WindowTextSystem { /// Subsets of the line can be styled independently with the `runs` parameter. /// Generally, you should prefer to use `TextLayout::shape_line` instead, which /// can be painted directly. - pub fn layout_line( + pub fn layout_line( &self, - text: Text, + text: &str, font_size: Pixels, runs: &[TextRun], force_width: Option, - ) -> Arc - where - Text: AsRef, - SharedString: From, - { + ) -> Arc { + let mut last_run = None::<&TextRun>; + let mut last_font: Option = None; let mut font_runs = self.font_runs_pool.lock().pop().unwrap_or_default(); + font_runs.clear(); + for run in runs.iter() { - let font_id = self.resolve_font(&run.font); - if let Some(last_run) = font_runs.last_mut() - && last_run.font_id == font_id + let decoration_changed = if let Some(last_run) = last_run + && last_run.color == run.color + && last_run.underline == run.underline + && last_run.strikethrough == run.strikethrough + // we do not consider differing background color relevant, as it does not affect glyphs + // && last_run.background_color == run.background_color { - last_run.len += run.len; - continue; + false + } else { + last_run = Some(run); + true + }; + + if let Some(font_run) = font_runs.last_mut() + && Some(font_run.font_id) == last_font + && !decoration_changed + { + font_run.len += run.len; + } else { + let font_id = self.resolve_font(&run.font); + last_font = Some(font_id); + font_runs.push(FontRun { + len: run.len, + font_id, + }); } - font_runs.push(FontRun { - len: run.len, - font_id, - }); } - let layout = - self.line_layout_cache - .layout_line_internal(text, font_size, &font_runs, force_width); + let layout = self.line_layout_cache.layout_line( + &SharedString::new(text), + font_size, + &font_runs, + force_width, + ); - font_runs.clear(); self.font_runs_pool.lock().push(font_runs); layout diff --git a/crates/gpui/src/text_system/line_layout.rs b/crates/gpui/src/text_system/line_layout.rs index 43694702a82566b8f84199dcfc4ff996da93588e..4ac1d258970802ed1c4fe86bd98f2971b78fbc04 100644 --- a/crates/gpui/src/text_system/line_layout.rs +++ b/crates/gpui/src/text_system/line_layout.rs @@ -501,7 +501,7 @@ impl LineLayoutCache { } else { drop(current_frame); let text = SharedString::from(text); - let unwrapped_layout = self.layout_line::<&SharedString>(&text, font_size, runs); + let unwrapped_layout = self.layout_line::<&SharedString>(&text, font_size, runs, None); let wrap_boundaries = if let Some(wrap_width) = wrap_width { unwrapped_layout.compute_wrap_boundaries(text.as_ref(), wrap_width, max_lines) } else { @@ -535,19 +535,6 @@ impl LineLayoutCache { text: Text, font_size: Pixels, runs: &[FontRun], - ) -> Arc - where - Text: AsRef, - SharedString: From, - { - self.layout_line_internal(text, font_size, runs, None) - } - - pub fn layout_line_internal( - &self, - text: Text, - font_size: Pixels, - runs: &[FontRun], force_width: Option, ) -> Arc where