Fix crash when opening files with a BOM on macOS (#40419)

Cole Miller and HactarCE created

Closes #40359

We were segfaulting when opening a UTF-8 file starting with a byte order
mark due to a mismatch in our UTF-16 indexing calculations caused by
Core Foundations `replace_str` stripping the BOM internally. This PR
fixes the crash by replacing one of our manual calculations by calling
the Core Foundations API to get the length of a string.

Release Notes:

- Fixed a crash on macOS when opening a file that starts with a UTF-8
byte order mark (BOM).

Co-authored-by: HactarCE <6060305+HactarCE@users.noreply.github.com>

Change summary

crates/gpui/src/platform/mac/text_system.rs | 35 ++++++++++++++++++----
1 file changed, 28 insertions(+), 7 deletions(-)

Detailed changes

crates/gpui/src/platform/mac/text_system.rs 🔗

@@ -449,11 +449,12 @@ impl MacTextSystemState {
                 // to prevent core text from forming ligatures between them
                 let needs_zwnj = last_font_run.replace(run.font_id) == Some(run.font_id);
 
-                let n_zwnjs = self.zwnjs_scratch_space.len();
-                let utf16_start = ix_converter.utf16_ix + n_zwnjs * ZWNJ_SIZE_16;
+                let n_zwnjs = self.zwnjs_scratch_space.len(); // from previous loop
+                let utf16_start = string.char_len(); // insert at end of string
                 ix_converter.advance_to_utf8_ix(ix_converter.utf8_ix + run.len);
 
-                string.replace_str(&CFString::new(text), CFRange::init(utf16_start as isize, 0));
+                // note: replace_str may silently ignore codepoints it dislikes (e.g., BOM at start of string)
+                string.replace_str(&CFString::new(text), CFRange::init(utf16_start, 0));
                 if needs_zwnj {
                     let zwnjs_pos = string.char_len();
                     self.zwnjs_scratch_space.push((n_zwnjs, zwnjs_pos as usize));
@@ -462,10 +463,9 @@ impl MacTextSystemState {
                         CFRange::init(zwnjs_pos, 0),
                     );
                 }
-                let utf16_end = string.char_len() as usize;
+                let utf16_end = string.char_len();
 
-                let cf_range =
-                    CFRange::init(utf16_start as isize, (utf16_end - utf16_start) as isize);
+                let cf_range = CFRange::init(utf16_start, utf16_end - utf16_start);
                 let font = &self.fonts[run.font_id.0];
 
                 let font_metrics = font.metrics();
@@ -548,10 +548,12 @@ impl MacTextSystemState {
     }
 }
 
-#[derive(Clone)]
+#[derive(Debug, Clone)]
 struct StringIndexConverter<'a> {
     text: &'a str,
+    /// Index in UTF-8 bytes
     utf8_ix: usize,
+    /// Index in UTF-16 code units
     utf16_ix: usize,
 }
 
@@ -732,6 +734,25 @@ mod tests {
         assert_eq!(layout.runs[0].glyphs[0].id, GlyphId(68u32)); // a
         // There's no glyph for \u{feff}
         assert_eq!(layout.runs[0].glyphs[1].id, GlyphId(69u32)); // b
+
+        let line = "\u{feff}ab";
+        let font_runs = &[
+            FontRun {
+                len: "\u{feff}".len(),
+                font_id,
+            },
+            FontRun {
+                len: "ab".len(),
+                font_id,
+            },
+        ];
+        let layout = fonts.layout_line(line, px(16.), font_runs);
+        assert_eq!(layout.len, line.len());
+        assert_eq!(layout.runs.len(), 1);
+        assert_eq!(layout.runs[0].glyphs.len(), 2);
+        // There's no glyph for \u{feff}
+        assert_eq!(layout.runs[0].glyphs[0].id, GlyphId(68u32)); // a
+        assert_eq!(layout.runs[0].glyphs[1].id, GlyphId(69u32)); // b
     }
 
     #[test]