Don't slice midway through multi-byte char when detecting line ending

Antonio Scandurra created

Change summary

crates/text/src/tests.rs | 11 +++++++++++
crates/text/src/text.rs  | 10 +++++++---
2 files changed, 18 insertions(+), 3 deletions(-)

Detailed changes

crates/text/src/tests.rs 🔗

@@ -154,6 +154,17 @@ fn test_random_edits(mut rng: StdRng) {
 
 #[test]
 fn test_line_endings() {
+    assert_eq!(LineEnding::detect(&"🍐✅\n".repeat(1000)), LineEnding::Unix);
+    assert_eq!(LineEnding::detect(&"abcd\n".repeat(1000)), LineEnding::Unix);
+    assert_eq!(
+        LineEnding::detect(&"🍐✅\r\n".repeat(1000)),
+        LineEnding::Windows
+    );
+    assert_eq!(
+        LineEnding::detect(&"abcd\r\n".repeat(1000)),
+        LineEnding::Windows
+    );
+
     let mut buffer = Buffer::new(0, 0, "one\r\ntwo".into());
     assert_eq!(buffer.text(), "one\ntwo");
     assert_eq!(buffer.line_ending(), LineEnding::Windows);

crates/text/src/text.rs 🔗

@@ -2353,9 +2353,13 @@ impl LineEnding {
     }
 
     pub fn detect(text: &str) -> Self {
-        if let Some(ix) = text[..cmp::min(text.len(), 1000)].find(&['\n']) {
-            let text = text.as_bytes();
-            if ix > 0 && text[ix - 1] == b'\r' {
+        let mut max_ix = cmp::min(text.len(), 1000);
+        while !text.is_char_boundary(max_ix) {
+            max_ix -= 1;
+        }
+
+        if let Some(ix) = text[..max_ix].find(&['\n']) {
+            if ix > 0 && text.as_bytes()[ix - 1] == b'\r' {
                 Self::Windows
             } else {
                 Self::Unix