Fix common_prefix_at panic when needle contains multibyte chars

Max Brunsfeld created 3 years ago

Also, make the prefix matching case-insensitive, since this is the
typical behavior with autocomplete.

Change summary

crates/text/src/tests.rs | 50 ++++++++++++++++++++++++++++++++++++-----
crates/text/src/text.rs  | 34 +++++++++++----------------
2 files changed, 57 insertions(+), 27 deletions(-)

Detailed changes

crates/text/src/tests.rs 🔗

@@ -7,7 +7,6 @@ use std::{
     iter::Iterator,
     time::{Duration, Instant},
 };
-use util::test::marked_text_ranges;
 
 #[cfg(test)]
 #[ctor::ctor]
@@ -167,14 +166,51 @@ fn test_line_len() {
 
 #[test]
 fn test_common_prefix_at_positionn() {
-    let (text, ranges) = marked_text_ranges("a = [bcd]");
+    let text = "a = str; b = δα";
     let buffer = Buffer::new(0, 0, History::new(text.into()));
-    let snapshot = &buffer.snapshot();
-    let expected_range = ranges[0].to_offset(&snapshot);
+
+    let offset1 = offset_after(text, "str");
+    let offset2 = offset_after(text, "δα");
+
+    // the preceding word is a prefix of the suggestion
+    assert_eq!(
+        buffer.common_prefix_at(offset1, "string"),
+        range_of(text, "str"),
+    );
+    // a suffix of the preceding word is a prefix of the suggestion
+    assert_eq!(
+        buffer.common_prefix_at(offset1, "tree"),
+        range_of(text, "tr"),
+    );
+    // the preceding word is a substring of the suggestion, but not a prefix
+    assert_eq!(
+        buffer.common_prefix_at(offset1, "astro"),
+        empty_range_after(text, "str"),
+    );
+
+    // prefix matching is case insenstive.
     assert_eq!(
-        buffer.common_prefix_at(expected_range.end, "bcdef"),
-        expected_range
-    )
+        buffer.common_prefix_at(offset1, "Strαngε"),
+        range_of(text, "str"),
+    );
+    assert_eq!(
+        buffer.common_prefix_at(offset2, "ΔΑΜΝ"),
+        range_of(text, "δα"),
+    );
+
+    fn offset_after(text: &str, part: &str) -> usize {
+        text.find(part).unwrap() + part.len()
+    }
+
+    fn empty_range_after(text: &str, part: &str) -> Range<usize> {
+        let offset = offset_after(text, part);
+        offset..offset
+    }
+
+    fn range_of(text: &str, part: &str) -> Range<usize> {
+        let start = text.find(part).unwrap();
+        start..start + part.len()
+    }
 }
 
 #[test]

crates/text/src/text.rs 🔗

@@ -1510,31 +1510,25 @@ impl BufferSnapshot {
 
     pub fn common_prefix_at<T>(&self, position: T, needle: &str) -> Range<T>
     where
-        T: Clone + ToOffset + FromAnchor,
+        T: ToOffset + TextDimension,
     {
-        let position_offset = position.to_offset(self);
-        // Get byte indices and char counts for every character in needle in reverse order
-        let char_indices = needle
+        let offset = position.to_offset(self);
+        let common_prefix_len = needle
             .char_indices()
             .map(|(index, _)| index)
-            .chain(std::iter::once(needle.len()))
-            .enumerate()
-            // Don't test any prefixes that are bigger than the requested position
-            .take_while(|(_, prefix_length)| *prefix_length <= position_offset);
-
-        let start = char_indices
-            // Compute the prefix string and prefix start location
-            .map(move |(byte_position, char_length)| {
-                (position_offset - char_length, &needle[..byte_position])
+            .chain([needle.len()])
+            .take_while(|&len| len <= offset)
+            .filter(|&len| {
+                let left = self
+                    .chars_for_range(offset - len..offset)
+                    .flat_map(|c| char::to_lowercase(c));
+                let right = needle[..len].chars().flat_map(|c| char::to_lowercase(c));
+                left.eq(right)
             })
-            // Only take strings when the prefix is contained at the expected prefix position
-            .filter(|(prefix_offset, prefix)| self.contains_str_at(prefix_offset, prefix))
-            // Convert offset to T
-            .map(|(prefix_offset, _)| T::from_anchor(&self.anchor_before(prefix_offset), self))
             .last()
-            // If no prefix matches, return the passed in position to create an empty range
-            .unwrap_or(position.clone());
-
+            .unwrap_or(0);
+        let start_offset = offset - common_prefix_len;
+        let start = self.text_summary_for_range(0..start_offset);
         start..position
     }