Fix fuzzy string match invariant check (#22032)

Michael Sloan created

Version in #21983 only handled out of range issues rather than utf-8
boundary issues (thanks to @s3bba for pointing this out)

Release Notes:

- N/A

Change summary

crates/fuzzy/src/strings.rs | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)

Detailed changes

crates/fuzzy/src/strings.rs 🔗

@@ -61,24 +61,24 @@ impl StringMatch {
         let mut positions = self.positions.iter().peekable();
         iter::from_fn(move || {
             if let Some(start) = positions.next().copied() {
-                if start >= self.string.len() {
+                let Some(char_len) = self.char_len_at_index(start) else {
                     log::error!(
-                        "Invariant violation: Index {start} out of range in string {:?}",
+                        "Invariant violation: Index {start} out of range or not on a utf-8 boundary in string {:?}",
                         self.string
                     );
                     return None;
-                }
-                let mut end = start + self.char_len_at_index(start);
+                };
+                let mut end = start + char_len;
                 while let Some(next_start) = positions.peek() {
                     if end == **next_start {
-                        if end >= self.string.len() {
+                        let Some(char_len) = self.char_len_at_index(end) else {
                             log::error!(
-                                "Invariant violation: Index {end} out of range in string {:?}",
+                                "Invariant violation: Index {end} out of range or not on a utf-8 boundary in string {:?}",
                                 self.string
                             );
                             return None;
-                        }
-                        end += self.char_len_at_index(end);
+                        };
+                        end += char_len;
                         positions.next();
                     } else {
                         break;
@@ -91,8 +91,12 @@ impl StringMatch {
         })
     }
 
-    fn char_len_at_index(&self, ix: usize) -> usize {
-        self.string[ix..].chars().next().unwrap().len_utf8()
+    /// Gets the byte length of the utf-8 character at a byte offset. If the index is out of range
+    /// or not on a utf-8 boundary then None is returned.
+    fn char_len_at_index(&self, ix: usize) -> Option<usize> {
+        self.string
+            .get(ix..)
+            .and_then(|slice| slice.chars().next().map(|char| char.len_utf8()))
     }
 }