Fix panic in fuzzy-finder for unicode characters (#7080)

Thorsten Ball and bennetbo created

This fixes a panic in the fuzzy finder which someone ran into when
typing in a query that contained the lower-case version of a unicode
character that has more chars than its upper-case version.

It also fixes another problem which was that we didn't find a match if
both candidates and query contained upper-case characters whose
lower-case version had more chars.


Release Notes:

- Fixed a panic in fuzzy-finder that could occur when matching with
queries containing upper-case unicode characters whose lower-case
version has more chars.

Co-authored-by: bennetbo <bennetbo@gmx.de>

Change summary

crates/fuzzy/src/matcher.rs | 23 +++++++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)

Detailed changes

crates/fuzzy/src/matcher.rs 🔗

@@ -45,7 +45,7 @@ impl<'a> Matcher<'a> {
             lowercase_query,
             query_char_bag,
             min_score: 0.0,
-            last_positions: vec![0; query.len()],
+            last_positions: vec![0; lowercase_query.len()],
             match_positions: vec![0; query.len()],
             score_matrix: Vec::new(),
             best_position_matrix: Vec::new(),
@@ -82,7 +82,7 @@ impl<'a> Matcher<'a> {
             lowercase_candidate_chars.clear();
             for c in candidate.to_string().chars() {
                 candidate_chars.push(c);
-                lowercase_candidate_chars.push(c.to_ascii_lowercase());
+                lowercase_candidate_chars.append(&mut c.to_lowercase().collect::<Vec<_>>());
             }
 
             if !self.find_last_positions(lowercase_prefix, &lowercase_candidate_chars) {
@@ -383,6 +383,25 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_lowercase_longer_than_uppercase() {
+        // This character has more chars in lower-case than in upper-case.
+        let paths = vec!["\u{0130}"];
+        let query = "\u{0130}";
+        assert_eq!(
+            match_single_path_query(query, false, &paths),
+            vec![("\u{0130}", vec![0])]
+        );
+
+        // Path is the lower-case version of the query
+        let paths = vec!["i\u{307}"];
+        let query = "\u{0130}";
+        assert_eq!(
+            match_single_path_query(query, false, &paths),
+            vec![("i\u{307}", vec![0])]
+        );
+    }
+
     #[test]
     fn test_match_multibyte_path_entries() {
         let paths = vec!["aαbβ/cγdδ", "αβγδ/bcde", "c1️⃣2️⃣3️⃣/d4️⃣5️⃣6️⃣/e7️⃣8️⃣9️⃣/f", "/d/🆒/h"];