From 12cd712b53a3cfd825d826c8962382c5c7ff3f9b Mon Sep 17 00:00:00 2001
From: Julia <floc@unpromptedtirade.com>
Date: Fri, 6 Jan 2023 22:46:32 -0500
Subject: [PATCH 1/3] Require start autocomplete query byte to match a
 completion word start byte

---
 crates/editor/src/editor.rs | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs
index d8ee49866b404d4d1d12efbd540b467096485d81..354a2be97d52a151f8c7b3ce9737994f88950918 100644
--- a/crates/editor/src/editor.rs
+++ b/crates/editor/src/editor.rs
@@ -827,6 +827,40 @@ impl CompletionsMenu {
                 })
                 .collect()
         };
+
+        //Remove all candidates where the query's start does not match the start of any word in the candidate
+        if let Some(query) = query {
+            if let Some(&start) = query.as_bytes().get(0) {
+                let start = start.to_ascii_lowercase();
+                matches.retain(|m| {
+                    let bytes = m.string.as_bytes();
+                    let mut index = 0;
+
+                    std::iter::from_fn(move || {
+                        let start_index = index;
+                        while index < bytes.len() {
+                            let current_upper = bytes[index].is_ascii_uppercase();
+                            let has_more = index + 1 < bytes.len();
+                            let next_upper = has_more && bytes[index + 1].is_ascii_uppercase();
+
+                            index += 1;
+                            if !current_upper && next_upper {
+                                return Some(&m.string[start_index..index]);
+                            }
+                        }
+
+                        index = bytes.len();
+                        if start_index < bytes.len() {
+                            return Some(&m.string[start_index..]);
+                        }
+                        None
+                    })
+                    .flat_map(|w| w.split_inclusive('_'))
+                    .any(|w| w.as_bytes().first().map(|&b| b.to_ascii_lowercase()) == Some(start))
+                });
+            }
+        }
+
         matches.sort_unstable_by_key(|mat| {
             let completion = &self.completions[mat.candidate_id];
             (

From a46ca323567aada6960467c447d789a606db3d6a Mon Sep 17 00:00:00 2001
From: Julia <floc@unpromptedtirade.com>
Date: Sat, 7 Jan 2023 15:34:28 -0500
Subject: [PATCH 2/3] Completion word start filtering which is codepoint aware

---
 crates/editor/src/editor.rs | 38 ++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs
index 354a2be97d52a151f8c7b3ce9737994f88950918..4405fc0b338279423a3d04655f75f12250453432 100644
--- a/crates/editor/src/editor.rs
+++ b/crates/editor/src/editor.rs
@@ -830,33 +830,41 @@ impl CompletionsMenu {
 
         //Remove all candidates where the query's start does not match the start of any word in the candidate
         if let Some(query) = query {
-            if let Some(&start) = query.as_bytes().get(0) {
-                let start = start.to_ascii_lowercase();
-                matches.retain(|m| {
-                    let bytes = m.string.as_bytes();
+            if let Some(query_start) = query.chars().next() {
+                matches.retain(|string_match| {
+                    let text = &string_match.string;
+
                     let mut index = 0;
+                    let mut codepoints = text.char_indices().peekable();
 
                     std::iter::from_fn(move || {
                         let start_index = index;
-                        while index < bytes.len() {
-                            let current_upper = bytes[index].is_ascii_uppercase();
-                            let has_more = index + 1 < bytes.len();
-                            let next_upper = has_more && bytes[index + 1].is_ascii_uppercase();
+                        while let Some((new_index, codepoint)) = codepoints.next() {
+                            index = new_index + codepoint.len_utf8();
+                            let current_upper = codepoint.is_uppercase();
+                            let next_upper = codepoints
+                                .peek()
+                                .map(|(_, c)| c.is_uppercase())
+                                .unwrap_or(false);
 
-                            index += 1;
                             if !current_upper && next_upper {
-                                return Some(&m.string[start_index..index]);
+                                return Some(&text[start_index..index]);
                             }
                         }
 
-                        index = bytes.len();
-                        if start_index < bytes.len() {
-                            return Some(&m.string[start_index..]);
+                        index = text.len();
+                        if start_index < text.len() {
+                            return Some(&text[start_index..]);
                         }
                         None
                     })
-                    .flat_map(|w| w.split_inclusive('_'))
-                    .any(|w| w.as_bytes().first().map(|&b| b.to_ascii_lowercase()) == Some(start))
+                    .flat_map(|word| word.split_inclusive('_'))
+                    .any(|word| {
+                        word.chars()
+                            .flat_map(|codepoint| codepoint.to_lowercase())
+                            .zip(query_start.to_lowercase())
+                            .all(|(word_cp, query_cp)| word_cp == query_cp)
+                    })
                 });
             }
         }

From 97ed89a797fb11c9bfa3a6900dc62f817b628d1d Mon Sep 17 00:00:00 2001
From: Julia <floc@unpromptedtirade.com>
Date: Mon, 9 Jan 2023 13:02:44 -0500
Subject: [PATCH 3/3] Test that completion word splitting does reasonable
 things

---
 crates/editor/src/editor.rs       | 59 ++++++++++++++++---------------
 crates/editor/src/editor_tests.rs | 14 ++++++++
 2 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs
index 4405fc0b338279423a3d04655f75f12250453432..356fd0ca4fa58421bbc7b708fbbd4e062775fc9c 100644
--- a/crates/editor/src/editor.rs
+++ b/crates/editor/src/editor.rs
@@ -832,34 +832,9 @@ impl CompletionsMenu {
         if let Some(query) = query {
             if let Some(query_start) = query.chars().next() {
                 matches.retain(|string_match| {
-                    let text = &string_match.string;
-
-                    let mut index = 0;
-                    let mut codepoints = text.char_indices().peekable();
-
-                    std::iter::from_fn(move || {
-                        let start_index = index;
-                        while let Some((new_index, codepoint)) = codepoints.next() {
-                            index = new_index + codepoint.len_utf8();
-                            let current_upper = codepoint.is_uppercase();
-                            let next_upper = codepoints
-                                .peek()
-                                .map(|(_, c)| c.is_uppercase())
-                                .unwrap_or(false);
-
-                            if !current_upper && next_upper {
-                                return Some(&text[start_index..index]);
-                            }
-                        }
-
-                        index = text.len();
-                        if start_index < text.len() {
-                            return Some(&text[start_index..]);
-                        }
-                        None
-                    })
-                    .flat_map(|word| word.split_inclusive('_'))
-                    .any(|word| {
+                    split_words(&string_match.string).any(|word| {
+                        //Check that the first codepoint of the word as lowercase matches the first
+                        //codepoint of the query as lowercase
                         word.chars()
                             .flat_map(|codepoint| codepoint.to_lowercase())
                             .zip(query_start.to_lowercase())
@@ -6841,6 +6816,34 @@ pub fn styled_runs_for_code_label<'a>(
         })
 }
 
+pub fn split_words<'a>(text: &'a str) -> impl std::iter::Iterator<Item = &'a str> + 'a {
+    let mut index = 0;
+    let mut codepoints = text.char_indices().peekable();
+
+    std::iter::from_fn(move || {
+        let start_index = index;
+        while let Some((new_index, codepoint)) = codepoints.next() {
+            index = new_index + codepoint.len_utf8();
+            let current_upper = codepoint.is_uppercase();
+            let next_upper = codepoints
+                .peek()
+                .map(|(_, c)| c.is_uppercase())
+                .unwrap_or(false);
+
+            if !current_upper && next_upper {
+                return Some(&text[start_index..index]);
+            }
+        }
+
+        index = text.len();
+        if start_index < text.len() {
+            return Some(&text[start_index..]);
+        }
+        None
+    })
+    .flat_map(|word| word.split_inclusive('_'))
+}
+
 trait RangeExt<T> {
     fn sorted(&self) -> Range<T>;
     fn to_inclusive(&self) -> RangeInclusive<T>;
diff --git a/crates/editor/src/editor_tests.rs b/crates/editor/src/editor_tests.rs
index 2fcc5f0014ef942a9d12762a6ae424de1aabfcc1..798636c0a1c40f5ef8db981a2da2f410e72f5dd8 100644
--- a/crates/editor/src/editor_tests.rs
+++ b/crates/editor/src/editor_tests.rs
@@ -5439,6 +5439,20 @@ async fn go_to_hunk(deterministic: Arc<Deterministic>, cx: &mut gpui::TestAppCon
     );
 }
 
+#[test]
+fn test_split_words() {
+    fn split<'a>(text: &'a str) -> Vec<&'a str> {
+        split_words(text).collect()
+    }
+
+    assert_eq!(split("HelloWorld"), &["Hello", "World"]);
+    assert_eq!(split("hello_world"), &["hello_", "world"]);
+    assert_eq!(split("_hello_world_"), &["_", "hello_", "world_"]);
+    assert_eq!(split("Hello_World"), &["Hello_", "World"]);
+    assert_eq!(split("helloWOrld"), &["hello", "WOrld"]);
+    assert_eq!(split("helloworld"), &["helloworld"]);
+}
+
 fn empty_range(row: usize, column: usize) -> Range<DisplayPoint> {
     let point = DisplayPoint::new(row as u32, column as u32);
     point..point