From b2981f4baa1450fb533117ac81a3b6e4f1a5afee Mon Sep 17 00:00:00 2001
From: Thorsten Ball <mrnugget@gmail.com>
Date: Mon, 11 Mar 2024 20:38:37 +0100
Subject: [PATCH] Fix completion-filtering by checking actual word boundaries &
 reduce Ruby word chars (#9170)

This fixes https://github.com/zed-industries/zed/issues/9069 by

1. reverting https://github.com/zed-industries/zed/issues/7819
2. fixing completion filtering with regards to word boudaries

For (2) see explanation in commit message:

> Previously, this would only split words on upper-lower boundaries or
> on `_`/`-`.
>
> The result was that we would filter out completions too aggressively.
> The filter works by taking a suggested completion, say `foo_bar_lol`,
split
> it up into words - `foo`, `bar, `lol` - and check whether any of the
words
> start with the same characters as what the user already typed: `fo`,
or `bar`,
> ...
>
> In the case of Ruby, though, `:` wasn't considered a word boundary. If
the
> LSP would return `:foobar` when the user typed `:foo`, we'd check if
there are
> any completions that match `foo` (because that's the current word) but
> we'd compare against `foobar`, not `:` or `:foobar`.
>
> With this change, we get more match candidates and thus more
completions in Ruby.

With that we can do (1) because we don't need these characters as word
characters anymore to trigger completions.

Release Notes:

- Fixed word boundaries in Ruby by restoring old behavior (`@`, `:`, ...
are no longer considered word characters)
([#9069](https://github.com/zed-industries/zed/issues/9069))
- Fixed completions being filtered out when they happened at word
boundaries on special characters (e.g. `:`)

---------

Co-authored-by: Max <max@zed.dev>
---
 crates/editor/src/editor.rs             | 41 ++++++++++---------------
 crates/editor/src/editor_tests.rs       |  2 ++
 crates/languages/src/ruby/config.toml   |  1 -
 crates/languages/src/ruby/overrides.scm |  1 +
 4 files changed, 19 insertions(+), 26 deletions(-)
diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs
index bd4fccf4a9b29afdca42230eacdc38bd1efb7771..33335132905c7f1f9b0337d3a1a4cbb97b9662a0 100644
--- a/crates/editor/src/editor.rs
+++ b/crates/editor/src/editor.rs
@@ -10700,32 +10700,23 @@ pub fn styled_runs_for_code_label<'a>(
 }
 
 pub(crate) fn split_words(text: &str) -> impl std::iter::Iterator<Item = &str> + '_ {
-    let mut index = 0;
-    let mut codepoints = text.char_indices().peekable();
-
-    std::iter::from_fn(move || {
-        let start_index = index;
-        while let Some((new_index, codepoint)) = codepoints.next() {
-            index = new_index + codepoint.len_utf8();
-            let current_upper = codepoint.is_uppercase();
-            let next_upper = codepoints
-                .peek()
-                .map(|(_, c)| c.is_uppercase())
-                .unwrap_or(false);
-
-            if !current_upper && next_upper {
-                return Some(&text[start_index..index]);
+    let mut prev_index = 0;
+    let mut prev_codepoint: Option<char> = None;
+    text.char_indices()
+        .chain([(text.len(), '\0')])
+        .filter_map(move |(index, codepoint)| {
+            let prev_codepoint = prev_codepoint.replace(codepoint)?;
+            let is_boundary = index == text.len()
+                || !prev_codepoint.is_uppercase() && codepoint.is_uppercase()
+                || !prev_codepoint.is_alphanumeric() && codepoint.is_alphanumeric();
+            if is_boundary {
+                let chunk = &text[prev_index..index];
+                prev_index = index;
+                Some(chunk)
+            } else {
+                None
             }
-        }
-
-        index = text.len();
-        if start_index < text.len() {
-            return Some(&text[start_index..]);
-        }
-        None
-    })
-    .flat_map(|word| word.split_inclusive('_'))
-    .flat_map(|word| word.split_inclusive('-'))
+        })
 }
 
 trait RangeToAnchorExt {
diff --git a/crates/editor/src/editor_tests.rs b/crates/editor/src/editor_tests.rs
index 21a24817ea9a75eb0286fd40f1d66048ac283487..c1db51fef8ce3143932db7a2896aca4d0fa75331 100644
--- a/crates/editor/src/editor_tests.rs
+++ b/crates/editor/src/editor_tests.rs
@@ -7442,6 +7442,8 @@ fn test_split_words() {
     assert_eq!(split("Hello_World"), &["Hello_", "World"]);
     assert_eq!(split("helloWOrld"), &["hello", "WOrld"]);
     assert_eq!(split("helloworld"), &["helloworld"]);
+
+    assert_eq!(split(":do_the_thing"), &[":", "do_", "the_", "thing"]);
 }
 
 #[gpui::test]
diff --git a/crates/languages/src/ruby/config.toml b/crates/languages/src/ruby/config.toml
index 3784952f6c7d693e2ff5b61bf0483e3365c423d8..54072d801bd22de9bc8c2d0d63ae3b2eb1abdfc7 100644
--- a/crates/languages/src/ruby/config.toml
+++ b/crates/languages/src/ruby/config.toml
@@ -37,4 +37,3 @@ brackets = [
     ] },
 ]
 collapsed_placeholder = "# ..."
-word_characters = ["_", "$", "=", "@", "!", ":", "?"]
diff --git a/crates/languages/src/ruby/overrides.scm b/crates/languages/src/ruby/overrides.scm
index 8a58e304e5c5185166a09bc78eb835527a246301..7ff82bd3457cc67f421789fb4255df3362fab00a 100644
--- a/crates/languages/src/ruby/overrides.scm
+++ b/crates/languages/src/ruby/overrides.scm
@@ -1,2 +1,3 @@
 (comment) @comment
 (string) @string
+[(simple_symbol) (delimited_symbol)] @simple_symbol