Add `word_characters` to language overrides & use for more things

Julia and Max Brunsfeld created

Use word_characters to feed completion trigger characters as well and
also recognize kebab as a potential sub-word splitter. This is fine for
non-kebab-case languages because we'd only ever attempt to split a word
with a kebab in it in language scopes which are kebab-cased

Co-Authored-By: Max Brunsfeld <max@zed.dev>

Change summary

crates/editor/src/editor.rs                           |  2 
crates/editor/src/editor_tests.rs                     | 96 ++++++++++++
crates/editor/src/movement.rs                         | 22 +-
crates/editor/src/multi_buffer.rs                     | 10 
crates/editor/src/test/editor_lsp_test_context.rs     |  2 
crates/language/src/buffer.rs                         | 16 +
crates/language/src/language.rs                       |  9 +
crates/live_kit_client/LiveKitBridge/Package.resolved |  4 
crates/project/src/search.rs                          |  8 
crates/vim/src/motion.rs                              | 22 +-
crates/vim/src/normal/change.rs                       | 10 
crates/vim/src/object.rs                              | 30 ++-
crates/zed/src/languages/javascript/config.toml       |  3 
13 files changed, 178 insertions(+), 56 deletions(-)

Detailed changes

crates/editor/src/editor.rs ๐Ÿ”—

@@ -2654,6 +2654,7 @@ impl Editor {
             false
         });
     }
+
     fn completion_query(buffer: &MultiBufferSnapshot, position: impl ToOffset) -> Option<String> {
         let offset = position.to_offset(buffer);
         let (word_range, kind) = buffer.surrounding_word(offset);
@@ -8878,6 +8879,7 @@ pub fn split_words<'a>(text: &'a str) -> impl std::iter::Iterator<Item = &'a str
         None
     })
     .flat_map(|word| word.split_inclusive('_'))
+    .flat_map(|word| word.split_inclusive('-'))
 }
 
 trait RangeToAnchorExt {

crates/editor/src/editor_tests.rs ๐Ÿ”—

@@ -19,7 +19,8 @@ use gpui::{
 use indoc::indoc;
 use language::{
     language_settings::{AllLanguageSettings, AllLanguageSettingsContent, LanguageSettingsContent},
-    BracketPairConfig, FakeLspAdapter, LanguageConfig, LanguageRegistry, Point,
+    BracketPairConfig, FakeLspAdapter, LanguageConfig, LanguageConfigOverride, LanguageRegistry,
+    Override, Point,
 };
 use parking_lot::Mutex;
 use project::project_settings::{LspSettings, ProjectSettings};
@@ -7611,6 +7612,99 @@ async fn test_completions_with_additional_edits(cx: &mut gpui::TestAppContext) {
     cx.assert_editor_state(indoc! {"fn main() { let a = Some(2)ห‡; }"});
 }
 
+#[gpui::test]
+async fn test_completions_in_languages_with_extra_word_characters(cx: &mut gpui::TestAppContext) {
+    init_test(cx, |_| {});
+
+    let mut cx = EditorLspTestContext::new(
+        Language::new(
+            LanguageConfig {
+                path_suffixes: vec!["jsx".into()],
+                overrides: [(
+                    "element".into(),
+                    LanguageConfigOverride {
+                        word_characters: Override::Set(['-'].into_iter().collect()),
+                        ..Default::default()
+                    },
+                )]
+                .into_iter()
+                .collect(),
+                ..Default::default()
+            },
+            Some(tree_sitter_typescript::language_tsx()),
+        )
+        .with_override_query("(jsx_self_closing_element) @element")
+        .unwrap(),
+        Default::default(),
+        cx,
+    )
+    .await;
+
+    cx.lsp
+        .handle_request::<lsp::request::Completion, _, _>(move |_, _| async move {
+            Ok(Some(lsp::CompletionResponse::Array(vec![
+                lsp::CompletionItem {
+                    label: "bg-blue".into(),
+                    ..Default::default()
+                },
+                lsp::CompletionItem {
+                    label: "bg-red".into(),
+                    ..Default::default()
+                },
+                lsp::CompletionItem {
+                    label: "bg-yellow".into(),
+                    ..Default::default()
+                },
+            ])))
+        });
+
+    cx.set_state(r#"<p class="bgห‡" />"#);
+
+    // Trigger completion when typing a dash, because the dash is an extra
+    // word character in the 'element' scope, which contains the cursor.
+    cx.simulate_keystroke("-");
+    cx.foreground().run_until_parked();
+    cx.update_editor(|editor, _| {
+        if let Some(ContextMenu::Completions(menu)) = &editor.context_menu {
+            assert_eq!(
+                menu.matches.iter().map(|m| &m.string).collect::<Vec<_>>(),
+                &["bg-red", "bg-blue", "bg-yellow"]
+            );
+        } else {
+            panic!("expected completion menu to be open");
+        }
+    });
+
+    cx.simulate_keystroke("l");
+    cx.foreground().run_until_parked();
+    cx.update_editor(|editor, _| {
+        if let Some(ContextMenu::Completions(menu)) = &editor.context_menu {
+            assert_eq!(
+                menu.matches.iter().map(|m| &m.string).collect::<Vec<_>>(),
+                &["bg-blue", "bg-yellow"]
+            );
+        } else {
+            panic!("expected completion menu to be open");
+        }
+    });
+
+    // When filtering completions, consider the character after the '-' to
+    // be the start of a subword.
+    cx.set_state(r#"<p class="yelห‡" />"#);
+    cx.simulate_keystroke("l");
+    cx.foreground().run_until_parked();
+    cx.update_editor(|editor, _| {
+        if let Some(ContextMenu::Completions(menu)) = &editor.context_menu {
+            assert_eq!(
+                menu.matches.iter().map(|m| &m.string).collect::<Vec<_>>(),
+                &["bg-blue", "bg-yellow"]
+            );
+        } else {
+            panic!("expected completion menu to be open");
+        }
+    });
+}
+
 fn empty_range(row: usize, column: usize) -> Range<DisplayPoint> {
     let point = DisplayPoint::new(row as u32, column as u32);
     point..point

crates/editor/src/movement.rs ๐Ÿ”—

@@ -177,20 +177,20 @@ pub fn line_end(
 
 pub fn previous_word_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
     let raw_point = point.to_point(map);
-    let language = map.buffer_snapshot.language_at(raw_point);
+    let scope = map.buffer_snapshot.language_scope_at(raw_point);
 
     find_preceding_boundary(map, point, |left, right| {
-        (char_kind(language, left) != char_kind(language, right) && !right.is_whitespace())
+        (char_kind(&scope, left) != char_kind(&scope, right) && !right.is_whitespace())
             || left == '\n'
     })
 }
 
 pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
     let raw_point = point.to_point(map);
-    let language = map.buffer_snapshot.language_at(raw_point);
+    let scope = map.buffer_snapshot.language_scope_at(raw_point);
     find_preceding_boundary(map, point, |left, right| {
         let is_word_start =
-            char_kind(language, left) != char_kind(language, right) && !right.is_whitespace();
+            char_kind(&scope, left) != char_kind(&scope, right) && !right.is_whitespace();
         let is_subword_start =
             left == '_' && right != '_' || left.is_lowercase() && right.is_uppercase();
         is_word_start || is_subword_start || left == '\n'
@@ -199,19 +199,19 @@ pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> Dis
 
 pub fn next_word_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
     let raw_point = point.to_point(map);
-    let language = map.buffer_snapshot.language_at(raw_point);
+    let scope = map.buffer_snapshot.language_scope_at(raw_point);
     find_boundary(map, point, |left, right| {
-        (char_kind(language, left) != char_kind(language, right) && !left.is_whitespace())
+        (char_kind(&scope, left) != char_kind(&scope, right) && !left.is_whitespace())
             || right == '\n'
     })
 }
 
 pub fn next_subword_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
     let raw_point = point.to_point(map);
-    let language = map.buffer_snapshot.language_at(raw_point);
+    let scope = map.buffer_snapshot.language_scope_at(raw_point);
     find_boundary(map, point, |left, right| {
         let is_word_end =
-            (char_kind(language, left) != char_kind(language, right)) && !left.is_whitespace();
+            (char_kind(&scope, left) != char_kind(&scope, right)) && !left.is_whitespace();
         let is_subword_end =
             left != '_' && right == '_' || left.is_lowercase() && right.is_uppercase();
         is_word_end || is_subword_end || right == '\n'
@@ -399,14 +399,14 @@ pub fn find_boundary_in_line(
 
 pub fn is_inside_word(map: &DisplaySnapshot, point: DisplayPoint) -> bool {
     let raw_point = point.to_point(map);
-    let language = map.buffer_snapshot.language_at(raw_point);
+    let scope = map.buffer_snapshot.language_scope_at(raw_point);
     let ix = map.clip_point(point, Bias::Left).to_offset(map, Bias::Left);
     let text = &map.buffer_snapshot;
-    let next_char_kind = text.chars_at(ix).next().map(|c| char_kind(language, c));
+    let next_char_kind = text.chars_at(ix).next().map(|c| char_kind(&scope, c));
     let prev_char_kind = text
         .reversed_chars_at(ix)
         .next()
-        .map(|c| char_kind(language, c));
+        .map(|c| char_kind(&scope, c));
     prev_char_kind.zip(next_char_kind) == Some((CharKind::Word, CharKind::Word))
 }
 

crates/editor/src/multi_buffer.rs ๐Ÿ”—

@@ -1360,11 +1360,13 @@ impl MultiBuffer {
             return false;
         }
 
-        if char.is_alphanumeric() || char == '_' {
+        let snapshot = self.snapshot(cx);
+        let position = position.to_offset(&snapshot);
+        let scope = snapshot.language_scope_at(position);
+        if char_kind(&scope, char) == CharKind::Word {
             return true;
         }
 
-        let snapshot = self.snapshot(cx);
         let anchor = snapshot.anchor_before(position);
         anchor
             .buffer_id
@@ -1866,8 +1868,8 @@ impl MultiBufferSnapshot {
         let mut next_chars = self.chars_at(start).peekable();
         let mut prev_chars = self.reversed_chars_at(start).peekable();
 
-        let language = self.language_at(start);
-        let kind = |c| char_kind(language, c);
+        let scope = self.language_scope_at(start);
+        let kind = |c| char_kind(&scope, c);
         let word_kind = cmp::max(
             prev_chars.peek().copied().map(kind),
             next_chars.peek().copied().map(kind),

crates/language/src/buffer.rs ๐Ÿ”—

@@ -2175,8 +2175,8 @@ impl BufferSnapshot {
         let mut next_chars = self.chars_at(start).peekable();
         let mut prev_chars = self.reversed_chars_at(start).peekable();
 
-        let language = self.language_at(start);
-        let kind = |c| char_kind(language, c);
+        let scope = self.language_scope_at(start);
+        let kind = |c| char_kind(&scope, c);
         let word_kind = cmp::max(
             prev_chars.peek().copied().map(kind),
             next_chars.peek().copied().map(kind),
@@ -2988,17 +2988,21 @@ pub fn contiguous_ranges(
     })
 }
 
-pub fn char_kind(language: Option<&Arc<Language>>, c: char) -> CharKind {
+pub fn char_kind(scope: &Option<LanguageScope>, c: char) -> CharKind {
     if c.is_whitespace() {
         return CharKind::Whitespace;
     } else if c.is_alphanumeric() || c == '_' {
         return CharKind::Word;
     }
-    if let Some(language) = language {
-        if language.config.word_characters.contains(&c) {
-            return CharKind::Word;
+
+    if let Some(scope) = scope {
+        if let Some(characters) = scope.word_characters() {
+            if characters.contains(&c) {
+                return CharKind::Word;
+            }
         }
     }
+
     CharKind::Punctuation
 }
 

crates/language/src/language.rs ๐Ÿ”—

@@ -370,6 +370,8 @@ pub struct LanguageConfigOverride {
     pub block_comment: Override<(Arc<str>, Arc<str>)>,
     #[serde(skip_deserializing)]
     pub disabled_bracket_ixs: Vec<u16>,
+    #[serde(default)]
+    pub word_characters: Override<HashSet<char>>,
 }
 
 #[derive(Clone, Deserialize, Debug)]
@@ -1557,6 +1559,13 @@ impl LanguageScope {
         .map(|e| (&e.0, &e.1))
     }
 
+    pub fn word_characters(&self) -> Option<&HashSet<char>> {
+        Override::as_option(
+            self.config_override().map(|o| &o.word_characters),
+            Some(&self.language.config.word_characters),
+        )
+    }
+
     pub fn brackets(&self) -> impl Iterator<Item = (&BracketPair, bool)> {
         let mut disabled_ids = self
             .config_override()

crates/live_kit_client/LiveKitBridge/Package.resolved ๐Ÿ”—

@@ -42,8 +42,8 @@
         "repositoryURL": "https://github.com/apple/swift-protobuf.git",
         "state": {
           "branch": null,
-          "revision": "0af9125c4eae12a4973fb66574c53a54962a9e1e",
-          "version": "1.21.0"
+          "revision": "ce20dc083ee485524b802669890291c0d8090170",
+          "version": "1.22.1"
         }
       }
     ]

crates/project/src/search.rs ๐Ÿ”—

@@ -204,15 +204,14 @@ impl SearchQuery {
         if self.as_str().is_empty() {
             return Default::default();
         }
-        let language = buffer.language_at(0);
+
+        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
         let rope = if let Some(range) = subrange {
             buffer.as_rope().slice(range)
         } else {
             buffer.as_rope().clone()
         };
 
-        let kind = |c| char_kind(language, c);
-
         let mut matches = Vec::new();
         match self {
             Self::Text {
@@ -228,6 +227,9 @@ impl SearchQuery {
 
                     let mat = mat.unwrap();
                     if *whole_word {
+                        let scope = buffer.language_scope_at(range_offset + mat.start());
+                        let kind = |c| char_kind(&scope, c);
+
                         let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
                         let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
                         let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());

crates/vim/src/motion.rs ๐Ÿ”—

@@ -439,12 +439,12 @@ pub(crate) fn next_word_start(
     ignore_punctuation: bool,
     times: usize,
 ) -> DisplayPoint {
-    let language = map.buffer_snapshot.language_at(point.to_point(map));
+    let scope = map.buffer_snapshot.language_scope_at(point.to_point(map));
     for _ in 0..times {
         let mut crossed_newline = false;
         point = movement::find_boundary(map, point, |left, right| {
-            let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
-            let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
+            let left_kind = char_kind(&scope, left).coerce_punctuation(ignore_punctuation);
+            let right_kind = char_kind(&scope, right).coerce_punctuation(ignore_punctuation);
             let at_newline = right == '\n';
 
             let found = (left_kind != right_kind && right_kind != CharKind::Whitespace)
@@ -464,12 +464,12 @@ fn next_word_end(
     ignore_punctuation: bool,
     times: usize,
 ) -> DisplayPoint {
-    let language = map.buffer_snapshot.language_at(point.to_point(map));
+    let scope = map.buffer_snapshot.language_scope_at(point.to_point(map));
     for _ in 0..times {
         *point.column_mut() += 1;
         point = movement::find_boundary(map, point, |left, right| {
-            let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
-            let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
+            let left_kind = char_kind(&scope, left).coerce_punctuation(ignore_punctuation);
+            let right_kind = char_kind(&scope, right).coerce_punctuation(ignore_punctuation);
 
             left_kind != right_kind && left_kind != CharKind::Whitespace
         });
@@ -495,13 +495,13 @@ fn previous_word_start(
     ignore_punctuation: bool,
     times: usize,
 ) -> DisplayPoint {
-    let language = map.buffer_snapshot.language_at(point.to_point(map));
+    let scope = map.buffer_snapshot.language_scope_at(point.to_point(map));
     for _ in 0..times {
         // This works even though find_preceding_boundary is called for every character in the line containing
         // cursor because the newline is checked only once.
         point = movement::find_preceding_boundary(map, point, |left, right| {
-            let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
-            let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
+            let left_kind = char_kind(&scope, left).coerce_punctuation(ignore_punctuation);
+            let right_kind = char_kind(&scope, right).coerce_punctuation(ignore_punctuation);
 
             (left_kind != right_kind && !right.is_whitespace()) || left == '\n'
         });
@@ -511,7 +511,7 @@ fn previous_word_start(
 
 fn first_non_whitespace(map: &DisplaySnapshot, from: DisplayPoint) -> DisplayPoint {
     let mut last_point = DisplayPoint::new(from.row(), 0);
-    let language = map.buffer_snapshot.language_at(from.to_point(map));
+    let scope = map.buffer_snapshot.language_scope_at(from.to_point(map));
     for (ch, point) in map.chars_at(last_point) {
         if ch == '\n' {
             return from;
@@ -519,7 +519,7 @@ fn first_non_whitespace(map: &DisplaySnapshot, from: DisplayPoint) -> DisplayPoi
 
         last_point = point;
 
-        if char_kind(language, ch) != CharKind::Whitespace {
+        if char_kind(&scope, ch) != CharKind::Whitespace {
             break;
         }
     }

crates/vim/src/normal/change.rs ๐Ÿ”—

@@ -82,19 +82,19 @@ fn expand_changed_word_selection(
     ignore_punctuation: bool,
 ) -> bool {
     if times.is_none() || times.unwrap() == 1 {
-        let language = map
+        let scope = map
             .buffer_snapshot
-            .language_at(selection.start.to_point(map));
+            .language_scope_at(selection.start.to_point(map));
         let in_word = map
             .chars_at(selection.head())
             .next()
-            .map(|(c, _)| char_kind(language, c) != CharKind::Whitespace)
+            .map(|(c, _)| char_kind(&scope, c) != CharKind::Whitespace)
             .unwrap_or_default();
 
         if in_word {
             selection.end = movement::find_boundary(map, selection.end, |left, right| {
-                let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
-                let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
+                let left_kind = char_kind(&scope, left).coerce_punctuation(ignore_punctuation);
+                let right_kind = char_kind(&scope, right).coerce_punctuation(ignore_punctuation);
 
                 left_kind != right_kind && left_kind != CharKind::Whitespace
             });

crates/vim/src/object.rs ๐Ÿ”—

@@ -122,18 +122,20 @@ fn in_word(
     ignore_punctuation: bool,
 ) -> Option<Range<DisplayPoint>> {
     // Use motion::right so that we consider the character under the cursor when looking for the start
-    let language = map.buffer_snapshot.language_at(relative_to.to_point(map));
+    let scope = map
+        .buffer_snapshot
+        .language_scope_at(relative_to.to_point(map));
     let start = movement::find_preceding_boundary_in_line(
         map,
         right(map, relative_to, 1),
         |left, right| {
-            char_kind(language, left).coerce_punctuation(ignore_punctuation)
-                != char_kind(language, right).coerce_punctuation(ignore_punctuation)
+            char_kind(&scope, left).coerce_punctuation(ignore_punctuation)
+                != char_kind(&scope, right).coerce_punctuation(ignore_punctuation)
         },
     );
     let end = movement::find_boundary_in_line(map, relative_to, |left, right| {
-        char_kind(language, left).coerce_punctuation(ignore_punctuation)
-            != char_kind(language, right).coerce_punctuation(ignore_punctuation)
+        char_kind(&scope, left).coerce_punctuation(ignore_punctuation)
+            != char_kind(&scope, right).coerce_punctuation(ignore_punctuation)
     });
 
     Some(start..end)
@@ -156,11 +158,13 @@ fn around_word(
     relative_to: DisplayPoint,
     ignore_punctuation: bool,
 ) -> Option<Range<DisplayPoint>> {
-    let language = map.buffer_snapshot.language_at(relative_to.to_point(map));
+    let scope = map
+        .buffer_snapshot
+        .language_scope_at(relative_to.to_point(map));
     let in_word = map
         .chars_at(relative_to)
         .next()
-        .map(|(c, _)| char_kind(language, c) != CharKind::Whitespace)
+        .map(|(c, _)| char_kind(&scope, c) != CharKind::Whitespace)
         .unwrap_or(false);
 
     if in_word {
@@ -184,21 +188,23 @@ fn around_next_word(
     relative_to: DisplayPoint,
     ignore_punctuation: bool,
 ) -> Option<Range<DisplayPoint>> {
-    let language = map.buffer_snapshot.language_at(relative_to.to_point(map));
+    let scope = map
+        .buffer_snapshot
+        .language_scope_at(relative_to.to_point(map));
     // Get the start of the word
     let start = movement::find_preceding_boundary_in_line(
         map,
         right(map, relative_to, 1),
         |left, right| {
-            char_kind(language, left).coerce_punctuation(ignore_punctuation)
-                != char_kind(language, right).coerce_punctuation(ignore_punctuation)
+            char_kind(&scope, left).coerce_punctuation(ignore_punctuation)
+                != char_kind(&scope, right).coerce_punctuation(ignore_punctuation)
         },
     );
 
     let mut word_found = false;
     let end = movement::find_boundary(map, relative_to, |left, right| {
-        let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
-        let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
+        let left_kind = char_kind(&scope, left).coerce_punctuation(ignore_punctuation);
+        let right_kind = char_kind(&scope, right).coerce_punctuation(ignore_punctuation);
 
         let found = (word_found && left_kind != right_kind) || right == '\n' && left == '\n';