Fix `-` being a word character for selections (#17171)

Conrad Irwin , Mikayla , and Nate created 2 years ago

Co-Authored-By: Mikayla <mikayla@zed.dev>
Co-Authored-By: Nate <nate@zed.dev>

Closes #15606
Closes #13515

Release Notes:

- Fixes `-` being considered a word character for selections in some
languages

Co-authored-by: Mikayla <mikayla@zed.dev>
Co-authored-by: Nate <nate@zed.dev>

Change summary

crates/editor/src/editor.rs                       | 24 ++--
crates/editor/src/hover_links.rs                  |  2 
crates/editor/src/items.rs                        |  2 
crates/editor/src/movement.rs                     | 38 +++---
crates/editor/src/test/editor_lsp_test_context.rs |  1 
crates/language/src/buffer.rs                     | 90 ++++++++++++---
crates/multi_buffer/src/multi_buffer.rs           | 36 ++++--
crates/project/src/search.rs                      | 20 ++-
crates/vim/src/motion.rs                          | 92 +++++++++-------
crates/vim/src/normal/change.rs                   | 16 +-
crates/vim/src/object.rs                          | 42 +++----
crates/vim/src/test.rs                            | 19 +++
12 files changed, 239 insertions(+), 143 deletions(-)

Detailed changes

crates/editor/src/editor.rs 🔗

@@ -89,13 +89,12 @@ pub use inline_completion_provider::*;
 pub use items::MAX_TAB_TITLE_LEN;
 use itertools::Itertools;
 use language::{
-    char_kind,
     language_settings::{self, all_language_settings, InlayHintSettings},
     markdown, point_from_lsp, AutoindentMode, BracketPair, Buffer, Capability, CharKind, CodeLabel,
     CursorShape, Diagnostic, Documentation, IndentKind, IndentSize, Language, OffsetRangeExt,
     Point, Selection, SelectionGoal, TransactionId,
 };
-use language::{point_to_lsp, BufferRow, Runnable, RunnableRange};
+use language::{point_to_lsp, BufferRow, CharClassifier, Runnable, RunnableRange};
 use linked_editing_ranges::refresh_linked_ranges;
 use task::{ResolvedTask, TaskTemplate, TaskVariables};
 
@@ -2443,7 +2442,8 @@ impl Editor {
 
             if let Some(completion_menu) = completion_menu {
                 let cursor_position = new_cursor_position.to_offset(buffer);
-                let (word_range, kind) = buffer.surrounding_word(completion_menu.initial_position);
+                let (word_range, kind) =
+                    buffer.surrounding_word(completion_menu.initial_position, true);
                 if kind == Some(CharKind::Word)
                     && word_range.to_inclusive().contains(&cursor_position)
                 {
@@ -3289,10 +3289,8 @@ impl Editor {
                 let start_anchor = snapshot.anchor_before(selection.start);
 
                 let is_word_char = text.chars().next().map_or(true, |char| {
-                    let scope = snapshot.language_scope_at(start_anchor.to_offset(&snapshot));
-                    let kind = char_kind(&scope, char);
-
-                    kind == CharKind::Word
+                    let classifier = snapshot.char_classifier_at(start_anchor.to_offset(&snapshot));
+                    classifier.is_word(char)
                 });
 
                 if is_word_char {
@@ -3923,7 +3921,7 @@ impl Editor {
 
     fn completion_query(buffer: &MultiBufferSnapshot, position: impl ToOffset) -> Option<String> {
         let offset = position.to_offset(buffer);
-        let (word_range, kind) = buffer.surrounding_word(offset);
+        let (word_range, kind) = buffer.surrounding_word(offset, true);
         if offset > word_range.start && kind == Some(CharKind::Word) {
             Some(
                 buffer
@@ -12302,10 +12300,11 @@ fn snippet_completions(
     };
 
     let scope = language.map(|language| language.default_scope());
+    let classifier = CharClassifier::new(scope).for_completion(true);
     let mut last_word = line_at
         .chars()
         .rev()
-        .take_while(|c| char_kind(&scope, *c) == CharKind::Word)
+        .take_while(|c| classifier.is_word(*c))
         .collect::<String>();
     last_word = last_word.chars().rev().collect();
     let as_offset = text::ToOffset::to_offset(&buffer_position, &snapshot);
@@ -12436,8 +12435,11 @@ impl CompletionProvider for Model<Project> {
         }
 
         let buffer = buffer.read(cx);
-        let scope = buffer.snapshot().language_scope_at(position);
-        if trigger_in_words && char_kind(&scope, char) == CharKind::Word {
+        let classifier = buffer
+            .snapshot()
+            .char_classifier_at(position)
+            .for_completion(true);
+        if trigger_in_words && classifier.is_word(char) {
             return true;
         }

crates/editor/src/hover_links.rs 🔗

@@ -613,7 +613,7 @@ pub fn show_link_definition(
                                 TriggerPoint::Text(trigger_anchor) => {
                                     // If no symbol range returned from language server, use the surrounding word.
                                     let (offset_range, _) =
-                                        snapshot.surrounding_word(*trigger_anchor);
+                                        snapshot.surrounding_word(*trigger_anchor, false);
                                     RangeInEditor::Text(
                                         snapshot.anchor_before(offset_range.start)
                                             ..snapshot.anchor_after(offset_range.end),

crates/editor/src/items.rs 🔗

@@ -1225,7 +1225,7 @@ impl SearchableItem for Editor {
             }
             SeedQuerySetting::Selection => String::new(),
             SeedQuerySetting::Always => {
-                let (range, kind) = snapshot.surrounding_word(selection.start);
+                let (range, kind) = snapshot.surrounding_word(selection.start, true);
                 if kind == Some(CharKind::Word) {
                     let text: String = snapshot.text_for_range(range).collect();
                     if !text.trim().is_empty() {

crates/editor/src/movement.rs 🔗

@@ -2,9 +2,7 @@
 //! in editor given a given motion (e.g. it handles converting a "move left" command into coordinates in editor). It is exposed mostly for use by vim crate.
 
 use super::{Bias, DisplayPoint, DisplaySnapshot, SelectionGoal, ToDisplayPoint};
-use crate::{
-    char_kind, scroll::ScrollAnchor, CharKind, DisplayRow, EditorStyle, RowExt, ToOffset, ToPoint,
-};
+use crate::{scroll::ScrollAnchor, CharKind, DisplayRow, EditorStyle, RowExt, ToOffset, ToPoint};
 use gpui::{px, Pixels, WindowTextSystem};
 use language::Point;
 use multi_buffer::{MultiBufferRow, MultiBufferSnapshot};
@@ -264,10 +262,10 @@ pub fn line_end(
 /// uppercase letter, lowercase letter, '_' character or language-specific word character (like '-' in CSS).
 pub fn previous_word_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
     let raw_point = point.to_point(map);
-    let scope = map.buffer_snapshot.language_scope_at(raw_point);
+    let classifier = map.buffer_snapshot.char_classifier_at(raw_point);
 
     find_preceding_boundary_display_point(map, point, FindRange::MultiLine, |left, right| {
-        (char_kind(&scope, left) != char_kind(&scope, right) && !right.is_whitespace())
+        (classifier.kind(left) != classifier.kind(right) && !classifier.is_whitespace(right))
             || left == '\n'
     })
 }
@@ -277,13 +275,14 @@ pub fn previous_word_start(map: &DisplaySnapshot, point: DisplayPoint) -> Displa
 /// lowerspace characters and uppercase characters.
 pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
     let raw_point = point.to_point(map);
-    let scope = map.buffer_snapshot.language_scope_at(raw_point);
+    let classifier = map.buffer_snapshot.char_classifier_at(raw_point);
 
     find_preceding_boundary_display_point(map, point, FindRange::MultiLine, |left, right| {
         let is_word_start =
-            char_kind(&scope, left) != char_kind(&scope, right) && !right.is_whitespace();
-        let is_subword_start =
-            left == '_' && right != '_' || left.is_lowercase() && right.is_uppercase();
+            classifier.kind(left) != classifier.kind(right) && !right.is_whitespace();
+        let is_subword_start = classifier.is_word('-') && left == '-' && right != '-'
+            || left == '_' && right != '_'
+            || left.is_lowercase() && right.is_uppercase();
         is_word_start || is_subword_start || left == '\n'
     })
 }
@@ -292,10 +291,10 @@ pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> Dis
 /// uppercase letter, lowercase letter, '_' character or language-specific word character (like '-' in CSS).
 pub fn next_word_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
     let raw_point = point.to_point(map);
-    let scope = map.buffer_snapshot.language_scope_at(raw_point);
+    let classifier = map.buffer_snapshot.char_classifier_at(raw_point);
 
     find_boundary(map, point, FindRange::MultiLine, |left, right| {
-        (char_kind(&scope, left) != char_kind(&scope, right) && !left.is_whitespace())
+        (classifier.kind(left) != classifier.kind(right) && !classifier.is_whitespace(left))
             || right == '\n'
     })
 }
@@ -305,13 +304,14 @@ pub fn next_word_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint
 /// lowerspace characters and uppercase characters.
 pub fn next_subword_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
     let raw_point = point.to_point(map);
-    let scope = map.buffer_snapshot.language_scope_at(raw_point);
+    let classifier = map.buffer_snapshot.char_classifier_at(raw_point);
 
     find_boundary(map, point, FindRange::MultiLine, |left, right| {
         let is_word_end =
-            (char_kind(&scope, left) != char_kind(&scope, right)) && !left.is_whitespace();
-        let is_subword_end =
-            left != '_' && right == '_' || left.is_lowercase() && right.is_uppercase();
+            (classifier.kind(left) != classifier.kind(right)) && !classifier.is_whitespace(left);
+        let is_subword_end = classifier.is_word('-') && left != '-' && right == '-'
+            || left != '_' && right == '_'
+            || left.is_lowercase() && right.is_uppercase();
         is_word_end || is_subword_end || right == '\n'
     })
 }
@@ -509,14 +509,14 @@ pub fn chars_before(
 
 pub(crate) fn is_inside_word(map: &DisplaySnapshot, point: DisplayPoint) -> bool {
     let raw_point = point.to_point(map);
-    let scope = map.buffer_snapshot.language_scope_at(raw_point);
+    let classifier = map.buffer_snapshot.char_classifier_at(raw_point);
     let ix = map.clip_point(point, Bias::Left).to_offset(map, Bias::Left);
     let text = &map.buffer_snapshot;
-    let next_char_kind = text.chars_at(ix).next().map(|c| char_kind(&scope, c));
+    let next_char_kind = text.chars_at(ix).next().map(|c| classifier.kind(c));
     let prev_char_kind = text
         .reversed_chars_at(ix)
         .next()
-        .map(|c| char_kind(&scope, c));
+        .map(|c| classifier.kind(c));
     prev_char_kind.zip(next_char_kind) == Some((CharKind::Word, CharKind::Word))
 }
 
@@ -527,7 +527,7 @@ pub(crate) fn surrounding_word(
     let position = map
         .clip_point(position, Bias::Left)
         .to_offset(map, Bias::Left);
-    let (range, _) = map.buffer_snapshot.surrounding_word(position);
+    let (range, _) = map.buffer_snapshot.surrounding_word(position, false);
     let start = range
         .start
         .to_point(&map.buffer_snapshot)

crates/editor/src/test/editor_lsp_test_context.rs 🔗

@@ -226,6 +226,7 @@ impl EditorLspTestContext {
                     ..Default::default()
                 },
                 block_comment: Some(("<!-- ".into(), " -->".into())),
+                word_characters: ['-'].into_iter().collect(),
                 ..Default::default()
             },
             Some(tree_sitter_html::language()),

crates/language/src/buffer.rs 🔗

@@ -2659,6 +2659,10 @@ impl BufferSnapshot {
         language_settings(self.language_at(position), self.file.as_ref(), cx)
     }
 
+    pub fn char_classifier_at<T: ToOffset>(&self, point: T) -> CharClassifier {
+        CharClassifier::new(self.language_scope_at(point))
+    }
+
     /// Returns the [LanguageScope] at the given location.
     pub fn language_scope_at<D: ToOffset>(&self, position: D) -> Option<LanguageScope> {
         let offset = position.to_offset(self);
@@ -2715,15 +2719,14 @@ impl BufferSnapshot {
         let mut next_chars = self.chars_at(start).peekable();
         let mut prev_chars = self.reversed_chars_at(start).peekable();
 
-        let scope = self.language_scope_at(start);
-        let kind = |c| char_kind(&scope, c);
+        let classifier = self.char_classifier_at(start);
         let word_kind = cmp::max(
-            prev_chars.peek().copied().map(kind),
-            next_chars.peek().copied().map(kind),
+            prev_chars.peek().copied().map(|c| classifier.kind(c)),
+            next_chars.peek().copied().map(|c| classifier.kind(c)),
         );
 
         for ch in prev_chars {
-            if Some(kind(ch)) == word_kind && ch != '\n' {
+            if Some(classifier.kind(ch)) == word_kind && ch != '\n' {
                 start -= ch.len_utf8();
             } else {
                 break;
@@ -2731,7 +2734,7 @@ impl BufferSnapshot {
         }
 
         for ch in next_chars {
-            if Some(kind(ch)) == word_kind && ch != '\n' {
+            if Some(classifier.kind(ch)) == word_kind && ch != '\n' {
                 end += ch.len_utf8();
             } else {
                 break;
@@ -4215,25 +4218,72 @@ pub(crate) fn contiguous_ranges(
     })
 }
 
-/// Returns the [CharKind] for the given character. When a scope is provided,
-/// the function checks if the character is considered a word character
-/// based on the language scope's word character settings.
-pub fn char_kind(scope: &Option<LanguageScope>, c: char) -> CharKind {
-    if c.is_whitespace() {
-        return CharKind::Whitespace;
-    } else if c.is_alphanumeric() || c == '_' {
-        return CharKind::Word;
+#[derive(Default, Debug)]
+pub struct CharClassifier {
+    scope: Option<LanguageScope>,
+    for_completion: bool,
+    ignore_punctuation: bool,
+}
+
+impl CharClassifier {
+    pub fn new(scope: Option<LanguageScope>) -> Self {
+        Self {
+            scope,
+            for_completion: false,
+            ignore_punctuation: false,
+        }
     }
 
-    if let Some(scope) = scope {
-        if let Some(characters) = scope.word_characters() {
-            if characters.contains(&c) {
-                return CharKind::Word;
-            }
+    pub fn for_completion(self, for_completion: bool) -> Self {
+        Self {
+            for_completion,
+            ..self
         }
     }
 
-    CharKind::Punctuation
+    pub fn ignore_punctuation(self, ignore_punctuation: bool) -> Self {
+        Self {
+            ignore_punctuation,
+            ..self
+        }
+    }
+
+    pub fn is_whitespace(&self, c: char) -> bool {
+        self.kind(c) == CharKind::Whitespace
+    }
+
+    pub fn is_word(&self, c: char) -> bool {
+        self.kind(c) == CharKind::Word
+    }
+
+    pub fn is_punctuation(&self, c: char) -> bool {
+        self.kind(c) == CharKind::Punctuation
+    }
+
+    pub fn kind(&self, c: char) -> CharKind {
+        if c.is_whitespace() {
+            return CharKind::Whitespace;
+        } else if c.is_alphanumeric() || c == '_' {
+            return CharKind::Word;
+        }
+
+        if let Some(scope) = &self.scope {
+            if let Some(characters) = scope.word_characters() {
+                if characters.contains(&c) {
+                    if c == '-' && !self.for_completion && !self.ignore_punctuation {
+                        return CharKind::Punctuation;
+                    }
+                    return CharKind::Word;
+                }
+            }
+        }
+
+        if self.ignore_punctuation {
+            CharKind::Word
+        } else {
+            CharKind::Punctuation
+        }
+    }
 }
 
 /// Find all of the ranges of whitespace that occur at the ends of lines

crates/multi_buffer/src/multi_buffer.rs 🔗

@@ -9,12 +9,12 @@ use git::diff::DiffHunk;
 use gpui::{AppContext, EntityId, EventEmitter, Model, ModelContext};
 use itertools::Itertools;
 use language::{
-    char_kind,
     language_settings::{language_settings, LanguageSettings},
-    AutoindentMode, Buffer, BufferChunks, BufferRow, BufferSnapshot, Capability, CharKind, Chunk,
-    CursorShape, DiagnosticEntry, File, IndentGuide, IndentSize, Language, LanguageScope,
-    OffsetRangeExt, OffsetUtf16, Outline, OutlineItem, Point, PointUtf16, Selection, TextDimension,
-    ToOffset as _, ToOffsetUtf16 as _, ToPoint as _, ToPointUtf16 as _, TransactionId, Unclipped,
+    AutoindentMode, Buffer, BufferChunks, BufferRow, BufferSnapshot, Capability, CharClassifier,
+    CharKind, Chunk, CursorShape, DiagnosticEntry, File, IndentGuide, IndentSize, Language,
+    LanguageScope, OffsetRangeExt, OffsetUtf16, Outline, OutlineItem, Point, PointUtf16, Selection,
+    TextDimension, ToOffset as _, ToOffsetUtf16 as _, ToPoint as _, ToPointUtf16 as _,
+    TransactionId, Unclipped,
 };
 use smallvec::SmallVec;
 use std::{
@@ -2295,21 +2295,27 @@ impl MultiBufferSnapshot {
                 .eq(needle.bytes())
     }
 
-    pub fn surrounding_word<T: ToOffset>(&self, start: T) -> (Range<usize>, Option<CharKind>) {
+    pub fn surrounding_word<T: ToOffset>(
+        &self,
+        start: T,
+        for_completion: bool,
+    ) -> (Range<usize>, Option<CharKind>) {
         let mut start = start.to_offset(self);
         let mut end = start;
         let mut next_chars = self.chars_at(start).peekable();
         let mut prev_chars = self.reversed_chars_at(start).peekable();
 
-        let scope = self.language_scope_at(start);
-        let kind = |c| char_kind(&scope, c);
+        let classifier = self
+            .char_classifier_at(start)
+            .for_completion(for_completion);
+
         let word_kind = cmp::max(
-            prev_chars.peek().copied().map(kind),
-            next_chars.peek().copied().map(kind),
+            prev_chars.peek().copied().map(|c| classifier.kind(c)),
+            next_chars.peek().copied().map(|c| classifier.kind(c)),
         );
 
         for ch in prev_chars {
-            if Some(kind(ch)) == word_kind && ch != '\n' {
+            if Some(classifier.kind(ch)) == word_kind && ch != '\n' {
                 start -= ch.len_utf8();
             } else {
                 break;
@@ -2317,7 +2323,7 @@ impl MultiBufferSnapshot {
         }
 
         for ch in next_chars {
-            if Some(kind(ch)) == word_kind && ch != '\n' {
+            if Some(classifier.kind(ch)) == word_kind && ch != '\n' {
                 end += ch.len_utf8();
             } else {
                 break;
@@ -3478,6 +3484,12 @@ impl MultiBufferSnapshot {
             .and_then(|(buffer, offset)| buffer.language_scope_at(offset))
     }
 
+    pub fn char_classifier_at<T: ToOffset>(&self, point: T) -> CharClassifier {
+        self.point_to_buffer_offset(point)
+            .map(|(buffer, offset)| buffer.char_classifier_at(offset))
+            .unwrap_or_default()
+    }
+
     pub fn language_indent_size_at<T: ToOffset>(
         &self,
         position: T,

crates/project/src/search.rs 🔗

@@ -2,7 +2,7 @@ use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
 use anyhow::Result;
 use client::proto;
 use gpui::Model;
-use language::{char_kind, Buffer, BufferSnapshot};
+use language::{Buffer, BufferSnapshot};
 use regex::{Captures, Regex, RegexBuilder};
 use smol::future::yield_now;
 use std::{
@@ -331,13 +331,17 @@ impl SearchQuery {
 
                     let mat = mat.unwrap();
                     if *whole_word {
-                        let scope = buffer.language_scope_at(range_offset + mat.start());
-                        let kind = |c| char_kind(&scope, c);
-
-                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
-                        let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
-                        let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
-                        let next_kind = rope.chars_at(mat.end()).next().map(kind);
+                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
+
+                        let prev_kind = rope
+                            .reversed_chars_at(mat.start())
+                            .next()
+                            .map(|c| classifier.kind(c));
+                        let start_kind =
+                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
+                        let end_kind =
+                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
+                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
                         if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
                             continue;
                         }

crates/vim/src/motion.rs 🔗

@@ -7,7 +7,7 @@ use editor::{
     Anchor, Bias, DisplayPoint, Editor, RowExt, ToOffset,
 };
 use gpui::{actions, impl_actions, px, ViewContext};
-use language::{char_kind, CharKind, Point, Selection, SelectionGoal};
+use language::{CharKind, Point, Selection, SelectionGoal};
 use multi_buffer::MultiBufferRow;
 use serde::Deserialize;
 use std::ops::Range;
@@ -1131,12 +1131,15 @@ pub(crate) fn next_word_start(
     ignore_punctuation: bool,
     times: usize,
 ) -> DisplayPoint {
-    let scope = map.buffer_snapshot.language_scope_at(point.to_point(map));
+    let classifier = map
+        .buffer_snapshot
+        .char_classifier_at(point.to_point(map))
+        .ignore_punctuation(ignore_punctuation);
     for _ in 0..times {
         let mut crossed_newline = false;
         let new_point = movement::find_boundary(map, point, FindRange::MultiLine, |left, right| {
-            let left_kind = coerce_punctuation(char_kind(&scope, left), ignore_punctuation);
-            let right_kind = coerce_punctuation(char_kind(&scope, right), ignore_punctuation);
+            let left_kind = classifier.kind(left);
+            let right_kind = classifier.kind(right);
             let at_newline = right == '\n';
 
             let found = (left_kind != right_kind && right_kind != CharKind::Whitespace)
@@ -1161,7 +1164,10 @@ pub(crate) fn next_word_end(
     times: usize,
     allow_cross_newline: bool,
 ) -> DisplayPoint {
-    let scope = map.buffer_snapshot.language_scope_at(point.to_point(map));
+    let classifier = map
+        .buffer_snapshot
+        .char_classifier_at(point.to_point(map))
+        .ignore_punctuation(ignore_punctuation);
     for _ in 0..times {
         let new_point = next_char(map, point, allow_cross_newline);
         let mut need_next_char = false;
@@ -1170,8 +1176,8 @@ pub(crate) fn next_word_end(
             new_point,
             FindRange::MultiLine,
             |left, right| {
-                let left_kind = coerce_punctuation(char_kind(&scope, left), ignore_punctuation);
-                let right_kind = coerce_punctuation(char_kind(&scope, right), ignore_punctuation);
+                let left_kind = classifier.kind(left);
+                let right_kind = classifier.kind(right);
                 let at_newline = right == '\n';
 
                 if !allow_cross_newline && at_newline {
@@ -1202,7 +1208,10 @@ fn previous_word_start(
     ignore_punctuation: bool,
     times: usize,
 ) -> DisplayPoint {
-    let scope = map.buffer_snapshot.language_scope_at(point.to_point(map));
+    let classifier = map
+        .buffer_snapshot
+        .char_classifier_at(point.to_point(map))
+        .ignore_punctuation(ignore_punctuation);
     for _ in 0..times {
         // This works even though find_preceding_boundary is called for every character in the line containing
         // cursor because the newline is checked only once.
@@ -1211,8 +1220,8 @@ fn previous_word_start(
             point,
             FindRange::MultiLine,
             |left, right| {
-                let left_kind = coerce_punctuation(char_kind(&scope, left), ignore_punctuation);
-                let right_kind = coerce_punctuation(char_kind(&scope, right), ignore_punctuation);
+                let left_kind = classifier.kind(left);
+                let right_kind = classifier.kind(right);
 
                 (left_kind != right_kind && !right.is_whitespace()) || left == '\n'
             },
@@ -1231,7 +1240,10 @@ fn previous_word_end(
     ignore_punctuation: bool,
     times: usize,
 ) -> DisplayPoint {
-    let scope = map.buffer_snapshot.language_scope_at(point.to_point(map));
+    let classifier = map
+        .buffer_snapshot
+        .char_classifier_at(point.to_point(map))
+        .ignore_punctuation(ignore_punctuation);
     let mut point = point.to_point(map);
 
     if point.column < map.buffer_snapshot.line_len(MultiBufferRow(point.row)) {
@@ -1243,8 +1255,8 @@ fn previous_word_end(
             point,
             FindRange::MultiLine,
             |left, right| {
-                let left_kind = coerce_punctuation(char_kind(&scope, left), ignore_punctuation);
-                let right_kind = coerce_punctuation(char_kind(&scope, right), ignore_punctuation);
+                let left_kind = classifier.kind(left);
+                let right_kind = classifier.kind(right);
                 match (left_kind, right_kind) {
                     (CharKind::Punctuation, CharKind::Whitespace)
                     | (CharKind::Punctuation, CharKind::Word)
@@ -1269,12 +1281,15 @@ fn next_subword_start(
     ignore_punctuation: bool,
     times: usize,
 ) -> DisplayPoint {
-    let scope = map.buffer_snapshot.language_scope_at(point.to_point(map));
+    let classifier = map
+        .buffer_snapshot
+        .char_classifier_at(point.to_point(map))
+        .ignore_punctuation(ignore_punctuation);
     for _ in 0..times {
         let mut crossed_newline = false;
         let new_point = movement::find_boundary(map, point, FindRange::MultiLine, |left, right| {
-            let left_kind = coerce_punctuation(char_kind(&scope, left), ignore_punctuation);
-            let right_kind = coerce_punctuation(char_kind(&scope, right), ignore_punctuation);
+            let left_kind = classifier.kind(left);
+            let right_kind = classifier.kind(right);
             let at_newline = right == '\n';
 
             let is_word_start = (left_kind != right_kind) && !left.is_alphanumeric();
@@ -1303,7 +1318,10 @@ pub(crate) fn next_subword_end(
     times: usize,
     allow_cross_newline: bool,
 ) -> DisplayPoint {
-    let scope = map.buffer_snapshot.language_scope_at(point.to_point(map));
+    let classifier = map
+        .buffer_snapshot
+        .char_classifier_at(point.to_point(map))
+        .ignore_punctuation(ignore_punctuation);
     for _ in 0..times {
         let new_point = next_char(map, point, allow_cross_newline);
 
@@ -1311,8 +1329,8 @@ pub(crate) fn next_subword_end(
         let mut need_backtrack = false;
         let new_point =
             movement::find_boundary(map, new_point, FindRange::MultiLine, |left, right| {
-                let left_kind = coerce_punctuation(char_kind(&scope, left), ignore_punctuation);
-                let right_kind = coerce_punctuation(char_kind(&scope, right), ignore_punctuation);
+                let left_kind = classifier.kind(left);
+                let right_kind = classifier.kind(right);
                 let at_newline = right == '\n';
 
                 if !allow_cross_newline && at_newline {
@@ -1350,7 +1368,10 @@ fn previous_subword_start(
     ignore_punctuation: bool,
     times: usize,
 ) -> DisplayPoint {
-    let scope = map.buffer_snapshot.language_scope_at(point.to_point(map));
+    let classifier = map
+        .buffer_snapshot
+        .char_classifier_at(point.to_point(map))
+        .ignore_punctuation(ignore_punctuation);
     for _ in 0..times {
         let mut crossed_newline = false;
         // This works even though find_preceding_boundary is called for every character in the line containing
@@ -1360,8 +1381,8 @@ fn previous_subword_start(
             point,
             FindRange::MultiLine,
             |left, right| {
-                let left_kind = coerce_punctuation(char_kind(&scope, left), ignore_punctuation);
-                let right_kind = coerce_punctuation(char_kind(&scope, right), ignore_punctuation);
+                let left_kind = classifier.kind(left);
+                let right_kind = classifier.kind(right);
                 let at_newline = right == '\n';
 
                 let is_word_start = (left_kind != right_kind) && !left.is_alphanumeric();
@@ -1391,7 +1412,10 @@ fn previous_subword_end(
     ignore_punctuation: bool,
     times: usize,
 ) -> DisplayPoint {
-    let scope = map.buffer_snapshot.language_scope_at(point.to_point(map));
+    let classifier = map
+        .buffer_snapshot
+        .char_classifier_at(point.to_point(map))
+        .ignore_punctuation(ignore_punctuation);
     let mut point = point.to_point(map);
 
     if point.column < map.buffer_snapshot.line_len(MultiBufferRow(point.row)) {
@@ -1403,8 +1427,8 @@ fn previous_subword_end(
             point,
             FindRange::MultiLine,
             |left, right| {
-                let left_kind = coerce_punctuation(char_kind(&scope, left), ignore_punctuation);
-                let right_kind = coerce_punctuation(char_kind(&scope, right), ignore_punctuation);
+                let left_kind = classifier.kind(left);
+                let right_kind = classifier.kind(right);
 
                 let is_subword_end =
                     left != '_' && right == '_' || left.is_lowercase() && right.is_uppercase();
@@ -1435,7 +1459,7 @@ pub(crate) fn first_non_whitespace(
     from: DisplayPoint,
 ) -> DisplayPoint {
     let mut start_offset = start_of_line(map, display_lines, from).to_offset(map, Bias::Left);
-    let scope = map.buffer_snapshot.language_scope_at(from.to_point(map));
+    let classifier = map.buffer_snapshot.char_classifier_at(from.to_point(map));
     for (ch, offset) in map.buffer_chars_at(start_offset) {
         if ch == '\n' {
             return from;
@@ -1443,7 +1467,7 @@ pub(crate) fn first_non_whitespace(
 
         start_offset = offset;
 
-        if char_kind(&scope, ch) != CharKind::Whitespace {
+        if classifier.kind(ch) != CharKind::Whitespace {
             break;
         }
     }
@@ -1457,11 +1481,11 @@ pub(crate) fn last_non_whitespace(
     count: usize,
 ) -> DisplayPoint {
     let mut end_of_line = end_of_line(map, false, from, count).to_offset(map, Bias::Left);
-    let scope = map.buffer_snapshot.language_scope_at(from.to_point(map));
+    let classifier = map.buffer_snapshot.char_classifier_at(from.to_point(map));
 
     // NOTE: depending on clip_at_line_end we may already be one char back from the end.
     if let Some((ch, _)) = map.buffer_chars_at(end_of_line).next() {
-        if char_kind(&scope, ch) != CharKind::Whitespace {
+        if classifier.kind(ch) != CharKind::Whitespace {
             return end_of_line.to_display_point(map);
         }
     }
@@ -1471,7 +1495,7 @@ pub(crate) fn last_non_whitespace(
             break;
         }
         end_of_line = offset;
-        if char_kind(&scope, ch) != CharKind::Whitespace || ch == '\n' {
+        if classifier.kind(ch) != CharKind::Whitespace || ch == '\n' {
             break;
         }
     }
@@ -1787,14 +1811,6 @@ fn window_bottom(
     }
 }
 
-pub fn coerce_punctuation(kind: CharKind, treat_punctuation_as_word: bool) -> CharKind {
-    if treat_punctuation_as_word && kind == CharKind::Punctuation {
-        CharKind::Word
-    } else {
-        kind
-    }
-}
-
 #[cfg(test)]
 mod test {

crates/vim/src/normal/change.rs 🔗

@@ -10,7 +10,7 @@ use editor::{
     scroll::Autoscroll,
     Bias, DisplayPoint,
 };
-use language::{char_kind, CharKind, Selection};
+use language::Selection;
 use ui::ViewContext;
 
 impl Vim {
@@ -59,13 +59,11 @@ impl Vim {
                                 if let Motion::CurrentLine = motion {
                                     let mut start_offset =
                                         selection.start.to_offset(map, Bias::Left);
-                                    let scope = map
+                                    let classifier = map
                                         .buffer_snapshot
-                                        .language_scope_at(selection.start.to_point(&map));
+                                        .char_classifier_at(selection.start.to_point(&map));
                                     for (ch, offset) in map.buffer_chars_at(start_offset) {
-                                        if ch == '\n'
-                                            || char_kind(&scope, ch) != CharKind::Whitespace
-                                        {
+                                        if ch == '\n' || !classifier.is_whitespace(ch) {
                                             break;
                                         }
                                         start_offset = offset + ch.len_utf8();
@@ -130,13 +128,13 @@ fn expand_changed_word_selection(
     use_subword: bool,
 ) -> bool {
     let is_in_word = || {
-        let scope = map
+        let classifier = map
             .buffer_snapshot
-            .language_scope_at(selection.start.to_point(map));
+            .char_classifier_at(selection.start.to_point(map));
         let in_word = map
             .buffer_chars_at(selection.head().to_offset(map, Bias::Left))
             .next()
-            .map(|(c, _)| char_kind(&scope, c) != CharKind::Whitespace)
+            .map(|(c, _)| !classifier.is_whitespace(c))
             .unwrap_or_default();
         return in_word;
     };

crates/vim/src/object.rs 🔗

@@ -1,10 +1,6 @@
 use std::ops::Range;
 
-use crate::{
-    motion::{coerce_punctuation, right},
-    state::Mode,
-    Vim,
-};
+use crate::{motion::right, state::Mode, Vim};
 use editor::{
     display_map::{DisplaySnapshot, ToDisplayPoint},
     movement::{self, FindRange},
@@ -14,7 +10,7 @@ use editor::{
 use itertools::Itertools;
 
 use gpui::{actions, impl_actions, ViewContext};
-use language::{char_kind, BufferSnapshot, CharKind, Point, Selection};
+use language::{BufferSnapshot, CharKind, Point, Selection};
 use multi_buffer::MultiBufferRow;
 use serde::Deserialize;
 
@@ -248,22 +244,19 @@ fn in_word(
     ignore_punctuation: bool,
 ) -> Option<Range<DisplayPoint>> {
     // Use motion::right so that we consider the character under the cursor when looking for the start
-    let scope = map
+    let classifier = map
         .buffer_snapshot
-        .language_scope_at(relative_to.to_point(map));
+        .char_classifier_at(relative_to.to_point(map))
+        .ignore_punctuation(ignore_punctuation);
     let start = movement::find_preceding_boundary_display_point(
         map,
         right(map, relative_to, 1),
         movement::FindRange::SingleLine,
-        |left, right| {
-            coerce_punctuation(char_kind(&scope, left), ignore_punctuation)
-                != coerce_punctuation(char_kind(&scope, right), ignore_punctuation)
-        },
+        |left, right| classifier.kind(left) != classifier.kind(right),
     );
 
     let end = movement::find_boundary(map, relative_to, FindRange::SingleLine, |left, right| {
-        coerce_punctuation(char_kind(&scope, left), ignore_punctuation)
-            != coerce_punctuation(char_kind(&scope, right), ignore_punctuation)
+        classifier.kind(left) != classifier.kind(right)
     });
 
     Some(start..end)
@@ -362,11 +355,14 @@ fn around_word(
     ignore_punctuation: bool,
 ) -> Option<Range<DisplayPoint>> {
     let offset = relative_to.to_offset(map, Bias::Left);
-    let scope = map.buffer_snapshot.language_scope_at(offset);
+    let classifier = map
+        .buffer_snapshot
+        .char_classifier_at(offset)
+        .ignore_punctuation(ignore_punctuation);
     let in_word = map
         .buffer_chars_at(offset)
         .next()
-        .map(|(c, _)| char_kind(&scope, c) != CharKind::Whitespace)
+        .map(|(c, _)| !classifier.is_whitespace(c))
         .unwrap_or(false);
 
     if in_word {
@@ -390,24 +386,22 @@ fn around_next_word(
     relative_to: DisplayPoint,
     ignore_punctuation: bool,
 ) -> Option<Range<DisplayPoint>> {
-    let scope = map
+    let classifier = map
         .buffer_snapshot
-        .language_scope_at(relative_to.to_point(map));
+        .char_classifier_at(relative_to.to_point(map))
+        .ignore_punctuation(ignore_punctuation);
     // Get the start of the word
     let start = movement::find_preceding_boundary_display_point(
         map,
         right(map, relative_to, 1),
         FindRange::SingleLine,
-        |left, right| {
-            coerce_punctuation(char_kind(&scope, left), ignore_punctuation)
-                != coerce_punctuation(char_kind(&scope, right), ignore_punctuation)
-        },
+        |left, right| classifier.kind(left) != classifier.kind(right),
     );
 
     let mut word_found = false;
     let end = movement::find_boundary(map, relative_to, FindRange::MultiLine, |left, right| {
-        let left_kind = coerce_punctuation(char_kind(&scope, left), ignore_punctuation);
-        let right_kind = coerce_punctuation(char_kind(&scope, right), ignore_punctuation);
+        let left_kind = classifier.kind(left);
+        let right_kind = classifier.kind(right);
 
         let found = (word_found && left_kind != right_kind) || right == '\n' && left == '\n';

crates/vim/src/test.rs 🔗

@@ -305,6 +305,25 @@ async fn test_word_characters(cx: &mut gpui::TestAppContext) {
     )
 }
 
+#[gpui::test]
+async fn test_kebab_case(cx: &mut gpui::TestAppContext) {
+    let mut cx = VimTestContext::new_html(cx).await;
+    cx.set_state(
+        indoc! { r#"
+            <div><a class="bg-rˇed"></a></div>
+            "#},
+        Mode::Normal,
+    );
+    cx.simulate_keystrokes("v i w");
+    cx.assert_state(
+        indoc! { r#"
+        <div><a class="bg-«redˇ»"></a></div>
+        "#
+        },
+        Mode::Visual,
+    )
+}
+
 #[gpui::test]
 async fn test_join_lines(cx: &mut gpui::TestAppContext) {
     let mut cx = NeovimBackedTestContext::new(cx).await;