chat: fix emoji completions when word consists of emojis (#9107)

Bennet Bo Fenner created

https://github.com/zed-industries/zed/assets/53836821/f4b31c47-d306-43f5-b971-0969f64a48f9

Fix for #9096 @JosephTLyons 

Release Notes:
- Fixed emoji completion not showing up when word contains only emojis
(#9096)

Change summary

Cargo.lock                                        |  1 
crates/collab_ui/src/chat_panel/message_editor.rs | 17 ++++++++
crates/editor/src/editor.rs                       | 27 ++++++++++++-
crates/editor/src/editor_tests.rs                 | 31 ++++++++++++---
crates/util/Cargo.toml                            |  1 
crates/util/src/util.rs                           | 33 +++++++++++++++++
6 files changed, 101 insertions(+), 9 deletions(-)

Detailed changes

Cargo.lock ๐Ÿ”—

@@ -11083,6 +11083,7 @@ dependencies = [
  "log",
  "parking_lot 0.11.2",
  "rand 0.8.5",
+ "regex",
  "rust-embed",
  "serde",
  "serde_json",

crates/collab_ui/src/chat_panel/message_editor.rs ๐Ÿ”—

@@ -407,6 +407,23 @@ impl MessageEditor {
                     if next_char.is_none() || next_char.unwrap().is_whitespace() {
                         return Some(query.chars().rev().collect::<String>());
                     }
+
+                    // If the previous character is not a whitespace, we are in the middle of a word
+                    // and we only want to complete the shortcode if the word is made up of other emojis
+                    let mut containing_word = String::new();
+                    for ch in buffer
+                        .reversed_chars_at(end_offset - query.len() - 1)
+                        .take(100)
+                    {
+                        if ch.is_whitespace() {
+                            break;
+                        }
+                        containing_word.push(ch);
+                    }
+                    let containing_word = containing_word.chars().rev().collect::<String>();
+                    if util::word_consists_of_emojis(containing_word.as_str()) {
+                        return Some(query.chars().rev().collect::<String>());
+                    }
                     break;
                 }
                 if ch.is_whitespace() || !ch.is_ascii() {

crates/editor/src/editor.rs ๐Ÿ”—

@@ -2653,10 +2653,31 @@ impl Editor {
         let mut found_colon = false;
         for char in snapshot.reversed_chars_at(position).take(100) {
             // Found a possible emoji shortcode in the middle of the buffer
-            if found_colon && char.is_whitespace() {
-                chars.reverse();
-                return Some(chars.iter().collect());
+            if found_colon {
+                if char.is_whitespace() {
+                    chars.reverse();
+                    return Some(chars.iter().collect());
+                }
+                // If the previous character is not a whitespace, we are in the middle of a word
+                // and we only want to complete the shortcode if the word is made up of other emojis
+                let mut containing_word = String::new();
+                for ch in snapshot
+                    .reversed_chars_at(position)
+                    .skip(chars.len() + 1)
+                    .take(100)
+                {
+                    if ch.is_whitespace() {
+                        break;
+                    }
+                    containing_word.push(ch);
+                }
+                let containing_word = containing_word.chars().rev().collect::<String>();
+                if util::word_consists_of_emojis(containing_word.as_str()) {
+                    chars.reverse();
+                    return Some(chars.iter().collect());
+                }
             }
+
             if char.is_whitespace() || !char.is_ascii() {
                 return None;
             }

crates/editor/src/editor_tests.rs ๐Ÿ”—

@@ -5206,20 +5206,39 @@ async fn test_auto_replace_emoji_shortcode(cx: &mut gpui::TestAppContext) {
         editor.handle_input(":", cx);
         assert_eq!(editor.text(cx), "Hello ๐Ÿ‘‹ ๐Ÿ˜„".unindent());
 
-        editor.handle_input(":1:", cx);
-        assert_eq!(editor.text(cx), "Hello ๐Ÿ‘‹ ๐Ÿ˜„:1:".unindent());
+        // Ensure shortcode gets replaced when it is part of a word that only consists of emojis
+        editor.handle_input(":wave", cx);
+        assert_eq!(editor.text(cx), "Hello ๐Ÿ‘‹ ๐Ÿ˜„:wave".unindent());
+
+        editor.handle_input(":", cx);
+        assert_eq!(editor.text(cx), "Hello ๐Ÿ‘‹ ๐Ÿ˜„๐Ÿ‘‹".unindent());
+
+        editor.handle_input(":1", cx);
+        assert_eq!(editor.text(cx), "Hello ๐Ÿ‘‹ ๐Ÿ˜„๐Ÿ‘‹:1".unindent());
+
+        editor.handle_input(":", cx);
+        assert_eq!(editor.text(cx), "Hello ๐Ÿ‘‹ ๐Ÿ˜„๐Ÿ‘‹:1:".unindent());
 
         // Ensure shortcode does not get replaced when it is part of a word
-        editor.handle_input(" Test:wave:", cx);
-        assert_eq!(editor.text(cx), "Hello ๐Ÿ‘‹ ๐Ÿ˜„:1: Test:wave:".unindent());
+        editor.handle_input(" Test:wave", cx);
+        assert_eq!(editor.text(cx), "Hello ๐Ÿ‘‹ ๐Ÿ˜„๐Ÿ‘‹:1: Test:wave".unindent());
+
+        editor.handle_input(":", cx);
+        assert_eq!(editor.text(cx), "Hello ๐Ÿ‘‹ ๐Ÿ˜„๐Ÿ‘‹:1: Test:wave:".unindent());
 
         editor.set_auto_replace_emoji_shortcode(false);
 
         // Ensure shortcode does not get replaced when auto replace is off
-        editor.handle_input(" :wave:", cx);
+        editor.handle_input(" :wave", cx);
+        assert_eq!(
+            editor.text(cx),
+            "Hello ๐Ÿ‘‹ ๐Ÿ˜„๐Ÿ‘‹:1: Test:wave: :wave".unindent()
+        );
+
+        editor.handle_input(":", cx);
         assert_eq!(
             editor.text(cx),
-            "Hello ๐Ÿ‘‹ ๐Ÿ˜„:1: Test:wave: :wave:".unindent()
+            "Hello ๐Ÿ‘‹ ๐Ÿ˜„๐Ÿ‘‹:1: Test:wave: :wave:".unindent()
         );
     });
 }

crates/util/Cargo.toml ๐Ÿ”—

@@ -28,6 +28,7 @@ lazy_static.workspace = true
 log.workspace = true
 parking_lot.workspace = true
 rand.workspace = true
+regex.workspace = true
 rust-embed.workspace = true
 serde.workspace = true
 serde_json.workspace = true

crates/util/src/util.rs ๐Ÿ”—

@@ -524,6 +524,22 @@ impl<'a> PartialOrd for NumericPrefixWithSuffix<'a> {
         Some(self.cmp(other))
     }
 }
+lazy_static! {
+    static ref EMOJI_REGEX: regex::Regex = regex::Regex::new("(\\p{Emoji}|\u{200D})").unwrap();
+}
+
+/// Returns true if the given string consists of emojis only.
+/// E.g. "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง๐Ÿ‘‹" will return true, but "๐Ÿ‘‹!" will return false.
+pub fn word_consists_of_emojis(s: &str) -> bool {
+    let mut prev_end = 0;
+    for capture in EMOJI_REGEX.find_iter(s) {
+        if capture.start() != prev_end {
+            return false;
+        }
+        prev_end = capture.end();
+    }
+    prev_end == s.len()
+}
 
 #[cfg(test)]
 mod tests {
@@ -583,4 +599,21 @@ mod tests {
             )
         }
     }
+
+    #[test]
+    fn test_word_consists_of_emojis() {
+        let words_to_test = vec![
+            ("๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง๐Ÿ‘‹๐Ÿฅ’", true),
+            ("๐Ÿ‘‹", true),
+            ("!๐Ÿ‘‹", false),
+            ("๐Ÿ‘‹!", false),
+            ("๐Ÿ‘‹ ", false),
+            (" ๐Ÿ‘‹", false),
+            ("Test", false),
+        ];
+
+        for (text, expected_result) in words_to_test {
+            assert_eq!(word_consists_of_emojis(text), expected_result);
+        }
+    }
 }