Fix hang in replace all when the query contains non-ASCII text and regex-special characters (#54422)

Conrad Irwin created

Closes #54331
Updates #50848

Release Notes:

- Fixed hang in replace all when the query contained non-ASCII text and
regex-special characters

Change summary

crates/project/src/search.rs        | 100 ++++++++++++++++++++++--------
crates/search/src/project_search.rs |  77 +++++++++++++++++++++++
2 files changed, 151 insertions(+), 26 deletions(-)

Detailed changes

crates/project/src/search.rs 🔗

@@ -105,12 +105,11 @@ impl SearchQuery {
             // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
             // Fallback to regex search as recommended by
             // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
-            return Self::regex(
-                regex::escape(&query),
+            return Self::escaped_regex(
+                query,
                 whole_word,
                 case_sensitive,
                 include_ignored,
-                false,
                 files_to_include,
                 files_to_exclude,
                 false,
@@ -145,7 +144,7 @@ impl SearchQuery {
     pub fn regex(
         query: impl ToString,
         whole_word: bool,
-        mut case_sensitive: bool,
+        case_sensitive: bool,
         include_ignored: bool,
         one_match_per_line: bool,
         files_to_include: PathMatcher,
@@ -153,47 +152,96 @@ impl SearchQuery {
         match_full_paths: bool,
         buffers: Option<Vec<Entity<Buffer>>>,
     ) -> Result<Self> {
-        let mut query = query.to_string();
-        let initial_query = Arc::from(query.as_str());
+        let query = query.to_string();
+        let inner = SearchInputs {
+            query: Arc::from(query.as_str()),
+            files_to_include,
+            files_to_exclude,
+            match_full_paths,
+            buffers,
+        };
+        Self::build_regex(
+            query,
+            whole_word,
+            case_sensitive,
+            include_ignored,
+            one_match_per_line,
+            inner,
+        )
+    }
 
-        if let Some((case_sensitive_from_pattern, new_query)) =
-            Self::case_sensitive_from_pattern(&query)
+    /// Create a regex query from a literal string, escaping any regex
+    /// metacharacters so that the resulting query matches the literal text.
+    ///
+    /// Unlike `regex`, the query stored on the resulting `SearchQuery` is the
+    /// original unescaped text, so `as_str` returns what the user typed.
+    pub fn escaped_regex(
+        query: impl ToString,
+        whole_word: bool,
+        case_sensitive: bool,
+        include_ignored: bool,
+        files_to_include: PathMatcher,
+        files_to_exclude: PathMatcher,
+        match_full_paths: bool,
+        buffers: Option<Vec<Entity<Buffer>>>,
+    ) -> Result<Self> {
+        let query = query.to_string();
+        let inner = SearchInputs {
+            query: Arc::from(query.as_str()),
+            files_to_include,
+            files_to_exclude,
+            match_full_paths,
+            buffers,
+        };
+        Self::build_regex(
+            regex::escape(&query),
+            whole_word,
+            case_sensitive,
+            include_ignored,
+            false,
+            inner,
+        )
+    }
+
+    fn build_regex(
+        mut pattern: String,
+        whole_word: bool,
+        mut case_sensitive: bool,
+        include_ignored: bool,
+        one_match_per_line: bool,
+        inner: SearchInputs,
+    ) -> Result<Self> {
+        if let Some((case_sensitive_from_pattern, new_pattern)) =
+            Self::case_sensitive_from_pattern(&pattern)
         {
             case_sensitive = case_sensitive_from_pattern;
-            query = new_query
+            pattern = new_pattern
         }
 
         if whole_word {
-            let mut word_query = String::new();
-            if let Some(first) = query.get(0..1)
+            let mut word_pattern = String::new();
+            if let Some(first) = pattern.get(0..1)
                 && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
             {
-                word_query.push_str("\\b");
+                word_pattern.push_str("\\b");
             }
-            word_query.push_str(&query);
-            if let Some(last) = query.get(query.len() - 1..)
+            word_pattern.push_str(&pattern);
+            if let Some(last) = pattern.get(pattern.len() - 1..)
                 && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
             {
-                word_query.push_str("\\b");
+                word_pattern.push_str("\\b");
             }
-            query = word_query
+            pattern = word_pattern
         }
 
-        let multiline = query.contains('\n') || query.contains("\\n");
+        let multiline = pattern.contains('\n') || pattern.contains("\\n");
         if multiline {
-            query.insert_str(0, "(?m)");
+            pattern.insert_str(0, "(?m)");
         }
 
-        let regex = RegexBuilder::new(&query)
+        let regex = RegexBuilder::new(&pattern)
             .case_insensitive(!case_sensitive)
             .build()?;
-        let inner = SearchInputs {
-            query: initial_query,
-            files_to_exclude,
-            files_to_include,
-            match_full_paths,
-            buffers,
-        };
         Ok(Self::Regex {
             regex,
             replacement: None,

crates/search/src/project_search.rs 🔗

@@ -5339,6 +5339,83 @@ pub mod tests {
         });
     }
 
+    #[gpui::test]
+    async fn test_replace_all_with_shared_heading_prefix_does_not_loop(cx: &mut TestAppContext) {
+        init_test(cx);
+
+        let search_text = "## この日に作成したノート";
+        let replacement_text = "## この日に関連するノート";
+
+        let file_a_before = format!("{search_text}\n- a\n\n{search_text}\n- b\n");
+        let file_b_before = format!("# Daily\n\n{search_text}\n- c\n");
+        let file_a_after = format!("{replacement_text}\n- a\n\n{replacement_text}\n- b\n");
+        let file_b_after = format!("# Daily\n\n{replacement_text}\n- c\n");
+
+        let fs = FakeFs::new(cx.background_executor.clone());
+        fs.insert_tree(
+            path!("/dir"),
+            json!({
+                "a.md": file_a_before,
+                "b.md": file_b_before,
+            }),
+        )
+        .await;
+        let project = Project::test(fs.clone(), [path!("/dir").as_ref()], cx).await;
+        let worktree_id = project.update(cx, |project, cx| {
+            project.worktrees(cx).next().unwrap().read(cx).id()
+        });
+        let window =
+            cx.add_window(|window, cx| MultiWorkspace::test_new(project.clone(), window, cx));
+        let workspace = window
+            .read_with(cx, |mw, _| mw.workspace().clone())
+            .unwrap();
+        let search = cx.new(|cx| ProjectSearch::new(project.clone(), cx));
+        let search_view = cx.add_window(|window, cx| {
+            ProjectSearchView::new(workspace.downgrade(), search.clone(), window, cx, None)
+        });
+
+        perform_search(search_view, search_text, cx);
+
+        search_view
+            .update(cx, |search_view, _window, cx| {
+                assert_eq!(search_view.entity.read(cx).match_ranges.len(), 3);
+            })
+            .unwrap();
+
+        search_view
+            .update(cx, |search_view, window, cx| {
+                search_view.replacement_editor.update(cx, |editor, cx| {
+                    editor.set_text(replacement_text, window, cx);
+                });
+                search_view.replace_all(&ReplaceAll, window, cx);
+            })
+            .unwrap();
+
+        cx.run_until_parked();
+
+        let buffer_a = project
+            .update(cx, |project, cx| {
+                project.open_buffer((worktree_id, rel_path("a.md")), cx)
+            })
+            .await
+            .unwrap();
+        let buffer_b = project
+            .update(cx, |project, cx| {
+                project.open_buffer((worktree_id, rel_path("b.md")), cx)
+            })
+            .await
+            .unwrap();
+
+        assert_eq!(
+            buffer_a.read_with(cx, |buffer, _| buffer.text()),
+            file_a_after
+        );
+        assert_eq!(
+            buffer_b.read_with(cx, |buffer, _| buffer.text()),
+            file_b_after
+        );
+    }
+
     #[gpui::test]
     async fn test_smartcase_overrides_explicit_case_sensitive(cx: &mut TestAppContext) {
         init_test(cx);