WIP: Remove ripgrep and start matching query for paths ourselves

Antonio Scandurra created

Change summary

Cargo.lock                    | 119 ------------------------------------
crates/project/Cargo.toml     |   3 
crates/project/src/fs.rs      |  11 +++
crates/project/src/project.rs |  95 ++++++++++++++++++-----------
4 files changed, 72 insertions(+), 156 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -745,9 +745,7 @@ version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a40b47ad93e1a5404e6c18dec46b628214fee441c70f4ab5d6942142cc268a3d"
 dependencies = [
- "lazy_static",
  "memchr",
- "regex-automata",
 ]
 
 [[package]]
@@ -778,12 +776,6 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
 
-[[package]]
-name = "bytecount"
-version = "0.6.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e"
-
 [[package]]
 name = "bytemuck"
 version = "1.5.1"
@@ -1661,15 +1653,6 @@ dependencies = [
  "cfg-if 1.0.0",
 ]
 
-[[package]]
-name = "encoding_rs_io"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1cc3c5651fb62ab8aa3103998dade57efdd028544bd300516baa31840c252a83"
-dependencies = [
- "encoding_rs",
-]
-
 [[package]]
 name = "entities"
 version = "1.0.1"
@@ -2266,90 +2249,6 @@ dependencies = [
  "syn",
 ]
 
-[[package]]
-name = "grep"
-version = "0.2.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51cb840c560b45a2ffd8abf00190382789d3f596663d5ffeb2e05931c20e8657"
-dependencies = [
- "grep-cli",
- "grep-matcher",
- "grep-printer",
- "grep-regex",
- "grep-searcher",
-]
-
-[[package]]
-name = "grep-cli"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2dd110c34bb4460d0de5062413b773e385cbf8a85a63fc535590110a09e79e8a"
-dependencies = [
- "atty",
- "bstr",
- "globset",
- "lazy_static",
- "log",
- "regex",
- "same-file",
- "termcolor",
- "winapi-util",
-]
-
-[[package]]
-name = "grep-matcher"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d27563c33062cd33003b166ade2bb4fd82db1fd6a86db764dfdad132d46c1cc"
-dependencies = [
- "memchr",
-]
-
-[[package]]
-name = "grep-printer"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05c271a24daedf5675b61a275a1d0af06e03312ab7856d15433ae6cde044dc72"
-dependencies = [
- "base64 0.13.0",
- "bstr",
- "grep-matcher",
- "grep-searcher",
- "serde",
- "serde_json",
- "termcolor",
-]
-
-[[package]]
-name = "grep-regex"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "121553c9768c363839b92fc2d7cdbbad44a3b70e8d6e7b1b72b05c977527bd06"
-dependencies = [
- "aho-corasick",
- "bstr",
- "grep-matcher",
- "log",
- "regex",
- "regex-syntax",
- "thread_local",
-]
-
-[[package]]
-name = "grep-searcher"
-version = "0.1.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fbdbde90ba52adc240d2deef7b6ad1f99f53142d074b771fe9b7bede6c4c23d"
-dependencies = [
- "bstr",
- "bytecount",
- "encoding_rs",
- "encoding_rs_io",
- "grep-matcher",
- "log",
- "memmap2 0.3.1",
-]
-
 [[package]]
 name = "group"
 version = "0.10.0"
@@ -3013,15 +2912,6 @@ dependencies = [
  "libc",
 ]
 
-[[package]]
-name = "memmap2"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00b6c2ebff6180198788f5db08d7ce3bc1d0b617176678831a7510825973e357"
-dependencies = [
- "libc",
-]
-
 [[package]]
 name = "memoffset"
 version = "0.6.3"
@@ -3665,6 +3555,7 @@ dependencies = [
 name = "project"
 version = "0.1.0"
 dependencies = [
+ "aho-corasick",
  "anyhow",
  "async-trait",
  "client",
@@ -3674,7 +3565,6 @@ dependencies = [
  "futures",
  "fuzzy",
  "gpui",
- "grep",
  "ignore",
  "language",
  "lazy_static",
@@ -3684,6 +3574,7 @@ dependencies = [
  "parking_lot",
  "postage",
  "rand 0.8.3",
+ "regex",
  "rpc",
  "serde",
  "serde_json",
@@ -3978,12 +3869,6 @@ dependencies = [
  "regex-syntax",
 ]
 
-[[package]]
-name = "regex-automata"
-version = "0.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
-
 [[package]]
 name = "regex-syntax"
 version = "0.6.25"

crates/project/Cargo.toml 🔗

@@ -26,10 +26,10 @@ lsp = { path = "../lsp" }
 rpc = { path = "../rpc" }
 sum_tree = { path = "../sum_tree" }
 util = { path = "../util" }
+aho-corasick = "0.7"
 anyhow = "1.0.38"
 async-trait = "0.1"
 futures = "0.3"
-grep = "0.2"
 ignore = "0.4"
 lazy_static = "1.4.0"
 libc = "0.2"
@@ -37,6 +37,7 @@ log = "0.4"
 parking_lot = "0.11.1"
 postage = { version = "0.4.1", features = ["futures-traits"] }
 rand = "0.8.3"
+regex = "1.5"
 serde = { version = "1", features = ["derive"] }
 serde_json = { version = "1.0.64", features = ["preserve_order"] }
 sha2 = "0.10"

crates/project/src/fs.rs 🔗

@@ -18,6 +18,7 @@ pub trait Fs: Send + Sync {
     async fn rename(&self, source: &Path, target: &Path, options: RenameOptions) -> Result<()>;
     async fn remove_dir(&self, path: &Path, options: RemoveOptions) -> Result<()>;
     async fn remove_file(&self, path: &Path, options: RemoveOptions) -> Result<()>;
+    async fn open_sync(&self, path: &Path) -> Result<Box<dyn io::Read>>;
     async fn load(&self, path: &Path) -> Result<String>;
     async fn save(&self, path: &Path, text: &Rope) -> Result<()>;
     async fn canonicalize(&self, path: &Path) -> Result<PathBuf>;
@@ -121,6 +122,10 @@ impl Fs for RealFs {
         }
     }
 
+    async fn open_sync(&self, path: &Path) -> Result<Box<dyn io::Read>> {
+        Ok(Box::new(std::fs::File::open(path)?))
+    }
+
     async fn load(&self, path: &Path) -> Result<String> {
         let mut file = smol::fs::File::open(path).await?;
         let mut text = String::new();
@@ -203,7 +208,6 @@ impl Fs for RealFs {
     fn is_fake(&self) -> bool {
         false
     }
-
     #[cfg(any(test, feature = "test-support"))]
     fn as_fake(&self) -> &FakeFs {
         panic!("called `RealFs::as_fake`")
@@ -535,6 +539,11 @@ impl Fs for FakeFs {
         Ok(())
     }
 
+    async fn open_sync(&self, path: &Path) -> Result<Box<dyn io::Read>> {
+        let text = self.load(path).await?;
+        Ok(Box::new(io::Cursor::new(text)))
+    }
+
     async fn load(&self, path: &Path) -> Result<String> {
         let path = normalize_path(path);
         self.executor.simulate_random_delay().await;

crates/project/src/project.rs 🔗

@@ -3,6 +3,7 @@ mod ignore;
 mod lsp_command;
 pub mod worktree;
 
+use aho_corasick::AhoCorasickBuilder;
 use anyhow::{anyhow, Context, Result};
 use client::{proto, Client, PeerId, TypedEnvelope, User, UserStore};
 use clock::ReplicaId;
@@ -13,7 +14,6 @@ use gpui::{
     AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, MutableAppContext, Task,
     UpgradeModelHandle, WeakModelHandle,
 };
-use grep::{matcher::Matcher, searcher::Searcher};
 use language::{
     range_from_lsp, Anchor, AnchorRangeExt, Bias, Buffer, CodeAction, CodeLabel, Completion,
     Diagnostic, DiagnosticEntry, File as _, Language, LanguageRegistry, Operation, PointUtf16,
@@ -152,6 +152,10 @@ pub struct Symbol {
     pub signature: [u8; 32],
 }
 
+pub enum SearchQuery {
+    Plain(String),
+}
+
 pub struct BufferRequestHandle(Rc<RefCell<ProjectBuffers>>);
 
 #[derive(Default)]
@@ -2043,16 +2047,13 @@ impl Project {
         )
     }
 
-    pub fn search<T>(
+    pub fn search(
         &self,
         query: SearchQuery,
         cx: &mut ModelContext<Self>,
-    ) -> Task<HashMap<ModelHandle<Buffer>, Vec<Range<Anchor>>>>
-    where
-        T: Matcher,
-    {
+    ) -> Task<HashMap<ModelHandle<Buffer>, Vec<Range<Anchor>>>> {
         if self.is_local() {
-            let (queue_tx, queue_rx) = smol::channel::bounded(1024);
+            let (paths_to_search_tx, paths_to_search_rx) = smol::channel::bounded(1024);
 
             // Submit all worktree paths to the queue.
             let snapshots = self
@@ -2063,55 +2064,75 @@ impl Project {
                 })
                 .collect::<Vec<_>>();
             cx.background()
-                .spawn({
-                    let queue_tx = queue_tx.clone();
-                    async move {
-                        for (snapshot_abs_path, snapshot) in snapshots {
-                            for file in snapshot.files(false, 0) {
-                                if queue_tx
-                                    .send((snapshot_abs_path.clone(), file.path.clone()))
-                                    .await
-                                    .is_err()
-                                {
-                                    return;
-                                }
+                .spawn(async move {
+                    for (snapshot_abs_path, snapshot) in snapshots {
+                        for file in snapshot.files(false, 0) {
+                            if paths_to_search_tx
+                                .send((snapshot_abs_path.clone(), file.path.clone()))
+                                .await
+                                .is_err()
+                            {
+                                return;
                             }
                         }
                     }
                 })
                 .detach();
 
-            let matcher = Arc::new(matcher);
+            let SearchQuery::Plain(query) = query;
+            let search = Arc::new(
+                AhoCorasickBuilder::new()
+                    .auto_configure(&[&query])
+                    // .ascii_case_insensitive(!case_sensitive)
+                    .build(&[&query]),
+            );
+            let (matching_paths_tx, matching_paths_rx) = smol::channel::bounded(1024);
             cx.background()
                 .spawn({
+                    let fs = self.fs.clone();
                     let background = cx.background().clone();
                     let workers = background.num_cpus();
-                    let searcher = searcher.clone();
-                    let matcher = matcher.clone();
+                    let search = search.clone();
                     async move {
+                        let fs = &fs;
+                        let search = &search;
+                        let matching_paths_tx = &matching_paths_tx;
                         background
                             .scoped(|scope| {
                                 for _ in 0..workers {
-                                    let mut paths_rx = queue_rx.clone();
+                                    let mut paths_to_search_rx = paths_to_search_rx.clone();
                                     scope.spawn(async move {
                                         let mut path = PathBuf::new();
                                         while let Some((snapshot_abs_path, file_path)) =
-                                            paths_rx.next().await
+                                            paths_to_search_rx.next().await
                                         {
+                                            if matching_paths_tx.is_closed() {
+                                                break;
+                                            }
+
                                             path.clear();
-                                            path.push(snapshot_abs_path);
-                                            path.push(file_path);
-                                            let mut matched = false;
-                                            // searcher.search_path(
-                                            //     matcher.as_ref(),
-                                            //     &path,
-                                            //     grep::searcher::sinks::Bytes(|_, _| {
-                                            //         matched = true;
-                                            //         Ok(false)
-                                            //     }),
-                                            // );
-
-                                            if matched {}
+                                            path.push(&snapshot_abs_path);
+                                            path.push(&file_path);
+                                            let matches = if let Some(file) =
+                                                fs.open_sync(&path).await.log_err()
+                                            {
+                                                search
+                                                    .stream_find_iter(file)
+                                                    .next()
+                                                    .map_or(false, |mat| mat.is_ok())
+                                            } else {
+                                                false
+                                            };
+
+                                            if matches {
+                                                if matching_paths_tx
+                                                    .send((snapshot_abs_path, file_path))
+                                                    .await
+                                                    .is_err()
+                                                {
+                                                    break;
+                                                }
+                                            }
                                         }
                                     });
                                 }