From 58fe19d55ea339270cd01d3398dc8f9e7f4c04bf Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Thu, 20 Nov 2025 18:44:55 +0100 Subject: [PATCH] project search: Skip loading of gitignored paths when their descendants will never match an inclusion/exclusion query (#42968) Co-authored-by: dino Related-to: #38799 Release Notes: - Improved project search performance with "Also search files ignored by configuration" combined with file inclusion/exclusion queries. --------- Co-authored-by: dino --- Cargo.lock | 45 +++++ Cargo.toml | 1 + crates/project/Cargo.toml | 1 + crates/project/src/project_search.rs | 258 +++++++++++++++++++++++++-- 4 files changed, 292 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0dc6a3c99a79ae3b3baad983a4427b710fb22080..a21c80c8b279206a791020231100abe6468ece6f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3668,6 +3668,26 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const_format" +version = "0.2.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "constant_time_eq" version = "0.1.5" @@ -12752,6 +12772,15 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5da3b0203fd7ee5720aa0b5e790b591aa5d3f41c3ed2c34a3a393382198af2f7" +[[package]] +name = "pori" +version = "0.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a63d338dec139f56dacc692ca63ad35a6be6a797442479b55acd611d79e906" +dependencies = [ + "nom 7.1.3", +] + [[package]] name = "portable-atomic" version = "1.11.1" @@ -13068,6 +13097,7 @@ dependencies = [ "url", "util", "watch", + "wax", "which 6.0.3", "worktree", "zeroize", @@ -19492,6 +19522,21 @@ dependencies = [ "zlog", ] +[[package]] +name = "wax" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d12a78aa0bab22d2f26ed1a96df7ab58e8a93506a3e20adb47c51a93b4e1357" +dependencies = [ + "const_format", + "itertools 0.11.0", + "nom 7.1.3", + "pori", + "regex", + "thiserror 1.0.69", + "walkdir", +] + [[package]] name = "wayland-backend" version = "0.3.11" diff --git a/Cargo.toml b/Cargo.toml index a4c9caccd9539ffde7d57d36dcfaf4cf162c7e92..fa85879698521a70686e9d96c6a108e8d1cbe28d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -719,6 +719,7 @@ wasmtime = { version = "29", default-features = false, features = [ "parallel-compilation", ] } wasmtime-wasi = "29" +wax = "0.6" which = "6.0.0" windows-core = "0.61" wit-component = "0.221" diff --git a/crates/project/Cargo.toml b/crates/project/Cargo.toml index 9b67fde1e0bd31856bfa19d01818c1a5c6564218..a33efb9896959cc12fd828986c881f73e84e0ec7 100644 --- a/crates/project/Cargo.toml +++ b/crates/project/Cargo.toml @@ -86,6 +86,7 @@ toml.workspace = true url.workspace = true util.workspace = true watch.workspace = true +wax.workspace = true which.workspace = true worktree.workspace = true zeroize.workspace = true diff --git a/crates/project/src/project_search.rs b/crates/project/src/project_search.rs index e638240ebba2be8fbc35b04a0653ab32519497dd..fa8279e4506c6bbcd44856756b6d6521af809281 100644 --- a/crates/project/src/project_search.rs +++ b/crates/project/src/project_search.rs @@ -1,7 +1,9 @@ use std::{ + cell::LazyCell, + collections::BTreeSet, io::{BufRead, BufReader}, ops::Range, - path::Path, + path::{Path, PathBuf}, pin::pin, sync::Arc, }; @@ -22,7 +24,7 @@ use smol::{ use text::BufferId; use util::{ResultExt, maybe, paths::compare_rel_paths}; -use worktree::{Entry, ProjectEntryId, Snapshot, Worktree}; +use worktree::{Entry, ProjectEntryId, Snapshot, Worktree, WorktreeSettings}; use crate::{ Project, ProjectItem, ProjectPath, RemotelyCreatedModels, @@ -178,7 +180,7 @@ impl Search { let (find_all_matches_tx, find_all_matches_rx) = bounded(MAX_CONCURRENT_BUFFER_OPENS); - + let query = Arc::new(query); let (candidate_searcher, tasks) = match self.kind { SearchKind::OpenBuffersOnly => { let Ok(open_buffers) = cx.update(|cx| self.all_loaded_buffers(&query, cx)) @@ -207,11 +209,10 @@ impl Search { let (sorted_search_results_tx, sorted_search_results_rx) = unbounded(); let (input_paths_tx, input_paths_rx) = unbounded(); - let tasks = vec![ cx.spawn(Self::provide_search_paths( std::mem::take(worktrees), - query.include_ignored(), + query.clone(), input_paths_tx, sorted_search_results_tx, )) @@ -366,26 +367,30 @@ impl Search { fn provide_search_paths( worktrees: Vec>, - include_ignored: bool, + query: Arc, tx: Sender, results: Sender>, ) -> impl AsyncFnOnce(&mut AsyncApp) { async move |cx| { _ = maybe!(async move { + let gitignored_tracker = PathInclusionMatcher::new(query.clone()); for worktree in worktrees { let (mut snapshot, worktree_settings) = worktree .read_with(cx, |this, _| { Some((this.snapshot(), this.as_local()?.settings())) })? .context("The worktree is not local")?; - if include_ignored { + if query.include_ignored() { // Pre-fetch all of the ignored directories as they're going to be searched. let mut entries_to_refresh = vec![]; - for entry in snapshot.entries(include_ignored, 0) { - if entry.is_ignored && entry.kind.is_unloaded() { - if !worktree_settings.is_path_excluded(&entry.path) { - entries_to_refresh.push(entry.path.clone()); - } + + for entry in snapshot.entries(query.include_ignored(), 0) { + if gitignored_tracker.should_scan_gitignored_dir( + entry, + &snapshot, + &worktree_settings, + ) { + entries_to_refresh.push(entry.path.clone()); } } let barrier = worktree.update(cx, |this, _| { @@ -404,8 +409,9 @@ impl Search { cx.background_executor() .scoped(|scope| { scope.spawn(async { - for entry in snapshot.files(include_ignored, 0) { + for entry in snapshot.files(query.include_ignored(), 0) { let (should_scan_tx, should_scan_rx) = oneshot::channel(); + let Ok(_) = tx .send(InputPath { entry: entry.clone(), @@ -788,3 +794,229 @@ struct MatchingEntry { path: ProjectPath, should_scan_tx: oneshot::Sender, } + +/// This struct encapsulates the logic to decide whether a given gitignored directory should be +/// scanned based on include/exclude patterns of a search query (as include/exclude parameters may match paths inside it). +/// It is kind-of doing an inverse of glob. Given a glob pattern like `src/**/` and a parent path like `src`, we need to decide whether the parent +/// may contain glob hits. +struct PathInclusionMatcher { + included: BTreeSet, + query: Arc, +} + +impl PathInclusionMatcher { + fn new(query: Arc) -> Self { + let mut included = BTreeSet::new(); + // To do an inverse glob match, we split each glob into it's prefix and the glob part. + // For example, `src/**/*.rs` becomes `src/` and `**/*.rs`. The glob part gets dropped. + // Then, when checking whether a given directory should be scanned, we check whether it is a non-empty substring of any glob prefix. + if query.filters_path() { + included.extend( + query + .files_to_include() + .sources() + .iter() + .flat_map(|glob| Some(wax::Glob::new(glob).ok()?.partition().0)), + ); + } + Self { included, query } + } + + fn should_scan_gitignored_dir( + &self, + entry: &Entry, + snapshot: &Snapshot, + worktree_settings: &WorktreeSettings, + ) -> bool { + if !entry.is_ignored || !entry.kind.is_unloaded() { + return false; + } + if !self.query.include_ignored() { + return false; + } + if worktree_settings.is_path_excluded(&entry.path) { + return false; + } + if !self.query.filters_path() { + return true; + } + + let as_abs_path = LazyCell::new(move || snapshot.absolutize(&entry.path)); + let entry_path = entry.path.as_std_path(); + // 3. Check Exclusions (Pruning) + // If the current path is a child of an excluded path, we stop. + let is_excluded = self.path_is_definitely_excluded(entry_path, snapshot); + + if is_excluded { + return false; + } + + // 4. Check Inclusions (Traversal) + if self.included.is_empty() { + return true; + } + + // We scan if the current path is a descendant of an include prefix + // OR if the current path is an ancestor of an include prefix (we need to go deeper to find it). + let is_included = self.included.iter().any(|prefix| { + let (prefix_matches_entry, entry_matches_prefix) = if prefix.is_absolute() { + ( + prefix.starts_with(&**as_abs_path), + as_abs_path.starts_with(prefix), + ) + } else { + ( + prefix.starts_with(entry_path), + entry_path.starts_with(prefix), + ) + }; + + // Logic: + // 1. entry_matches_prefix: We are inside the target zone (e.g. glob: src/, current: src/lib/). Keep scanning. + // 2. prefix_matches_entry: We are above the target zone (e.g. glob: src/foo/, current: src/). Keep scanning to reach foo. + prefix_matches_entry || entry_matches_prefix + }); + + is_included + } + fn path_is_definitely_excluded(&self, path: &Path, snapshot: &Snapshot) -> bool { + if !self.query.files_to_exclude().sources().is_empty() { + let mut path = if self.query.match_full_paths() { + let mut full_path = snapshot.root_name().as_std_path().to_owned(); + full_path.push(path); + full_path + } else { + path.to_owned() + }; + loop { + if self.query.files_to_exclude().is_match(&path) { + return true; + } else if !path.pop() { + return false; + } + } + } else { + false + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use fs::FakeFs; + use serde_json::json; + use settings::Settings; + use util::{ + path, + paths::{PathMatcher, PathStyle}, + rel_path::RelPath, + }; + use worktree::{Entry, EntryKind, WorktreeSettings}; + + use crate::{ + Project, project_search::PathInclusionMatcher, project_tests::init_test, + search::SearchQuery, + }; + + #[gpui::test] + async fn test_path_inclusion_matcher(cx: &mut gpui::TestAppContext) { + init_test(cx); + + let fs = FakeFs::new(cx.background_executor.clone()); + fs.insert_tree( + "/root", + json!({ + ".gitignore": "src/data/\n", + "src": { + "data": { + "main.csv": "field_1,field_2,field_3", + }, + "lib": { + "main.txt": "Are you familiar with fields?", + }, + }, + }), + ) + .await; + + let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await; + let worktree = project.update(cx, |project, cx| project.worktrees(cx).next().unwrap()); + let (worktree_settings, worktree_snapshot) = worktree.update(cx, |worktree, cx| { + let settings_location = worktree.settings_location(cx); + return ( + WorktreeSettings::get(Some(settings_location), cx).clone(), + worktree.snapshot(), + ); + }); + + // Manually create a test entry for the gitignored directory since it won't + // be loaded by the worktree + let entry = Entry { + id: ProjectEntryId::from_proto(1), + kind: EntryKind::UnloadedDir, + path: Arc::from(RelPath::unix(Path::new("src/data")).unwrap()), + inode: 0, + mtime: None, + canonical_path: None, + is_ignored: true, + is_hidden: false, + is_always_included: false, + is_external: false, + is_private: false, + size: 0, + char_bag: Default::default(), + is_fifo: false, + }; + + // 1. Test searching for `field`, including ignored files without any + // inclusion and exclusion filters. + let include_ignored = true; + let files_to_include = PathMatcher::default(); + let files_to_exclude = PathMatcher::default(); + let match_full_paths = false; + let search_query = SearchQuery::text( + "field", + false, + false, + include_ignored, + files_to_include, + files_to_exclude, + match_full_paths, + None, + ) + .unwrap(); + + let path_matcher = PathInclusionMatcher::new(Arc::new(search_query)); + assert!(path_matcher.should_scan_gitignored_dir( + &entry, + &worktree_snapshot, + &worktree_settings + )); + + // 2. Test searching for `field`, including ignored files but updating + // `files_to_include` to only include files under `src/lib`. + let include_ignored = true; + let files_to_include = PathMatcher::new(vec!["src/lib"], PathStyle::Posix).unwrap(); + let files_to_exclude = PathMatcher::default(); + let match_full_paths = false; + let search_query = SearchQuery::text( + "field", + false, + false, + include_ignored, + files_to_include, + files_to_exclude, + match_full_paths, + None, + ) + .unwrap(); + + let path_matcher = PathInclusionMatcher::new(Arc::new(search_query)); + assert!(!path_matcher.should_scan_gitignored_dir( + &entry, + &worktree_snapshot, + &worktree_settings + )); + } +}