1use std::{
2 cell::LazyCell,
3 collections::BTreeSet,
4 io::{BufReader, Cursor, Read},
5 ops::Range,
6 path::{Path, PathBuf},
7 pin::pin,
8 sync::Arc,
9};
10
11use anyhow::Context;
12use collections::HashSet;
13use fs::Fs;
14use futures::{SinkExt, StreamExt, select_biased, stream::FuturesOrdered};
15use gpui::{App, AppContext, AsyncApp, Entity, Task};
16use language::{Buffer, BufferSnapshot};
17use parking_lot::Mutex;
18use postage::oneshot;
19use rpc::{AnyProtoClient, proto};
20use smol::{
21 channel::{Receiver, Sender, bounded, unbounded},
22 future::FutureExt,
23};
24
25use text::BufferId;
26use util::{ResultExt, maybe, paths::compare_rel_paths, rel_path::RelPath};
27use worktree::{Entry, ProjectEntryId, Snapshot, Worktree, WorktreeSettings};
28
29use crate::{
30 Project, ProjectItem, ProjectPath, RemotelyCreatedModels,
31 buffer_store::BufferStore,
32 search::{SearchQuery, SearchResult},
33 worktree_store::WorktreeStore,
34};
35
36pub struct Search {
37 buffer_store: Entity<BufferStore>,
38 worktree_store: Entity<WorktreeStore>,
39 limit: usize,
40 kind: SearchKind,
41}
42
43/// Represents search setup, before it is actually kicked off with Search::into_results
44enum SearchKind {
45 /// Search for candidates by inspecting file contents on file system, avoiding loading the buffer unless we know that a given file contains a match.
46 Local {
47 fs: Arc<dyn Fs>,
48 worktrees: Vec<Entity<Worktree>>,
49 },
50 /// Query remote host for candidates. As of writing, the host runs a local search in "buffers with matches only" mode.
51 Remote {
52 client: AnyProtoClient,
53 remote_id: u64,
54 models: Arc<Mutex<RemotelyCreatedModels>>,
55 },
56 /// Run search against a known set of candidates. Even when working with a remote host, this won't round-trip to host.
57 OpenBuffersOnly,
58}
59
60/// Represents results of project search and allows one to either obtain match positions OR
61/// just the handles to buffers that may match the search. Grabbing the handles is cheaper than obtaining full match positions, because in that case we'll look for
62/// at most one match in each file.
63#[must_use]
64pub struct SearchResultsHandle {
65 results: Receiver<SearchResult>,
66 matching_buffers: Receiver<Entity<Buffer>>,
67 trigger_search: Box<dyn FnOnce(&mut App) -> Task<()> + Send + Sync>,
68}
69
70impl SearchResultsHandle {
71 pub fn results(self, cx: &mut App) -> (Receiver<SearchResult>, Task<()>) {
72 let task = (self.trigger_search)(cx);
73 (self.results, task)
74 }
75
76 pub fn matching_buffers(self, cx: &mut App) -> (Receiver<Entity<Buffer>>, Task<()>) {
77 let task = (self.trigger_search)(cx);
78 (self.matching_buffers, task)
79 }
80}
81
82#[derive(Clone)]
83enum FindSearchCandidates {
84 Local {
85 fs: Arc<dyn Fs>,
86 /// Start off with all paths in project and filter them based on:
87 /// - Include filters
88 /// - Exclude filters
89 /// - Only open buffers
90 /// - Scan ignored files
91 /// Put another way: filter out files that can't match (without looking at file contents)
92 input_paths_rx: Receiver<InputPath>,
93 /// After that, if the buffer is not yet loaded, we'll figure out if it contains at least one match
94 /// based on disk contents of a buffer. This step is not performed for buffers we already have in memory.
95 confirm_contents_will_match_tx: Sender<MatchingEntry>,
96 confirm_contents_will_match_rx: Receiver<MatchingEntry>,
97 },
98 Remote,
99 OpenBuffersOnly,
100}
101
102impl Search {
103 pub fn local(
104 fs: Arc<dyn Fs>,
105 buffer_store: Entity<BufferStore>,
106 worktree_store: Entity<WorktreeStore>,
107 limit: usize,
108 cx: &mut App,
109 ) -> Self {
110 let worktrees = worktree_store.read(cx).visible_worktrees(cx).collect();
111 Self {
112 kind: SearchKind::Local { fs, worktrees },
113 buffer_store,
114 worktree_store,
115 limit,
116 }
117 }
118
119 pub(crate) fn remote(
120 buffer_store: Entity<BufferStore>,
121 worktree_store: Entity<WorktreeStore>,
122 limit: usize,
123 client_state: (AnyProtoClient, u64, Arc<Mutex<RemotelyCreatedModels>>),
124 ) -> Self {
125 Self {
126 kind: SearchKind::Remote {
127 client: client_state.0,
128 remote_id: client_state.1,
129 models: client_state.2,
130 },
131 buffer_store,
132 worktree_store,
133 limit,
134 }
135 }
136 pub(crate) fn open_buffers_only(
137 buffer_store: Entity<BufferStore>,
138 worktree_store: Entity<WorktreeStore>,
139 limit: usize,
140 ) -> Self {
141 Self {
142 kind: SearchKind::OpenBuffersOnly,
143 buffer_store,
144 worktree_store,
145 limit,
146 }
147 }
148
149 pub(crate) const MAX_SEARCH_RESULT_FILES: usize = 5_000;
150 pub(crate) const MAX_SEARCH_RESULT_RANGES: usize = 10_000;
151 /// Prepares a project search run. The resulting [`SearchResultsHandle`] has to be used to specify whether you're interested in matching buffers
152 /// or full search results.
153 pub fn into_handle(mut self, query: SearchQuery, cx: &mut App) -> SearchResultsHandle {
154 let mut open_buffers = HashSet::default();
155 let mut unnamed_buffers = Vec::new();
156 const MAX_CONCURRENT_BUFFER_OPENS: usize = 64;
157 let buffers = self.buffer_store.read(cx);
158 for handle in buffers.buffers() {
159 let buffer = handle.read(cx);
160 if !buffers.is_searchable(&buffer.remote_id()) {
161 continue;
162 } else if let Some(entry_id) = buffer.entry_id(cx) {
163 open_buffers.insert(entry_id);
164 } else {
165 self.limit = self.limit.saturating_sub(1);
166 unnamed_buffers.push(handle)
167 };
168 }
169 let executor = cx.background_executor().clone();
170 let (tx, rx) = unbounded();
171 let (grab_buffer_snapshot_tx, grab_buffer_snapshot_rx) = unbounded();
172 let matching_buffers = grab_buffer_snapshot_rx.clone();
173 let trigger_search = Box::new(move |cx: &mut App| {
174 cx.spawn(async move |cx| {
175 for buffer in unnamed_buffers {
176 _ = grab_buffer_snapshot_tx.send(buffer).await;
177 }
178
179 let (find_all_matches_tx, find_all_matches_rx) =
180 bounded(MAX_CONCURRENT_BUFFER_OPENS);
181 let query = Arc::new(query);
182 let (candidate_searcher, tasks) = match self.kind {
183 SearchKind::OpenBuffersOnly => {
184 let Ok(open_buffers) = cx.update(|cx| self.all_loaded_buffers(&query, cx))
185 else {
186 return;
187 };
188 let fill_requests = cx
189 .background_spawn(async move {
190 for buffer in open_buffers {
191 if let Err(_) = grab_buffer_snapshot_tx.send(buffer).await {
192 return;
193 }
194 }
195 })
196 .boxed_local();
197 (FindSearchCandidates::OpenBuffersOnly, vec![fill_requests])
198 }
199 SearchKind::Local {
200 fs,
201 ref mut worktrees,
202 } => {
203 let (get_buffer_for_full_scan_tx, get_buffer_for_full_scan_rx) =
204 unbounded();
205 let (confirm_contents_will_match_tx, confirm_contents_will_match_rx) =
206 bounded(64);
207 let (sorted_search_results_tx, sorted_search_results_rx) = unbounded();
208
209 let (input_paths_tx, input_paths_rx) = unbounded();
210 let tasks = vec![
211 cx.spawn(Self::provide_search_paths(
212 std::mem::take(worktrees),
213 query.clone(),
214 input_paths_tx,
215 sorted_search_results_tx,
216 ))
217 .boxed_local(),
218 Self::open_buffers(
219 &self.buffer_store,
220 get_buffer_for_full_scan_rx,
221 grab_buffer_snapshot_tx,
222 cx.clone(),
223 )
224 .boxed_local(),
225 cx.background_spawn(Self::maintain_sorted_search_results(
226 sorted_search_results_rx,
227 get_buffer_for_full_scan_tx,
228 self.limit,
229 ))
230 .boxed_local(),
231 ];
232 (
233 FindSearchCandidates::Local {
234 fs,
235 confirm_contents_will_match_tx,
236 confirm_contents_will_match_rx,
237 input_paths_rx,
238 },
239 tasks,
240 )
241 }
242 SearchKind::Remote {
243 client,
244 remote_id,
245 models,
246 } => {
247 let request = client.request(proto::FindSearchCandidates {
248 project_id: remote_id,
249 query: Some(query.to_proto()),
250 limit: self.limit as _,
251 });
252 let Ok(guard) = cx.update(|cx| {
253 Project::retain_remotely_created_models_impl(
254 &models,
255 &self.buffer_store,
256 &self.worktree_store,
257 cx,
258 )
259 }) else {
260 return;
261 };
262 let buffer_store = self.buffer_store.downgrade();
263 let issue_remote_buffers_request = cx
264 .spawn(async move |cx| {
265 let _ = maybe!(async move {
266 let response = request.await?;
267 for buffer_id in response.buffer_ids {
268 let buffer_id = BufferId::new(buffer_id)?;
269 let buffer = buffer_store
270 .update(cx, |buffer_store, cx| {
271 buffer_store.wait_for_remote_buffer(buffer_id, cx)
272 })?
273 .await?;
274 let _ = grab_buffer_snapshot_tx.send(buffer).await;
275 }
276
277 drop(guard);
278 anyhow::Ok(())
279 })
280 .await
281 .log_err();
282 })
283 .boxed_local();
284 (
285 FindSearchCandidates::Remote,
286 vec![issue_remote_buffers_request],
287 )
288 }
289 };
290
291 let should_find_all_matches = !tx.is_closed();
292
293 let worker_pool = executor.scoped(|scope| {
294 let num_cpus = executor.num_cpus();
295
296 assert!(num_cpus > 0);
297 for _ in 0..executor.num_cpus() - 1 {
298 let worker = Worker {
299 query: &query,
300 open_buffers: &open_buffers,
301 candidates: candidate_searcher.clone(),
302 find_all_matches_rx: find_all_matches_rx.clone(),
303 };
304 scope.spawn(worker.run());
305 }
306
307 drop(find_all_matches_rx);
308 drop(candidate_searcher);
309 });
310
311 let (sorted_matches_tx, sorted_matches_rx) = unbounded();
312 // The caller of `into_handle` decides whether they're interested in all matches (files that matched + all matching ranges) or
313 // just the files. *They are using the same stream as the guts of the project search do*.
314 // This means that we cannot grab values off of that stream unless it's strictly needed for making a progress in project search.
315 //
316 // Grabbing buffer snapshots is only necessary when we're looking for all matches. If the caller decided that they're not interested
317 // in all matches, running that task unconditionally would hinder caller's ability to observe all matching file paths.
318 let buffer_snapshots = if should_find_all_matches {
319 Some(
320 Self::grab_buffer_snapshots(
321 grab_buffer_snapshot_rx,
322 find_all_matches_tx,
323 sorted_matches_tx,
324 cx.clone(),
325 )
326 .boxed_local(),
327 )
328 } else {
329 drop(find_all_matches_tx);
330
331 None
332 };
333 let ensure_matches_are_reported_in_order = if should_find_all_matches {
334 Some(
335 Self::ensure_matched_ranges_are_reported_in_order(sorted_matches_rx, tx)
336 .boxed_local(),
337 )
338 } else {
339 drop(tx);
340 None
341 };
342
343 futures::future::join_all(
344 [worker_pool.boxed_local()]
345 .into_iter()
346 .chain(buffer_snapshots)
347 .chain(ensure_matches_are_reported_in_order)
348 .chain(tasks),
349 )
350 .await;
351 })
352 });
353
354 SearchResultsHandle {
355 results: rx,
356 matching_buffers,
357 trigger_search,
358 }
359 }
360
361 fn provide_search_paths(
362 worktrees: Vec<Entity<Worktree>>,
363 query: Arc<SearchQuery>,
364 tx: Sender<InputPath>,
365 results: Sender<oneshot::Receiver<ProjectPath>>,
366 ) -> impl AsyncFnOnce(&mut AsyncApp) {
367 async move |cx| {
368 _ = maybe!(async move {
369 let gitignored_tracker = PathInclusionMatcher::new(query.clone());
370 for worktree in worktrees {
371 let (mut snapshot, worktree_settings) = worktree
372 .read_with(cx, |this, _| {
373 Some((this.snapshot(), this.as_local()?.settings()))
374 })?
375 .context("The worktree is not local")?;
376 if query.include_ignored() {
377 // Pre-fetch all of the ignored directories as they're going to be searched.
378 let mut entries_to_refresh = vec![];
379
380 for entry in snapshot.entries(query.include_ignored(), 0) {
381 if gitignored_tracker.should_scan_gitignored_dir(
382 entry,
383 &snapshot,
384 &worktree_settings,
385 ) {
386 entries_to_refresh.push(entry.path.clone());
387 }
388 }
389 let barrier = worktree.update(cx, |this, _| {
390 let local = this.as_local_mut()?;
391 let barrier = entries_to_refresh
392 .into_iter()
393 .map(|path| local.add_path_prefix_to_scan(path).into_future())
394 .collect::<Vec<_>>();
395 Some(barrier)
396 })?;
397 if let Some(barriers) = barrier {
398 futures::future::join_all(barriers).await;
399 }
400 snapshot = worktree.read_with(cx, |this, _| this.snapshot())?;
401 }
402 cx.background_executor()
403 .scoped(|scope| {
404 scope.spawn(async {
405 for entry in snapshot.files(query.include_ignored(), 0) {
406 let (should_scan_tx, should_scan_rx) = oneshot::channel();
407
408 let Ok(_) = tx
409 .send(InputPath {
410 entry: entry.clone(),
411 snapshot: snapshot.clone(),
412 should_scan_tx,
413 })
414 .await
415 else {
416 return;
417 };
418 if results.send(should_scan_rx).await.is_err() {
419 return;
420 };
421 }
422 })
423 })
424 .await;
425 }
426 anyhow::Ok(())
427 })
428 .await;
429 }
430 }
431
432 async fn maintain_sorted_search_results(
433 rx: Receiver<oneshot::Receiver<ProjectPath>>,
434 paths_for_full_scan: Sender<ProjectPath>,
435 limit: usize,
436 ) {
437 let mut rx = pin!(rx);
438 let mut matched = 0;
439 while let Some(mut next_path_result) = rx.next().await {
440 let Some(successful_path) = next_path_result.next().await else {
441 // This file did not produce a match, hence skip it.
442 continue;
443 };
444 if paths_for_full_scan.send(successful_path).await.is_err() {
445 return;
446 };
447 matched += 1;
448 if matched >= limit {
449 break;
450 }
451 }
452 }
453
454 /// Background workers cannot open buffers by themselves, hence main thread will do it on their behalf.
455 async fn open_buffers(
456 buffer_store: &Entity<BufferStore>,
457 rx: Receiver<ProjectPath>,
458 find_all_matches_tx: Sender<Entity<Buffer>>,
459 mut cx: AsyncApp,
460 ) {
461 let mut rx = pin!(rx.ready_chunks(64));
462 _ = maybe!(async move {
463 while let Some(requested_paths) = rx.next().await {
464 let mut buffers = buffer_store.update(&mut cx, |this, cx| {
465 requested_paths
466 .into_iter()
467 .map(|path| this.open_buffer(path, cx))
468 .collect::<FuturesOrdered<_>>()
469 })?;
470
471 while let Some(buffer) = buffers.next().await {
472 if let Some(buffer) = buffer.log_err() {
473 find_all_matches_tx.send(buffer).await?;
474 }
475 }
476 }
477 Result::<_, anyhow::Error>::Ok(())
478 })
479 .await;
480 }
481
482 async fn grab_buffer_snapshots(
483 rx: Receiver<Entity<Buffer>>,
484 find_all_matches_tx: Sender<(
485 Entity<Buffer>,
486 BufferSnapshot,
487 oneshot::Sender<(Entity<Buffer>, Vec<Range<language::Anchor>>)>,
488 )>,
489 results: Sender<oneshot::Receiver<(Entity<Buffer>, Vec<Range<language::Anchor>>)>>,
490 mut cx: AsyncApp,
491 ) {
492 _ = maybe!(async move {
493 while let Ok(buffer) = rx.recv().await {
494 let snapshot = buffer.read_with(&mut cx, |this, _| this.snapshot())?;
495 let (tx, rx) = oneshot::channel();
496 find_all_matches_tx.send((buffer, snapshot, tx)).await?;
497 results.send(rx).await?;
498 }
499 debug_assert!(rx.is_empty());
500 Result::<_, anyhow::Error>::Ok(())
501 })
502 .await;
503 }
504
505 async fn ensure_matched_ranges_are_reported_in_order(
506 rx: Receiver<oneshot::Receiver<(Entity<Buffer>, Vec<Range<language::Anchor>>)>>,
507 tx: Sender<SearchResult>,
508 ) {
509 use postage::stream::Stream;
510 _ = maybe!(async move {
511 let mut matched_buffers = 0;
512 let mut matches = 0;
513 while let Ok(mut next_buffer_matches) = rx.recv().await {
514 let Some((buffer, ranges)) = next_buffer_matches.recv().await else {
515 continue;
516 };
517
518 if matched_buffers > Search::MAX_SEARCH_RESULT_FILES
519 || matches > Search::MAX_SEARCH_RESULT_RANGES
520 {
521 _ = tx.send(SearchResult::LimitReached).await;
522 break;
523 }
524 matched_buffers += 1;
525 matches += ranges.len();
526
527 _ = tx.send(SearchResult::Buffer { buffer, ranges }).await?;
528 }
529 anyhow::Ok(())
530 })
531 .await;
532 }
533
534 fn all_loaded_buffers(&self, search_query: &SearchQuery, cx: &App) -> Vec<Entity<Buffer>> {
535 let worktree_store = self.worktree_store.read(cx);
536 let mut buffers = search_query
537 .buffers()
538 .into_iter()
539 .flatten()
540 .filter(|buffer| {
541 let b = buffer.read(cx);
542 if let Some(file) = b.file() {
543 if !search_query.match_path(file.path()) {
544 return false;
545 }
546 if !search_query.include_ignored()
547 && let Some(entry) = b
548 .entry_id(cx)
549 .and_then(|entry_id| worktree_store.entry_for_id(entry_id, cx))
550 && entry.is_ignored
551 {
552 return false;
553 }
554 }
555 true
556 })
557 .cloned()
558 .collect::<Vec<_>>();
559 buffers.sort_by(|a, b| {
560 let a = a.read(cx);
561 let b = b.read(cx);
562 match (a.file(), b.file()) {
563 (None, None) => a.remote_id().cmp(&b.remote_id()),
564 (None, Some(_)) => std::cmp::Ordering::Less,
565 (Some(_), None) => std::cmp::Ordering::Greater,
566 (Some(a), Some(b)) => compare_rel_paths((a.path(), true), (b.path(), true)),
567 }
568 });
569
570 buffers
571 }
572}
573
574struct Worker<'search> {
575 query: &'search SearchQuery,
576 open_buffers: &'search HashSet<ProjectEntryId>,
577 candidates: FindSearchCandidates,
578 /// Ok, we're back in background: run full scan & find all matches in a given buffer snapshot.
579 /// Then, when you're done, share them via the channel you were given.
580 find_all_matches_rx: Receiver<(
581 Entity<Buffer>,
582 BufferSnapshot,
583 oneshot::Sender<(Entity<Buffer>, Vec<Range<language::Anchor>>)>,
584 )>,
585}
586
587impl Worker<'_> {
588 async fn run(self) {
589 let (
590 input_paths_rx,
591 confirm_contents_will_match_rx,
592 mut confirm_contents_will_match_tx,
593 fs,
594 ) = match self.candidates {
595 FindSearchCandidates::Local {
596 fs,
597 input_paths_rx,
598 confirm_contents_will_match_rx,
599 confirm_contents_will_match_tx,
600 } => (
601 input_paths_rx,
602 confirm_contents_will_match_rx,
603 confirm_contents_will_match_tx,
604 Some(fs),
605 ),
606 FindSearchCandidates::Remote | FindSearchCandidates::OpenBuffersOnly => {
607 (unbounded().1, unbounded().1, unbounded().0, None)
608 }
609 };
610 // WorkerA: grabs a request for "find all matches in file/a" <- takes 5 minutes
611 // right after: WorkerB: grabs a request for "find all matches in file/b" <- takes 5 seconds
612 let mut find_all_matches = pin!(self.find_all_matches_rx.fuse());
613 let mut find_first_match = pin!(confirm_contents_will_match_rx.fuse());
614 let mut scan_path = pin!(input_paths_rx.fuse());
615
616 loop {
617 let handler = RequestHandler {
618 query: self.query,
619 open_entries: &self.open_buffers,
620 fs: fs.as_deref(),
621 confirm_contents_will_match_tx: &confirm_contents_will_match_tx,
622 };
623 // Whenever we notice that some step of a pipeline is closed, we don't want to close subsequent
624 // steps straight away. Another worker might be about to produce a value that will
625 // be pushed there, thus we'll replace current worker's pipe with a dummy one.
626 // That way, we'll only ever close a next-stage channel when ALL workers do so.
627 select_biased! {
628 find_all_matches = find_all_matches.next() => {
629 let Some(matches) = find_all_matches else {
630 continue;
631 };
632 handler.handle_find_all_matches(matches).await;
633 },
634 find_first_match = find_first_match.next() => {
635 if let Some(buffer_with_at_least_one_match) = find_first_match {
636 handler.handle_find_first_match(buffer_with_at_least_one_match).await;
637 }
638 },
639 scan_path = scan_path.next() => {
640 if let Some(path_to_scan) = scan_path {
641 handler.handle_scan_path(path_to_scan).await;
642 } else {
643 // If we're the last worker to notice that this is not producing values, close the upstream.
644 confirm_contents_will_match_tx = bounded(1).0;
645 }
646
647 }
648 complete => {
649 break
650 },
651
652 }
653 }
654 }
655}
656
657struct RequestHandler<'worker> {
658 query: &'worker SearchQuery,
659 fs: Option<&'worker dyn Fs>,
660 open_entries: &'worker HashSet<ProjectEntryId>,
661 confirm_contents_will_match_tx: &'worker Sender<MatchingEntry>,
662}
663
664impl RequestHandler<'_> {
665 async fn handle_find_all_matches(
666 &self,
667 (buffer, snapshot, mut report_matches): (
668 Entity<Buffer>,
669 BufferSnapshot,
670 oneshot::Sender<(Entity<Buffer>, Vec<Range<language::Anchor>>)>,
671 ),
672 ) {
673 let ranges = self
674 .query
675 .search(&snapshot, None)
676 .await
677 .iter()
678 .map(|range| snapshot.anchor_before(range.start)..snapshot.anchor_after(range.end))
679 .collect::<Vec<_>>();
680
681 _ = report_matches.send((buffer, ranges)).await;
682 }
683
684 async fn handle_find_first_match(&self, mut entry: MatchingEntry) {
685 _ = (async move || -> anyhow::Result<()> {
686 let abs_path = entry.worktree_root.join(entry.path.path.as_std_path());
687
688 // Avoid blocking IO here: cancellation of the search is implemented via task drop, and a
689 // synchronous `std::fs::File::open` / `Read::read` can delay task cancellation for a long time.
690 let contents = self
691 .fs
692 .context("Trying to query filesystem in remote project search")?
693 .load_bytes(&abs_path)
694 .await?;
695
696 // Before attempting to match the file content, throw away files that have invalid UTF-8 sequences early on;
697 // That way we can still match files without having to look at "obviously binary" files.
698 if let Err(error) = std::str::from_utf8(&contents) {
699 if let Some(starting_position) = error.error_len() {
700 log::debug!(
701 "Invalid UTF-8 sequence in file {abs_path:?} at byte position {starting_position}"
702 );
703 return Ok(());
704 }
705 }
706
707 let file: Box<dyn Read + Send + Sync> = Box::new(Cursor::new(contents));
708 let file = BufReader::new(file);
709
710 if self.query.detect(file).unwrap_or(false) {
711 // Yes, we should scan the whole file.
712 entry.should_scan_tx.send(entry.path).await?;
713 }
714
715 Ok(())
716 })()
717 .await
718 .log_err();
719 }
720
721 async fn handle_scan_path(&self, req: InputPath) {
722 _ = maybe!(async move {
723 let InputPath {
724 entry,
725 snapshot,
726 mut should_scan_tx,
727 } = req;
728
729 if entry.is_fifo || !entry.is_file() {
730 return Ok(());
731 }
732
733 if self.query.filters_path() {
734 let matched_path = if self.query.match_full_paths() {
735 let mut full_path = snapshot.root_name().to_owned();
736 full_path.push(&entry.path);
737 self.query.match_path(&full_path)
738 } else {
739 self.query.match_path(&entry.path)
740 };
741 if !matched_path {
742 return Ok(());
743 }
744 }
745
746 if self.open_entries.contains(&entry.id) {
747 // The buffer is already in memory and that's the version we want to scan;
748 // hence skip the dilly-dally and look for all matches straight away.
749 should_scan_tx
750 .send(ProjectPath {
751 worktree_id: snapshot.id(),
752 path: entry.path.clone(),
753 })
754 .await?;
755 } else {
756 self.confirm_contents_will_match_tx
757 .send(MatchingEntry {
758 should_scan_tx: should_scan_tx,
759 worktree_root: snapshot.abs_path().clone(),
760 path: ProjectPath {
761 worktree_id: snapshot.id(),
762 path: entry.path.clone(),
763 },
764 })
765 .await?;
766 }
767
768 anyhow::Ok(())
769 })
770 .await;
771 }
772}
773
774struct InputPath {
775 entry: Entry,
776 snapshot: Snapshot,
777 should_scan_tx: oneshot::Sender<ProjectPath>,
778}
779
780struct MatchingEntry {
781 worktree_root: Arc<Path>,
782 path: ProjectPath,
783 should_scan_tx: oneshot::Sender<ProjectPath>,
784}
785
786/// This struct encapsulates the logic to decide whether a given gitignored directory should be
787/// scanned based on include/exclude patterns of a search query (as include/exclude parameters may match paths inside it).
788/// It is kind-of doing an inverse of glob. Given a glob pattern like `src/**/` and a parent path like `src`, we need to decide whether the parent
789/// may contain glob hits.
790struct PathInclusionMatcher {
791 included: BTreeSet<PathBuf>,
792 query: Arc<SearchQuery>,
793}
794
795impl PathInclusionMatcher {
796 fn new(query: Arc<SearchQuery>) -> Self {
797 let mut included = BTreeSet::new();
798 // To do an inverse glob match, we split each glob into it's prefix and the glob part.
799 // For example, `src/**/*.rs` becomes `src/` and `**/*.rs`. The glob part gets dropped.
800 // Then, when checking whether a given directory should be scanned, we check whether it is a non-empty substring of any glob prefix.
801 if query.filters_path() {
802 included.extend(
803 query
804 .files_to_include()
805 .sources()
806 .flat_map(|glob| Some(wax::Glob::new(glob).ok()?.partition().0)),
807 );
808 }
809 Self { included, query }
810 }
811
812 fn should_scan_gitignored_dir(
813 &self,
814 entry: &Entry,
815 snapshot: &Snapshot,
816 worktree_settings: &WorktreeSettings,
817 ) -> bool {
818 if !entry.is_ignored || !entry.kind.is_unloaded() {
819 return false;
820 }
821 if !self.query.include_ignored() {
822 return false;
823 }
824 if worktree_settings.is_path_excluded(&entry.path) {
825 return false;
826 }
827 if !self.query.filters_path() {
828 return true;
829 }
830
831 let as_abs_path = LazyCell::new(move || snapshot.absolutize(&entry.path));
832 let entry_path = &entry.path;
833 // 3. Check Exclusions (Pruning)
834 // If the current path is a child of an excluded path, we stop.
835 let is_excluded = self.path_is_definitely_excluded(&entry_path, snapshot);
836
837 if is_excluded {
838 return false;
839 }
840
841 // 4. Check Inclusions (Traversal)
842 if self.included.is_empty() {
843 return true;
844 }
845
846 // We scan if the current path is a descendant of an include prefix
847 // OR if the current path is an ancestor of an include prefix (we need to go deeper to find it).
848 let is_included = self.included.iter().any(|prefix| {
849 let (prefix_matches_entry, entry_matches_prefix) = if prefix.is_absolute() {
850 (
851 prefix.starts_with(&**as_abs_path),
852 as_abs_path.starts_with(prefix),
853 )
854 } else {
855 RelPath::new(prefix, snapshot.path_style()).map_or((false, false), |prefix| {
856 (
857 prefix.starts_with(entry_path),
858 entry_path.starts_with(&prefix),
859 )
860 })
861 };
862
863 // Logic:
864 // 1. entry_matches_prefix: We are inside the target zone (e.g. glob: src/, current: src/lib/). Keep scanning.
865 // 2. prefix_matches_entry: We are above the target zone (e.g. glob: src/foo/, current: src/). Keep scanning to reach foo.
866 prefix_matches_entry || entry_matches_prefix
867 });
868
869 is_included
870 }
871 fn path_is_definitely_excluded(&self, path: &RelPath, snapshot: &Snapshot) -> bool {
872 if !self.query.files_to_exclude().sources().next().is_none() {
873 let mut path = if self.query.match_full_paths() {
874 let mut full_path = snapshot.root_name().to_owned();
875 full_path.push(path);
876 full_path
877 } else {
878 path.to_owned()
879 };
880 loop {
881 if self.query.files_to_exclude().is_match(&path) {
882 return true;
883 } else if !path.pop() {
884 return false;
885 }
886 }
887 } else {
888 false
889 }
890 }
891}
892
893#[cfg(test)]
894mod tests {
895 use super::*;
896 use fs::FakeFs;
897 use serde_json::json;
898 use settings::Settings;
899 use util::{
900 path,
901 paths::{PathMatcher, PathStyle},
902 rel_path::RelPath,
903 };
904 use worktree::{Entry, EntryKind, WorktreeSettings};
905
906 use crate::{
907 Project, project_search::PathInclusionMatcher, project_tests::init_test,
908 search::SearchQuery,
909 };
910
911 #[gpui::test]
912 async fn test_path_inclusion_matcher(cx: &mut gpui::TestAppContext) {
913 init_test(cx);
914
915 let fs = FakeFs::new(cx.background_executor.clone());
916 fs.insert_tree(
917 "/root",
918 json!({
919 ".gitignore": "src/data/\n",
920 "src": {
921 "data": {
922 "main.csv": "field_1,field_2,field_3",
923 },
924 "lib": {
925 "main.txt": "Are you familiar with fields?",
926 },
927 },
928 }),
929 )
930 .await;
931
932 let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
933 let worktree = project.update(cx, |project, cx| project.worktrees(cx).next().unwrap());
934 let (worktree_settings, worktree_snapshot) = worktree.update(cx, |worktree, cx| {
935 let settings_location = worktree.settings_location(cx);
936 return (
937 WorktreeSettings::get(Some(settings_location), cx).clone(),
938 worktree.snapshot(),
939 );
940 });
941
942 // Manually create a test entry for the gitignored directory since it won't
943 // be loaded by the worktree
944 let entry = Entry {
945 id: ProjectEntryId::from_proto(1),
946 kind: EntryKind::UnloadedDir,
947 path: Arc::from(RelPath::unix(Path::new("src/data")).unwrap()),
948 inode: 0,
949 mtime: None,
950 canonical_path: None,
951 is_ignored: true,
952 is_hidden: false,
953 is_always_included: false,
954 is_external: false,
955 is_private: false,
956 size: 0,
957 char_bag: Default::default(),
958 is_fifo: false,
959 };
960
961 // 1. Test searching for `field`, including ignored files without any
962 // inclusion and exclusion filters.
963 let include_ignored = true;
964 let files_to_include = PathMatcher::default();
965 let files_to_exclude = PathMatcher::default();
966 let match_full_paths = false;
967 let search_query = SearchQuery::text(
968 "field",
969 false,
970 false,
971 include_ignored,
972 files_to_include,
973 files_to_exclude,
974 match_full_paths,
975 None,
976 )
977 .unwrap();
978
979 let path_matcher = PathInclusionMatcher::new(Arc::new(search_query));
980 assert!(path_matcher.should_scan_gitignored_dir(
981 &entry,
982 &worktree_snapshot,
983 &worktree_settings
984 ));
985
986 // 2. Test searching for `field`, including ignored files but updating
987 // `files_to_include` to only include files under `src/lib`.
988 let include_ignored = true;
989 let files_to_include = PathMatcher::new(vec!["src/lib"], PathStyle::Posix).unwrap();
990 let files_to_exclude = PathMatcher::default();
991 let match_full_paths = false;
992 let search_query = SearchQuery::text(
993 "field",
994 false,
995 false,
996 include_ignored,
997 files_to_include,
998 files_to_exclude,
999 match_full_paths,
1000 None,
1001 )
1002 .unwrap();
1003
1004 let path_matcher = PathInclusionMatcher::new(Arc::new(search_query));
1005 assert!(!path_matcher.should_scan_gitignored_dir(
1006 &entry,
1007 &worktree_snapshot,
1008 &worktree_settings
1009 ));
1010 }
1011}