1use std::{
2 cell::LazyCell,
3 collections::BTreeSet,
4 io::{BufRead, BufReader},
5 ops::Range,
6 path::{Path, PathBuf},
7 pin::pin,
8 sync::Arc,
9};
10
11use anyhow::Context;
12use collections::HashSet;
13use fs::Fs;
14use futures::{SinkExt, StreamExt, select_biased, stream::FuturesOrdered};
15use gpui::{App, AppContext, AsyncApp, Entity, Task};
16use language::{Buffer, BufferSnapshot};
17use parking_lot::Mutex;
18use postage::oneshot;
19use rpc::{AnyProtoClient, proto};
20use smol::{
21 channel::{Receiver, Sender, bounded, unbounded},
22 future::FutureExt,
23};
24
25use text::BufferId;
26use util::{ResultExt, maybe, paths::compare_rel_paths, rel_path::RelPath};
27use worktree::{Entry, ProjectEntryId, Snapshot, Worktree, WorktreeSettings};
28
29use crate::{
30 Project, ProjectItem, ProjectPath, RemotelyCreatedModels,
31 buffer_store::BufferStore,
32 search::{SearchQuery, SearchResult},
33 worktree_store::WorktreeStore,
34};
35
36pub struct Search {
37 buffer_store: Entity<BufferStore>,
38 worktree_store: Entity<WorktreeStore>,
39 limit: usize,
40 kind: SearchKind,
41}
42
43/// Represents search setup, before it is actually kicked off with Search::into_results
44enum SearchKind {
45 /// Search for candidates by inspecting file contents on file system, avoiding loading the buffer unless we know that a given file contains a match.
46 Local {
47 fs: Arc<dyn Fs>,
48 worktrees: Vec<Entity<Worktree>>,
49 },
50 /// Query remote host for candidates. As of writing, the host runs a local search in "buffers with matches only" mode.
51 Remote {
52 client: AnyProtoClient,
53 remote_id: u64,
54 models: Arc<Mutex<RemotelyCreatedModels>>,
55 },
56 /// Run search against a known set of candidates. Even when working with a remote host, this won't round-trip to host.
57 OpenBuffersOnly,
58}
59
60/// Represents results of project search and allows one to either obtain match positions OR
61/// just the handles to buffers that may match the search. Grabbing the handles is cheaper than obtaining full match positions, because in that case we'll look for
62/// at most one match in each file.
63#[must_use]
64pub struct SearchResultsHandle {
65 results: Receiver<SearchResult>,
66 matching_buffers: Receiver<Entity<Buffer>>,
67 trigger_search: Box<dyn FnOnce(&mut App) -> Task<()> + Send + Sync>,
68}
69
70pub struct SearchResults<T> {
71 pub _task_handle: Task<()>,
72 pub rx: Receiver<T>,
73}
74impl SearchResultsHandle {
75 pub fn results(self, cx: &mut App) -> SearchResults<SearchResult> {
76 SearchResults {
77 _task_handle: (self.trigger_search)(cx),
78 rx: self.results,
79 }
80 }
81 pub fn matching_buffers(self, cx: &mut App) -> SearchResults<Entity<Buffer>> {
82 SearchResults {
83 _task_handle: (self.trigger_search)(cx),
84 rx: self.matching_buffers,
85 }
86 }
87}
88
89#[derive(Clone)]
90enum FindSearchCandidates {
91 Local {
92 fs: Arc<dyn Fs>,
93 /// Start off with all paths in project and filter them based on:
94 /// - Include filters
95 /// - Exclude filters
96 /// - Only open buffers
97 /// - Scan ignored files
98 /// Put another way: filter out files that can't match (without looking at file contents)
99 input_paths_rx: Receiver<InputPath>,
100 /// After that, if the buffer is not yet loaded, we'll figure out if it contains at least one match
101 /// based on disk contents of a buffer. This step is not performed for buffers we already have in memory.
102 confirm_contents_will_match_tx: Sender<MatchingEntry>,
103 confirm_contents_will_match_rx: Receiver<MatchingEntry>,
104 },
105 Remote,
106 OpenBuffersOnly,
107}
108
109impl Search {
110 pub fn local(
111 fs: Arc<dyn Fs>,
112 buffer_store: Entity<BufferStore>,
113 worktree_store: Entity<WorktreeStore>,
114 limit: usize,
115 cx: &mut App,
116 ) -> Self {
117 let worktrees = worktree_store.read(cx).visible_worktrees(cx).collect();
118 Self {
119 kind: SearchKind::Local { fs, worktrees },
120 buffer_store,
121 worktree_store,
122 limit,
123 }
124 }
125
126 pub(crate) fn remote(
127 buffer_store: Entity<BufferStore>,
128 worktree_store: Entity<WorktreeStore>,
129 limit: usize,
130 client_state: (AnyProtoClient, u64, Arc<Mutex<RemotelyCreatedModels>>),
131 ) -> Self {
132 Self {
133 kind: SearchKind::Remote {
134 client: client_state.0,
135 remote_id: client_state.1,
136 models: client_state.2,
137 },
138 buffer_store,
139 worktree_store,
140 limit,
141 }
142 }
143 pub(crate) fn open_buffers_only(
144 buffer_store: Entity<BufferStore>,
145 worktree_store: Entity<WorktreeStore>,
146 limit: usize,
147 ) -> Self {
148 Self {
149 kind: SearchKind::OpenBuffersOnly,
150 buffer_store,
151 worktree_store,
152 limit,
153 }
154 }
155
156 pub(crate) const MAX_SEARCH_RESULT_FILES: usize = 5_000;
157 pub(crate) const MAX_SEARCH_RESULT_RANGES: usize = 10_000;
158 /// Prepares a project search run. The resulting [`SearchResultsHandle`] has to be used to specify whether you're interested in matching buffers
159 /// or full search results.
160 pub fn into_handle(mut self, query: SearchQuery, cx: &mut App) -> SearchResultsHandle {
161 let mut open_buffers = HashSet::default();
162 let mut unnamed_buffers = Vec::new();
163 const MAX_CONCURRENT_BUFFER_OPENS: usize = 64;
164 let buffers = self.buffer_store.read(cx);
165 for handle in buffers.buffers() {
166 let buffer = handle.read(cx);
167 if !buffers.is_searchable(&buffer.remote_id()) {
168 continue;
169 } else if let Some(entry_id) = buffer.entry_id(cx) {
170 open_buffers.insert(entry_id);
171 } else {
172 self.limit = self.limit.saturating_sub(1);
173 unnamed_buffers.push(handle)
174 };
175 }
176 let executor = cx.background_executor().clone();
177 let (tx, rx) = unbounded();
178 let (grab_buffer_snapshot_tx, grab_buffer_snapshot_rx) = unbounded();
179 let matching_buffers = grab_buffer_snapshot_rx.clone();
180 let trigger_search = Box::new(move |cx: &mut App| {
181 cx.spawn(async move |cx| {
182 for buffer in unnamed_buffers {
183 _ = grab_buffer_snapshot_tx.send(buffer).await;
184 }
185
186 let (find_all_matches_tx, find_all_matches_rx) =
187 bounded(MAX_CONCURRENT_BUFFER_OPENS);
188 let query = Arc::new(query);
189 let (candidate_searcher, tasks) = match self.kind {
190 SearchKind::OpenBuffersOnly => {
191 let Ok(open_buffers) = cx.update(|cx| self.all_loaded_buffers(&query, cx))
192 else {
193 return;
194 };
195 let fill_requests = cx
196 .background_spawn(async move {
197 for buffer in open_buffers {
198 if let Err(_) = grab_buffer_snapshot_tx.send(buffer).await {
199 return;
200 }
201 }
202 })
203 .boxed_local();
204 (FindSearchCandidates::OpenBuffersOnly, vec![fill_requests])
205 }
206 SearchKind::Local {
207 fs,
208 ref mut worktrees,
209 } => {
210 let (get_buffer_for_full_scan_tx, get_buffer_for_full_scan_rx) =
211 unbounded();
212 let (confirm_contents_will_match_tx, confirm_contents_will_match_rx) =
213 bounded(64);
214 let (sorted_search_results_tx, sorted_search_results_rx) = unbounded();
215
216 let (input_paths_tx, input_paths_rx) = unbounded();
217 let tasks = vec![
218 cx.spawn(Self::provide_search_paths(
219 std::mem::take(worktrees),
220 query.clone(),
221 input_paths_tx,
222 sorted_search_results_tx,
223 ))
224 .boxed_local(),
225 Self::open_buffers(
226 &self.buffer_store,
227 get_buffer_for_full_scan_rx,
228 grab_buffer_snapshot_tx,
229 cx.clone(),
230 )
231 .boxed_local(),
232 cx.background_spawn(Self::maintain_sorted_search_results(
233 sorted_search_results_rx,
234 get_buffer_for_full_scan_tx,
235 self.limit,
236 ))
237 .boxed_local(),
238 ];
239 (
240 FindSearchCandidates::Local {
241 fs,
242 confirm_contents_will_match_tx,
243 confirm_contents_will_match_rx,
244 input_paths_rx,
245 },
246 tasks,
247 )
248 }
249 SearchKind::Remote {
250 client,
251 remote_id,
252 models,
253 } => {
254 let request = client.request(proto::FindSearchCandidates {
255 project_id: remote_id,
256 query: Some(query.to_proto()),
257 limit: self.limit as _,
258 });
259 let Ok(guard) = cx.update(|cx| {
260 Project::retain_remotely_created_models_impl(
261 &models,
262 &self.buffer_store,
263 &self.worktree_store,
264 cx,
265 )
266 }) else {
267 return;
268 };
269 let buffer_store = self.buffer_store.downgrade();
270 let issue_remote_buffers_request = cx
271 .spawn(async move |cx| {
272 let _ = maybe!(async move {
273 let response = request.await?;
274 for buffer_id in response.buffer_ids {
275 let buffer_id = BufferId::new(buffer_id)?;
276 let buffer = buffer_store
277 .update(cx, |buffer_store, cx| {
278 buffer_store.wait_for_remote_buffer(buffer_id, cx)
279 })?
280 .await?;
281 let _ = grab_buffer_snapshot_tx.send(buffer).await;
282 }
283
284 drop(guard);
285 anyhow::Ok(())
286 })
287 .await
288 .log_err();
289 })
290 .boxed_local();
291 (
292 FindSearchCandidates::Remote,
293 vec![issue_remote_buffers_request],
294 )
295 }
296 };
297
298 let should_find_all_matches = !tx.is_closed();
299
300 let worker_pool = executor.scoped(|scope| {
301 let num_cpus = executor.num_cpus();
302
303 assert!(num_cpus > 0);
304 for _ in 0..executor.num_cpus() - 1 {
305 let worker = Worker {
306 query: &query,
307 open_buffers: &open_buffers,
308 candidates: candidate_searcher.clone(),
309 find_all_matches_rx: find_all_matches_rx.clone(),
310 };
311 scope.spawn(worker.run());
312 }
313
314 drop(find_all_matches_rx);
315 drop(candidate_searcher);
316 });
317
318 let (sorted_matches_tx, sorted_matches_rx) = unbounded();
319 // The caller of `into_handle` decides whether they're interested in all matches (files that matched + all matching ranges) or
320 // just the files. *They are using the same stream as the guts of the project search do*.
321 // This means that we cannot grab values off of that stream unless it's strictly needed for making a progress in project search.
322 //
323 // Grabbing buffer snapshots is only necessary when we're looking for all matches. If the caller decided that they're not interested
324 // in all matches, running that task unconditionally would hinder caller's ability to observe all matching file paths.
325 let buffer_snapshots = if should_find_all_matches {
326 Some(
327 Self::grab_buffer_snapshots(
328 grab_buffer_snapshot_rx,
329 find_all_matches_tx,
330 sorted_matches_tx,
331 cx.clone(),
332 )
333 .boxed_local(),
334 )
335 } else {
336 drop(find_all_matches_tx);
337
338 None
339 };
340 let ensure_matches_are_reported_in_order = if should_find_all_matches {
341 Some(
342 Self::ensure_matched_ranges_are_reported_in_order(sorted_matches_rx, tx)
343 .boxed_local(),
344 )
345 } else {
346 drop(tx);
347 None
348 };
349
350 futures::future::join_all(
351 [worker_pool.boxed_local()]
352 .into_iter()
353 .chain(buffer_snapshots)
354 .chain(ensure_matches_are_reported_in_order)
355 .chain(tasks),
356 )
357 .await;
358 })
359 });
360
361 SearchResultsHandle {
362 results: rx,
363 matching_buffers,
364 trigger_search,
365 }
366 }
367
368 fn provide_search_paths(
369 worktrees: Vec<Entity<Worktree>>,
370 query: Arc<SearchQuery>,
371 tx: Sender<InputPath>,
372 results: Sender<oneshot::Receiver<ProjectPath>>,
373 ) -> impl AsyncFnOnce(&mut AsyncApp) {
374 async move |cx| {
375 _ = maybe!(async move {
376 let gitignored_tracker = PathInclusionMatcher::new(query.clone());
377 for worktree in worktrees {
378 let (mut snapshot, worktree_settings) = worktree
379 .read_with(cx, |this, _| {
380 Some((this.snapshot(), this.as_local()?.settings()))
381 })?
382 .context("The worktree is not local")?;
383 if query.include_ignored() {
384 // Pre-fetch all of the ignored directories as they're going to be searched.
385 let mut entries_to_refresh = vec![];
386
387 for entry in snapshot.entries(query.include_ignored(), 0) {
388 if gitignored_tracker.should_scan_gitignored_dir(
389 entry,
390 &snapshot,
391 &worktree_settings,
392 ) {
393 entries_to_refresh.push(entry.path.clone());
394 }
395 }
396 let barrier = worktree.update(cx, |this, _| {
397 let local = this.as_local_mut()?;
398 let barrier = entries_to_refresh
399 .into_iter()
400 .map(|path| local.add_path_prefix_to_scan(path).into_future())
401 .collect::<Vec<_>>();
402 Some(barrier)
403 })?;
404 if let Some(barriers) = barrier {
405 futures::future::join_all(barriers).await;
406 }
407 snapshot = worktree.read_with(cx, |this, _| this.snapshot())?;
408 }
409 cx.background_executor()
410 .scoped(|scope| {
411 scope.spawn(async {
412 for entry in snapshot.files(query.include_ignored(), 0) {
413 let (should_scan_tx, should_scan_rx) = oneshot::channel();
414
415 let Ok(_) = tx
416 .send(InputPath {
417 entry: entry.clone(),
418 snapshot: snapshot.clone(),
419 should_scan_tx,
420 })
421 .await
422 else {
423 return;
424 };
425 if results.send(should_scan_rx).await.is_err() {
426 return;
427 };
428 }
429 })
430 })
431 .await;
432 }
433 anyhow::Ok(())
434 })
435 .await;
436 }
437 }
438
439 async fn maintain_sorted_search_results(
440 rx: Receiver<oneshot::Receiver<ProjectPath>>,
441 paths_for_full_scan: Sender<ProjectPath>,
442 limit: usize,
443 ) {
444 let mut rx = pin!(rx);
445 let mut matched = 0;
446 while let Some(mut next_path_result) = rx.next().await {
447 let Some(successful_path) = next_path_result.next().await else {
448 // This file did not produce a match, hence skip it.
449 continue;
450 };
451 if paths_for_full_scan.send(successful_path).await.is_err() {
452 return;
453 };
454 matched += 1;
455 if matched >= limit {
456 break;
457 }
458 }
459 }
460
461 /// Background workers cannot open buffers by themselves, hence main thread will do it on their behalf.
462 async fn open_buffers(
463 buffer_store: &Entity<BufferStore>,
464 rx: Receiver<ProjectPath>,
465 find_all_matches_tx: Sender<Entity<Buffer>>,
466 mut cx: AsyncApp,
467 ) {
468 let mut rx = pin!(rx.ready_chunks(64));
469 _ = maybe!(async move {
470 while let Some(requested_paths) = rx.next().await {
471 let mut buffers = buffer_store.update(&mut cx, |this, cx| {
472 requested_paths
473 .into_iter()
474 .map(|path| this.open_buffer(path, cx))
475 .collect::<FuturesOrdered<_>>()
476 })?;
477
478 while let Some(buffer) = buffers.next().await {
479 if let Some(buffer) = buffer.log_err() {
480 find_all_matches_tx.send(buffer).await?;
481 }
482 }
483 }
484 Result::<_, anyhow::Error>::Ok(())
485 })
486 .await;
487 }
488
489 async fn grab_buffer_snapshots(
490 rx: Receiver<Entity<Buffer>>,
491 find_all_matches_tx: Sender<(
492 Entity<Buffer>,
493 BufferSnapshot,
494 oneshot::Sender<(Entity<Buffer>, Vec<Range<language::Anchor>>)>,
495 )>,
496 results: Sender<oneshot::Receiver<(Entity<Buffer>, Vec<Range<language::Anchor>>)>>,
497 mut cx: AsyncApp,
498 ) {
499 _ = maybe!(async move {
500 while let Ok(buffer) = rx.recv().await {
501 let snapshot = buffer.read_with(&mut cx, |this, _| this.snapshot())?;
502 let (tx, rx) = oneshot::channel();
503 find_all_matches_tx.send((buffer, snapshot, tx)).await?;
504 results.send(rx).await?;
505 }
506 debug_assert!(rx.is_empty());
507 Result::<_, anyhow::Error>::Ok(())
508 })
509 .await;
510 }
511
512 async fn ensure_matched_ranges_are_reported_in_order(
513 rx: Receiver<oneshot::Receiver<(Entity<Buffer>, Vec<Range<language::Anchor>>)>>,
514 tx: Sender<SearchResult>,
515 ) {
516 use postage::stream::Stream;
517 _ = maybe!(async move {
518 let mut matched_buffers = 0;
519 let mut matches = 0;
520 while let Ok(mut next_buffer_matches) = rx.recv().await {
521 let Some((buffer, ranges)) = next_buffer_matches.recv().await else {
522 continue;
523 };
524
525 if matched_buffers > Search::MAX_SEARCH_RESULT_FILES
526 || matches > Search::MAX_SEARCH_RESULT_RANGES
527 {
528 _ = tx.send(SearchResult::LimitReached).await;
529 break;
530 }
531 matched_buffers += 1;
532 matches += ranges.len();
533
534 _ = tx.send(SearchResult::Buffer { buffer, ranges }).await?;
535 }
536 anyhow::Ok(())
537 })
538 .await;
539 }
540
541 fn all_loaded_buffers(&self, search_query: &SearchQuery, cx: &App) -> Vec<Entity<Buffer>> {
542 let worktree_store = self.worktree_store.read(cx);
543 let mut buffers = search_query
544 .buffers()
545 .into_iter()
546 .flatten()
547 .filter(|buffer| {
548 let b = buffer.read(cx);
549 if let Some(file) = b.file() {
550 if !search_query.match_path(file.path()) {
551 return false;
552 }
553 if !search_query.include_ignored()
554 && let Some(entry) = b
555 .entry_id(cx)
556 .and_then(|entry_id| worktree_store.entry_for_id(entry_id, cx))
557 && entry.is_ignored
558 {
559 return false;
560 }
561 }
562 true
563 })
564 .cloned()
565 .collect::<Vec<_>>();
566 buffers.sort_by(|a, b| {
567 let a = a.read(cx);
568 let b = b.read(cx);
569 match (a.file(), b.file()) {
570 (None, None) => a.remote_id().cmp(&b.remote_id()),
571 (None, Some(_)) => std::cmp::Ordering::Less,
572 (Some(_), None) => std::cmp::Ordering::Greater,
573 (Some(a), Some(b)) => compare_rel_paths((a.path(), true), (b.path(), true)),
574 }
575 });
576
577 buffers
578 }
579}
580
581struct Worker<'search> {
582 query: &'search SearchQuery,
583 open_buffers: &'search HashSet<ProjectEntryId>,
584 candidates: FindSearchCandidates,
585 /// Ok, we're back in background: run full scan & find all matches in a given buffer snapshot.
586 /// Then, when you're done, share them via the channel you were given.
587 find_all_matches_rx: Receiver<(
588 Entity<Buffer>,
589 BufferSnapshot,
590 oneshot::Sender<(Entity<Buffer>, Vec<Range<language::Anchor>>)>,
591 )>,
592}
593
594impl Worker<'_> {
595 async fn run(self) {
596 let (
597 input_paths_rx,
598 confirm_contents_will_match_rx,
599 mut confirm_contents_will_match_tx,
600 fs,
601 ) = match self.candidates {
602 FindSearchCandidates::Local {
603 fs,
604 input_paths_rx,
605 confirm_contents_will_match_rx,
606 confirm_contents_will_match_tx,
607 } => (
608 input_paths_rx,
609 confirm_contents_will_match_rx,
610 confirm_contents_will_match_tx,
611 Some(fs),
612 ),
613 FindSearchCandidates::Remote | FindSearchCandidates::OpenBuffersOnly => {
614 (unbounded().1, unbounded().1, unbounded().0, None)
615 }
616 };
617 // WorkerA: grabs a request for "find all matches in file/a" <- takes 5 minutes
618 // right after: WorkerB: grabs a request for "find all matches in file/b" <- takes 5 seconds
619 let mut find_all_matches = pin!(self.find_all_matches_rx.fuse());
620 let mut find_first_match = pin!(confirm_contents_will_match_rx.fuse());
621 let mut scan_path = pin!(input_paths_rx.fuse());
622
623 loop {
624 let handler = RequestHandler {
625 query: self.query,
626 open_entries: &self.open_buffers,
627 fs: fs.as_deref(),
628 confirm_contents_will_match_tx: &confirm_contents_will_match_tx,
629 };
630 // Whenever we notice that some step of a pipeline is closed, we don't want to close subsequent
631 // steps straight away. Another worker might be about to produce a value that will
632 // be pushed there, thus we'll replace current worker's pipe with a dummy one.
633 // That way, we'll only ever close a next-stage channel when ALL workers do so.
634 select_biased! {
635 find_all_matches = find_all_matches.next() => {
636 let Some(matches) = find_all_matches else {
637 continue;
638 };
639 handler.handle_find_all_matches(matches).await;
640 },
641 find_first_match = find_first_match.next() => {
642 if let Some(buffer_with_at_least_one_match) = find_first_match {
643 handler.handle_find_first_match(buffer_with_at_least_one_match).await;
644 }
645 },
646 scan_path = scan_path.next() => {
647 if let Some(path_to_scan) = scan_path {
648 handler.handle_scan_path(path_to_scan).await;
649 } else {
650 // If we're the last worker to notice that this is not producing values, close the upstream.
651 confirm_contents_will_match_tx = bounded(1).0;
652 }
653
654 }
655 complete => {
656 break
657 },
658
659 }
660 }
661 }
662}
663
664struct RequestHandler<'worker> {
665 query: &'worker SearchQuery,
666 fs: Option<&'worker dyn Fs>,
667 open_entries: &'worker HashSet<ProjectEntryId>,
668 confirm_contents_will_match_tx: &'worker Sender<MatchingEntry>,
669}
670
671impl RequestHandler<'_> {
672 async fn handle_find_all_matches(
673 &self,
674 (buffer, snapshot, mut report_matches): (
675 Entity<Buffer>,
676 BufferSnapshot,
677 oneshot::Sender<(Entity<Buffer>, Vec<Range<language::Anchor>>)>,
678 ),
679 ) {
680 let ranges = self
681 .query
682 .search(&snapshot, None)
683 .await
684 .iter()
685 .map(|range| snapshot.anchor_before(range.start)..snapshot.anchor_after(range.end))
686 .collect::<Vec<_>>();
687
688 _ = report_matches.send((buffer, ranges)).await;
689 }
690
691 async fn handle_find_first_match(&self, mut entry: MatchingEntry) {
692 _=maybe!(async move {
693 let abs_path = entry.worktree_root.join(entry.path.path.as_std_path());
694 let Some(file) = self.fs.context("Trying to query filesystem in remote project search")?.open_sync(&abs_path).await.log_err() else {
695 return anyhow::Ok(());
696 };
697
698 let mut file = BufReader::new(file);
699 let file_start = file.fill_buf()?;
700
701 if let Err(Some(starting_position)) =
702 std::str::from_utf8(file_start).map_err(|e| e.error_len())
703 {
704 // Before attempting to match the file content, throw away files that have invalid UTF-8 sequences early on;
705 // That way we can still match files in a streaming fashion without having look at "obviously binary" files.
706 log::debug!(
707 "Invalid UTF-8 sequence in file {abs_path:?} at byte position {starting_position}"
708 );
709 return Ok(());
710 }
711
712 if self.query.detect(file).unwrap_or(false) {
713 // Yes, we should scan the whole file.
714 entry.should_scan_tx.send(entry.path).await?;
715 }
716 Ok(())
717 }).await;
718 }
719
720 async fn handle_scan_path(&self, req: InputPath) {
721 _ = maybe!(async move {
722 let InputPath {
723 entry,
724 snapshot,
725 mut should_scan_tx,
726 } = req;
727
728 if entry.is_fifo || !entry.is_file() {
729 return Ok(());
730 }
731
732 if self.query.filters_path() {
733 let matched_path = if self.query.match_full_paths() {
734 let mut full_path = snapshot.root_name().to_owned();
735 full_path.push(&entry.path);
736 self.query.match_path(&full_path)
737 } else {
738 self.query.match_path(&entry.path)
739 };
740 if !matched_path {
741 return Ok(());
742 }
743 }
744
745 if self.open_entries.contains(&entry.id) {
746 // The buffer is already in memory and that's the version we want to scan;
747 // hence skip the dilly-dally and look for all matches straight away.
748 should_scan_tx
749 .send(ProjectPath {
750 worktree_id: snapshot.id(),
751 path: entry.path.clone(),
752 })
753 .await?;
754 } else {
755 self.confirm_contents_will_match_tx
756 .send(MatchingEntry {
757 should_scan_tx: should_scan_tx,
758 worktree_root: snapshot.abs_path().clone(),
759 path: ProjectPath {
760 worktree_id: snapshot.id(),
761 path: entry.path.clone(),
762 },
763 })
764 .await?;
765 }
766
767 anyhow::Ok(())
768 })
769 .await;
770 }
771}
772
773struct InputPath {
774 entry: Entry,
775 snapshot: Snapshot,
776 should_scan_tx: oneshot::Sender<ProjectPath>,
777}
778
779struct MatchingEntry {
780 worktree_root: Arc<Path>,
781 path: ProjectPath,
782 should_scan_tx: oneshot::Sender<ProjectPath>,
783}
784
785/// This struct encapsulates the logic to decide whether a given gitignored directory should be
786/// scanned based on include/exclude patterns of a search query (as include/exclude parameters may match paths inside it).
787/// It is kind-of doing an inverse of glob. Given a glob pattern like `src/**/` and a parent path like `src`, we need to decide whether the parent
788/// may contain glob hits.
789struct PathInclusionMatcher {
790 included: BTreeSet<PathBuf>,
791 query: Arc<SearchQuery>,
792}
793
794impl PathInclusionMatcher {
795 fn new(query: Arc<SearchQuery>) -> Self {
796 let mut included = BTreeSet::new();
797 // To do an inverse glob match, we split each glob into it's prefix and the glob part.
798 // For example, `src/**/*.rs` becomes `src/` and `**/*.rs`. The glob part gets dropped.
799 // Then, when checking whether a given directory should be scanned, we check whether it is a non-empty substring of any glob prefix.
800 if query.filters_path() {
801 included.extend(
802 query
803 .files_to_include()
804 .sources()
805 .flat_map(|glob| Some(wax::Glob::new(glob).ok()?.partition().0)),
806 );
807 }
808 Self { included, query }
809 }
810
811 fn should_scan_gitignored_dir(
812 &self,
813 entry: &Entry,
814 snapshot: &Snapshot,
815 worktree_settings: &WorktreeSettings,
816 ) -> bool {
817 if !entry.is_ignored || !entry.kind.is_unloaded() {
818 return false;
819 }
820 if !self.query.include_ignored() {
821 return false;
822 }
823 if worktree_settings.is_path_excluded(&entry.path) {
824 return false;
825 }
826 if !self.query.filters_path() {
827 return true;
828 }
829
830 let as_abs_path = LazyCell::new(move || snapshot.absolutize(&entry.path));
831 let entry_path = &entry.path;
832 // 3. Check Exclusions (Pruning)
833 // If the current path is a child of an excluded path, we stop.
834 let is_excluded = self.path_is_definitely_excluded(&entry_path, snapshot);
835
836 if is_excluded {
837 return false;
838 }
839
840 // 4. Check Inclusions (Traversal)
841 if self.included.is_empty() {
842 return true;
843 }
844
845 // We scan if the current path is a descendant of an include prefix
846 // OR if the current path is an ancestor of an include prefix (we need to go deeper to find it).
847 let is_included = self.included.iter().any(|prefix| {
848 let (prefix_matches_entry, entry_matches_prefix) = if prefix.is_absolute() {
849 (
850 prefix.starts_with(&**as_abs_path),
851 as_abs_path.starts_with(prefix),
852 )
853 } else {
854 RelPath::new(prefix, snapshot.path_style()).map_or((false, false), |prefix| {
855 (
856 prefix.starts_with(entry_path),
857 entry_path.starts_with(&prefix),
858 )
859 })
860 };
861
862 // Logic:
863 // 1. entry_matches_prefix: We are inside the target zone (e.g. glob: src/, current: src/lib/). Keep scanning.
864 // 2. prefix_matches_entry: We are above the target zone (e.g. glob: src/foo/, current: src/). Keep scanning to reach foo.
865 prefix_matches_entry || entry_matches_prefix
866 });
867
868 is_included
869 }
870 fn path_is_definitely_excluded(&self, path: &RelPath, snapshot: &Snapshot) -> bool {
871 if !self.query.files_to_exclude().sources().next().is_none() {
872 let mut path = if self.query.match_full_paths() {
873 let mut full_path = snapshot.root_name().to_owned();
874 full_path.push(path);
875 full_path
876 } else {
877 path.to_owned()
878 };
879 loop {
880 if self.query.files_to_exclude().is_match(&path) {
881 return true;
882 } else if !path.pop() {
883 return false;
884 }
885 }
886 } else {
887 false
888 }
889 }
890}
891
892#[cfg(test)]
893mod tests {
894 use super::*;
895 use fs::FakeFs;
896 use serde_json::json;
897 use settings::Settings;
898 use util::{
899 path,
900 paths::{PathMatcher, PathStyle},
901 rel_path::RelPath,
902 };
903 use worktree::{Entry, EntryKind, WorktreeSettings};
904
905 use crate::{
906 Project, project_search::PathInclusionMatcher, project_tests::init_test,
907 search::SearchQuery,
908 };
909
910 #[gpui::test]
911 async fn test_path_inclusion_matcher(cx: &mut gpui::TestAppContext) {
912 init_test(cx);
913
914 let fs = FakeFs::new(cx.background_executor.clone());
915 fs.insert_tree(
916 "/root",
917 json!({
918 ".gitignore": "src/data/\n",
919 "src": {
920 "data": {
921 "main.csv": "field_1,field_2,field_3",
922 },
923 "lib": {
924 "main.txt": "Are you familiar with fields?",
925 },
926 },
927 }),
928 )
929 .await;
930
931 let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
932 let worktree = project.update(cx, |project, cx| project.worktrees(cx).next().unwrap());
933 let (worktree_settings, worktree_snapshot) = worktree.update(cx, |worktree, cx| {
934 let settings_location = worktree.settings_location(cx);
935 return (
936 WorktreeSettings::get(Some(settings_location), cx).clone(),
937 worktree.snapshot(),
938 );
939 });
940
941 // Manually create a test entry for the gitignored directory since it won't
942 // be loaded by the worktree
943 let entry = Entry {
944 id: ProjectEntryId::from_proto(1),
945 kind: EntryKind::UnloadedDir,
946 path: Arc::from(RelPath::unix(Path::new("src/data")).unwrap()),
947 inode: 0,
948 mtime: None,
949 canonical_path: None,
950 is_ignored: true,
951 is_hidden: false,
952 is_always_included: false,
953 is_external: false,
954 is_private: false,
955 size: 0,
956 char_bag: Default::default(),
957 is_fifo: false,
958 };
959
960 // 1. Test searching for `field`, including ignored files without any
961 // inclusion and exclusion filters.
962 let include_ignored = true;
963 let files_to_include = PathMatcher::default();
964 let files_to_exclude = PathMatcher::default();
965 let match_full_paths = false;
966 let search_query = SearchQuery::text(
967 "field",
968 false,
969 false,
970 include_ignored,
971 files_to_include,
972 files_to_exclude,
973 match_full_paths,
974 None,
975 )
976 .unwrap();
977
978 let path_matcher = PathInclusionMatcher::new(Arc::new(search_query));
979 assert!(path_matcher.should_scan_gitignored_dir(
980 &entry,
981 &worktree_snapshot,
982 &worktree_settings
983 ));
984
985 // 2. Test searching for `field`, including ignored files but updating
986 // `files_to_include` to only include files under `src/lib`.
987 let include_ignored = true;
988 let files_to_include = PathMatcher::new(vec!["src/lib"], PathStyle::Posix).unwrap();
989 let files_to_exclude = PathMatcher::default();
990 let match_full_paths = false;
991 let search_query = SearchQuery::text(
992 "field",
993 false,
994 false,
995 include_ignored,
996 files_to_include,
997 files_to_exclude,
998 match_full_paths,
999 None,
1000 )
1001 .unwrap();
1002
1003 let path_matcher = PathInclusionMatcher::new(Arc::new(search_query));
1004 assert!(!path_matcher.should_scan_gitignored_dir(
1005 &entry,
1006 &worktree_snapshot,
1007 &worktree_settings
1008 ));
1009 }
1010}