1use std::{
2 cell::LazyCell,
3 collections::BTreeSet,
4 io::{BufRead, BufReader},
5 ops::Range,
6 path::{Path, PathBuf},
7 pin::pin,
8 sync::Arc,
9};
10
11use anyhow::Context;
12use collections::HashSet;
13use fs::Fs;
14use futures::{SinkExt, StreamExt, select_biased, stream::FuturesOrdered};
15use gpui::{App, AppContext, AsyncApp, Entity, Task};
16use language::{Buffer, BufferSnapshot};
17use parking_lot::Mutex;
18use postage::oneshot;
19use rpc::{AnyProtoClient, proto};
20use smol::{
21 channel::{Receiver, Sender, bounded, unbounded},
22 future::FutureExt,
23};
24
25use text::BufferId;
26use util::{ResultExt, maybe, paths::compare_rel_paths, rel_path::RelPath};
27use worktree::{Entry, ProjectEntryId, Snapshot, Worktree, WorktreeSettings};
28
29use crate::{
30 Project, ProjectItem, ProjectPath, RemotelyCreatedModels,
31 buffer_store::BufferStore,
32 search::{SearchQuery, SearchResult},
33 worktree_store::WorktreeStore,
34};
35
36pub struct Search {
37 buffer_store: Entity<BufferStore>,
38 worktree_store: Entity<WorktreeStore>,
39 limit: usize,
40 kind: SearchKind,
41}
42
43/// Represents search setup, before it is actually kicked off with Search::into_results
44enum SearchKind {
45 /// Search for candidates by inspecting file contents on file system, avoiding loading the buffer unless we know that a given file contains a match.
46 Local {
47 fs: Arc<dyn Fs>,
48 worktrees: Vec<Entity<Worktree>>,
49 },
50 /// Query remote host for candidates. As of writing, the host runs a local search in "buffers with matches only" mode.
51 Remote {
52 client: AnyProtoClient,
53 remote_id: u64,
54 models: Arc<Mutex<RemotelyCreatedModels>>,
55 },
56 /// Run search against a known set of candidates. Even when working with a remote host, this won't round-trip to host.
57 OpenBuffersOnly,
58}
59
60/// Represents results of project search and allows one to either obtain match positions OR
61/// just the handles to buffers that may match the search. Grabbing the handles is cheaper than obtaining full match positions, because in that case we'll look for
62/// at most one match in each file.
63#[must_use]
64pub struct SearchResultsHandle {
65 results: Receiver<SearchResult>,
66 matching_buffers: Receiver<Entity<Buffer>>,
67 trigger_search: Box<dyn FnOnce(&mut App) -> Task<()> + Send + Sync>,
68}
69
70pub struct SearchResults<T> {
71 pub _task_handle: Task<()>,
72 pub rx: Receiver<T>,
73}
74impl SearchResultsHandle {
75 pub fn results(self, cx: &mut App) -> SearchResults<SearchResult> {
76 SearchResults {
77 _task_handle: (self.trigger_search)(cx),
78 rx: self.results,
79 }
80 }
81 pub fn matching_buffers(self, cx: &mut App) -> SearchResults<Entity<Buffer>> {
82 SearchResults {
83 _task_handle: (self.trigger_search)(cx),
84 rx: self.matching_buffers,
85 }
86 }
87}
88
89#[derive(Clone)]
90enum FindSearchCandidates {
91 Local {
92 fs: Arc<dyn Fs>,
93 /// Start off with all paths in project and filter them based on:
94 /// - Include filters
95 /// - Exclude filters
96 /// - Only open buffers
97 /// - Scan ignored files
98 /// Put another way: filter out files that can't match (without looking at file contents)
99 input_paths_rx: Receiver<InputPath>,
100 /// After that, if the buffer is not yet loaded, we'll figure out if it contains at least one match
101 /// based on disk contents of a buffer. This step is not performed for buffers we already have in memory.
102 confirm_contents_will_match_tx: Sender<MatchingEntry>,
103 confirm_contents_will_match_rx: Receiver<MatchingEntry>,
104 },
105 Remote,
106 OpenBuffersOnly,
107}
108
109impl Search {
110 pub fn local(
111 fs: Arc<dyn Fs>,
112 buffer_store: Entity<BufferStore>,
113 worktree_store: Entity<WorktreeStore>,
114 limit: usize,
115 cx: &mut App,
116 ) -> Self {
117 let worktrees = worktree_store.read(cx).visible_worktrees(cx).collect();
118 Self {
119 kind: SearchKind::Local { fs, worktrees },
120 buffer_store,
121 worktree_store,
122 limit,
123 }
124 }
125
126 pub(crate) fn remote(
127 buffer_store: Entity<BufferStore>,
128 worktree_store: Entity<WorktreeStore>,
129 limit: usize,
130 client_state: (AnyProtoClient, u64, Arc<Mutex<RemotelyCreatedModels>>),
131 ) -> Self {
132 Self {
133 kind: SearchKind::Remote {
134 client: client_state.0,
135 remote_id: client_state.1,
136 models: client_state.2,
137 },
138 buffer_store,
139 worktree_store,
140 limit,
141 }
142 }
143 pub(crate) fn open_buffers_only(
144 buffer_store: Entity<BufferStore>,
145 worktree_store: Entity<WorktreeStore>,
146 limit: usize,
147 ) -> Self {
148 Self {
149 kind: SearchKind::OpenBuffersOnly,
150 buffer_store,
151 worktree_store,
152 limit,
153 }
154 }
155
156 pub(crate) const MAX_SEARCH_RESULT_FILES: usize = 5_000;
157 pub(crate) const MAX_SEARCH_RESULT_RANGES: usize = 10_000;
158 /// Prepares a project search run. The resulting [`SearchResultsHandle`] has to be used to specify whether you're interested in matching buffers
159 /// or full search results.
160 pub fn into_handle(mut self, query: SearchQuery, cx: &mut App) -> SearchResultsHandle {
161 let mut open_buffers = HashSet::default();
162 let mut unnamed_buffers = Vec::new();
163 const MAX_CONCURRENT_BUFFER_OPENS: usize = 64;
164 let buffers = self.buffer_store.read(cx);
165 for handle in buffers.buffers() {
166 let buffer = handle.read(cx);
167 if !buffers.is_searchable(&buffer.remote_id()) {
168 continue;
169 } else if let Some(entry_id) = buffer.entry_id(cx) {
170 open_buffers.insert(entry_id);
171 } else {
172 self.limit = self.limit.saturating_sub(1);
173 unnamed_buffers.push(handle)
174 };
175 }
176 let open_buffers = Arc::new(open_buffers);
177 let executor = cx.background_executor().clone();
178 let (tx, rx) = unbounded();
179 let (grab_buffer_snapshot_tx, grab_buffer_snapshot_rx) = unbounded();
180 let matching_buffers = grab_buffer_snapshot_rx.clone();
181 let trigger_search = Box::new(move |cx: &mut App| {
182 cx.spawn(async move |cx| {
183 for buffer in unnamed_buffers {
184 _ = grab_buffer_snapshot_tx.send(buffer).await;
185 }
186
187 let (find_all_matches_tx, find_all_matches_rx) =
188 bounded(MAX_CONCURRENT_BUFFER_OPENS);
189 let query = Arc::new(query);
190 let (candidate_searcher, tasks) = match self.kind {
191 SearchKind::OpenBuffersOnly => {
192 let open_buffers = cx.update(|cx| self.all_loaded_buffers(&query, cx));
193 let fill_requests = cx
194 .background_spawn(async move {
195 for buffer in open_buffers {
196 if let Err(_) = grab_buffer_snapshot_tx.send(buffer).await {
197 return;
198 }
199 }
200 })
201 .boxed_local();
202 (FindSearchCandidates::OpenBuffersOnly, vec![fill_requests])
203 }
204 SearchKind::Local {
205 fs,
206 ref mut worktrees,
207 } => {
208 let (get_buffer_for_full_scan_tx, get_buffer_for_full_scan_rx) =
209 unbounded();
210 let (confirm_contents_will_match_tx, confirm_contents_will_match_rx) =
211 bounded(64);
212 let (sorted_search_results_tx, sorted_search_results_rx) = unbounded();
213
214 let (input_paths_tx, input_paths_rx) = unbounded();
215 let tasks = vec![
216 cx.spawn(Self::provide_search_paths(
217 std::mem::take(worktrees),
218 query.clone(),
219 input_paths_tx,
220 sorted_search_results_tx,
221 ))
222 .boxed_local(),
223 Self::open_buffers(
224 self.buffer_store,
225 get_buffer_for_full_scan_rx,
226 grab_buffer_snapshot_tx,
227 cx.clone(),
228 )
229 .boxed_local(),
230 cx.background_spawn(Self::maintain_sorted_search_results(
231 sorted_search_results_rx,
232 get_buffer_for_full_scan_tx,
233 self.limit,
234 ))
235 .boxed_local(),
236 ];
237 (
238 FindSearchCandidates::Local {
239 fs,
240 confirm_contents_will_match_tx,
241 confirm_contents_will_match_rx,
242 input_paths_rx,
243 },
244 tasks,
245 )
246 }
247 SearchKind::Remote {
248 client,
249 remote_id,
250 models,
251 } => {
252 let request = client.request(proto::FindSearchCandidates {
253 project_id: remote_id,
254 query: Some(query.to_proto()),
255 limit: self.limit as _,
256 });
257 let weak_buffer_store = self.buffer_store.downgrade();
258 let buffer_store = self.buffer_store;
259 let guard = cx.update(|cx| {
260 Project::retain_remotely_created_models_impl(
261 &models,
262 &buffer_store,
263 &self.worktree_store,
264 cx,
265 )
266 });
267
268 let issue_remote_buffers_request = cx
269 .spawn(async move |cx| {
270 let _ = maybe!(async move {
271 let response = request.await?;
272 for buffer_id in response.buffer_ids {
273 let buffer_id = BufferId::new(buffer_id)?;
274 let buffer = weak_buffer_store
275 .update(cx, |buffer_store, cx| {
276 buffer_store.wait_for_remote_buffer(buffer_id, cx)
277 })?
278 .await?;
279 let _ = grab_buffer_snapshot_tx.send(buffer).await;
280 }
281
282 drop(guard);
283 anyhow::Ok(())
284 })
285 .await
286 .log_err();
287 })
288 .boxed_local();
289 (
290 FindSearchCandidates::Remote,
291 vec![issue_remote_buffers_request],
292 )
293 }
294 };
295
296 let should_find_all_matches = !tx.is_closed();
297
298 let _executor = executor.clone();
299 let worker_pool = executor.spawn(async move {
300 let num_cpus = _executor.num_cpus();
301
302 assert!(num_cpus > 0);
303 _executor
304 .scoped(|scope| {
305 for _ in 0..num_cpus - 1 {
306 let worker = Worker {
307 query: query.clone(),
308 open_buffers: open_buffers.clone(),
309 candidates: candidate_searcher.clone(),
310 find_all_matches_rx: find_all_matches_rx.clone(),
311 };
312 scope.spawn(worker.run());
313 }
314
315 drop(find_all_matches_rx);
316 drop(candidate_searcher);
317 })
318 .await;
319 });
320
321 let (sorted_matches_tx, sorted_matches_rx) = unbounded();
322 // The caller of `into_handle` decides whether they're interested in all matches (files that matched + all matching ranges) or
323 // just the files. *They are using the same stream as the guts of the project search do*.
324 // This means that we cannot grab values off of that stream unless it's strictly needed for making a progress in project search.
325 //
326 // Grabbing buffer snapshots is only necessary when we're looking for all matches. If the caller decided that they're not interested
327 // in all matches, running that task unconditionally would hinder caller's ability to observe all matching file paths.
328 let buffer_snapshots = if should_find_all_matches {
329 Some(
330 Self::grab_buffer_snapshots(
331 grab_buffer_snapshot_rx,
332 find_all_matches_tx,
333 sorted_matches_tx,
334 cx.clone(),
335 )
336 .boxed_local(),
337 )
338 } else {
339 drop(find_all_matches_tx);
340
341 None
342 };
343 let ensure_matches_are_reported_in_order = if should_find_all_matches {
344 Some(
345 Self::ensure_matched_ranges_are_reported_in_order(sorted_matches_rx, tx)
346 .boxed_local(),
347 )
348 } else {
349 drop(tx);
350 None
351 };
352
353 futures::future::join_all(
354 [worker_pool.boxed_local()]
355 .into_iter()
356 .chain(buffer_snapshots)
357 .chain(ensure_matches_are_reported_in_order)
358 .chain(tasks),
359 )
360 .await;
361 })
362 });
363
364 SearchResultsHandle {
365 results: rx,
366 matching_buffers,
367 trigger_search,
368 }
369 }
370
371 fn provide_search_paths(
372 worktrees: Vec<Entity<Worktree>>,
373 query: Arc<SearchQuery>,
374 tx: Sender<InputPath>,
375 results: Sender<oneshot::Receiver<ProjectPath>>,
376 ) -> impl AsyncFnOnce(&mut AsyncApp) {
377 async move |cx| {
378 _ = maybe!(async move {
379 let gitignored_tracker = PathInclusionMatcher::new(query.clone());
380 let include_ignored = query.include_ignored();
381 for worktree in worktrees {
382 let (mut snapshot, worktree_settings) = worktree
383 .read_with(cx, |this, _| {
384 Some((this.snapshot(), this.as_local()?.settings()))
385 })
386 .context("The worktree is not local")?;
387 if query.include_ignored() {
388 // Pre-fetch all of the ignored directories as they're going to be searched.
389 let mut entries_to_refresh = vec![];
390
391 for entry in snapshot.entries(query.include_ignored(), 0) {
392 if gitignored_tracker.should_scan_gitignored_dir(
393 entry,
394 &snapshot,
395 &worktree_settings,
396 ) {
397 entries_to_refresh.push(entry.path.clone());
398 }
399 }
400 let barrier = worktree.update(cx, |this, _| {
401 let local = this.as_local_mut()?;
402 let barrier = entries_to_refresh
403 .into_iter()
404 .map(|path| local.add_path_prefix_to_scan(path).into_future())
405 .collect::<Vec<_>>();
406 Some(barrier)
407 });
408 if let Some(barriers) = barrier {
409 futures::future::join_all(barriers).await;
410 }
411 snapshot = worktree.read_with(cx, |this, _| this.snapshot());
412 }
413 let tx = tx.clone();
414 let results = results.clone();
415
416 cx.background_executor()
417 .spawn(async move {
418 for entry in snapshot.files(include_ignored, 0) {
419 let (should_scan_tx, should_scan_rx) = oneshot::channel();
420
421 let Ok(_) = tx
422 .send(InputPath {
423 entry: entry.clone(),
424 snapshot: snapshot.clone(),
425 should_scan_tx,
426 })
427 .await
428 else {
429 return;
430 };
431 if results.send(should_scan_rx).await.is_err() {
432 return;
433 };
434 }
435 })
436 .await;
437 }
438 anyhow::Ok(())
439 })
440 .await;
441 }
442 }
443
444 async fn maintain_sorted_search_results(
445 rx: Receiver<oneshot::Receiver<ProjectPath>>,
446 paths_for_full_scan: Sender<ProjectPath>,
447 limit: usize,
448 ) {
449 let mut rx = pin!(rx);
450 let mut matched = 0;
451 while let Some(mut next_path_result) = rx.next().await {
452 let Some(successful_path) = next_path_result.next().await else {
453 // This file did not produce a match, hence skip it.
454 continue;
455 };
456 if paths_for_full_scan.send(successful_path).await.is_err() {
457 return;
458 };
459 matched += 1;
460 if matched >= limit {
461 break;
462 }
463 }
464 }
465
466 /// Background workers cannot open buffers by themselves, hence main thread will do it on their behalf.
467 async fn open_buffers(
468 buffer_store: Entity<BufferStore>,
469 rx: Receiver<ProjectPath>,
470 find_all_matches_tx: Sender<Entity<Buffer>>,
471 mut cx: AsyncApp,
472 ) {
473 let mut rx = pin!(rx.ready_chunks(64));
474 _ = maybe!(async move {
475 while let Some(requested_paths) = rx.next().await {
476 let mut buffers = buffer_store.update(&mut cx, |this, cx| {
477 requested_paths
478 .into_iter()
479 .map(|path| this.open_buffer(path, cx))
480 .collect::<FuturesOrdered<_>>()
481 });
482
483 while let Some(buffer) = buffers.next().await {
484 if let Some(buffer) = buffer.log_err() {
485 find_all_matches_tx.send(buffer).await?;
486 }
487 }
488 }
489 Result::<_, anyhow::Error>::Ok(())
490 })
491 .await;
492 }
493
494 async fn grab_buffer_snapshots(
495 rx: Receiver<Entity<Buffer>>,
496 find_all_matches_tx: Sender<(
497 Entity<Buffer>,
498 BufferSnapshot,
499 oneshot::Sender<(Entity<Buffer>, Vec<Range<language::Anchor>>)>,
500 )>,
501 results: Sender<oneshot::Receiver<(Entity<Buffer>, Vec<Range<language::Anchor>>)>>,
502 mut cx: AsyncApp,
503 ) {
504 _ = maybe!(async move {
505 while let Ok(buffer) = rx.recv().await {
506 let snapshot = buffer.read_with(&mut cx, |this, _| this.snapshot());
507 let (tx, rx) = oneshot::channel();
508 find_all_matches_tx.send((buffer, snapshot, tx)).await?;
509 results.send(rx).await?;
510 }
511 debug_assert!(rx.is_empty());
512 Result::<_, anyhow::Error>::Ok(())
513 })
514 .await;
515 }
516
517 async fn ensure_matched_ranges_are_reported_in_order(
518 rx: Receiver<oneshot::Receiver<(Entity<Buffer>, Vec<Range<language::Anchor>>)>>,
519 tx: Sender<SearchResult>,
520 ) {
521 use postage::stream::Stream;
522 _ = maybe!(async move {
523 let mut matched_buffers = 0;
524 let mut matches = 0;
525 while let Ok(mut next_buffer_matches) = rx.recv().await {
526 let Some((buffer, ranges)) = next_buffer_matches.recv().await else {
527 continue;
528 };
529
530 if matched_buffers > Search::MAX_SEARCH_RESULT_FILES
531 || matches > Search::MAX_SEARCH_RESULT_RANGES
532 {
533 _ = tx.send(SearchResult::LimitReached).await;
534 break;
535 }
536 matched_buffers += 1;
537 matches += ranges.len();
538
539 _ = tx.send(SearchResult::Buffer { buffer, ranges }).await?;
540 }
541 anyhow::Ok(())
542 })
543 .await;
544 }
545
546 fn all_loaded_buffers(&self, search_query: &SearchQuery, cx: &App) -> Vec<Entity<Buffer>> {
547 let worktree_store = self.worktree_store.read(cx);
548 let mut buffers = search_query
549 .buffers()
550 .into_iter()
551 .flatten()
552 .filter(|buffer| {
553 let b = buffer.read(cx);
554 if let Some(file) = b.file() {
555 if !search_query.match_path(file.path()) {
556 return false;
557 }
558 if !search_query.include_ignored()
559 && let Some(entry) = b
560 .entry_id(cx)
561 .and_then(|entry_id| worktree_store.entry_for_id(entry_id, cx))
562 && entry.is_ignored
563 {
564 return false;
565 }
566 }
567 true
568 })
569 .cloned()
570 .collect::<Vec<_>>();
571 buffers.sort_by(|a, b| {
572 let a = a.read(cx);
573 let b = b.read(cx);
574 match (a.file(), b.file()) {
575 (None, None) => a.remote_id().cmp(&b.remote_id()),
576 (None, Some(_)) => std::cmp::Ordering::Less,
577 (Some(_), None) => std::cmp::Ordering::Greater,
578 (Some(a), Some(b)) => compare_rel_paths((a.path(), true), (b.path(), true)),
579 }
580 });
581
582 buffers
583 }
584}
585
586struct Worker {
587 query: Arc<SearchQuery>,
588 open_buffers: Arc<HashSet<ProjectEntryId>>,
589 candidates: FindSearchCandidates,
590 /// Ok, we're back in background: run full scan & find all matches in a given buffer snapshot.
591 /// Then, when you're done, share them via the channel you were given.
592 find_all_matches_rx: Receiver<(
593 Entity<Buffer>,
594 BufferSnapshot,
595 oneshot::Sender<(Entity<Buffer>, Vec<Range<language::Anchor>>)>,
596 )>,
597}
598
599impl Worker {
600 async fn run(self) {
601 let (
602 input_paths_rx,
603 confirm_contents_will_match_rx,
604 mut confirm_contents_will_match_tx,
605 fs,
606 ) = match self.candidates {
607 FindSearchCandidates::Local {
608 fs,
609 input_paths_rx,
610 confirm_contents_will_match_rx,
611 confirm_contents_will_match_tx,
612 } => (
613 input_paths_rx,
614 confirm_contents_will_match_rx,
615 confirm_contents_will_match_tx,
616 Some(fs),
617 ),
618 FindSearchCandidates::Remote | FindSearchCandidates::OpenBuffersOnly => {
619 (unbounded().1, unbounded().1, unbounded().0, None)
620 }
621 };
622 // WorkerA: grabs a request for "find all matches in file/a" <- takes 5 minutes
623 // right after: WorkerB: grabs a request for "find all matches in file/b" <- takes 5 seconds
624 let mut find_all_matches = pin!(self.find_all_matches_rx.fuse());
625 let mut find_first_match = pin!(confirm_contents_will_match_rx.fuse());
626 let mut scan_path = pin!(input_paths_rx.fuse());
627
628 loop {
629 let handler = RequestHandler {
630 query: &self.query,
631 open_entries: &self.open_buffers,
632 fs: fs.as_deref(),
633 confirm_contents_will_match_tx: &confirm_contents_will_match_tx,
634 };
635 // Whenever we notice that some step of a pipeline is closed, we don't want to close subsequent
636 // steps straight away. Another worker might be about to produce a value that will
637 // be pushed there, thus we'll replace current worker's pipe with a dummy one.
638 // That way, we'll only ever close a next-stage channel when ALL workers do so.
639 select_biased! {
640 find_all_matches = find_all_matches.next() => {
641 let Some(matches) = find_all_matches else {
642 continue;
643 };
644 handler.handle_find_all_matches(matches).await;
645 },
646 find_first_match = find_first_match.next() => {
647 if let Some(buffer_with_at_least_one_match) = find_first_match {
648 handler.handle_find_first_match(buffer_with_at_least_one_match).await;
649 }
650 },
651 scan_path = scan_path.next() => {
652 if let Some(path_to_scan) = scan_path {
653 handler.handle_scan_path(path_to_scan).await;
654 } else {
655 // If we're the last worker to notice that this is not producing values, close the upstream.
656 confirm_contents_will_match_tx = bounded(1).0;
657 }
658
659 }
660 complete => {
661 break
662 },
663
664 }
665 }
666 }
667}
668
669struct RequestHandler<'worker> {
670 query: &'worker SearchQuery,
671 fs: Option<&'worker dyn Fs>,
672 open_entries: &'worker HashSet<ProjectEntryId>,
673 confirm_contents_will_match_tx: &'worker Sender<MatchingEntry>,
674}
675
676impl RequestHandler<'_> {
677 async fn handle_find_all_matches(
678 &self,
679 (buffer, snapshot, mut report_matches): (
680 Entity<Buffer>,
681 BufferSnapshot,
682 oneshot::Sender<(Entity<Buffer>, Vec<Range<language::Anchor>>)>,
683 ),
684 ) {
685 let ranges = self
686 .query
687 .search(&snapshot, None)
688 .await
689 .iter()
690 .map(|range| snapshot.anchor_before(range.start)..snapshot.anchor_after(range.end))
691 .collect::<Vec<_>>();
692
693 _ = report_matches.send((buffer, ranges)).await;
694 }
695
696 async fn handle_find_first_match(&self, mut entry: MatchingEntry) {
697 _=maybe!(async move {
698 let abs_path = entry.worktree_root.join(entry.path.path.as_std_path());
699 let Some(file) = self.fs.context("Trying to query filesystem in remote project search")?.open_sync(&abs_path).await.log_err() else {
700 return anyhow::Ok(());
701 };
702
703 let mut file = BufReader::new(file);
704 let file_start = file.fill_buf()?;
705
706 if let Err(Some(starting_position)) =
707 std::str::from_utf8(file_start).map_err(|e| e.error_len())
708 {
709 // Before attempting to match the file content, throw away files that have invalid UTF-8 sequences early on;
710 // That way we can still match files in a streaming fashion without having look at "obviously binary" files.
711 log::debug!(
712 "Invalid UTF-8 sequence in file {abs_path:?} at byte position {starting_position}"
713 );
714 return Ok(());
715 }
716
717 if self.query.detect(file).await.unwrap_or(false) {
718 // Yes, we should scan the whole file.
719 entry.should_scan_tx.send(entry.path).await?;
720 }
721 Ok(())
722 }).await;
723 }
724
725 async fn handle_scan_path(&self, req: InputPath) {
726 _ = maybe!(async move {
727 let InputPath {
728 entry,
729 snapshot,
730 mut should_scan_tx,
731 } = req;
732
733 if entry.is_fifo || !entry.is_file() {
734 return Ok(());
735 }
736
737 if self.query.filters_path() {
738 let matched_path = if self.query.match_full_paths() {
739 let mut full_path = snapshot.root_name().to_owned();
740 full_path.push(&entry.path);
741 self.query.match_path(&full_path)
742 } else {
743 self.query.match_path(&entry.path)
744 };
745 if !matched_path {
746 return Ok(());
747 }
748 }
749
750 if self.open_entries.contains(&entry.id) {
751 // The buffer is already in memory and that's the version we want to scan;
752 // hence skip the dilly-dally and look for all matches straight away.
753 should_scan_tx
754 .send(ProjectPath {
755 worktree_id: snapshot.id(),
756 path: entry.path.clone(),
757 })
758 .await?;
759 } else {
760 self.confirm_contents_will_match_tx
761 .send(MatchingEntry {
762 should_scan_tx: should_scan_tx,
763 worktree_root: snapshot.abs_path().clone(),
764 path: ProjectPath {
765 worktree_id: snapshot.id(),
766 path: entry.path.clone(),
767 },
768 })
769 .await?;
770 }
771
772 anyhow::Ok(())
773 })
774 .await;
775 }
776}
777
778struct InputPath {
779 entry: Entry,
780 snapshot: Snapshot,
781 should_scan_tx: oneshot::Sender<ProjectPath>,
782}
783
784struct MatchingEntry {
785 worktree_root: Arc<Path>,
786 path: ProjectPath,
787 should_scan_tx: oneshot::Sender<ProjectPath>,
788}
789
790/// This struct encapsulates the logic to decide whether a given gitignored directory should be
791/// scanned based on include/exclude patterns of a search query (as include/exclude parameters may match paths inside it).
792/// It is kind-of doing an inverse of glob. Given a glob pattern like `src/**/` and a parent path like `src`, we need to decide whether the parent
793/// may contain glob hits.
794struct PathInclusionMatcher {
795 included: BTreeSet<PathBuf>,
796 query: Arc<SearchQuery>,
797}
798
799impl PathInclusionMatcher {
800 fn new(query: Arc<SearchQuery>) -> Self {
801 let mut included = BTreeSet::new();
802 // To do an inverse glob match, we split each glob into it's prefix and the glob part.
803 // For example, `src/**/*.rs` becomes `src/` and `**/*.rs`. The glob part gets dropped.
804 // Then, when checking whether a given directory should be scanned, we check whether it is a non-empty substring of any glob prefix.
805 if query.filters_path() {
806 included.extend(
807 query
808 .files_to_include()
809 .sources()
810 .flat_map(|glob| Some(wax::Glob::new(glob).ok()?.partition().0)),
811 );
812 }
813 Self { included, query }
814 }
815
816 fn should_scan_gitignored_dir(
817 &self,
818 entry: &Entry,
819 snapshot: &Snapshot,
820 worktree_settings: &WorktreeSettings,
821 ) -> bool {
822 if !entry.is_ignored || !entry.kind.is_unloaded() {
823 return false;
824 }
825 if !self.query.include_ignored() {
826 return false;
827 }
828 if worktree_settings.is_path_excluded(&entry.path) {
829 return false;
830 }
831 if !self.query.filters_path() {
832 return true;
833 }
834
835 let as_abs_path = LazyCell::new(move || snapshot.absolutize(&entry.path));
836 let entry_path = &entry.path;
837 // 3. Check Exclusions (Pruning)
838 // If the current path is a child of an excluded path, we stop.
839 let is_excluded = self.path_is_definitely_excluded(&entry_path, snapshot);
840
841 if is_excluded {
842 return false;
843 }
844
845 // 4. Check Inclusions (Traversal)
846 if self.included.is_empty() {
847 return true;
848 }
849
850 // We scan if the current path is a descendant of an include prefix
851 // OR if the current path is an ancestor of an include prefix (we need to go deeper to find it).
852 let is_included = self.included.iter().any(|prefix| {
853 let (prefix_matches_entry, entry_matches_prefix) = if prefix.is_absolute() {
854 (
855 prefix.starts_with(&**as_abs_path),
856 as_abs_path.starts_with(prefix),
857 )
858 } else {
859 RelPath::new(prefix, snapshot.path_style()).map_or((false, false), |prefix| {
860 (
861 prefix.starts_with(entry_path),
862 entry_path.starts_with(&prefix),
863 )
864 })
865 };
866
867 // Logic:
868 // 1. entry_matches_prefix: We are inside the target zone (e.g. glob: src/, current: src/lib/). Keep scanning.
869 // 2. prefix_matches_entry: We are above the target zone (e.g. glob: src/foo/, current: src/). Keep scanning to reach foo.
870 prefix_matches_entry || entry_matches_prefix
871 });
872
873 is_included
874 }
875 fn path_is_definitely_excluded(&self, path: &RelPath, snapshot: &Snapshot) -> bool {
876 if !self.query.files_to_exclude().sources().next().is_none() {
877 let mut path = if self.query.match_full_paths() {
878 let mut full_path = snapshot.root_name().to_owned();
879 full_path.push(path);
880 full_path
881 } else {
882 path.to_owned()
883 };
884 loop {
885 if self.query.files_to_exclude().is_match(&path) {
886 return true;
887 } else if !path.pop() {
888 return false;
889 }
890 }
891 } else {
892 false
893 }
894 }
895}
896
897#[cfg(test)]
898mod tests {
899 use super::*;
900 use fs::FakeFs;
901 use serde_json::json;
902 use settings::Settings;
903 use util::{
904 path,
905 paths::{PathMatcher, PathStyle},
906 rel_path::RelPath,
907 };
908 use worktree::{Entry, EntryKind, WorktreeSettings};
909
910 use crate::{
911 Project, project_search::PathInclusionMatcher, project_tests::init_test,
912 search::SearchQuery,
913 };
914
915 #[gpui::test]
916 async fn test_path_inclusion_matcher(cx: &mut gpui::TestAppContext) {
917 init_test(cx);
918
919 let fs = FakeFs::new(cx.background_executor.clone());
920 fs.insert_tree(
921 "/root",
922 json!({
923 ".gitignore": "src/data/\n",
924 "src": {
925 "data": {
926 "main.csv": "field_1,field_2,field_3",
927 },
928 "lib": {
929 "main.txt": "Are you familiar with fields?",
930 },
931 },
932 }),
933 )
934 .await;
935
936 let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
937 let worktree = project.update(cx, |project, cx| project.worktrees(cx).next().unwrap());
938 let (worktree_settings, worktree_snapshot) = worktree.update(cx, |worktree, cx| {
939 let settings_location = worktree.settings_location(cx);
940 return (
941 WorktreeSettings::get(Some(settings_location), cx).clone(),
942 worktree.snapshot(),
943 );
944 });
945
946 // Manually create a test entry for the gitignored directory since it won't
947 // be loaded by the worktree
948 let entry = Entry {
949 id: ProjectEntryId::from_proto(1),
950 kind: EntryKind::UnloadedDir,
951 path: Arc::from(RelPath::unix(Path::new("src/data")).unwrap()),
952 inode: 0,
953 mtime: None,
954 canonical_path: None,
955 is_ignored: true,
956 is_hidden: false,
957 is_always_included: false,
958 is_external: false,
959 is_private: false,
960 size: 0,
961 char_bag: Default::default(),
962 is_fifo: false,
963 };
964
965 // 1. Test searching for `field`, including ignored files without any
966 // inclusion and exclusion filters.
967 let include_ignored = true;
968 let files_to_include = PathMatcher::default();
969 let files_to_exclude = PathMatcher::default();
970 let match_full_paths = false;
971 let search_query = SearchQuery::text(
972 "field",
973 false,
974 false,
975 include_ignored,
976 files_to_include,
977 files_to_exclude,
978 match_full_paths,
979 None,
980 )
981 .unwrap();
982
983 let path_matcher = PathInclusionMatcher::new(Arc::new(search_query));
984 assert!(path_matcher.should_scan_gitignored_dir(
985 &entry,
986 &worktree_snapshot,
987 &worktree_settings
988 ));
989
990 // 2. Test searching for `field`, including ignored files but updating
991 // `files_to_include` to only include files under `src/lib`.
992 let include_ignored = true;
993 let files_to_include = PathMatcher::new(vec!["src/lib"], PathStyle::Posix).unwrap();
994 let files_to_exclude = PathMatcher::default();
995 let match_full_paths = false;
996 let search_query = SearchQuery::text(
997 "field",
998 false,
999 false,
1000 include_ignored,
1001 files_to_include,
1002 files_to_exclude,
1003 match_full_paths,
1004 None,
1005 )
1006 .unwrap();
1007
1008 let path_matcher = PathInclusionMatcher::new(Arc::new(search_query));
1009 assert!(!path_matcher.should_scan_gitignored_dir(
1010 &entry,
1011 &worktree_snapshot,
1012 &worktree_settings
1013 ));
1014 }
1015}