1mod ignore;
2mod worktree_settings;
3
4use ::ignore::gitignore::{Gitignore, GitignoreBuilder};
5use anyhow::{Context as _, Result, anyhow};
6use chardetng::EncodingDetector;
7use clock::ReplicaId;
8use collections::{HashMap, HashSet, VecDeque};
9use encoding_rs::Encoding;
10use fs::{
11 Fs, MTime, PathEvent, PathEventKind, RemoveOptions, Watcher, copy_recursive, read_dir_items,
12};
13use futures::{
14 FutureExt as _, Stream, StreamExt,
15 channel::{
16 mpsc::{self, UnboundedSender},
17 oneshot,
18 },
19 select_biased, stream,
20 task::Poll,
21};
22use fuzzy::CharBag;
23use git::{
24 COMMIT_MESSAGE, DOT_GIT, FSMONITOR_DAEMON, GITIGNORE, INDEX_LOCK, LFS_DIR, REPO_EXCLUDE,
25 status::GitSummary,
26};
27use gpui::{
28 App, AppContext as _, AsyncApp, BackgroundExecutor, Context, Entity, EventEmitter, Priority,
29 Task,
30};
31use ignore::IgnoreStack;
32use language::DiskState;
33
34use parking_lot::Mutex;
35use paths::{local_settings_folder_name, local_vscode_folder_name};
36use postage::{
37 barrier,
38 prelude::{Sink as _, Stream as _},
39 watch,
40};
41use rpc::{
42 AnyProtoClient,
43 proto::{self, split_worktree_update},
44};
45pub use settings::WorktreeId;
46use settings::{Settings, SettingsLocation, SettingsStore};
47use smallvec::{SmallVec, smallvec};
48use smol::channel::{self, Sender};
49use std::{
50 any::Any,
51 borrow::Borrow as _,
52 cmp::Ordering,
53 collections::hash_map,
54 convert::TryFrom,
55 ffi::OsStr,
56 fmt,
57 future::Future,
58 mem::{self},
59 ops::{Deref, DerefMut, Range},
60 path::{Path, PathBuf},
61 pin::Pin,
62 sync::{
63 Arc,
64 atomic::{AtomicUsize, Ordering::SeqCst},
65 },
66 time::{Duration, Instant},
67};
68use sum_tree::{Bias, Dimensions, Edit, KeyedItem, SeekTarget, SumTree, Summary, TreeMap, TreeSet};
69use text::{LineEnding, Rope};
70use util::{
71 ResultExt, debug_panic, maybe,
72 paths::{PathMatcher, PathStyle, SanitizedPath, home_dir},
73 rel_path::RelPath,
74};
75pub use worktree_settings::WorktreeSettings;
76
77use crate::ignore::IgnoreKind;
78
79pub const FS_WATCH_LATENCY: Duration = Duration::from_millis(100);
80
81/// A set of local or remote files that are being opened as part of a project.
82/// Responsible for tracking related FS (for local)/collab (for remote) events and corresponding updates.
83/// Stores git repositories data and the diagnostics for the file(s).
84///
85/// Has an absolute path, and may be set to be visible in Zed UI or not.
86/// May correspond to a directory or a single file.
87/// Possible examples:
88/// * a drag and dropped file — may be added as an invisible, "ephemeral" entry to the current worktree
89/// * a directory opened in Zed — may be added as a visible entry to the current worktree
90///
91/// Uses [`Entry`] to track the state of each file/directory, can look up absolute paths for entries.
92pub enum Worktree {
93 Local(LocalWorktree),
94 Remote(RemoteWorktree),
95}
96
97/// An entry, created in the worktree.
98#[derive(Debug)]
99pub enum CreatedEntry {
100 /// Got created and indexed by the worktree, receiving a corresponding entry.
101 Included(Entry),
102 /// Got created, but not indexed due to falling under exclusion filters.
103 Excluded { abs_path: PathBuf },
104}
105
106#[derive(Debug)]
107pub struct LoadedFile {
108 pub file: Arc<File>,
109 pub text: String,
110 pub encoding: &'static Encoding,
111 pub has_bom: bool,
112}
113
114pub struct LoadedBinaryFile {
115 pub file: Arc<File>,
116 pub content: Vec<u8>,
117}
118
119impl fmt::Debug for LoadedBinaryFile {
120 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
121 f.debug_struct("LoadedBinaryFile")
122 .field("file", &self.file)
123 .field("content_bytes", &self.content.len())
124 .finish()
125 }
126}
127
128pub struct LocalWorktree {
129 snapshot: LocalSnapshot,
130 scan_requests_tx: channel::Sender<ScanRequest>,
131 path_prefixes_to_scan_tx: channel::Sender<PathPrefixScanRequest>,
132 is_scanning: (watch::Sender<bool>, watch::Receiver<bool>),
133 snapshot_subscriptions: VecDeque<(usize, oneshot::Sender<()>)>,
134 _background_scanner_tasks: Vec<Task<()>>,
135 update_observer: Option<UpdateObservationState>,
136 fs: Arc<dyn Fs>,
137 fs_case_sensitive: bool,
138 visible: bool,
139 next_entry_id: Arc<AtomicUsize>,
140 settings: WorktreeSettings,
141 share_private_files: bool,
142 scanning_enabled: bool,
143}
144
145pub struct PathPrefixScanRequest {
146 path: Arc<RelPath>,
147 done: SmallVec<[barrier::Sender; 1]>,
148}
149
150struct ScanRequest {
151 relative_paths: Vec<Arc<RelPath>>,
152 done: SmallVec<[barrier::Sender; 1]>,
153}
154
155pub struct RemoteWorktree {
156 snapshot: Snapshot,
157 background_snapshot: Arc<Mutex<(Snapshot, Vec<proto::UpdateWorktree>)>>,
158 project_id: u64,
159 client: AnyProtoClient,
160 file_scan_inclusions: PathMatcher,
161 updates_tx: Option<UnboundedSender<proto::UpdateWorktree>>,
162 update_observer: Option<mpsc::UnboundedSender<proto::UpdateWorktree>>,
163 snapshot_subscriptions: VecDeque<(usize, oneshot::Sender<()>)>,
164 replica_id: ReplicaId,
165 visible: bool,
166 disconnected: bool,
167}
168
169#[derive(Clone)]
170pub struct Snapshot {
171 id: WorktreeId,
172 /// The absolute path of the worktree root.
173 abs_path: Arc<SanitizedPath>,
174 path_style: PathStyle,
175 root_name: Arc<RelPath>,
176 root_char_bag: CharBag,
177 entries_by_path: SumTree<Entry>,
178 entries_by_id: SumTree<PathEntry>,
179 root_repo_common_dir: Option<Arc<SanitizedPath>>,
180 always_included_entries: Vec<Arc<RelPath>>,
181
182 /// A number that increases every time the worktree begins scanning
183 /// a set of paths from the filesystem. This scanning could be caused
184 /// by some operation performed on the worktree, such as reading or
185 /// writing a file, or by an event reported by the filesystem.
186 scan_id: usize,
187
188 /// The latest scan id that has completed, and whose preceding scans
189 /// have all completed. The current `scan_id` could be more than one
190 /// greater than the `completed_scan_id` if operations are performed
191 /// on the worktree while it is processing a file-system event.
192 completed_scan_id: usize,
193}
194
195/// This path corresponds to the 'content path' of a repository in relation
196/// to Zed's project root.
197/// In the majority of the cases, this is the folder that contains the .git folder.
198/// But if a sub-folder of a git repository is opened, this corresponds to the
199/// project root and the .git folder is located in a parent directory.
200#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
201pub enum WorkDirectory {
202 InProject {
203 relative_path: Arc<RelPath>,
204 },
205 AboveProject {
206 absolute_path: Arc<Path>,
207 location_in_repo: Arc<Path>,
208 },
209}
210
211impl WorkDirectory {
212 fn path_key(&self) -> PathKey {
213 match self {
214 WorkDirectory::InProject { relative_path } => PathKey(relative_path.clone()),
215 WorkDirectory::AboveProject { .. } => PathKey(RelPath::empty().into()),
216 }
217 }
218
219 /// Returns true if the given path is a child of the work directory.
220 ///
221 /// Note that the path may not be a member of this repository, if there
222 /// is a repository in a directory between these two paths
223 /// external .git folder in a parent folder of the project root.
224 #[track_caller]
225 pub fn directory_contains(&self, path: &RelPath) -> bool {
226 match self {
227 WorkDirectory::InProject { relative_path } => path.starts_with(relative_path),
228 WorkDirectory::AboveProject { .. } => true,
229 }
230 }
231}
232
233impl Default for WorkDirectory {
234 fn default() -> Self {
235 Self::InProject {
236 relative_path: Arc::from(RelPath::empty()),
237 }
238 }
239}
240
241#[derive(Clone)]
242pub struct LocalSnapshot {
243 snapshot: Snapshot,
244 global_gitignore: Option<Arc<Gitignore>>,
245 /// Exclude files for all git repositories in the worktree, indexed by their absolute path.
246 /// The boolean indicates whether the gitignore needs to be updated.
247 repo_exclude_by_work_dir_abs_path: HashMap<Arc<Path>, (Arc<Gitignore>, bool)>,
248 /// All of the gitignore files in the worktree, indexed by their absolute path.
249 /// The boolean indicates whether the gitignore needs to be updated.
250 ignores_by_parent_abs_path: HashMap<Arc<Path>, (Arc<Gitignore>, bool)>,
251 /// All of the git repositories in the worktree, indexed by the project entry
252 /// id of their parent directory.
253 git_repositories: TreeMap<ProjectEntryId, LocalRepositoryEntry>,
254 /// The file handle of the worktree root
255 /// (so we can find it after it's been moved)
256 root_file_handle: Option<Arc<dyn fs::FileHandle>>,
257}
258
259struct BackgroundScannerState {
260 snapshot: LocalSnapshot,
261 scanned_dirs: HashSet<ProjectEntryId>,
262 path_prefixes_to_scan: HashSet<Arc<RelPath>>,
263 paths_to_scan: HashSet<Arc<RelPath>>,
264 /// The ids of all of the entries that were removed from the snapshot
265 /// as part of the current update. These entry ids may be re-used
266 /// if the same inode is discovered at a new path, or if the given
267 /// path is re-created after being deleted.
268 removed_entries: HashMap<u64, Entry>,
269 changed_paths: Vec<Arc<RelPath>>,
270 prev_snapshot: Snapshot,
271 scanning_enabled: bool,
272}
273
274#[derive(Clone, Debug, Eq, PartialEq)]
275struct EventRoot {
276 path: Arc<RelPath>,
277 was_rescanned: bool,
278}
279
280#[derive(Debug, Clone)]
281struct LocalRepositoryEntry {
282 work_directory_id: ProjectEntryId,
283 work_directory: WorkDirectory,
284 work_directory_abs_path: Arc<Path>,
285 git_dir_scan_id: usize,
286 /// Absolute path to the original .git entry that caused us to create this repository.
287 ///
288 /// This is normally a directory, but may be a "gitfile" that points to a directory elsewhere
289 /// (whose path we then store in `repository_dir_abs_path`).
290 dot_git_abs_path: Arc<Path>,
291 /// Absolute path to the "commondir" for this repository.
292 ///
293 /// This is always a directory. For a normal repository, this is the same as
294 /// `dot_git_abs_path`. For a linked worktree, this is the main repo's `.git`
295 /// directory (resolved from the worktree's `commondir` file). For a submodule,
296 /// this equals `repository_dir_abs_path` (submodules don't have a `commondir`
297 /// file).
298 common_dir_abs_path: Arc<Path>,
299 /// Absolute path to the directory holding the repository's state.
300 ///
301 /// For a normal repository, this is a directory and coincides with `dot_git_abs_path` and
302 /// `common_dir_abs_path`. For a submodule or worktree, this is some subdirectory of the
303 /// commondir like `/project/.git/modules/foo`.
304 repository_dir_abs_path: Arc<Path>,
305}
306
307impl sum_tree::Item for LocalRepositoryEntry {
308 type Summary = PathSummary<sum_tree::NoSummary>;
309
310 fn summary(&self, _: <Self::Summary as Summary>::Context<'_>) -> Self::Summary {
311 PathSummary {
312 max_path: self.work_directory.path_key().0,
313 item_summary: sum_tree::NoSummary,
314 }
315 }
316}
317
318impl KeyedItem for LocalRepositoryEntry {
319 type Key = PathKey;
320
321 fn key(&self) -> Self::Key {
322 self.work_directory.path_key()
323 }
324}
325
326impl Deref for LocalRepositoryEntry {
327 type Target = WorkDirectory;
328
329 fn deref(&self) -> &Self::Target {
330 &self.work_directory
331 }
332}
333
334impl Deref for LocalSnapshot {
335 type Target = Snapshot;
336
337 fn deref(&self) -> &Self::Target {
338 &self.snapshot
339 }
340}
341
342impl DerefMut for LocalSnapshot {
343 fn deref_mut(&mut self) -> &mut Self::Target {
344 &mut self.snapshot
345 }
346}
347
348enum ScanState {
349 Started,
350 Updated {
351 snapshot: LocalSnapshot,
352 changes: UpdatedEntriesSet,
353 barrier: SmallVec<[barrier::Sender; 1]>,
354 scanning: bool,
355 },
356 RootUpdated {
357 new_path: Arc<SanitizedPath>,
358 },
359 RootDeleted,
360}
361
362struct UpdateObservationState {
363 snapshots_tx: mpsc::UnboundedSender<(LocalSnapshot, UpdatedEntriesSet)>,
364 resume_updates: watch::Sender<()>,
365 _maintain_remote_snapshot: Task<Option<()>>,
366}
367
368#[derive(Debug, Clone)]
369pub enum Event {
370 UpdatedEntries(UpdatedEntriesSet),
371 UpdatedGitRepositories(UpdatedGitRepositoriesSet),
372 UpdatedRootRepoCommonDir,
373 DeletedEntry(ProjectEntryId),
374 /// The worktree root itself has been deleted (for single-file worktrees)
375 Deleted,
376}
377
378impl EventEmitter<Event> for Worktree {}
379
380impl Worktree {
381 pub async fn local(
382 path: impl Into<Arc<Path>>,
383 visible: bool,
384 fs: Arc<dyn Fs>,
385 next_entry_id: Arc<AtomicUsize>,
386 scanning_enabled: bool,
387 worktree_id: WorktreeId,
388 cx: &mut AsyncApp,
389 ) -> Result<Entity<Self>> {
390 let abs_path = path.into();
391 let metadata = fs
392 .metadata(&abs_path)
393 .await
394 .context("failed to stat worktree path")?;
395
396 let fs_case_sensitive = fs.is_case_sensitive().await;
397
398 let root_file_handle = if metadata.as_ref().is_some() {
399 fs.open_handle(&abs_path)
400 .await
401 .with_context(|| {
402 format!(
403 "failed to open local worktree root at {}",
404 abs_path.display()
405 )
406 })
407 .log_err()
408 } else {
409 None
410 };
411
412 let root_repo_common_dir = discover_root_repo_common_dir(&abs_path, fs.as_ref())
413 .await
414 .map(SanitizedPath::from_arc);
415
416 Ok(cx.new(move |cx: &mut Context<Worktree>| {
417 let mut snapshot = LocalSnapshot {
418 ignores_by_parent_abs_path: Default::default(),
419 global_gitignore: Default::default(),
420 repo_exclude_by_work_dir_abs_path: Default::default(),
421 git_repositories: Default::default(),
422 snapshot: Snapshot::new(
423 worktree_id,
424 abs_path
425 .file_name()
426 .and_then(|f| f.to_str())
427 .map_or(RelPath::empty().into(), |f| {
428 RelPath::unix(f).unwrap().into()
429 }),
430 abs_path.clone(),
431 PathStyle::local(),
432 ),
433 root_file_handle,
434 };
435 snapshot.root_repo_common_dir = root_repo_common_dir;
436
437 let worktree_id = snapshot.id();
438 let settings_location = Some(SettingsLocation {
439 worktree_id,
440 path: RelPath::empty(),
441 });
442
443 let settings = WorktreeSettings::get(settings_location, cx).clone();
444 cx.observe_global::<SettingsStore>(move |this, cx| {
445 if let Self::Local(this) = this {
446 let settings = WorktreeSettings::get(settings_location, cx).clone();
447 if this.settings != settings {
448 this.settings = settings;
449 this.restart_background_scanners(cx);
450 }
451 }
452 })
453 .detach();
454
455 let share_private_files = false;
456 if let Some(metadata) = metadata {
457 let mut entry = Entry::new(
458 RelPath::empty().into(),
459 &metadata,
460 ProjectEntryId::new(&next_entry_id),
461 snapshot.root_char_bag,
462 None,
463 );
464 if metadata.is_dir {
465 if !scanning_enabled {
466 entry.kind = EntryKind::UnloadedDir;
467 }
468 } else {
469 if let Some(file_name) = abs_path.file_name()
470 && let Some(file_name) = file_name.to_str()
471 && let Ok(path) = RelPath::unix(file_name)
472 {
473 entry.is_private = !share_private_files && settings.is_path_private(path);
474 entry.is_hidden = settings.is_path_hidden(path);
475 }
476 }
477 cx.foreground_executor()
478 .block_on(snapshot.insert_entry(entry, fs.as_ref()));
479 }
480
481 let (scan_requests_tx, scan_requests_rx) = channel::unbounded();
482 let (path_prefixes_to_scan_tx, path_prefixes_to_scan_rx) = channel::unbounded();
483 let mut worktree = LocalWorktree {
484 share_private_files,
485 next_entry_id,
486 snapshot,
487 is_scanning: watch::channel_with(true),
488 snapshot_subscriptions: Default::default(),
489 update_observer: None,
490 scan_requests_tx,
491 path_prefixes_to_scan_tx,
492 _background_scanner_tasks: Vec::new(),
493 fs,
494 fs_case_sensitive,
495 visible,
496 settings,
497 scanning_enabled,
498 };
499 worktree.start_background_scanner(scan_requests_rx, path_prefixes_to_scan_rx, cx);
500 Worktree::Local(worktree)
501 }))
502 }
503
504 pub fn remote(
505 project_id: u64,
506 replica_id: ReplicaId,
507 worktree: proto::WorktreeMetadata,
508 client: AnyProtoClient,
509 path_style: PathStyle,
510 cx: &mut App,
511 ) -> Entity<Self> {
512 cx.new(|cx: &mut Context<Self>| {
513 let snapshot = Snapshot::new(
514 WorktreeId::from_proto(worktree.id),
515 RelPath::from_proto(&worktree.root_name)
516 .unwrap_or_else(|_| RelPath::empty().into()),
517 Path::new(&worktree.abs_path).into(),
518 path_style,
519 );
520
521 let background_snapshot = Arc::new(Mutex::new((
522 snapshot.clone(),
523 Vec::<proto::UpdateWorktree>::new(),
524 )));
525 let (background_updates_tx, mut background_updates_rx) =
526 mpsc::unbounded::<proto::UpdateWorktree>();
527 let (mut snapshot_updated_tx, mut snapshot_updated_rx) = watch::channel();
528
529 let worktree_id = snapshot.id();
530 let settings_location = Some(SettingsLocation {
531 worktree_id,
532 path: RelPath::empty(),
533 });
534
535 let settings = WorktreeSettings::get(settings_location, cx).clone();
536 let worktree = RemoteWorktree {
537 client,
538 project_id,
539 replica_id,
540 snapshot,
541 file_scan_inclusions: settings.parent_dir_scan_inclusions.clone(),
542 background_snapshot: background_snapshot.clone(),
543 updates_tx: Some(background_updates_tx),
544 update_observer: None,
545 snapshot_subscriptions: Default::default(),
546 visible: worktree.visible,
547 disconnected: false,
548 };
549
550 // Apply updates to a separate snapshot in a background task, then
551 // send them to a foreground task which updates the model.
552 cx.background_spawn(async move {
553 while let Some(update) = background_updates_rx.next().await {
554 {
555 let mut lock = background_snapshot.lock();
556 lock.0.apply_remote_update(
557 update.clone(),
558 &settings.parent_dir_scan_inclusions,
559 );
560 lock.1.push(update);
561 }
562 snapshot_updated_tx.send(()).await.ok();
563 }
564 })
565 .detach();
566
567 // On the foreground task, update to the latest snapshot and notify
568 // any update observer of all updates that led to that snapshot.
569 cx.spawn(async move |this, cx| {
570 while (snapshot_updated_rx.recv().await).is_some() {
571 this.update(cx, |this, cx| {
572 let mut entries_changed = false;
573 let this = this.as_remote_mut().unwrap();
574 let old_root_repo_common_dir = this.snapshot.root_repo_common_dir.clone();
575 {
576 let mut lock = this.background_snapshot.lock();
577 this.snapshot = lock.0.clone();
578 for update in lock.1.drain(..) {
579 entries_changed |= !update.updated_entries.is_empty()
580 || !update.removed_entries.is_empty();
581 if let Some(tx) = &this.update_observer {
582 tx.unbounded_send(update).ok();
583 }
584 }
585 };
586
587 if entries_changed {
588 cx.emit(Event::UpdatedEntries(Arc::default()));
589 }
590 if this.snapshot.root_repo_common_dir != old_root_repo_common_dir {
591 cx.emit(Event::UpdatedRootRepoCommonDir);
592 }
593 cx.notify();
594 while let Some((scan_id, _)) = this.snapshot_subscriptions.front() {
595 if this.observed_snapshot(*scan_id) {
596 let (_, tx) = this.snapshot_subscriptions.pop_front().unwrap();
597 let _ = tx.send(());
598 } else {
599 break;
600 }
601 }
602 })?;
603 }
604 anyhow::Ok(())
605 })
606 .detach();
607
608 Worktree::Remote(worktree)
609 })
610 }
611
612 pub fn as_local(&self) -> Option<&LocalWorktree> {
613 if let Worktree::Local(worktree) = self {
614 Some(worktree)
615 } else {
616 None
617 }
618 }
619
620 pub fn as_remote(&self) -> Option<&RemoteWorktree> {
621 if let Worktree::Remote(worktree) = self {
622 Some(worktree)
623 } else {
624 None
625 }
626 }
627
628 pub fn as_local_mut(&mut self) -> Option<&mut LocalWorktree> {
629 if let Worktree::Local(worktree) = self {
630 Some(worktree)
631 } else {
632 None
633 }
634 }
635
636 pub fn as_remote_mut(&mut self) -> Option<&mut RemoteWorktree> {
637 if let Worktree::Remote(worktree) = self {
638 Some(worktree)
639 } else {
640 None
641 }
642 }
643
644 pub fn is_local(&self) -> bool {
645 matches!(self, Worktree::Local(_))
646 }
647
648 pub fn is_remote(&self) -> bool {
649 !self.is_local()
650 }
651
652 pub fn settings_location(&self, _: &Context<Self>) -> SettingsLocation<'static> {
653 SettingsLocation {
654 worktree_id: self.id(),
655 path: RelPath::empty(),
656 }
657 }
658
659 pub fn snapshot(&self) -> Snapshot {
660 match self {
661 Worktree::Local(worktree) => worktree.snapshot.snapshot.clone(),
662 Worktree::Remote(worktree) => worktree.snapshot.clone(),
663 }
664 }
665
666 pub fn scan_id(&self) -> usize {
667 match self {
668 Worktree::Local(worktree) => worktree.snapshot.scan_id,
669 Worktree::Remote(worktree) => worktree.snapshot.scan_id,
670 }
671 }
672
673 pub fn metadata_proto(&self) -> proto::WorktreeMetadata {
674 proto::WorktreeMetadata {
675 id: self.id().to_proto(),
676 root_name: self.root_name().to_proto(),
677 visible: self.is_visible(),
678 abs_path: self.abs_path().to_string_lossy().into_owned(),
679 }
680 }
681
682 pub fn completed_scan_id(&self) -> usize {
683 match self {
684 Worktree::Local(worktree) => worktree.snapshot.completed_scan_id,
685 Worktree::Remote(worktree) => worktree.snapshot.completed_scan_id,
686 }
687 }
688
689 pub fn is_visible(&self) -> bool {
690 match self {
691 Worktree::Local(worktree) => worktree.visible,
692 Worktree::Remote(worktree) => worktree.visible,
693 }
694 }
695
696 pub fn replica_id(&self) -> ReplicaId {
697 match self {
698 Worktree::Local(_) => ReplicaId::LOCAL,
699 Worktree::Remote(worktree) => worktree.replica_id,
700 }
701 }
702
703 pub fn abs_path(&self) -> Arc<Path> {
704 match self {
705 Worktree::Local(worktree) => SanitizedPath::cast_arc(worktree.abs_path.clone()),
706 Worktree::Remote(worktree) => SanitizedPath::cast_arc(worktree.abs_path.clone()),
707 }
708 }
709
710 pub fn root_file(&self, cx: &Context<Self>) -> Option<Arc<File>> {
711 let entry = self.root_entry()?;
712 Some(File::for_entry(entry.clone(), cx.entity()))
713 }
714
715 pub fn observe_updates<F, Fut>(&mut self, project_id: u64, cx: &Context<Worktree>, callback: F)
716 where
717 F: 'static + Send + Fn(proto::UpdateWorktree) -> Fut,
718 Fut: 'static + Send + Future<Output = bool>,
719 {
720 match self {
721 Worktree::Local(this) => this.observe_updates(project_id, cx, callback),
722 Worktree::Remote(this) => this.observe_updates(project_id, cx, callback),
723 }
724 }
725
726 pub fn stop_observing_updates(&mut self) {
727 match self {
728 Worktree::Local(this) => {
729 this.update_observer.take();
730 }
731 Worktree::Remote(this) => {
732 this.update_observer.take();
733 }
734 }
735 }
736
737 pub fn wait_for_snapshot(
738 &mut self,
739 scan_id: usize,
740 ) -> impl Future<Output = Result<()>> + use<> {
741 match self {
742 Worktree::Local(this) => this.wait_for_snapshot(scan_id).boxed(),
743 Worktree::Remote(this) => this.wait_for_snapshot(scan_id).boxed(),
744 }
745 }
746
747 #[cfg(feature = "test-support")]
748 pub fn has_update_observer(&self) -> bool {
749 match self {
750 Worktree::Local(this) => this.update_observer.is_some(),
751 Worktree::Remote(this) => this.update_observer.is_some(),
752 }
753 }
754
755 pub fn load_file(&self, path: &RelPath, cx: &Context<Worktree>) -> Task<Result<LoadedFile>> {
756 match self {
757 Worktree::Local(this) => this.load_file(path, cx),
758 Worktree::Remote(_) => {
759 Task::ready(Err(anyhow!("remote worktrees can't yet load files")))
760 }
761 }
762 }
763
764 pub fn load_binary_file(
765 &self,
766 path: &RelPath,
767 cx: &Context<Worktree>,
768 ) -> Task<Result<LoadedBinaryFile>> {
769 match self {
770 Worktree::Local(this) => this.load_binary_file(path, cx),
771 Worktree::Remote(_) => {
772 Task::ready(Err(anyhow!("remote worktrees can't yet load binary files")))
773 }
774 }
775 }
776
777 pub fn write_file(
778 &self,
779 path: Arc<RelPath>,
780 text: Rope,
781 line_ending: LineEnding,
782 encoding: &'static Encoding,
783 has_bom: bool,
784 cx: &Context<Worktree>,
785 ) -> Task<Result<Arc<File>>> {
786 match self {
787 Worktree::Local(this) => {
788 this.write_file(path, text, line_ending, encoding, has_bom, cx)
789 }
790 Worktree::Remote(_) => {
791 Task::ready(Err(anyhow!("remote worktree can't yet write files")))
792 }
793 }
794 }
795
796 pub fn create_entry(
797 &mut self,
798 path: Arc<RelPath>,
799 is_directory: bool,
800 content: Option<Vec<u8>>,
801 cx: &Context<Worktree>,
802 ) -> Task<Result<CreatedEntry>> {
803 let worktree_id = self.id();
804 match self {
805 Worktree::Local(this) => this.create_entry(path, is_directory, content, cx),
806 Worktree::Remote(this) => {
807 let project_id = this.project_id;
808 let request = this.client.request(proto::CreateProjectEntry {
809 worktree_id: worktree_id.to_proto(),
810 project_id,
811 path: path.as_ref().to_proto(),
812 content,
813 is_directory,
814 });
815 cx.spawn(async move |this, cx| {
816 let response = request.await?;
817 match response.entry {
818 Some(entry) => this
819 .update(cx, |worktree, cx| {
820 worktree.as_remote_mut().unwrap().insert_entry(
821 entry,
822 response.worktree_scan_id as usize,
823 cx,
824 )
825 })?
826 .await
827 .map(CreatedEntry::Included),
828 None => {
829 let abs_path =
830 this.read_with(cx, |worktree, _| worktree.absolutize(&path))?;
831 Ok(CreatedEntry::Excluded { abs_path })
832 }
833 }
834 })
835 }
836 }
837 }
838
839 pub fn delete_entry(
840 &mut self,
841 entry_id: ProjectEntryId,
842 trash: bool,
843 cx: &mut Context<Worktree>,
844 ) -> Option<Task<Result<()>>> {
845 let task = match self {
846 Worktree::Local(this) => this.delete_entry(entry_id, trash, cx),
847 Worktree::Remote(this) => this.delete_entry(entry_id, trash, cx),
848 }?;
849
850 let entry = match &*self {
851 Worktree::Local(this) => this.entry_for_id(entry_id),
852 Worktree::Remote(this) => this.entry_for_id(entry_id),
853 }?;
854
855 let mut ids = vec![entry_id];
856 let path = &*entry.path;
857
858 self.get_children_ids_recursive(path, &mut ids);
859
860 for id in ids {
861 cx.emit(Event::DeletedEntry(id));
862 }
863 Some(task)
864 }
865
866 fn get_children_ids_recursive(&self, path: &RelPath, ids: &mut Vec<ProjectEntryId>) {
867 let children_iter = self.child_entries(path);
868 for child in children_iter {
869 ids.push(child.id);
870 self.get_children_ids_recursive(&child.path, ids);
871 }
872 }
873
874 // pub fn rename_entry(
875 // &mut self,
876 // entry_id: ProjectEntryId,
877 // new_path: Arc<RelPath>,
878 // cx: &Context<Self>,
879 // ) -> Task<Result<CreatedEntry>> {
880 // match self {
881 // Worktree::Local(this) => this.rename_entry(entry_id, new_path, cx),
882 // Worktree::Remote(this) => this.rename_entry(entry_id, new_path, cx),
883 // }
884 // }
885
886 pub fn copy_external_entries(
887 &mut self,
888 target_directory: Arc<RelPath>,
889 paths: Vec<Arc<Path>>,
890 fs: Arc<dyn Fs>,
891 cx: &Context<Worktree>,
892 ) -> Task<Result<Vec<ProjectEntryId>>> {
893 match self {
894 Worktree::Local(this) => this.copy_external_entries(target_directory, paths, cx),
895 Worktree::Remote(this) => this.copy_external_entries(target_directory, paths, fs, cx),
896 }
897 }
898
899 pub fn expand_entry(
900 &mut self,
901 entry_id: ProjectEntryId,
902 cx: &Context<Worktree>,
903 ) -> Option<Task<Result<()>>> {
904 match self {
905 Worktree::Local(this) => this.expand_entry(entry_id, cx),
906 Worktree::Remote(this) => {
907 let response = this.client.request(proto::ExpandProjectEntry {
908 project_id: this.project_id,
909 entry_id: entry_id.to_proto(),
910 });
911 Some(cx.spawn(async move |this, cx| {
912 let response = response.await?;
913 this.update(cx, |this, _| {
914 this.as_remote_mut()
915 .unwrap()
916 .wait_for_snapshot(response.worktree_scan_id as usize)
917 })?
918 .await?;
919 Ok(())
920 }))
921 }
922 }
923 }
924
925 pub fn expand_all_for_entry(
926 &mut self,
927 entry_id: ProjectEntryId,
928 cx: &Context<Worktree>,
929 ) -> Option<Task<Result<()>>> {
930 match self {
931 Worktree::Local(this) => this.expand_all_for_entry(entry_id, cx),
932 Worktree::Remote(this) => {
933 let response = this.client.request(proto::ExpandAllForProjectEntry {
934 project_id: this.project_id,
935 entry_id: entry_id.to_proto(),
936 });
937 Some(cx.spawn(async move |this, cx| {
938 let response = response.await?;
939 this.update(cx, |this, _| {
940 this.as_remote_mut()
941 .unwrap()
942 .wait_for_snapshot(response.worktree_scan_id as usize)
943 })?
944 .await?;
945 Ok(())
946 }))
947 }
948 }
949 }
950
951 pub async fn handle_create_entry(
952 this: Entity<Self>,
953 request: proto::CreateProjectEntry,
954 mut cx: AsyncApp,
955 ) -> Result<proto::ProjectEntryResponse> {
956 let (scan_id, entry) = this.update(&mut cx, |this, cx| {
957 anyhow::Ok((
958 this.scan_id(),
959 this.create_entry(
960 RelPath::from_proto(&request.path).with_context(|| {
961 format!("received invalid relative path {:?}", request.path)
962 })?,
963 request.is_directory,
964 request.content,
965 cx,
966 ),
967 ))
968 })?;
969 Ok(proto::ProjectEntryResponse {
970 entry: match &entry.await? {
971 CreatedEntry::Included(entry) => Some(entry.into()),
972 CreatedEntry::Excluded { .. } => None,
973 },
974 worktree_scan_id: scan_id as u64,
975 })
976 }
977
978 pub async fn handle_delete_entry(
979 this: Entity<Self>,
980 request: proto::DeleteProjectEntry,
981 mut cx: AsyncApp,
982 ) -> Result<proto::ProjectEntryResponse> {
983 let (scan_id, task) = this.update(&mut cx, |this, cx| {
984 (
985 this.scan_id(),
986 this.delete_entry(
987 ProjectEntryId::from_proto(request.entry_id),
988 request.use_trash,
989 cx,
990 ),
991 )
992 });
993 task.ok_or_else(|| anyhow::anyhow!("invalid entry"))?
994 .await?;
995 Ok(proto::ProjectEntryResponse {
996 entry: None,
997 worktree_scan_id: scan_id as u64,
998 })
999 }
1000
1001 pub async fn handle_expand_entry(
1002 this: Entity<Self>,
1003 request: proto::ExpandProjectEntry,
1004 mut cx: AsyncApp,
1005 ) -> Result<proto::ExpandProjectEntryResponse> {
1006 let task = this.update(&mut cx, |this, cx| {
1007 this.expand_entry(ProjectEntryId::from_proto(request.entry_id), cx)
1008 });
1009 task.ok_or_else(|| anyhow::anyhow!("no such entry"))?
1010 .await?;
1011 let scan_id = this.read_with(&cx, |this, _| this.scan_id());
1012 Ok(proto::ExpandProjectEntryResponse {
1013 worktree_scan_id: scan_id as u64,
1014 })
1015 }
1016
1017 pub async fn handle_expand_all_for_entry(
1018 this: Entity<Self>,
1019 request: proto::ExpandAllForProjectEntry,
1020 mut cx: AsyncApp,
1021 ) -> Result<proto::ExpandAllForProjectEntryResponse> {
1022 let task = this.update(&mut cx, |this, cx| {
1023 this.expand_all_for_entry(ProjectEntryId::from_proto(request.entry_id), cx)
1024 });
1025 task.ok_or_else(|| anyhow::anyhow!("no such entry"))?
1026 .await?;
1027 let scan_id = this.read_with(&cx, |this, _| this.scan_id());
1028 Ok(proto::ExpandAllForProjectEntryResponse {
1029 worktree_scan_id: scan_id as u64,
1030 })
1031 }
1032
1033 pub fn is_single_file(&self) -> bool {
1034 self.root_dir().is_none()
1035 }
1036
1037 /// For visible worktrees, returns the path with the worktree name as the first component.
1038 /// Otherwise, returns an absolute path.
1039 pub fn full_path(&self, worktree_relative_path: &RelPath) -> PathBuf {
1040 if self.is_visible() {
1041 self.root_name()
1042 .join(worktree_relative_path)
1043 .display(self.path_style)
1044 .to_string()
1045 .into()
1046 } else {
1047 let full_path = self.abs_path();
1048 let mut full_path_string = if self.is_local()
1049 && let Ok(stripped) = full_path.strip_prefix(home_dir())
1050 {
1051 self.path_style
1052 .join("~", &*stripped.to_string_lossy())
1053 .unwrap()
1054 } else {
1055 full_path.to_string_lossy().into_owned()
1056 };
1057
1058 if worktree_relative_path.components().next().is_some() {
1059 full_path_string.push_str(self.path_style.primary_separator());
1060 full_path_string.push_str(&worktree_relative_path.display(self.path_style));
1061 }
1062
1063 full_path_string.into()
1064 }
1065 }
1066}
1067
1068impl LocalWorktree {
1069 pub fn fs(&self) -> &Arc<dyn Fs> {
1070 &self.fs
1071 }
1072
1073 pub fn is_path_private(&self, path: &RelPath) -> bool {
1074 !self.share_private_files && self.settings.is_path_private(path)
1075 }
1076
1077 pub fn fs_is_case_sensitive(&self) -> bool {
1078 self.fs_case_sensitive
1079 }
1080
1081 fn restart_background_scanners(&mut self, cx: &Context<Worktree>) {
1082 let (scan_requests_tx, scan_requests_rx) = channel::unbounded();
1083 let (path_prefixes_to_scan_tx, path_prefixes_to_scan_rx) = channel::unbounded();
1084 self.scan_requests_tx = scan_requests_tx;
1085 self.path_prefixes_to_scan_tx = path_prefixes_to_scan_tx;
1086
1087 self.start_background_scanner(scan_requests_rx, path_prefixes_to_scan_rx, cx);
1088 let always_included_entries = mem::take(&mut self.snapshot.always_included_entries);
1089 log::debug!(
1090 "refreshing entries for the following always included paths: {:?}",
1091 always_included_entries
1092 );
1093
1094 // Cleans up old always included entries to ensure they get updated properly. Otherwise,
1095 // nested always included entries may not get updated and will result in out-of-date info.
1096 self.refresh_entries_for_paths(always_included_entries);
1097 }
1098
1099 fn start_background_scanner(
1100 &mut self,
1101 scan_requests_rx: channel::Receiver<ScanRequest>,
1102 path_prefixes_to_scan_rx: channel::Receiver<PathPrefixScanRequest>,
1103 cx: &Context<Worktree>,
1104 ) {
1105 let snapshot = self.snapshot();
1106 let share_private_files = self.share_private_files;
1107 let next_entry_id = self.next_entry_id.clone();
1108 let fs = self.fs.clone();
1109 let scanning_enabled = self.scanning_enabled;
1110 let settings = self.settings.clone();
1111 let (scan_states_tx, mut scan_states_rx) = mpsc::unbounded();
1112 let background_scanner = cx.background_spawn({
1113 let abs_path = snapshot.abs_path.as_path().to_path_buf();
1114 let background = cx.background_executor().clone();
1115 async move {
1116 let (events, watcher) = if scanning_enabled {
1117 fs.watch(&abs_path, FS_WATCH_LATENCY).await
1118 } else {
1119 (Box::pin(stream::pending()) as _, Arc::new(NullWatcher) as _)
1120 };
1121 let fs_case_sensitive = fs.is_case_sensitive().await;
1122
1123 let is_single_file = snapshot.snapshot.root_dir().is_none();
1124 let mut scanner = BackgroundScanner {
1125 fs,
1126 fs_case_sensitive,
1127 status_updates_tx: scan_states_tx,
1128 executor: background,
1129 scan_requests_rx,
1130 path_prefixes_to_scan_rx,
1131 next_entry_id,
1132 state: async_lock::Mutex::new(BackgroundScannerState {
1133 prev_snapshot: snapshot.snapshot.clone(),
1134 snapshot,
1135 scanned_dirs: Default::default(),
1136 scanning_enabled,
1137 path_prefixes_to_scan: Default::default(),
1138 paths_to_scan: Default::default(),
1139 removed_entries: Default::default(),
1140 changed_paths: Default::default(),
1141 }),
1142 phase: BackgroundScannerPhase::InitialScan,
1143 share_private_files,
1144 settings,
1145 watcher,
1146 is_single_file,
1147 };
1148
1149 scanner
1150 .run(Box::pin(events.map(|events| events.into_iter().collect())))
1151 .await;
1152 }
1153 });
1154 let scan_state_updater = cx.spawn(async move |this, cx| {
1155 while let Some((state, this)) = scan_states_rx.next().await.zip(this.upgrade()) {
1156 this.update(cx, |this, cx| {
1157 let this = this.as_local_mut().unwrap();
1158 match state {
1159 ScanState::Started => {
1160 *this.is_scanning.0.borrow_mut() = true;
1161 }
1162 ScanState::Updated {
1163 snapshot,
1164 changes,
1165 barrier,
1166 scanning,
1167 } => {
1168 *this.is_scanning.0.borrow_mut() = scanning;
1169 this.set_snapshot(snapshot, changes, cx);
1170 drop(barrier);
1171 }
1172 ScanState::RootUpdated { new_path } => {
1173 this.update_abs_path_and_refresh(new_path, cx);
1174 }
1175 ScanState::RootDeleted => {
1176 log::info!(
1177 "worktree root {} no longer exists, closing worktree",
1178 this.abs_path().display()
1179 );
1180 cx.emit(Event::Deleted);
1181 }
1182 }
1183 });
1184 }
1185 });
1186 self._background_scanner_tasks = vec![background_scanner, scan_state_updater];
1187 *self.is_scanning.0.borrow_mut() = true;
1188 }
1189
1190 fn set_snapshot(
1191 &mut self,
1192 mut new_snapshot: LocalSnapshot,
1193 entry_changes: UpdatedEntriesSet,
1194 cx: &mut Context<Worktree>,
1195 ) {
1196 let repo_changes = self.changed_repos(&self.snapshot, &mut new_snapshot);
1197
1198 new_snapshot.root_repo_common_dir = new_snapshot
1199 .local_repo_for_work_directory_path(RelPath::empty())
1200 .map(|repo| SanitizedPath::from_arc(repo.common_dir_abs_path.clone()));
1201
1202 let root_repo_common_dir_changed =
1203 self.snapshot.root_repo_common_dir != new_snapshot.root_repo_common_dir;
1204 self.snapshot = new_snapshot;
1205
1206 if let Some(share) = self.update_observer.as_mut() {
1207 share
1208 .snapshots_tx
1209 .unbounded_send((self.snapshot.clone(), entry_changes.clone()))
1210 .ok();
1211 }
1212
1213 if !entry_changes.is_empty() {
1214 cx.emit(Event::UpdatedEntries(entry_changes));
1215 }
1216 if !repo_changes.is_empty() {
1217 cx.emit(Event::UpdatedGitRepositories(repo_changes));
1218 }
1219 if root_repo_common_dir_changed {
1220 cx.emit(Event::UpdatedRootRepoCommonDir);
1221 }
1222
1223 while let Some((scan_id, _)) = self.snapshot_subscriptions.front() {
1224 if self.snapshot.completed_scan_id >= *scan_id {
1225 let (_, tx) = self.snapshot_subscriptions.pop_front().unwrap();
1226 tx.send(()).ok();
1227 } else {
1228 break;
1229 }
1230 }
1231 }
1232
1233 fn changed_repos(
1234 &self,
1235 old_snapshot: &LocalSnapshot,
1236 new_snapshot: &mut LocalSnapshot,
1237 ) -> UpdatedGitRepositoriesSet {
1238 let mut changes = Vec::new();
1239 let mut old_repos = old_snapshot.git_repositories.iter().peekable();
1240 let new_repos = new_snapshot.git_repositories.clone();
1241 let mut new_repos = new_repos.iter().peekable();
1242
1243 loop {
1244 match (new_repos.peek().map(clone), old_repos.peek().map(clone)) {
1245 (Some((new_entry_id, new_repo)), Some((old_entry_id, old_repo))) => {
1246 match Ord::cmp(&new_entry_id, &old_entry_id) {
1247 Ordering::Less => {
1248 changes.push(UpdatedGitRepository {
1249 work_directory_id: new_entry_id,
1250 old_work_directory_abs_path: None,
1251 new_work_directory_abs_path: Some(
1252 new_repo.work_directory_abs_path.clone(),
1253 ),
1254 dot_git_abs_path: Some(new_repo.dot_git_abs_path.clone()),
1255 repository_dir_abs_path: Some(
1256 new_repo.repository_dir_abs_path.clone(),
1257 ),
1258 common_dir_abs_path: Some(new_repo.common_dir_abs_path.clone()),
1259 });
1260 new_repos.next();
1261 }
1262 Ordering::Equal => {
1263 if new_repo.git_dir_scan_id != old_repo.git_dir_scan_id
1264 || new_repo.work_directory_abs_path
1265 != old_repo.work_directory_abs_path
1266 {
1267 changes.push(UpdatedGitRepository {
1268 work_directory_id: new_entry_id,
1269 old_work_directory_abs_path: Some(
1270 old_repo.work_directory_abs_path.clone(),
1271 ),
1272 new_work_directory_abs_path: Some(
1273 new_repo.work_directory_abs_path.clone(),
1274 ),
1275 dot_git_abs_path: Some(new_repo.dot_git_abs_path.clone()),
1276 repository_dir_abs_path: Some(
1277 new_repo.repository_dir_abs_path.clone(),
1278 ),
1279 common_dir_abs_path: Some(new_repo.common_dir_abs_path.clone()),
1280 });
1281 }
1282 new_repos.next();
1283 old_repos.next();
1284 }
1285 Ordering::Greater => {
1286 changes.push(UpdatedGitRepository {
1287 work_directory_id: old_entry_id,
1288 old_work_directory_abs_path: Some(
1289 old_repo.work_directory_abs_path.clone(),
1290 ),
1291 new_work_directory_abs_path: None,
1292 dot_git_abs_path: None,
1293 repository_dir_abs_path: None,
1294 common_dir_abs_path: None,
1295 });
1296 old_repos.next();
1297 }
1298 }
1299 }
1300 (Some((entry_id, repo)), None) => {
1301 changes.push(UpdatedGitRepository {
1302 work_directory_id: entry_id,
1303 old_work_directory_abs_path: None,
1304 new_work_directory_abs_path: Some(repo.work_directory_abs_path.clone()),
1305 dot_git_abs_path: Some(repo.dot_git_abs_path.clone()),
1306 repository_dir_abs_path: Some(repo.repository_dir_abs_path.clone()),
1307 common_dir_abs_path: Some(repo.common_dir_abs_path.clone()),
1308 });
1309 new_repos.next();
1310 }
1311 (None, Some((entry_id, repo))) => {
1312 changes.push(UpdatedGitRepository {
1313 work_directory_id: entry_id,
1314 old_work_directory_abs_path: Some(repo.work_directory_abs_path.clone()),
1315 new_work_directory_abs_path: None,
1316 dot_git_abs_path: Some(repo.dot_git_abs_path.clone()),
1317 repository_dir_abs_path: Some(repo.repository_dir_abs_path.clone()),
1318 common_dir_abs_path: Some(repo.common_dir_abs_path.clone()),
1319 });
1320 old_repos.next();
1321 }
1322 (None, None) => break,
1323 }
1324 }
1325
1326 fn clone<T: Clone, U: Clone>(value: &(&T, &U)) -> (T, U) {
1327 (value.0.clone(), value.1.clone())
1328 }
1329
1330 changes.into()
1331 }
1332
1333 pub fn scan_complete(&self) -> impl Future<Output = ()> + use<> {
1334 let mut is_scanning_rx = self.is_scanning.1.clone();
1335 async move {
1336 let mut is_scanning = *is_scanning_rx.borrow();
1337 while is_scanning {
1338 if let Some(value) = is_scanning_rx.recv().await {
1339 is_scanning = value;
1340 } else {
1341 break;
1342 }
1343 }
1344 }
1345 }
1346
1347 pub fn wait_for_snapshot(
1348 &mut self,
1349 scan_id: usize,
1350 ) -> impl Future<Output = Result<()>> + use<> {
1351 let (tx, rx) = oneshot::channel();
1352 if self.snapshot.completed_scan_id >= scan_id {
1353 tx.send(()).ok();
1354 } else {
1355 match self
1356 .snapshot_subscriptions
1357 .binary_search_by_key(&scan_id, |probe| probe.0)
1358 {
1359 Ok(ix) | Err(ix) => self.snapshot_subscriptions.insert(ix, (scan_id, tx)),
1360 }
1361 }
1362
1363 async move {
1364 rx.await?;
1365 Ok(())
1366 }
1367 }
1368
1369 pub fn snapshot(&self) -> LocalSnapshot {
1370 self.snapshot.clone()
1371 }
1372
1373 pub fn settings(&self) -> WorktreeSettings {
1374 self.settings.clone()
1375 }
1376
1377 fn load_binary_file(
1378 &self,
1379 path: &RelPath,
1380 cx: &Context<Worktree>,
1381 ) -> Task<Result<LoadedBinaryFile>> {
1382 let path = Arc::from(path);
1383 let abs_path = self.absolutize(&path);
1384 let fs = self.fs.clone();
1385 let entry = self.refresh_entry(path.clone(), None, cx);
1386 let is_private = self.is_path_private(&path);
1387
1388 let worktree = cx.weak_entity();
1389 cx.background_spawn(async move {
1390 let content = fs.load_bytes(&abs_path).await?;
1391
1392 let worktree = worktree.upgrade().context("worktree was dropped")?;
1393 let file = match entry.await? {
1394 Some(entry) => File::for_entry(entry, worktree),
1395 None => {
1396 let metadata = fs
1397 .metadata(&abs_path)
1398 .await
1399 .with_context(|| {
1400 format!("Loading metadata for excluded file {abs_path:?}")
1401 })?
1402 .with_context(|| {
1403 format!("Excluded file {abs_path:?} got removed during loading")
1404 })?;
1405 Arc::new(File {
1406 entry_id: None,
1407 worktree,
1408 path,
1409 disk_state: DiskState::Present {
1410 mtime: metadata.mtime,
1411 size: metadata.len,
1412 },
1413 is_local: true,
1414 is_private,
1415 })
1416 }
1417 };
1418
1419 Ok(LoadedBinaryFile { file, content })
1420 })
1421 }
1422
1423 #[ztracing::instrument(skip_all)]
1424 fn load_file(&self, path: &RelPath, cx: &Context<Worktree>) -> Task<Result<LoadedFile>> {
1425 let path = Arc::from(path);
1426 let abs_path = self.absolutize(&path);
1427 let fs = self.fs.clone();
1428 let entry = self.refresh_entry(path.clone(), None, cx);
1429 let is_private = self.is_path_private(path.as_ref());
1430
1431 let this = cx.weak_entity();
1432 cx.background_spawn(async move {
1433 // WARN: Temporary workaround for #27283.
1434 // We are not efficient with our memory usage per file, and use in excess of 64GB for a 10GB file
1435 // Therefore, as a temporary workaround to prevent system freezes, we just bail before opening a file
1436 // if it is too large
1437 // 5GB seems to be more reasonable, peaking at ~16GB, while 6GB jumps up to >24GB which seems like a
1438 // reasonable limit
1439 {
1440 const FILE_SIZE_MAX: u64 = 6 * 1024 * 1024 * 1024; // 6GB
1441 if let Ok(Some(metadata)) = fs.metadata(&abs_path).await
1442 && metadata.len >= FILE_SIZE_MAX
1443 {
1444 anyhow::bail!("File is too large to load");
1445 }
1446 }
1447 let (text, encoding, has_bom) = decode_file_text(fs.as_ref(), &abs_path).await?;
1448
1449 let worktree = this.upgrade().context("worktree was dropped")?;
1450 let file = match entry.await? {
1451 Some(entry) => File::for_entry(entry, worktree),
1452 None => {
1453 let metadata = fs
1454 .metadata(&abs_path)
1455 .await
1456 .with_context(|| {
1457 format!("Loading metadata for excluded file {abs_path:?}")
1458 })?
1459 .with_context(|| {
1460 format!("Excluded file {abs_path:?} got removed during loading")
1461 })?;
1462 Arc::new(File {
1463 entry_id: None,
1464 worktree,
1465 path,
1466 disk_state: DiskState::Present {
1467 mtime: metadata.mtime,
1468 size: metadata.len,
1469 },
1470 is_local: true,
1471 is_private,
1472 })
1473 }
1474 };
1475
1476 Ok(LoadedFile {
1477 file,
1478 text,
1479 encoding,
1480 has_bom,
1481 })
1482 })
1483 }
1484
1485 /// Find the lowest path in the worktree's datastructures that is an ancestor
1486 fn lowest_ancestor(&self, path: &RelPath) -> Arc<RelPath> {
1487 let mut lowest_ancestor = None;
1488 for path in path.ancestors() {
1489 if self.entry_for_path(path).is_some() {
1490 lowest_ancestor = Some(path.into());
1491 break;
1492 }
1493 }
1494
1495 lowest_ancestor.unwrap_or_else(|| RelPath::empty().into())
1496 }
1497
1498 pub fn create_entry(
1499 &self,
1500 path: Arc<RelPath>,
1501 is_dir: bool,
1502 content: Option<Vec<u8>>,
1503 cx: &Context<Worktree>,
1504 ) -> Task<Result<CreatedEntry>> {
1505 let abs_path = self.absolutize(&path);
1506 let path_excluded = self.settings.is_path_excluded(&path);
1507 let fs = self.fs.clone();
1508 let task_abs_path = abs_path.clone();
1509 let write = cx.background_spawn(async move {
1510 if is_dir {
1511 fs.create_dir(&task_abs_path)
1512 .await
1513 .with_context(|| format!("creating directory {task_abs_path:?}"))
1514 } else {
1515 fs.write(&task_abs_path, content.as_deref().unwrap_or(&[]))
1516 .await
1517 .with_context(|| format!("creating file {task_abs_path:?}"))
1518 }
1519 });
1520
1521 let lowest_ancestor = self.lowest_ancestor(&path);
1522 cx.spawn(async move |this, cx| {
1523 write.await?;
1524 if path_excluded {
1525 return Ok(CreatedEntry::Excluded { abs_path });
1526 }
1527
1528 let (result, refreshes) = this.update(cx, |this, cx| {
1529 let mut refreshes = Vec::new();
1530 let refresh_paths = path.strip_prefix(&lowest_ancestor).unwrap();
1531 for refresh_path in refresh_paths.ancestors() {
1532 if refresh_path == RelPath::empty() {
1533 continue;
1534 }
1535 let refresh_full_path = lowest_ancestor.join(refresh_path);
1536
1537 refreshes.push(this.as_local_mut().unwrap().refresh_entry(
1538 refresh_full_path,
1539 None,
1540 cx,
1541 ));
1542 }
1543 (
1544 this.as_local_mut().unwrap().refresh_entry(path, None, cx),
1545 refreshes,
1546 )
1547 })?;
1548 for refresh in refreshes {
1549 refresh.await.log_err();
1550 }
1551
1552 Ok(result
1553 .await?
1554 .map(CreatedEntry::Included)
1555 .unwrap_or_else(|| CreatedEntry::Excluded { abs_path }))
1556 })
1557 }
1558
1559 pub fn write_file(
1560 &self,
1561 path: Arc<RelPath>,
1562 text: Rope,
1563 line_ending: LineEnding,
1564 encoding: &'static Encoding,
1565 has_bom: bool,
1566 cx: &Context<Worktree>,
1567 ) -> Task<Result<Arc<File>>> {
1568 let fs = self.fs.clone();
1569 let is_private = self.is_path_private(&path);
1570 let abs_path = self.absolutize(&path);
1571
1572 let write = cx.background_spawn({
1573 let fs = fs.clone();
1574 let abs_path = abs_path.clone();
1575 async move {
1576 // For UTF-8, use the optimized `fs.save` which writes Rope chunks directly to disk
1577 // without allocating a contiguous string.
1578 if encoding == encoding_rs::UTF_8 && !has_bom {
1579 return fs.save(&abs_path, &text, line_ending).await;
1580 }
1581
1582 // For legacy encodings (e.g. Shift-JIS), we fall back to converting the entire Rope
1583 // to a String/Bytes in memory before writing.
1584 //
1585 // Note: This is inefficient for very large files compared to the streaming approach above,
1586 // but supporting streaming writes for arbitrary encodings would require a significant
1587 // refactor of the `fs` crate to expose a Writer interface.
1588 let text_string = text.to_string();
1589 let normalized_text = match line_ending {
1590 LineEnding::Unix => text_string,
1591 LineEnding::Windows => text_string.replace('\n', "\r\n"),
1592 };
1593
1594 // Create the byte vector manually for UTF-16 encodings because encoding_rs encodes to UTF-8 by default (per WHATWG standards),
1595 // which is not what we want for saving files.
1596 let bytes = if encoding == encoding_rs::UTF_16BE {
1597 let mut data = Vec::with_capacity(normalized_text.len() * 2 + 2);
1598 if has_bom {
1599 data.extend_from_slice(&[0xFE, 0xFF]); // BOM
1600 }
1601 let utf16be_bytes =
1602 normalized_text.encode_utf16().flat_map(|u| u.to_be_bytes());
1603 data.extend(utf16be_bytes);
1604 data.into()
1605 } else if encoding == encoding_rs::UTF_16LE {
1606 let mut data = Vec::with_capacity(normalized_text.len() * 2 + 2);
1607 if has_bom {
1608 data.extend_from_slice(&[0xFF, 0xFE]); // BOM
1609 }
1610 let utf16le_bytes =
1611 normalized_text.encode_utf16().flat_map(|u| u.to_le_bytes());
1612 data.extend(utf16le_bytes);
1613 data.into()
1614 } else {
1615 // For other encodings (Shift-JIS, UTF-8 with BOM, etc.), delegate to encoding_rs.
1616 let bom_bytes = if has_bom {
1617 if encoding == encoding_rs::UTF_8 {
1618 vec![0xEF, 0xBB, 0xBF]
1619 } else {
1620 vec![]
1621 }
1622 } else {
1623 vec![]
1624 };
1625 let (cow, _, _) = encoding.encode(&normalized_text);
1626 if !bom_bytes.is_empty() {
1627 let mut bytes = bom_bytes;
1628 bytes.extend_from_slice(&cow);
1629 bytes.into()
1630 } else {
1631 cow
1632 }
1633 };
1634
1635 fs.write(&abs_path, &bytes).await
1636 }
1637 });
1638
1639 cx.spawn(async move |this, cx| {
1640 write.await?;
1641 let entry = this
1642 .update(cx, |this, cx| {
1643 this.as_local_mut()
1644 .unwrap()
1645 .refresh_entry(path.clone(), None, cx)
1646 })?
1647 .await?;
1648 let worktree = this.upgrade().context("worktree dropped")?;
1649 if let Some(entry) = entry {
1650 Ok(File::for_entry(entry, worktree))
1651 } else {
1652 let metadata = fs
1653 .metadata(&abs_path)
1654 .await
1655 .with_context(|| {
1656 format!("Fetching metadata after saving the excluded buffer {abs_path:?}")
1657 })?
1658 .with_context(|| {
1659 format!("Excluded buffer {path:?} got removed during saving")
1660 })?;
1661 Ok(Arc::new(File {
1662 worktree,
1663 path,
1664 disk_state: DiskState::Present {
1665 mtime: metadata.mtime,
1666 size: metadata.len,
1667 },
1668 entry_id: None,
1669 is_local: true,
1670 is_private,
1671 }))
1672 }
1673 })
1674 }
1675
1676 pub fn delete_entry(
1677 &self,
1678 entry_id: ProjectEntryId,
1679 trash: bool,
1680 cx: &Context<Worktree>,
1681 ) -> Option<Task<Result<()>>> {
1682 let entry = self.entry_for_id(entry_id)?.clone();
1683 let abs_path = self.absolutize(&entry.path);
1684 let fs = self.fs.clone();
1685
1686 let delete = cx.background_spawn(async move {
1687 if entry.is_file() {
1688 if trash {
1689 fs.trash_file(&abs_path, Default::default()).await?;
1690 } else {
1691 fs.remove_file(&abs_path, Default::default()).await?;
1692 }
1693 } else if trash {
1694 fs.trash_dir(
1695 &abs_path,
1696 RemoveOptions {
1697 recursive: true,
1698 ignore_if_not_exists: false,
1699 },
1700 )
1701 .await?;
1702 } else {
1703 fs.remove_dir(
1704 &abs_path,
1705 RemoveOptions {
1706 recursive: true,
1707 ignore_if_not_exists: false,
1708 },
1709 )
1710 .await?;
1711 }
1712 anyhow::Ok(entry.path)
1713 });
1714
1715 Some(cx.spawn(async move |this, cx| {
1716 let path = delete.await?;
1717 this.update(cx, |this, _| {
1718 this.as_local_mut()
1719 .unwrap()
1720 .refresh_entries_for_paths(vec![path])
1721 })?
1722 .recv()
1723 .await;
1724 Ok(())
1725 }))
1726 }
1727
1728 pub fn copy_external_entries(
1729 &self,
1730 target_directory: Arc<RelPath>,
1731 paths: Vec<Arc<Path>>,
1732 cx: &Context<Worktree>,
1733 ) -> Task<Result<Vec<ProjectEntryId>>> {
1734 let target_directory = self.absolutize(&target_directory);
1735 let worktree_path = self.abs_path().clone();
1736 let fs = self.fs.clone();
1737 let paths = paths
1738 .into_iter()
1739 .filter_map(|source| {
1740 let file_name = source.file_name()?;
1741 let mut target = target_directory.clone();
1742 target.push(file_name);
1743
1744 // Do not allow copying the same file to itself.
1745 if source.as_ref() != target.as_path() {
1746 Some((source, target))
1747 } else {
1748 None
1749 }
1750 })
1751 .collect::<Vec<_>>();
1752
1753 let paths_to_refresh = paths
1754 .iter()
1755 .filter_map(|(_, target)| {
1756 RelPath::new(
1757 target.strip_prefix(&worktree_path).ok()?,
1758 PathStyle::local(),
1759 )
1760 .ok()
1761 .map(|path| path.into_arc())
1762 })
1763 .collect::<Vec<_>>();
1764
1765 cx.spawn(async move |this, cx| {
1766 cx.background_spawn(async move {
1767 for (source, target) in paths {
1768 copy_recursive(
1769 fs.as_ref(),
1770 &source,
1771 &target,
1772 fs::CopyOptions {
1773 overwrite: true,
1774 ..Default::default()
1775 },
1776 )
1777 .await
1778 .with_context(|| {
1779 format!("Failed to copy file from {source:?} to {target:?}")
1780 })?;
1781 }
1782 anyhow::Ok(())
1783 })
1784 .await
1785 .log_err();
1786 let mut refresh = cx.read_entity(
1787 &this.upgrade().with_context(|| "Dropped worktree")?,
1788 |this, _| {
1789 anyhow::Ok::<postage::barrier::Receiver>(
1790 this.as_local()
1791 .with_context(|| "Worktree is not local")?
1792 .refresh_entries_for_paths(paths_to_refresh.clone()),
1793 )
1794 },
1795 )?;
1796
1797 cx.background_spawn(async move {
1798 refresh.next().await;
1799 anyhow::Ok(())
1800 })
1801 .await
1802 .log_err();
1803
1804 let this = this.upgrade().with_context(|| "Dropped worktree")?;
1805 Ok(cx.read_entity(&this, |this, _| {
1806 paths_to_refresh
1807 .iter()
1808 .filter_map(|path| Some(this.entry_for_path(path)?.id))
1809 .collect()
1810 }))
1811 })
1812 }
1813
1814 fn expand_entry(
1815 &self,
1816 entry_id: ProjectEntryId,
1817 cx: &Context<Worktree>,
1818 ) -> Option<Task<Result<()>>> {
1819 let path = self.entry_for_id(entry_id)?.path.clone();
1820 let mut refresh = self.refresh_entries_for_paths(vec![path]);
1821 Some(cx.background_spawn(async move {
1822 refresh.next().await;
1823 Ok(())
1824 }))
1825 }
1826
1827 fn expand_all_for_entry(
1828 &self,
1829 entry_id: ProjectEntryId,
1830 cx: &Context<Worktree>,
1831 ) -> Option<Task<Result<()>>> {
1832 let path = self.entry_for_id(entry_id).unwrap().path.clone();
1833 let mut rx = self.add_path_prefix_to_scan(path);
1834 Some(cx.background_spawn(async move {
1835 rx.next().await;
1836 Ok(())
1837 }))
1838 }
1839
1840 pub fn refresh_entries_for_paths(&self, paths: Vec<Arc<RelPath>>) -> barrier::Receiver {
1841 let (tx, rx) = barrier::channel();
1842 self.scan_requests_tx
1843 .try_send(ScanRequest {
1844 relative_paths: paths,
1845 done: smallvec![tx],
1846 })
1847 .ok();
1848 rx
1849 }
1850
1851 #[cfg(feature = "test-support")]
1852 pub fn manually_refresh_entries_for_paths(
1853 &self,
1854 paths: Vec<Arc<RelPath>>,
1855 ) -> barrier::Receiver {
1856 self.refresh_entries_for_paths(paths)
1857 }
1858
1859 pub fn add_path_prefix_to_scan(&self, path_prefix: Arc<RelPath>) -> barrier::Receiver {
1860 let (tx, rx) = barrier::channel();
1861 self.path_prefixes_to_scan_tx
1862 .try_send(PathPrefixScanRequest {
1863 path: path_prefix,
1864 done: smallvec![tx],
1865 })
1866 .ok();
1867 rx
1868 }
1869
1870 pub fn refresh_entry(
1871 &self,
1872 path: Arc<RelPath>,
1873 old_path: Option<Arc<RelPath>>,
1874 cx: &Context<Worktree>,
1875 ) -> Task<Result<Option<Entry>>> {
1876 if self.settings.is_path_excluded(&path) {
1877 return Task::ready(Ok(None));
1878 }
1879 let paths = if let Some(old_path) = old_path.as_ref() {
1880 vec![old_path.clone(), path.clone()]
1881 } else {
1882 vec![path.clone()]
1883 };
1884 let t0 = Instant::now();
1885 let mut refresh = self.refresh_entries_for_paths(paths);
1886 // todo(lw): Hot foreground spawn
1887 cx.spawn(async move |this, cx| {
1888 refresh.recv().await;
1889 log::trace!("refreshed entry {path:?} in {:?}", t0.elapsed());
1890 let new_entry = this.read_with(cx, |this, _| {
1891 this.entry_for_path(&path).cloned().with_context(|| {
1892 format!("Could not find entry in worktree for {path:?} after refresh")
1893 })
1894 })??;
1895 Ok(Some(new_entry))
1896 })
1897 }
1898
1899 pub fn observe_updates<F, Fut>(&mut self, project_id: u64, cx: &Context<Worktree>, callback: F)
1900 where
1901 F: 'static + Send + Fn(proto::UpdateWorktree) -> Fut,
1902 Fut: 'static + Send + Future<Output = bool>,
1903 {
1904 if let Some(observer) = self.update_observer.as_mut() {
1905 *observer.resume_updates.borrow_mut() = ();
1906 return;
1907 }
1908
1909 let (resume_updates_tx, mut resume_updates_rx) = watch::channel::<()>();
1910 let (snapshots_tx, mut snapshots_rx) =
1911 mpsc::unbounded::<(LocalSnapshot, UpdatedEntriesSet)>();
1912 snapshots_tx
1913 .unbounded_send((self.snapshot(), Arc::default()))
1914 .ok();
1915
1916 let worktree_id = self.id.to_proto();
1917 let _maintain_remote_snapshot = cx.background_spawn(async move {
1918 let mut is_first = true;
1919 while let Some((snapshot, entry_changes)) = snapshots_rx.next().await {
1920 let update = if is_first {
1921 is_first = false;
1922 snapshot.build_initial_update(project_id, worktree_id)
1923 } else {
1924 snapshot.build_update(project_id, worktree_id, entry_changes)
1925 };
1926
1927 for update in proto::split_worktree_update(update) {
1928 let _ = resume_updates_rx.try_recv();
1929 loop {
1930 let result = callback(update.clone());
1931 if result.await {
1932 break;
1933 } else {
1934 log::info!("waiting to resume updates");
1935 if resume_updates_rx.next().await.is_none() {
1936 return Some(());
1937 }
1938 }
1939 }
1940 }
1941 }
1942 Some(())
1943 });
1944
1945 self.update_observer = Some(UpdateObservationState {
1946 snapshots_tx,
1947 resume_updates: resume_updates_tx,
1948 _maintain_remote_snapshot,
1949 });
1950 }
1951
1952 pub fn share_private_files(&mut self, cx: &Context<Worktree>) {
1953 self.share_private_files = true;
1954 self.restart_background_scanners(cx);
1955 }
1956
1957 pub fn update_abs_path_and_refresh(
1958 &mut self,
1959 new_path: Arc<SanitizedPath>,
1960 cx: &Context<Worktree>,
1961 ) {
1962 self.snapshot.git_repositories = Default::default();
1963 self.snapshot.ignores_by_parent_abs_path = Default::default();
1964 let root_name = new_path
1965 .as_path()
1966 .file_name()
1967 .and_then(|f| f.to_str())
1968 .map_or(RelPath::empty().into(), |f| {
1969 RelPath::unix(f).unwrap().into()
1970 });
1971 self.snapshot.update_abs_path(new_path, root_name);
1972 self.restart_background_scanners(cx);
1973 }
1974 #[cfg(feature = "test-support")]
1975 pub fn repositories(&self) -> Vec<Arc<Path>> {
1976 self.git_repositories
1977 .values()
1978 .map(|entry| entry.work_directory_abs_path.clone())
1979 .collect::<Vec<_>>()
1980 }
1981}
1982
1983impl RemoteWorktree {
1984 pub fn project_id(&self) -> u64 {
1985 self.project_id
1986 }
1987
1988 pub fn client(&self) -> AnyProtoClient {
1989 self.client.clone()
1990 }
1991
1992 pub fn disconnected_from_host(&mut self) {
1993 self.updates_tx.take();
1994 self.snapshot_subscriptions.clear();
1995 self.disconnected = true;
1996 }
1997
1998 pub fn update_from_remote(&self, update: proto::UpdateWorktree) {
1999 if let Some(updates_tx) = &self.updates_tx {
2000 updates_tx
2001 .unbounded_send(update)
2002 .expect("consumer runs to completion");
2003 }
2004 }
2005
2006 fn observe_updates<F, Fut>(&mut self, project_id: u64, cx: &Context<Worktree>, callback: F)
2007 where
2008 F: 'static + Send + Fn(proto::UpdateWorktree) -> Fut,
2009 Fut: 'static + Send + Future<Output = bool>,
2010 {
2011 let (tx, mut rx) = mpsc::unbounded();
2012 let initial_update = self
2013 .snapshot
2014 .build_initial_update(project_id, self.id().to_proto());
2015 self.update_observer = Some(tx);
2016 cx.spawn(async move |this, cx| {
2017 let mut update = initial_update;
2018 'outer: loop {
2019 // SSH projects use a special project ID of 0, and we need to
2020 // remap it to the correct one here.
2021 update.project_id = project_id;
2022
2023 for chunk in split_worktree_update(update) {
2024 if !callback(chunk).await {
2025 break 'outer;
2026 }
2027 }
2028
2029 if let Some(next_update) = rx.next().await {
2030 update = next_update;
2031 } else {
2032 break;
2033 }
2034 }
2035 this.update(cx, |this, _| {
2036 let this = this.as_remote_mut().unwrap();
2037 this.update_observer.take();
2038 })
2039 })
2040 .detach();
2041 }
2042
2043 fn observed_snapshot(&self, scan_id: usize) -> bool {
2044 self.completed_scan_id >= scan_id
2045 }
2046
2047 pub fn wait_for_snapshot(
2048 &mut self,
2049 scan_id: usize,
2050 ) -> impl Future<Output = Result<()>> + use<> {
2051 let (tx, rx) = oneshot::channel();
2052 if self.observed_snapshot(scan_id) {
2053 let _ = tx.send(());
2054 } else if self.disconnected {
2055 drop(tx);
2056 } else {
2057 match self
2058 .snapshot_subscriptions
2059 .binary_search_by_key(&scan_id, |probe| probe.0)
2060 {
2061 Ok(ix) | Err(ix) => self.snapshot_subscriptions.insert(ix, (scan_id, tx)),
2062 }
2063 }
2064
2065 async move {
2066 rx.await?;
2067 Ok(())
2068 }
2069 }
2070
2071 pub fn insert_entry(
2072 &mut self,
2073 entry: proto::Entry,
2074 scan_id: usize,
2075 cx: &Context<Worktree>,
2076 ) -> Task<Result<Entry>> {
2077 let wait_for_snapshot = self.wait_for_snapshot(scan_id);
2078 cx.spawn(async move |this, cx| {
2079 wait_for_snapshot.await?;
2080 this.update(cx, |worktree, _| {
2081 let worktree = worktree.as_remote_mut().unwrap();
2082 let snapshot = &mut worktree.background_snapshot.lock().0;
2083 let entry = snapshot.insert_entry(entry, &worktree.file_scan_inclusions);
2084 worktree.snapshot = snapshot.clone();
2085 entry
2086 })?
2087 })
2088 }
2089
2090 fn delete_entry(
2091 &self,
2092 entry_id: ProjectEntryId,
2093 trash: bool,
2094 cx: &Context<Worktree>,
2095 ) -> Option<Task<Result<()>>> {
2096 let response = self.client.request(proto::DeleteProjectEntry {
2097 project_id: self.project_id,
2098 entry_id: entry_id.to_proto(),
2099 use_trash: trash,
2100 });
2101 Some(cx.spawn(async move |this, cx| {
2102 let response = response.await?;
2103 let scan_id = response.worktree_scan_id as usize;
2104
2105 this.update(cx, move |this, _| {
2106 this.as_remote_mut().unwrap().wait_for_snapshot(scan_id)
2107 })?
2108 .await?;
2109
2110 this.update(cx, |this, _| {
2111 let this = this.as_remote_mut().unwrap();
2112 let snapshot = &mut this.background_snapshot.lock().0;
2113 snapshot.delete_entry(entry_id);
2114 this.snapshot = snapshot.clone();
2115 })
2116 }))
2117 }
2118
2119 // fn rename_entry(
2120 // &self,
2121 // entry_id: ProjectEntryId,
2122 // new_path: impl Into<Arc<RelPath>>,
2123 // cx: &Context<Worktree>,
2124 // ) -> Task<Result<CreatedEntry>> {
2125 // let new_path: Arc<RelPath> = new_path.into();
2126 // let response = self.client.request(proto::RenameProjectEntry {
2127 // project_id: self.project_id,
2128 // entry_id: entry_id.to_proto(),
2129 // new_worktree_id: new_path.worktree_id,
2130 // new_path: new_path.as_ref().to_proto(),
2131 // });
2132 // cx.spawn(async move |this, cx| {
2133 // let response = response.await?;
2134 // match response.entry {
2135 // Some(entry) => this
2136 // .update(cx, |this, cx| {
2137 // this.as_remote_mut().unwrap().insert_entry(
2138 // entry,
2139 // response.worktree_scan_id as usize,
2140 // cx,
2141 // )
2142 // })?
2143 // .await
2144 // .map(CreatedEntry::Included),
2145 // None => {
2146 // let abs_path =
2147 // this.read_with(cx, |worktree, _| worktree.absolutize(&new_path))?;
2148 // Ok(CreatedEntry::Excluded { abs_path })
2149 // }
2150 // }
2151 // })
2152 // }
2153
2154 fn copy_external_entries(
2155 &self,
2156 target_directory: Arc<RelPath>,
2157 paths_to_copy: Vec<Arc<Path>>,
2158 local_fs: Arc<dyn Fs>,
2159 cx: &Context<Worktree>,
2160 ) -> Task<anyhow::Result<Vec<ProjectEntryId>>> {
2161 let client = self.client.clone();
2162 let worktree_id = self.id().to_proto();
2163 let project_id = self.project_id;
2164
2165 cx.background_spawn(async move {
2166 let mut requests = Vec::new();
2167 for root_path_to_copy in paths_to_copy {
2168 let Some(filename) = root_path_to_copy
2169 .file_name()
2170 .and_then(|name| name.to_str())
2171 .and_then(|filename| RelPath::unix(filename).ok())
2172 else {
2173 continue;
2174 };
2175 for (abs_path, is_directory) in
2176 read_dir_items(local_fs.as_ref(), &root_path_to_copy).await?
2177 {
2178 let Some(relative_path) = abs_path
2179 .strip_prefix(&root_path_to_copy)
2180 .map_err(|e| anyhow::Error::from(e))
2181 .and_then(|relative_path| RelPath::new(relative_path, PathStyle::local()))
2182 .log_err()
2183 else {
2184 continue;
2185 };
2186 let content = if is_directory {
2187 None
2188 } else {
2189 Some(local_fs.load_bytes(&abs_path).await?)
2190 };
2191
2192 let mut target_path = target_directory.join(filename);
2193 if relative_path.file_name().is_some() {
2194 target_path = target_path.join(&relative_path);
2195 }
2196
2197 requests.push(proto::CreateProjectEntry {
2198 project_id,
2199 worktree_id,
2200 path: target_path.to_proto(),
2201 is_directory,
2202 content,
2203 });
2204 }
2205 }
2206 requests.sort_unstable_by(|a, b| a.path.cmp(&b.path));
2207 requests.dedup();
2208
2209 let mut copied_entry_ids = Vec::new();
2210 for request in requests {
2211 let response = client.request(request).await?;
2212 copied_entry_ids.extend(response.entry.map(|e| ProjectEntryId::from_proto(e.id)));
2213 }
2214
2215 Ok(copied_entry_ids)
2216 })
2217 }
2218}
2219
2220impl Snapshot {
2221 pub fn new(
2222 id: WorktreeId,
2223 root_name: Arc<RelPath>,
2224 abs_path: Arc<Path>,
2225 path_style: PathStyle,
2226 ) -> Self {
2227 Snapshot {
2228 id,
2229 abs_path: SanitizedPath::from_arc(abs_path),
2230 path_style,
2231 root_char_bag: root_name
2232 .as_unix_str()
2233 .chars()
2234 .map(|c| c.to_ascii_lowercase())
2235 .collect(),
2236 root_name,
2237 always_included_entries: Default::default(),
2238 entries_by_path: Default::default(),
2239 entries_by_id: Default::default(),
2240 root_repo_common_dir: None,
2241 scan_id: 1,
2242 completed_scan_id: 0,
2243 }
2244 }
2245
2246 pub fn id(&self) -> WorktreeId {
2247 self.id
2248 }
2249
2250 // TODO:
2251 // Consider the following:
2252 //
2253 // ```rust
2254 // let abs_path: Arc<Path> = snapshot.abs_path(); // e.g. "C:\Users\user\Desktop\project"
2255 // let some_non_trimmed_path = Path::new("\\\\?\\C:\\Users\\user\\Desktop\\project\\main.rs");
2256 // // The caller perform some actions here:
2257 // some_non_trimmed_path.strip_prefix(abs_path); // This fails
2258 // some_non_trimmed_path.starts_with(abs_path); // This fails too
2259 // ```
2260 //
2261 // This is definitely a bug, but it's not clear if we should handle it here or not.
2262 pub fn abs_path(&self) -> &Arc<Path> {
2263 SanitizedPath::cast_arc_ref(&self.abs_path)
2264 }
2265
2266 pub fn root_repo_common_dir(&self) -> Option<&Arc<Path>> {
2267 self.root_repo_common_dir
2268 .as_ref()
2269 .map(SanitizedPath::cast_arc_ref)
2270 }
2271
2272 fn build_initial_update(&self, project_id: u64, worktree_id: u64) -> proto::UpdateWorktree {
2273 let mut updated_entries = self
2274 .entries_by_path
2275 .iter()
2276 .map(proto::Entry::from)
2277 .collect::<Vec<_>>();
2278 updated_entries.sort_unstable_by_key(|e| e.id);
2279
2280 proto::UpdateWorktree {
2281 project_id,
2282 worktree_id,
2283 abs_path: self.abs_path().to_string_lossy().into_owned(),
2284 root_name: self.root_name().to_proto(),
2285 root_repo_common_dir: self
2286 .root_repo_common_dir()
2287 .map(|p| p.to_string_lossy().into_owned()),
2288 updated_entries,
2289 removed_entries: Vec::new(),
2290 scan_id: self.scan_id as u64,
2291 is_last_update: self.completed_scan_id == self.scan_id,
2292 // Sent in separate messages.
2293 updated_repositories: Vec::new(),
2294 removed_repositories: Vec::new(),
2295 }
2296 }
2297
2298 pub fn work_directory_abs_path(&self, work_directory: &WorkDirectory) -> PathBuf {
2299 match work_directory {
2300 WorkDirectory::InProject { relative_path } => self.absolutize(relative_path),
2301 WorkDirectory::AboveProject { absolute_path, .. } => absolute_path.as_ref().to_owned(),
2302 }
2303 }
2304
2305 pub fn absolutize(&self, path: &RelPath) -> PathBuf {
2306 if path.file_name().is_some() {
2307 let mut abs_path = self.abs_path.to_string();
2308 for component in path.components() {
2309 if !abs_path.ends_with(self.path_style.primary_separator()) {
2310 abs_path.push_str(self.path_style.primary_separator());
2311 }
2312 abs_path.push_str(component);
2313 }
2314 PathBuf::from(abs_path)
2315 } else {
2316 self.abs_path.as_path().to_path_buf()
2317 }
2318 }
2319
2320 pub fn contains_entry(&self, entry_id: ProjectEntryId) -> bool {
2321 self.entries_by_id.get(&entry_id, ()).is_some()
2322 }
2323
2324 fn insert_entry(
2325 &mut self,
2326 entry: proto::Entry,
2327 always_included_paths: &PathMatcher,
2328 ) -> Result<Entry> {
2329 let entry = Entry::try_from((&self.root_char_bag, always_included_paths, entry))?;
2330 let old_entry = self.entries_by_id.insert_or_replace(
2331 PathEntry {
2332 id: entry.id,
2333 path: entry.path.clone(),
2334 is_ignored: entry.is_ignored,
2335 scan_id: 0,
2336 },
2337 (),
2338 );
2339 if let Some(old_entry) = old_entry {
2340 self.entries_by_path.remove(&PathKey(old_entry.path), ());
2341 }
2342 self.entries_by_path.insert_or_replace(entry.clone(), ());
2343 Ok(entry)
2344 }
2345
2346 fn delete_entry(&mut self, entry_id: ProjectEntryId) -> Option<Arc<RelPath>> {
2347 let removed_entry = self.entries_by_id.remove(&entry_id, ())?;
2348 self.entries_by_path = {
2349 let mut cursor = self.entries_by_path.cursor::<TraversalProgress>(());
2350 let mut new_entries_by_path =
2351 cursor.slice(&TraversalTarget::path(&removed_entry.path), Bias::Left);
2352 while let Some(entry) = cursor.item() {
2353 if entry.path.starts_with(&removed_entry.path) {
2354 self.entries_by_id.remove(&entry.id, ());
2355 cursor.next();
2356 } else {
2357 break;
2358 }
2359 }
2360 new_entries_by_path.append(cursor.suffix(), ());
2361 new_entries_by_path
2362 };
2363
2364 Some(removed_entry.path)
2365 }
2366
2367 fn update_abs_path(&mut self, abs_path: Arc<SanitizedPath>, root_name: Arc<RelPath>) {
2368 self.abs_path = abs_path;
2369 if root_name != self.root_name {
2370 self.root_char_bag = root_name
2371 .as_unix_str()
2372 .chars()
2373 .map(|c| c.to_ascii_lowercase())
2374 .collect();
2375 self.root_name = root_name;
2376 }
2377 }
2378
2379 pub fn apply_remote_update(
2380 &mut self,
2381 update: proto::UpdateWorktree,
2382 always_included_paths: &PathMatcher,
2383 ) {
2384 log::debug!(
2385 "applying remote worktree update. {} entries updated, {} removed",
2386 update.updated_entries.len(),
2387 update.removed_entries.len()
2388 );
2389 if let Some(root_name) = RelPath::from_proto(&update.root_name).log_err() {
2390 self.update_abs_path(
2391 SanitizedPath::new_arc(&Path::new(&update.abs_path)),
2392 root_name,
2393 );
2394 }
2395
2396 let mut entries_by_path_edits = Vec::new();
2397 let mut entries_by_id_edits = Vec::new();
2398
2399 for entry_id in update.removed_entries {
2400 let entry_id = ProjectEntryId::from_proto(entry_id);
2401 entries_by_id_edits.push(Edit::Remove(entry_id));
2402 if let Some(entry) = self.entry_for_id(entry_id) {
2403 entries_by_path_edits.push(Edit::Remove(PathKey(entry.path.clone())));
2404 }
2405 }
2406
2407 for entry in update.updated_entries {
2408 let Some(entry) =
2409 Entry::try_from((&self.root_char_bag, always_included_paths, entry)).log_err()
2410 else {
2411 continue;
2412 };
2413 if let Some(PathEntry { path, .. }) = self.entries_by_id.get(&entry.id, ()) {
2414 entries_by_path_edits.push(Edit::Remove(PathKey(path.clone())));
2415 }
2416 if let Some(old_entry) = self.entries_by_path.get(&PathKey(entry.path.clone()), ())
2417 && old_entry.id != entry.id
2418 {
2419 entries_by_id_edits.push(Edit::Remove(old_entry.id));
2420 }
2421 entries_by_id_edits.push(Edit::Insert(PathEntry {
2422 id: entry.id,
2423 path: entry.path.clone(),
2424 is_ignored: entry.is_ignored,
2425 scan_id: 0,
2426 }));
2427 entries_by_path_edits.push(Edit::Insert(entry));
2428 }
2429
2430 self.entries_by_path.edit(entries_by_path_edits, ());
2431 self.entries_by_id.edit(entries_by_id_edits, ());
2432
2433 self.root_repo_common_dir = update
2434 .root_repo_common_dir
2435 .map(|p| SanitizedPath::new_arc(Path::new(&p)));
2436
2437 self.scan_id = update.scan_id as usize;
2438 if update.is_last_update {
2439 self.completed_scan_id = update.scan_id as usize;
2440 }
2441 }
2442
2443 pub fn entry_count(&self) -> usize {
2444 self.entries_by_path.summary().count
2445 }
2446
2447 pub fn visible_entry_count(&self) -> usize {
2448 self.entries_by_path.summary().non_ignored_count
2449 }
2450
2451 pub fn dir_count(&self) -> usize {
2452 let summary = self.entries_by_path.summary();
2453 summary.count - summary.file_count
2454 }
2455
2456 pub fn visible_dir_count(&self) -> usize {
2457 let summary = self.entries_by_path.summary();
2458 summary.non_ignored_count - summary.non_ignored_file_count
2459 }
2460
2461 pub fn file_count(&self) -> usize {
2462 self.entries_by_path.summary().file_count
2463 }
2464
2465 pub fn visible_file_count(&self) -> usize {
2466 self.entries_by_path.summary().non_ignored_file_count
2467 }
2468
2469 fn traverse_from_offset(
2470 &self,
2471 include_files: bool,
2472 include_dirs: bool,
2473 include_ignored: bool,
2474 start_offset: usize,
2475 ) -> Traversal<'_> {
2476 let mut cursor = self.entries_by_path.cursor(());
2477 cursor.seek(
2478 &TraversalTarget::Count {
2479 count: start_offset,
2480 include_files,
2481 include_dirs,
2482 include_ignored,
2483 },
2484 Bias::Right,
2485 );
2486 Traversal {
2487 snapshot: self,
2488 cursor,
2489 include_files,
2490 include_dirs,
2491 include_ignored,
2492 }
2493 }
2494
2495 pub fn traverse_from_path(
2496 &self,
2497 include_files: bool,
2498 include_dirs: bool,
2499 include_ignored: bool,
2500 path: &RelPath,
2501 ) -> Traversal<'_> {
2502 Traversal::new(self, include_files, include_dirs, include_ignored, path)
2503 }
2504
2505 pub fn files(&self, include_ignored: bool, start: usize) -> Traversal<'_> {
2506 self.traverse_from_offset(true, false, include_ignored, start)
2507 }
2508
2509 pub fn directories(&self, include_ignored: bool, start: usize) -> Traversal<'_> {
2510 self.traverse_from_offset(false, true, include_ignored, start)
2511 }
2512
2513 pub fn entries(&self, include_ignored: bool, start: usize) -> Traversal<'_> {
2514 self.traverse_from_offset(true, true, include_ignored, start)
2515 }
2516
2517 pub fn paths(&self) -> impl Iterator<Item = &RelPath> {
2518 self.entries_by_path
2519 .cursor::<()>(())
2520 .filter(move |entry| !entry.path.is_empty())
2521 .map(|entry| entry.path.as_ref())
2522 }
2523
2524 pub fn child_entries<'a>(&'a self, parent_path: &'a RelPath) -> ChildEntriesIter<'a> {
2525 let options = ChildEntriesOptions {
2526 include_files: true,
2527 include_dirs: true,
2528 include_ignored: true,
2529 };
2530 self.child_entries_with_options(parent_path, options)
2531 }
2532
2533 pub fn child_entries_with_options<'a>(
2534 &'a self,
2535 parent_path: &'a RelPath,
2536 options: ChildEntriesOptions,
2537 ) -> ChildEntriesIter<'a> {
2538 let mut cursor = self.entries_by_path.cursor(());
2539 cursor.seek(&TraversalTarget::path(parent_path), Bias::Right);
2540 let traversal = Traversal {
2541 snapshot: self,
2542 cursor,
2543 include_files: options.include_files,
2544 include_dirs: options.include_dirs,
2545 include_ignored: options.include_ignored,
2546 };
2547 ChildEntriesIter {
2548 traversal,
2549 parent_path,
2550 }
2551 }
2552
2553 pub fn root_entry(&self) -> Option<&Entry> {
2554 self.entries_by_path.first()
2555 }
2556
2557 /// Returns `None` for a single file worktree, or `Some(self.abs_path())` if
2558 /// it is a directory.
2559 pub fn root_dir(&self) -> Option<Arc<Path>> {
2560 self.root_entry()
2561 .filter(|entry| entry.is_dir())
2562 .map(|_| self.abs_path().clone())
2563 }
2564
2565 pub fn root_name(&self) -> &RelPath {
2566 &self.root_name
2567 }
2568
2569 pub fn root_name_str(&self) -> &str {
2570 self.root_name.as_unix_str()
2571 }
2572
2573 pub fn scan_id(&self) -> usize {
2574 self.scan_id
2575 }
2576
2577 pub fn entry_for_path(&self, path: &RelPath) -> Option<&Entry> {
2578 self.traverse_from_path(true, true, true, path)
2579 .entry()
2580 .and_then(|entry| {
2581 if entry.path.as_ref() == path {
2582 Some(entry)
2583 } else {
2584 None
2585 }
2586 })
2587 }
2588
2589 /// Resolves a path to an executable using the following heuristics:
2590 ///
2591 /// 1. If the path starts with `~`, it is expanded to the user's home directory.
2592 /// 2. If the path is relative and contains more than one component,
2593 /// it is joined to the worktree root path.
2594 /// 3. If the path is relative and exists in the worktree
2595 /// (even if falls under an exclusion filter),
2596 /// it is joined to the worktree root path.
2597 /// 4. Otherwise the path is returned unmodified.
2598 ///
2599 /// Relative paths that do not exist in the worktree may
2600 /// still be found using the `PATH` environment variable.
2601 pub fn resolve_relative_path(&self, path: PathBuf) -> PathBuf {
2602 if let Some(path_str) = path.to_str() {
2603 if let Some(remaining_path) = path_str.strip_prefix("~/") {
2604 return home_dir().join(remaining_path);
2605 } else if path_str == "~" {
2606 return home_dir().to_path_buf();
2607 }
2608 }
2609
2610 if let Ok(rel_path) = RelPath::new(&path, self.path_style)
2611 && (path.components().count() > 1 || self.entry_for_path(&rel_path).is_some())
2612 {
2613 self.abs_path().join(path)
2614 } else {
2615 path
2616 }
2617 }
2618
2619 pub fn entry_for_id(&self, id: ProjectEntryId) -> Option<&Entry> {
2620 let entry = self.entries_by_id.get(&id, ())?;
2621 self.entry_for_path(&entry.path)
2622 }
2623
2624 pub fn path_style(&self) -> PathStyle {
2625 self.path_style
2626 }
2627}
2628
2629impl LocalSnapshot {
2630 fn local_repo_for_work_directory_path(&self, path: &RelPath) -> Option<&LocalRepositoryEntry> {
2631 self.git_repositories
2632 .iter()
2633 .map(|(_, entry)| entry)
2634 .find(|entry| entry.work_directory.path_key() == PathKey(path.into()))
2635 }
2636
2637 fn build_update(
2638 &self,
2639 project_id: u64,
2640 worktree_id: u64,
2641 entry_changes: UpdatedEntriesSet,
2642 ) -> proto::UpdateWorktree {
2643 let mut updated_entries = Vec::new();
2644 let mut removed_entries = Vec::new();
2645
2646 for (_, entry_id, path_change) in entry_changes.iter() {
2647 if let PathChange::Removed = path_change {
2648 removed_entries.push(entry_id.0 as u64);
2649 } else if let Some(entry) = self.entry_for_id(*entry_id) {
2650 updated_entries.push(proto::Entry::from(entry));
2651 }
2652 }
2653
2654 removed_entries.sort_unstable();
2655 updated_entries.sort_unstable_by_key(|e| e.id);
2656
2657 // TODO - optimize, knowing that removed_entries are sorted.
2658 removed_entries.retain(|id| updated_entries.binary_search_by_key(id, |e| e.id).is_err());
2659
2660 proto::UpdateWorktree {
2661 project_id,
2662 worktree_id,
2663 abs_path: self.abs_path().to_string_lossy().into_owned(),
2664 root_name: self.root_name().to_proto(),
2665 root_repo_common_dir: self
2666 .root_repo_common_dir()
2667 .map(|p| p.to_string_lossy().into_owned()),
2668 updated_entries,
2669 removed_entries,
2670 scan_id: self.scan_id as u64,
2671 is_last_update: self.completed_scan_id == self.scan_id,
2672 // Sent in separate messages.
2673 updated_repositories: Vec::new(),
2674 removed_repositories: Vec::new(),
2675 }
2676 }
2677
2678 async fn insert_entry(&mut self, mut entry: Entry, fs: &dyn Fs) -> Entry {
2679 log::trace!("insert entry {:?}", entry.path);
2680 if entry.is_file() && entry.path.file_name() == Some(&GITIGNORE) {
2681 let abs_path = self.absolutize(&entry.path);
2682 match build_gitignore(&abs_path, fs).await {
2683 Ok(ignore) => {
2684 self.ignores_by_parent_abs_path
2685 .insert(abs_path.parent().unwrap().into(), (Arc::new(ignore), true));
2686 }
2687 Err(error) => {
2688 log::error!(
2689 "error loading .gitignore file {:?} - {:?}",
2690 &entry.path,
2691 error
2692 );
2693 }
2694 }
2695 }
2696
2697 if entry.kind == EntryKind::PendingDir
2698 && let Some(existing_entry) = self.entries_by_path.get(&PathKey(entry.path.clone()), ())
2699 {
2700 entry.kind = existing_entry.kind;
2701 }
2702
2703 let scan_id = self.scan_id;
2704 let removed = self.entries_by_path.insert_or_replace(entry.clone(), ());
2705 if let Some(removed) = removed
2706 && removed.id != entry.id
2707 {
2708 self.entries_by_id.remove(&removed.id, ());
2709 }
2710 self.entries_by_id.insert_or_replace(
2711 PathEntry {
2712 id: entry.id,
2713 path: entry.path.clone(),
2714 is_ignored: entry.is_ignored,
2715 scan_id,
2716 },
2717 (),
2718 );
2719
2720 entry
2721 }
2722
2723 fn ancestor_inodes_for_path(&self, path: &RelPath) -> TreeSet<u64> {
2724 let mut inodes = TreeSet::default();
2725 for ancestor in path.ancestors().skip(1) {
2726 if let Some(entry) = self.entry_for_path(ancestor) {
2727 inodes.insert(entry.inode);
2728 }
2729 }
2730 inodes
2731 }
2732
2733 async fn ignore_stack_for_abs_path(
2734 &self,
2735 abs_path: &Path,
2736 is_dir: bool,
2737 fs: &dyn Fs,
2738 ) -> IgnoreStack {
2739 let mut new_ignores = Vec::new();
2740 let mut repo_root = None;
2741 for (index, ancestor) in abs_path.ancestors().enumerate() {
2742 if index > 0 {
2743 if let Some((ignore, _)) = self.ignores_by_parent_abs_path.get(ancestor) {
2744 new_ignores.push((ancestor, Some(ignore.clone())));
2745 } else {
2746 new_ignores.push((ancestor, None));
2747 }
2748 }
2749
2750 let metadata = fs.metadata(&ancestor.join(DOT_GIT)).await.ok().flatten();
2751 if metadata.is_some() {
2752 repo_root = Some(Arc::from(ancestor));
2753 break;
2754 }
2755 }
2756
2757 let mut ignore_stack = if let Some(global_gitignore) = self.global_gitignore.clone() {
2758 IgnoreStack::global(global_gitignore)
2759 } else {
2760 IgnoreStack::none()
2761 };
2762
2763 if let Some((repo_exclude, _)) = repo_root
2764 .as_ref()
2765 .and_then(|abs_path| self.repo_exclude_by_work_dir_abs_path.get(abs_path))
2766 {
2767 ignore_stack = ignore_stack.append(IgnoreKind::RepoExclude, repo_exclude.clone());
2768 }
2769 ignore_stack.repo_root = repo_root;
2770 for (parent_abs_path, ignore) in new_ignores.into_iter().rev() {
2771 if ignore_stack.is_abs_path_ignored(parent_abs_path, true) {
2772 ignore_stack = IgnoreStack::all();
2773 break;
2774 } else if let Some(ignore) = ignore {
2775 ignore_stack =
2776 ignore_stack.append(IgnoreKind::Gitignore(parent_abs_path.into()), ignore);
2777 }
2778 }
2779
2780 if ignore_stack.is_abs_path_ignored(abs_path, is_dir) {
2781 ignore_stack = IgnoreStack::all();
2782 }
2783
2784 ignore_stack
2785 }
2786
2787 #[cfg(feature = "test-support")]
2788 pub fn expanded_entries(&self) -> impl Iterator<Item = &Entry> {
2789 self.entries_by_path
2790 .cursor::<()>(())
2791 .filter(|entry| entry.kind == EntryKind::Dir && (entry.is_external || entry.is_ignored))
2792 }
2793
2794 #[cfg(feature = "test-support")]
2795 pub fn check_invariants(&self, git_state: bool) {
2796 use pretty_assertions::assert_eq;
2797
2798 assert_eq!(
2799 self.entries_by_path
2800 .cursor::<()>(())
2801 .map(|e| (&e.path, e.id))
2802 .collect::<Vec<_>>(),
2803 self.entries_by_id
2804 .cursor::<()>(())
2805 .map(|e| (&e.path, e.id))
2806 .collect::<collections::BTreeSet<_>>()
2807 .into_iter()
2808 .collect::<Vec<_>>(),
2809 "entries_by_path and entries_by_id are inconsistent"
2810 );
2811
2812 let mut files = self.files(true, 0);
2813 let mut visible_files = self.files(false, 0);
2814 for entry in self.entries_by_path.cursor::<()>(()) {
2815 if entry.is_file() {
2816 assert_eq!(files.next().unwrap().inode, entry.inode);
2817 if !entry.is_ignored || entry.is_always_included {
2818 assert_eq!(visible_files.next().unwrap().inode, entry.inode);
2819 }
2820 }
2821 }
2822
2823 assert!(files.next().is_none());
2824 assert!(visible_files.next().is_none());
2825
2826 let mut bfs_paths = Vec::new();
2827 let mut stack = self
2828 .root_entry()
2829 .map(|e| e.path.as_ref())
2830 .into_iter()
2831 .collect::<Vec<_>>();
2832 while let Some(path) = stack.pop() {
2833 bfs_paths.push(path);
2834 let ix = stack.len();
2835 for child_entry in self.child_entries(path) {
2836 stack.insert(ix, &child_entry.path);
2837 }
2838 }
2839
2840 let dfs_paths_via_iter = self
2841 .entries_by_path
2842 .cursor::<()>(())
2843 .map(|e| e.path.as_ref())
2844 .collect::<Vec<_>>();
2845 assert_eq!(bfs_paths, dfs_paths_via_iter);
2846
2847 let dfs_paths_via_traversal = self
2848 .entries(true, 0)
2849 .map(|e| e.path.as_ref())
2850 .collect::<Vec<_>>();
2851
2852 assert_eq!(dfs_paths_via_traversal, dfs_paths_via_iter);
2853
2854 if git_state {
2855 for ignore_parent_abs_path in self.ignores_by_parent_abs_path.keys() {
2856 let ignore_parent_path = &RelPath::new(
2857 ignore_parent_abs_path
2858 .strip_prefix(self.abs_path.as_path())
2859 .unwrap(),
2860 PathStyle::local(),
2861 )
2862 .unwrap();
2863 assert!(self.entry_for_path(ignore_parent_path).is_some());
2864 assert!(
2865 self.entry_for_path(
2866 &ignore_parent_path.join(RelPath::unix(GITIGNORE).unwrap())
2867 )
2868 .is_some()
2869 );
2870 }
2871 }
2872 }
2873
2874 #[cfg(feature = "test-support")]
2875 pub fn entries_without_ids(&self, include_ignored: bool) -> Vec<(&RelPath, u64, bool)> {
2876 let mut paths = Vec::new();
2877 for entry in self.entries_by_path.cursor::<()>(()) {
2878 if include_ignored || !entry.is_ignored {
2879 paths.push((entry.path.as_ref(), entry.inode, entry.is_ignored));
2880 }
2881 }
2882 paths.sort_by(|a, b| a.0.cmp(b.0));
2883 paths
2884 }
2885}
2886
2887impl BackgroundScannerState {
2888 fn should_scan_directory(&self, entry: &Entry) -> bool {
2889 (self.scanning_enabled && !entry.is_external && (!entry.is_ignored || entry.is_always_included))
2890 || entry.path.file_name() == Some(DOT_GIT)
2891 || entry.path.file_name() == Some(local_settings_folder_name())
2892 || entry.path.file_name() == Some(local_vscode_folder_name())
2893 || self.scanned_dirs.contains(&entry.id) // If we've ever scanned it, keep scanning
2894 || self
2895 .paths_to_scan
2896 .iter()
2897 .any(|p| p.starts_with(&entry.path))
2898 || self
2899 .path_prefixes_to_scan
2900 .iter()
2901 .any(|p| entry.path.starts_with(p))
2902 }
2903
2904 async fn enqueue_scan_dir(
2905 &self,
2906 abs_path: Arc<Path>,
2907 entry: &Entry,
2908 scan_job_tx: &Sender<ScanJob>,
2909 fs: &dyn Fs,
2910 ) {
2911 let path = entry.path.clone();
2912 let ignore_stack = self
2913 .snapshot
2914 .ignore_stack_for_abs_path(&abs_path, true, fs)
2915 .await;
2916 let mut ancestor_inodes = self.snapshot.ancestor_inodes_for_path(&path);
2917
2918 if !ancestor_inodes.contains(&entry.inode) {
2919 ancestor_inodes.insert(entry.inode);
2920 scan_job_tx
2921 .try_send(ScanJob {
2922 abs_path,
2923 path,
2924 ignore_stack,
2925 scan_queue: scan_job_tx.clone(),
2926 ancestor_inodes,
2927 is_external: entry.is_external,
2928 })
2929 .unwrap();
2930 }
2931 }
2932
2933 fn reuse_entry_id(&mut self, entry: &mut Entry) {
2934 if let Some(mtime) = entry.mtime {
2935 // If an entry with the same inode was removed from the worktree during this scan,
2936 // then it *might* represent the same file or directory. But the OS might also have
2937 // re-used the inode for a completely different file or directory.
2938 //
2939 // Conditionally reuse the old entry's id:
2940 // * if the mtime is the same, the file was probably been renamed.
2941 // * if the path is the same, the file may just have been updated
2942 if let Some(removed_entry) = self.removed_entries.remove(&entry.inode) {
2943 if removed_entry.mtime == Some(mtime) || removed_entry.path == entry.path {
2944 entry.id = removed_entry.id;
2945 }
2946 } else if let Some(existing_entry) = self.snapshot.entry_for_path(&entry.path) {
2947 entry.id = existing_entry.id;
2948 }
2949 }
2950 }
2951
2952 fn entry_id_for(
2953 &mut self,
2954 next_entry_id: &AtomicUsize,
2955 path: &RelPath,
2956 metadata: &fs::Metadata,
2957 ) -> ProjectEntryId {
2958 // If an entry with the same inode was removed from the worktree during this scan,
2959 // then it *might* represent the same file or directory. But the OS might also have
2960 // re-used the inode for a completely different file or directory.
2961 //
2962 // Conditionally reuse the old entry's id:
2963 // * if the mtime is the same, the file was probably been renamed.
2964 // * if the path is the same, the file may just have been updated
2965 if let Some(removed_entry) = self.removed_entries.remove(&metadata.inode) {
2966 if removed_entry.mtime == Some(metadata.mtime) || *removed_entry.path == *path {
2967 return removed_entry.id;
2968 }
2969 } else if let Some(existing_entry) = self.snapshot.entry_for_path(path) {
2970 return existing_entry.id;
2971 }
2972 ProjectEntryId::new(next_entry_id)
2973 }
2974
2975 async fn insert_entry(&mut self, entry: Entry, fs: &dyn Fs, watcher: &dyn Watcher) -> Entry {
2976 let entry = self.snapshot.insert_entry(entry, fs).await;
2977 if entry.path.file_name() == Some(&DOT_GIT) {
2978 self.insert_git_repository(entry.path.clone(), fs, watcher)
2979 .await;
2980 }
2981
2982 #[cfg(feature = "test-support")]
2983 self.snapshot.check_invariants(false);
2984
2985 entry
2986 }
2987
2988 fn populate_dir(
2989 &mut self,
2990 parent_path: Arc<RelPath>,
2991 entries: impl IntoIterator<Item = Entry>,
2992 ignore: Option<Arc<Gitignore>>,
2993 ) {
2994 let mut parent_entry = if let Some(parent_entry) = self
2995 .snapshot
2996 .entries_by_path
2997 .get(&PathKey(parent_path.clone()), ())
2998 {
2999 parent_entry.clone()
3000 } else {
3001 log::warn!(
3002 "populating a directory {:?} that has been removed",
3003 parent_path
3004 );
3005 return;
3006 };
3007
3008 match parent_entry.kind {
3009 EntryKind::PendingDir | EntryKind::UnloadedDir => parent_entry.kind = EntryKind::Dir,
3010 EntryKind::Dir => {}
3011 _ => return,
3012 }
3013
3014 if let Some(ignore) = ignore {
3015 let abs_parent_path = self
3016 .snapshot
3017 .abs_path
3018 .as_path()
3019 .join(parent_path.as_std_path())
3020 .into();
3021 self.snapshot
3022 .ignores_by_parent_abs_path
3023 .insert(abs_parent_path, (ignore, false));
3024 }
3025
3026 let parent_entry_id = parent_entry.id;
3027 self.scanned_dirs.insert(parent_entry_id);
3028 let mut entries_by_path_edits = vec![Edit::Insert(parent_entry)];
3029 let mut entries_by_id_edits = Vec::new();
3030
3031 for entry in entries {
3032 entries_by_id_edits.push(Edit::Insert(PathEntry {
3033 id: entry.id,
3034 path: entry.path.clone(),
3035 is_ignored: entry.is_ignored,
3036 scan_id: self.snapshot.scan_id,
3037 }));
3038 entries_by_path_edits.push(Edit::Insert(entry));
3039 }
3040
3041 self.snapshot
3042 .entries_by_path
3043 .edit(entries_by_path_edits, ());
3044 self.snapshot.entries_by_id.edit(entries_by_id_edits, ());
3045
3046 if let Err(ix) = self.changed_paths.binary_search(&parent_path) {
3047 self.changed_paths.insert(ix, parent_path.clone());
3048 }
3049
3050 #[cfg(feature = "test-support")]
3051 self.snapshot.check_invariants(false);
3052 }
3053
3054 fn remove_path(&mut self, path: &RelPath, watcher: &dyn Watcher) {
3055 log::trace!("background scanner removing path {path:?}");
3056 let mut new_entries;
3057 let removed_entries;
3058 {
3059 let mut cursor = self
3060 .snapshot
3061 .entries_by_path
3062 .cursor::<TraversalProgress>(());
3063 new_entries = cursor.slice(&TraversalTarget::path(path), Bias::Left);
3064 removed_entries = cursor.slice(&TraversalTarget::successor(path), Bias::Left);
3065 new_entries.append(cursor.suffix(), ());
3066 }
3067 self.snapshot.entries_by_path = new_entries;
3068
3069 let mut removed_ids = Vec::with_capacity(removed_entries.summary().count);
3070 let mut removed_dir_abs_paths = Vec::new();
3071 for entry in removed_entries.cursor::<()>(()) {
3072 if entry.is_dir() {
3073 removed_dir_abs_paths.push(self.snapshot.absolutize(&entry.path));
3074 }
3075
3076 match self.removed_entries.entry(entry.inode) {
3077 hash_map::Entry::Occupied(mut e) => {
3078 let prev_removed_entry = e.get_mut();
3079 if entry.id > prev_removed_entry.id {
3080 *prev_removed_entry = entry.clone();
3081 }
3082 }
3083 hash_map::Entry::Vacant(e) => {
3084 e.insert(entry.clone());
3085 }
3086 }
3087
3088 if entry.path.file_name() == Some(GITIGNORE) {
3089 let abs_parent_path = self.snapshot.absolutize(&entry.path.parent().unwrap());
3090 if let Some((_, needs_update)) = self
3091 .snapshot
3092 .ignores_by_parent_abs_path
3093 .get_mut(abs_parent_path.as_path())
3094 {
3095 *needs_update = true;
3096 }
3097 }
3098
3099 if let Err(ix) = removed_ids.binary_search(&entry.id) {
3100 removed_ids.insert(ix, entry.id);
3101 }
3102 }
3103
3104 self.snapshot
3105 .entries_by_id
3106 .edit(removed_ids.iter().map(|&id| Edit::Remove(id)).collect(), ());
3107 self.snapshot
3108 .git_repositories
3109 .retain(|id, _| removed_ids.binary_search(id).is_err());
3110
3111 for removed_dir_abs_path in removed_dir_abs_paths {
3112 watcher.remove(&removed_dir_abs_path).log_err();
3113 }
3114
3115 #[cfg(feature = "test-support")]
3116 self.snapshot.check_invariants(false);
3117 }
3118
3119 async fn insert_git_repository(
3120 &mut self,
3121 dot_git_path: Arc<RelPath>,
3122 fs: &dyn Fs,
3123 watcher: &dyn Watcher,
3124 ) {
3125 let work_dir_path: Arc<RelPath> = match dot_git_path.parent() {
3126 Some(parent_dir) => {
3127 // Guard against repositories inside the repository metadata
3128 if parent_dir
3129 .components()
3130 .any(|component| component == DOT_GIT)
3131 {
3132 log::debug!(
3133 "not building git repository for nested `.git` directory, `.git` path in the worktree: {dot_git_path:?}"
3134 );
3135 return;
3136 };
3137
3138 parent_dir.into()
3139 }
3140 None => {
3141 // `dot_git_path.parent().is_none()` means `.git` directory is the opened worktree itself,
3142 // no files inside that directory are tracked by git, so no need to build the repo around it
3143 log::debug!(
3144 "not building git repository for the worktree itself, `.git` path in the worktree: {dot_git_path:?}"
3145 );
3146 return;
3147 }
3148 };
3149
3150 let dot_git_abs_path = Arc::from(self.snapshot.absolutize(&dot_git_path).as_ref());
3151
3152 self.insert_git_repository_for_path(
3153 WorkDirectory::InProject {
3154 relative_path: work_dir_path,
3155 },
3156 dot_git_abs_path,
3157 fs,
3158 watcher,
3159 )
3160 .await
3161 .log_err();
3162 }
3163
3164 async fn insert_git_repository_for_path(
3165 &mut self,
3166 work_directory: WorkDirectory,
3167 dot_git_abs_path: Arc<Path>,
3168 fs: &dyn Fs,
3169 watcher: &dyn Watcher,
3170 ) -> Result<LocalRepositoryEntry> {
3171 let work_dir_entry = self
3172 .snapshot
3173 .entry_for_path(&work_directory.path_key().0)
3174 .with_context(|| {
3175 format!(
3176 "working directory `{}` not indexed",
3177 work_directory
3178 .path_key()
3179 .0
3180 .display(self.snapshot.path_style)
3181 )
3182 })?;
3183 let work_directory_abs_path = self.snapshot.work_directory_abs_path(&work_directory);
3184
3185 let (repository_dir_abs_path, common_dir_abs_path) =
3186 discover_git_paths(&dot_git_abs_path, fs).await;
3187 watcher
3188 .add(&common_dir_abs_path)
3189 .context("failed to add common directory to watcher")
3190 .log_err();
3191 watcher
3192 .add(&repository_dir_abs_path)
3193 .context("failed to add repository directory to watcher")
3194 .log_err();
3195
3196 let work_directory_id = work_dir_entry.id;
3197
3198 let local_repository = LocalRepositoryEntry {
3199 work_directory_id,
3200 work_directory,
3201 work_directory_abs_path: work_directory_abs_path.as_path().into(),
3202 git_dir_scan_id: 0,
3203 dot_git_abs_path,
3204 common_dir_abs_path,
3205 repository_dir_abs_path,
3206 };
3207
3208 self.snapshot
3209 .git_repositories
3210 .insert(work_directory_id, local_repository.clone());
3211
3212 log::trace!("inserting new local git repository");
3213 Ok(local_repository)
3214 }
3215}
3216
3217async fn is_git_dir(path: &Path, fs: &dyn Fs) -> bool {
3218 if let Some(file_name) = path.file_name()
3219 && file_name == DOT_GIT
3220 {
3221 return true;
3222 }
3223
3224 // If we're in a bare repository, we are not inside a `.git` folder. In a
3225 // bare repository, the root folder contains what would normally be in the
3226 // `.git` folder.
3227 let head_metadata = fs.metadata(&path.join("HEAD")).await;
3228 if !matches!(head_metadata, Ok(Some(_))) {
3229 return false;
3230 }
3231 let config_metadata = fs.metadata(&path.join("config")).await;
3232 matches!(config_metadata, Ok(Some(_)))
3233}
3234
3235async fn build_gitignore(abs_path: &Path, fs: &dyn Fs) -> Result<Gitignore> {
3236 let contents = fs
3237 .load(abs_path)
3238 .await
3239 .with_context(|| format!("failed to load gitignore file at {}", abs_path.display()))?;
3240 let parent = abs_path.parent().unwrap_or_else(|| Path::new("/"));
3241 let mut builder = GitignoreBuilder::new(parent);
3242 for line in contents.lines() {
3243 builder.add_line(Some(abs_path.into()), line)?;
3244 }
3245 Ok(builder.build()?)
3246}
3247
3248impl Deref for Worktree {
3249 type Target = Snapshot;
3250
3251 fn deref(&self) -> &Self::Target {
3252 match self {
3253 Worktree::Local(worktree) => &worktree.snapshot,
3254 Worktree::Remote(worktree) => &worktree.snapshot,
3255 }
3256 }
3257}
3258
3259impl Deref for LocalWorktree {
3260 type Target = LocalSnapshot;
3261
3262 fn deref(&self) -> &Self::Target {
3263 &self.snapshot
3264 }
3265}
3266
3267impl Deref for RemoteWorktree {
3268 type Target = Snapshot;
3269
3270 fn deref(&self) -> &Self::Target {
3271 &self.snapshot
3272 }
3273}
3274
3275impl fmt::Debug for LocalWorktree {
3276 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3277 self.snapshot.fmt(f)
3278 }
3279}
3280
3281impl fmt::Debug for Snapshot {
3282 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3283 struct EntriesById<'a>(&'a SumTree<PathEntry>);
3284 struct EntriesByPath<'a>(&'a SumTree<Entry>);
3285
3286 impl fmt::Debug for EntriesByPath<'_> {
3287 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3288 f.debug_map()
3289 .entries(self.0.iter().map(|entry| (&entry.path, entry.id)))
3290 .finish()
3291 }
3292 }
3293
3294 impl fmt::Debug for EntriesById<'_> {
3295 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3296 f.debug_list().entries(self.0.iter()).finish()
3297 }
3298 }
3299
3300 f.debug_struct("Snapshot")
3301 .field("id", &self.id)
3302 .field("root_name", &self.root_name)
3303 .field("entries_by_path", &EntriesByPath(&self.entries_by_path))
3304 .field("entries_by_id", &EntriesById(&self.entries_by_id))
3305 .finish()
3306 }
3307}
3308
3309#[derive(Debug, Clone, PartialEq)]
3310pub struct File {
3311 pub worktree: Entity<Worktree>,
3312 pub path: Arc<RelPath>,
3313 pub disk_state: DiskState,
3314 pub entry_id: Option<ProjectEntryId>,
3315 pub is_local: bool,
3316 pub is_private: bool,
3317}
3318
3319impl language::File for File {
3320 fn as_local(&self) -> Option<&dyn language::LocalFile> {
3321 if self.is_local { Some(self) } else { None }
3322 }
3323
3324 fn disk_state(&self) -> DiskState {
3325 self.disk_state
3326 }
3327
3328 fn path(&self) -> &Arc<RelPath> {
3329 &self.path
3330 }
3331
3332 fn full_path(&self, cx: &App) -> PathBuf {
3333 self.worktree.read(cx).full_path(&self.path)
3334 }
3335
3336 /// Returns the last component of this handle's absolute path. If this handle refers to the root
3337 /// of its worktree, then this method will return the name of the worktree itself.
3338 fn file_name<'a>(&'a self, cx: &'a App) -> &'a str {
3339 self.path
3340 .file_name()
3341 .unwrap_or_else(|| self.worktree.read(cx).root_name_str())
3342 }
3343
3344 fn worktree_id(&self, cx: &App) -> WorktreeId {
3345 self.worktree.read(cx).id()
3346 }
3347
3348 fn to_proto(&self, cx: &App) -> rpc::proto::File {
3349 rpc::proto::File {
3350 worktree_id: self.worktree.read(cx).id().to_proto(),
3351 entry_id: self.entry_id.map(|id| id.to_proto()),
3352 path: self.path.as_ref().to_proto(),
3353 mtime: self.disk_state.mtime().map(|time| time.into()),
3354 is_deleted: self.disk_state.is_deleted(),
3355 is_historic: matches!(self.disk_state, DiskState::Historic { .. }),
3356 }
3357 }
3358
3359 fn is_private(&self) -> bool {
3360 self.is_private
3361 }
3362
3363 fn path_style(&self, cx: &App) -> PathStyle {
3364 self.worktree.read(cx).path_style()
3365 }
3366
3367 fn can_open(&self) -> bool {
3368 true
3369 }
3370}
3371
3372impl language::LocalFile for File {
3373 fn abs_path(&self, cx: &App) -> PathBuf {
3374 self.worktree.read(cx).absolutize(&self.path)
3375 }
3376
3377 fn load(&self, cx: &App) -> Task<Result<String>> {
3378 let worktree = self.worktree.read(cx).as_local().unwrap();
3379 let abs_path = worktree.absolutize(&self.path);
3380 let fs = worktree.fs.clone();
3381 cx.background_spawn(async move { fs.load(&abs_path).await })
3382 }
3383
3384 fn load_bytes(&self, cx: &App) -> Task<Result<Vec<u8>>> {
3385 let worktree = self.worktree.read(cx).as_local().unwrap();
3386 let abs_path = worktree.absolutize(&self.path);
3387 let fs = worktree.fs.clone();
3388 cx.background_spawn(async move { fs.load_bytes(&abs_path).await })
3389 }
3390}
3391
3392impl File {
3393 pub fn for_entry(entry: Entry, worktree: Entity<Worktree>) -> Arc<Self> {
3394 Arc::new(Self {
3395 worktree,
3396 path: entry.path.clone(),
3397 disk_state: if let Some(mtime) = entry.mtime {
3398 DiskState::Present {
3399 mtime,
3400 size: entry.size,
3401 }
3402 } else {
3403 DiskState::New
3404 },
3405 entry_id: Some(entry.id),
3406 is_local: true,
3407 is_private: entry.is_private,
3408 })
3409 }
3410
3411 pub fn from_proto(
3412 proto: rpc::proto::File,
3413 worktree: Entity<Worktree>,
3414 cx: &App,
3415 ) -> Result<Self> {
3416 let worktree_id = worktree.read(cx).as_remote().context("not remote")?.id();
3417
3418 anyhow::ensure!(
3419 worktree_id.to_proto() == proto.worktree_id,
3420 "worktree id does not match file"
3421 );
3422
3423 let disk_state = if proto.is_historic {
3424 DiskState::Historic {
3425 was_deleted: proto.is_deleted,
3426 }
3427 } else if proto.is_deleted {
3428 DiskState::Deleted
3429 } else if let Some(mtime) = proto.mtime.map(&Into::into) {
3430 DiskState::Present { mtime, size: 0 }
3431 } else {
3432 DiskState::New
3433 };
3434
3435 Ok(Self {
3436 worktree,
3437 path: RelPath::from_proto(&proto.path).context("invalid path in file protobuf")?,
3438 disk_state,
3439 entry_id: proto.entry_id.map(ProjectEntryId::from_proto),
3440 is_local: false,
3441 is_private: false,
3442 })
3443 }
3444
3445 pub fn from_dyn(file: Option<&Arc<dyn language::File>>) -> Option<&Self> {
3446 file.and_then(|f| {
3447 let f: &dyn language::File = f.borrow();
3448 let f: &dyn Any = f;
3449 f.downcast_ref()
3450 })
3451 }
3452
3453 pub fn worktree_id(&self, cx: &App) -> WorktreeId {
3454 self.worktree.read(cx).id()
3455 }
3456
3457 pub fn project_entry_id(&self) -> Option<ProjectEntryId> {
3458 match self.disk_state {
3459 DiskState::Deleted => None,
3460 _ => self.entry_id,
3461 }
3462 }
3463}
3464
3465#[derive(Clone, Debug, PartialEq, Eq)]
3466pub struct Entry {
3467 pub id: ProjectEntryId,
3468 pub kind: EntryKind,
3469 pub path: Arc<RelPath>,
3470 pub inode: u64,
3471 pub mtime: Option<MTime>,
3472
3473 pub canonical_path: Option<Arc<Path>>,
3474 /// Whether this entry is ignored by Git.
3475 ///
3476 /// We only scan ignored entries once the directory is expanded and
3477 /// exclude them from searches.
3478 pub is_ignored: bool,
3479
3480 /// Whether this entry is hidden or inside hidden directory.
3481 ///
3482 /// We only scan hidden entries once the directory is expanded.
3483 pub is_hidden: bool,
3484
3485 /// Whether this entry is always included in searches.
3486 ///
3487 /// This is used for entries that are always included in searches, even
3488 /// if they are ignored by git. Overridden by file_scan_exclusions.
3489 pub is_always_included: bool,
3490
3491 /// Whether this entry's canonical path is outside of the worktree.
3492 /// This means the entry is only accessible from the worktree root via a
3493 /// symlink.
3494 ///
3495 /// We only scan entries outside of the worktree once the symlinked
3496 /// directory is expanded.
3497 pub is_external: bool,
3498
3499 /// Whether this entry is considered to be a `.env` file.
3500 pub is_private: bool,
3501 /// The entry's size on disk, in bytes.
3502 pub size: u64,
3503 pub char_bag: CharBag,
3504 pub is_fifo: bool,
3505}
3506
3507#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3508pub enum EntryKind {
3509 UnloadedDir,
3510 PendingDir,
3511 Dir,
3512 File,
3513}
3514
3515#[derive(Clone, Copy, Debug, PartialEq)]
3516pub enum PathChange {
3517 /// A filesystem entry was was created.
3518 Added,
3519 /// A filesystem entry was removed.
3520 Removed,
3521 /// A filesystem entry was updated.
3522 Updated,
3523 /// A filesystem entry was either updated or added. We don't know
3524 /// whether or not it already existed, because the path had not
3525 /// been loaded before the event.
3526 AddedOrUpdated,
3527 /// A filesystem entry was found during the initial scan of the worktree.
3528 Loaded,
3529}
3530
3531#[derive(Clone, Debug, PartialEq, Eq)]
3532pub struct UpdatedGitRepository {
3533 /// ID of the repository's working directory.
3534 ///
3535 /// For a repo that's above the worktree root, this is the ID of the worktree root, and hence not unique.
3536 /// It's included here to aid the GitStore in detecting when a repository's working directory is renamed.
3537 pub work_directory_id: ProjectEntryId,
3538 pub old_work_directory_abs_path: Option<Arc<Path>>,
3539 pub new_work_directory_abs_path: Option<Arc<Path>>,
3540 /// For a normal git repository checkout, the absolute path to the .git directory.
3541 /// For a worktree, the absolute path to the worktree's subdirectory inside the .git directory.
3542 pub dot_git_abs_path: Option<Arc<Path>>,
3543 pub repository_dir_abs_path: Option<Arc<Path>>,
3544 pub common_dir_abs_path: Option<Arc<Path>>,
3545}
3546
3547pub type UpdatedEntriesSet = Arc<[(Arc<RelPath>, ProjectEntryId, PathChange)]>;
3548pub type UpdatedGitRepositoriesSet = Arc<[UpdatedGitRepository]>;
3549
3550#[derive(Clone, Debug)]
3551pub struct PathProgress<'a> {
3552 pub max_path: &'a RelPath,
3553}
3554
3555#[derive(Clone, Debug)]
3556pub struct PathSummary<S> {
3557 pub max_path: Arc<RelPath>,
3558 pub item_summary: S,
3559}
3560
3561impl<S: Summary> Summary for PathSummary<S> {
3562 type Context<'a> = S::Context<'a>;
3563
3564 fn zero(cx: Self::Context<'_>) -> Self {
3565 Self {
3566 max_path: RelPath::empty().into(),
3567 item_summary: S::zero(cx),
3568 }
3569 }
3570
3571 fn add_summary(&mut self, rhs: &Self, cx: Self::Context<'_>) {
3572 self.max_path = rhs.max_path.clone();
3573 self.item_summary.add_summary(&rhs.item_summary, cx);
3574 }
3575}
3576
3577impl<'a, S: Summary> sum_tree::Dimension<'a, PathSummary<S>> for PathProgress<'a> {
3578 fn zero(_: <PathSummary<S> as Summary>::Context<'_>) -> Self {
3579 Self {
3580 max_path: RelPath::empty(),
3581 }
3582 }
3583
3584 fn add_summary(
3585 &mut self,
3586 summary: &'a PathSummary<S>,
3587 _: <PathSummary<S> as Summary>::Context<'_>,
3588 ) {
3589 self.max_path = summary.max_path.as_ref()
3590 }
3591}
3592
3593impl<'a> sum_tree::Dimension<'a, PathSummary<GitSummary>> for GitSummary {
3594 fn zero(_cx: ()) -> Self {
3595 Default::default()
3596 }
3597
3598 fn add_summary(&mut self, summary: &'a PathSummary<GitSummary>, _: ()) {
3599 *self += summary.item_summary
3600 }
3601}
3602
3603impl<'a>
3604 sum_tree::SeekTarget<'a, PathSummary<GitSummary>, Dimensions<TraversalProgress<'a>, GitSummary>>
3605 for PathTarget<'_>
3606{
3607 fn cmp(
3608 &self,
3609 cursor_location: &Dimensions<TraversalProgress<'a>, GitSummary>,
3610 _: (),
3611 ) -> Ordering {
3612 self.cmp_path(cursor_location.0.max_path)
3613 }
3614}
3615
3616impl<'a, S: Summary> sum_tree::Dimension<'a, PathSummary<S>> for PathKey {
3617 fn zero(_: S::Context<'_>) -> Self {
3618 Default::default()
3619 }
3620
3621 fn add_summary(&mut self, summary: &'a PathSummary<S>, _: S::Context<'_>) {
3622 self.0 = summary.max_path.clone();
3623 }
3624}
3625
3626impl<'a, S: Summary> sum_tree::Dimension<'a, PathSummary<S>> for TraversalProgress<'a> {
3627 fn zero(_cx: S::Context<'_>) -> Self {
3628 Default::default()
3629 }
3630
3631 fn add_summary(&mut self, summary: &'a PathSummary<S>, _: S::Context<'_>) {
3632 self.max_path = summary.max_path.as_ref();
3633 }
3634}
3635
3636impl Entry {
3637 fn new(
3638 path: Arc<RelPath>,
3639 metadata: &fs::Metadata,
3640 id: ProjectEntryId,
3641 root_char_bag: CharBag,
3642 canonical_path: Option<Arc<Path>>,
3643 ) -> Self {
3644 let char_bag = char_bag_for_path(root_char_bag, &path);
3645 Self {
3646 id,
3647 kind: if metadata.is_dir {
3648 EntryKind::PendingDir
3649 } else {
3650 EntryKind::File
3651 },
3652 path,
3653 inode: metadata.inode,
3654 mtime: Some(metadata.mtime),
3655 size: metadata.len,
3656 canonical_path,
3657 is_ignored: false,
3658 is_hidden: false,
3659 is_always_included: false,
3660 is_external: false,
3661 is_private: false,
3662 char_bag,
3663 is_fifo: metadata.is_fifo,
3664 }
3665 }
3666
3667 pub fn is_created(&self) -> bool {
3668 self.mtime.is_some()
3669 }
3670
3671 pub fn is_dir(&self) -> bool {
3672 self.kind.is_dir()
3673 }
3674
3675 pub fn is_file(&self) -> bool {
3676 self.kind.is_file()
3677 }
3678}
3679
3680impl EntryKind {
3681 pub fn is_dir(&self) -> bool {
3682 matches!(
3683 self,
3684 EntryKind::Dir | EntryKind::PendingDir | EntryKind::UnloadedDir
3685 )
3686 }
3687
3688 pub fn is_unloaded(&self) -> bool {
3689 matches!(self, EntryKind::UnloadedDir)
3690 }
3691
3692 pub fn is_file(&self) -> bool {
3693 matches!(self, EntryKind::File)
3694 }
3695}
3696
3697impl sum_tree::Item for Entry {
3698 type Summary = EntrySummary;
3699
3700 fn summary(&self, _cx: ()) -> Self::Summary {
3701 let non_ignored_count = if self.is_ignored && !self.is_always_included {
3702 0
3703 } else {
3704 1
3705 };
3706 let file_count;
3707 let non_ignored_file_count;
3708 if self.is_file() {
3709 file_count = 1;
3710 non_ignored_file_count = non_ignored_count;
3711 } else {
3712 file_count = 0;
3713 non_ignored_file_count = 0;
3714 }
3715
3716 EntrySummary {
3717 max_path: self.path.clone(),
3718 count: 1,
3719 non_ignored_count,
3720 file_count,
3721 non_ignored_file_count,
3722 }
3723 }
3724}
3725
3726impl sum_tree::KeyedItem for Entry {
3727 type Key = PathKey;
3728
3729 fn key(&self) -> Self::Key {
3730 PathKey(self.path.clone())
3731 }
3732}
3733
3734#[derive(Clone, Debug)]
3735pub struct EntrySummary {
3736 max_path: Arc<RelPath>,
3737 count: usize,
3738 non_ignored_count: usize,
3739 file_count: usize,
3740 non_ignored_file_count: usize,
3741}
3742
3743impl Default for EntrySummary {
3744 fn default() -> Self {
3745 Self {
3746 max_path: Arc::from(RelPath::empty()),
3747 count: 0,
3748 non_ignored_count: 0,
3749 file_count: 0,
3750 non_ignored_file_count: 0,
3751 }
3752 }
3753}
3754
3755impl sum_tree::ContextLessSummary for EntrySummary {
3756 fn zero() -> Self {
3757 Default::default()
3758 }
3759
3760 fn add_summary(&mut self, rhs: &Self) {
3761 self.max_path = rhs.max_path.clone();
3762 self.count += rhs.count;
3763 self.non_ignored_count += rhs.non_ignored_count;
3764 self.file_count += rhs.file_count;
3765 self.non_ignored_file_count += rhs.non_ignored_file_count;
3766 }
3767}
3768
3769#[derive(Clone, Debug)]
3770struct PathEntry {
3771 id: ProjectEntryId,
3772 path: Arc<RelPath>,
3773 is_ignored: bool,
3774 scan_id: usize,
3775}
3776
3777impl sum_tree::Item for PathEntry {
3778 type Summary = PathEntrySummary;
3779
3780 fn summary(&self, _cx: ()) -> Self::Summary {
3781 PathEntrySummary { max_id: self.id }
3782 }
3783}
3784
3785impl sum_tree::KeyedItem for PathEntry {
3786 type Key = ProjectEntryId;
3787
3788 fn key(&self) -> Self::Key {
3789 self.id
3790 }
3791}
3792
3793#[derive(Clone, Debug, Default)]
3794struct PathEntrySummary {
3795 max_id: ProjectEntryId,
3796}
3797
3798impl sum_tree::ContextLessSummary for PathEntrySummary {
3799 fn zero() -> Self {
3800 Default::default()
3801 }
3802
3803 fn add_summary(&mut self, summary: &Self) {
3804 self.max_id = summary.max_id;
3805 }
3806}
3807
3808impl<'a> sum_tree::Dimension<'a, PathEntrySummary> for ProjectEntryId {
3809 fn zero(_cx: ()) -> Self {
3810 Default::default()
3811 }
3812
3813 fn add_summary(&mut self, summary: &'a PathEntrySummary, _: ()) {
3814 *self = summary.max_id;
3815 }
3816}
3817
3818#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
3819pub struct PathKey(pub Arc<RelPath>);
3820
3821impl Default for PathKey {
3822 fn default() -> Self {
3823 Self(RelPath::empty().into())
3824 }
3825}
3826
3827impl<'a> sum_tree::Dimension<'a, EntrySummary> for PathKey {
3828 fn zero(_cx: ()) -> Self {
3829 Default::default()
3830 }
3831
3832 fn add_summary(&mut self, summary: &'a EntrySummary, _: ()) {
3833 self.0 = summary.max_path.clone();
3834 }
3835}
3836
3837struct BackgroundScanner {
3838 state: async_lock::Mutex<BackgroundScannerState>,
3839 fs: Arc<dyn Fs>,
3840 fs_case_sensitive: bool,
3841 status_updates_tx: UnboundedSender<ScanState>,
3842 executor: BackgroundExecutor,
3843 scan_requests_rx: channel::Receiver<ScanRequest>,
3844 path_prefixes_to_scan_rx: channel::Receiver<PathPrefixScanRequest>,
3845 next_entry_id: Arc<AtomicUsize>,
3846 phase: BackgroundScannerPhase,
3847 watcher: Arc<dyn Watcher>,
3848 settings: WorktreeSettings,
3849 share_private_files: bool,
3850 /// Whether this is a single-file worktree (root is a file, not a directory).
3851 /// Used to determine if we should give up after repeated canonicalization failures.
3852 is_single_file: bool,
3853}
3854
3855#[derive(Copy, Clone, PartialEq)]
3856enum BackgroundScannerPhase {
3857 InitialScan,
3858 EventsReceivedDuringInitialScan,
3859 Events,
3860}
3861
3862impl BackgroundScanner {
3863 async fn run(&mut self, mut fs_events_rx: Pin<Box<dyn Send + Stream<Item = Vec<PathEvent>>>>) {
3864 let root_abs_path;
3865 let scanning_enabled;
3866 {
3867 let state = self.state.lock().await;
3868 root_abs_path = state.snapshot.abs_path.clone();
3869 scanning_enabled = state.scanning_enabled;
3870 }
3871
3872 // If the worktree root does not contain a git repository, then find
3873 // the git repository in an ancestor directory. Find any gitignore files
3874 // in ancestor directories.
3875 let repo = if scanning_enabled {
3876 let (ignores, exclude, repo) =
3877 discover_ancestor_git_repo(self.fs.clone(), &root_abs_path).await;
3878 self.state
3879 .lock()
3880 .await
3881 .snapshot
3882 .ignores_by_parent_abs_path
3883 .extend(ignores);
3884 if let Some(exclude) = exclude {
3885 self.state
3886 .lock()
3887 .await
3888 .snapshot
3889 .repo_exclude_by_work_dir_abs_path
3890 .insert(root_abs_path.as_path().into(), (exclude, false));
3891 }
3892
3893 repo
3894 } else {
3895 None
3896 };
3897
3898 let containing_git_repository = if let Some((ancestor_dot_git, work_directory)) = repo
3899 && scanning_enabled
3900 {
3901 maybe!(async {
3902 self.state
3903 .lock()
3904 .await
3905 .insert_git_repository_for_path(
3906 work_directory,
3907 ancestor_dot_git.clone().into(),
3908 self.fs.as_ref(),
3909 self.watcher.as_ref(),
3910 )
3911 .await
3912 .log_err()?;
3913 Some(ancestor_dot_git)
3914 })
3915 .await
3916 } else {
3917 None
3918 };
3919
3920 log::trace!("containing git repository: {containing_git_repository:?}");
3921
3922 let global_gitignore_file = paths::global_gitignore_path();
3923 let mut global_gitignore_events = if let Some(global_gitignore_path) =
3924 &global_gitignore_file
3925 && scanning_enabled
3926 {
3927 let is_file = self.fs.is_file(&global_gitignore_path).await;
3928 self.state.lock().await.snapshot.global_gitignore = if is_file {
3929 build_gitignore(global_gitignore_path, self.fs.as_ref())
3930 .await
3931 .ok()
3932 .map(Arc::new)
3933 } else {
3934 None
3935 };
3936 if is_file {
3937 self.fs
3938 .watch(global_gitignore_path, FS_WATCH_LATENCY)
3939 .await
3940 .0
3941 } else {
3942 Box::pin(futures::stream::pending())
3943 }
3944 } else {
3945 self.state.lock().await.snapshot.global_gitignore = None;
3946 Box::pin(futures::stream::pending())
3947 };
3948
3949 let (scan_job_tx, scan_job_rx) = channel::unbounded();
3950 {
3951 let mut state = self.state.lock().await;
3952 state.snapshot.scan_id += 1;
3953 if let Some(mut root_entry) = state.snapshot.root_entry().cloned() {
3954 let ignore_stack = state
3955 .snapshot
3956 .ignore_stack_for_abs_path(root_abs_path.as_path(), true, self.fs.as_ref())
3957 .await;
3958 if ignore_stack.is_abs_path_ignored(root_abs_path.as_path(), true) {
3959 root_entry.is_ignored = true;
3960 let mut root_entry = root_entry.clone();
3961 state.reuse_entry_id(&mut root_entry);
3962 state
3963 .insert_entry(root_entry, self.fs.as_ref(), self.watcher.as_ref())
3964 .await;
3965 }
3966 if root_entry.is_dir() && state.scanning_enabled {
3967 state
3968 .enqueue_scan_dir(
3969 root_abs_path.as_path().into(),
3970 &root_entry,
3971 &scan_job_tx,
3972 self.fs.as_ref(),
3973 )
3974 .await;
3975 }
3976 }
3977 };
3978
3979 // Perform an initial scan of the directory.
3980 drop(scan_job_tx);
3981 self.scan_dirs(true, scan_job_rx).await;
3982 {
3983 let mut state = self.state.lock().await;
3984 state.snapshot.completed_scan_id = state.snapshot.scan_id;
3985 }
3986
3987 self.send_status_update(false, SmallVec::new(), &[]).await;
3988
3989 // Process any any FS events that occurred while performing the initial scan.
3990 // For these events, update events cannot be as precise, because we didn't
3991 // have the previous state loaded yet.
3992 self.phase = BackgroundScannerPhase::EventsReceivedDuringInitialScan;
3993 if let Poll::Ready(Some(mut paths)) = futures::poll!(fs_events_rx.next()) {
3994 while let Poll::Ready(Some(more_paths)) = futures::poll!(fs_events_rx.next()) {
3995 paths.extend(more_paths);
3996 }
3997 self.process_events(
3998 paths
3999 .into_iter()
4000 .filter(|event| event.kind.is_some())
4001 .collect(),
4002 )
4003 .await;
4004 }
4005 if let Some(abs_path) = containing_git_repository {
4006 self.process_events(vec![PathEvent {
4007 path: abs_path,
4008 kind: Some(fs::PathEventKind::Changed),
4009 }])
4010 .await;
4011 }
4012
4013 // Continue processing events until the worktree is dropped.
4014 self.phase = BackgroundScannerPhase::Events;
4015
4016 loop {
4017 select_biased! {
4018 // Process any path refresh requests from the worktree. Prioritize
4019 // these before handling changes reported by the filesystem.
4020 request = self.next_scan_request().fuse() => {
4021 let Ok(request) = request else { break };
4022 if !self.process_scan_request(request, false).await {
4023 return;
4024 }
4025 }
4026
4027 path_prefix_request = self.path_prefixes_to_scan_rx.recv().fuse() => {
4028 let Ok(request) = path_prefix_request else { break };
4029 log::trace!("adding path prefix {:?}", request.path);
4030
4031 let did_scan = self.forcibly_load_paths(std::slice::from_ref(&request.path)).await;
4032 if did_scan {
4033 let abs_path =
4034 {
4035 let mut state = self.state.lock().await;
4036 state.path_prefixes_to_scan.insert(request.path.clone());
4037 state.snapshot.absolutize(&request.path)
4038 };
4039
4040 if let Some(abs_path) = self.fs.canonicalize(&abs_path).await.log_err() {
4041 self.process_events(vec![PathEvent {
4042 path: abs_path,
4043 kind: Some(fs::PathEventKind::Changed),
4044 }])
4045 .await;
4046 }
4047 }
4048 self.send_status_update(false, request.done, &[]).await;
4049 }
4050
4051 paths = fs_events_rx.next().fuse() => {
4052 let Some(mut paths) = paths else { break };
4053 while let Poll::Ready(Some(more_paths)) = futures::poll!(fs_events_rx.next()) {
4054 paths.extend(more_paths);
4055 }
4056 self.process_events(paths.into_iter().filter(|event| event.kind.is_some()).collect()).await;
4057 }
4058
4059 _ = global_gitignore_events.next().fuse() => {
4060 if let Some(path) = &global_gitignore_file {
4061 self.update_global_gitignore(&path).await;
4062 }
4063 }
4064 }
4065 }
4066 }
4067
4068 async fn process_scan_request(&self, mut request: ScanRequest, scanning: bool) -> bool {
4069 log::debug!("rescanning paths {:?}", request.relative_paths);
4070
4071 request.relative_paths.sort_unstable();
4072 self.forcibly_load_paths(&request.relative_paths).await;
4073
4074 let root_path = self.state.lock().await.snapshot.abs_path.clone();
4075 let root_canonical_path = self.fs.canonicalize(root_path.as_path()).await;
4076 let root_canonical_path = match &root_canonical_path {
4077 Ok(path) => SanitizedPath::new(path),
4078 Err(err) => {
4079 log::error!("failed to canonicalize root path {root_path:?}: {err:#}");
4080 return true;
4081 }
4082 };
4083 let abs_paths = request
4084 .relative_paths
4085 .iter()
4086 .map(|path| {
4087 if path.file_name().is_some() {
4088 root_canonical_path.as_path().join(path.as_std_path())
4089 } else {
4090 root_canonical_path.as_path().to_path_buf()
4091 }
4092 })
4093 .collect::<Vec<_>>();
4094
4095 {
4096 let mut state = self.state.lock().await;
4097 let is_idle = state.snapshot.completed_scan_id == state.snapshot.scan_id;
4098 state.snapshot.scan_id += 1;
4099 if is_idle {
4100 state.snapshot.completed_scan_id = state.snapshot.scan_id;
4101 }
4102 }
4103
4104 self.reload_entries_for_paths(
4105 &root_path,
4106 &root_canonical_path,
4107 &request.relative_paths,
4108 abs_paths,
4109 None,
4110 )
4111 .await;
4112
4113 self.send_status_update(scanning, request.done, &[]).await
4114 }
4115
4116 async fn process_events(&self, mut events: Vec<PathEvent>) {
4117 let root_path = self.state.lock().await.snapshot.abs_path.clone();
4118 let root_canonical_path = self.fs.canonicalize(root_path.as_path()).await;
4119 let root_canonical_path = match &root_canonical_path {
4120 Ok(path) => SanitizedPath::new(path),
4121 Err(err) => {
4122 let new_path = self
4123 .state
4124 .lock()
4125 .await
4126 .snapshot
4127 .root_file_handle
4128 .clone()
4129 .and_then(|handle| match handle.current_path(&self.fs) {
4130 Ok(new_path) => Some(new_path),
4131 Err(e) => {
4132 log::error!("Failed to refresh worktree root path: {e:#}");
4133 None
4134 }
4135 })
4136 .map(|path| SanitizedPath::new_arc(&path))
4137 .filter(|new_path| *new_path != root_path);
4138
4139 if let Some(new_path) = new_path {
4140 log::info!(
4141 "root renamed from {:?} to {:?}",
4142 root_path.as_path(),
4143 new_path.as_path(),
4144 );
4145 self.status_updates_tx
4146 .unbounded_send(ScanState::RootUpdated { new_path })
4147 .ok();
4148 } else {
4149 log::error!("root path could not be canonicalized: {err:#}");
4150
4151 // For single-file worktrees, if we can't canonicalize and the file handle
4152 // fallback also failed, the file is gone - close the worktree
4153 if self.is_single_file {
4154 log::info!(
4155 "single-file worktree root {:?} no longer exists, marking as deleted",
4156 root_path.as_path()
4157 );
4158 self.status_updates_tx
4159 .unbounded_send(ScanState::RootDeleted)
4160 .ok();
4161 }
4162 }
4163 return;
4164 }
4165 };
4166
4167 // Certain directories may have FS changes, but do not lead to git data changes that Zed cares about.
4168 // Ignore these, to avoid Zed unnecessarily rescanning git metadata.
4169 let skipped_files_in_dot_git = [COMMIT_MESSAGE, INDEX_LOCK];
4170 let skipped_dirs_in_dot_git = [FSMONITOR_DAEMON, LFS_DIR];
4171
4172 let mut relative_paths = Vec::with_capacity(events.len());
4173 let mut dot_git_abs_paths = Vec::new();
4174 let mut work_dirs_needing_exclude_update = Vec::new();
4175 events.sort_unstable_by(|left, right| left.path.cmp(&right.path));
4176 events.dedup_by(|left, right| {
4177 if left.path == right.path {
4178 if matches!(left.kind, Some(fs::PathEventKind::Rescan)) {
4179 right.kind = left.kind;
4180 }
4181 true
4182 } else if left.path.starts_with(&right.path) {
4183 if matches!(left.kind, Some(fs::PathEventKind::Rescan)) {
4184 right.kind = left.kind;
4185 }
4186 true
4187 } else {
4188 false
4189 }
4190 });
4191 {
4192 let snapshot = &self.state.lock().await.snapshot;
4193
4194 let mut ranges_to_drop = SmallVec::<[Range<usize>; 4]>::new();
4195
4196 fn skip_ix(ranges: &mut SmallVec<[Range<usize>; 4]>, ix: usize) {
4197 if let Some(last_range) = ranges.last_mut()
4198 && last_range.end == ix
4199 {
4200 last_range.end += 1;
4201 } else {
4202 ranges.push(ix..ix + 1);
4203 }
4204 }
4205
4206 for (ix, event) in events.iter().enumerate() {
4207 let abs_path = SanitizedPath::new(&event.path);
4208
4209 let mut is_git_related = false;
4210 let mut dot_git_paths = None;
4211
4212 for ancestor in abs_path.as_path().ancestors() {
4213 if is_git_dir(ancestor, self.fs.as_ref()).await {
4214 let path_in_git_dir = abs_path
4215 .as_path()
4216 .strip_prefix(ancestor)
4217 .expect("stripping off the ancestor");
4218 dot_git_paths = Some((ancestor.to_owned(), path_in_git_dir.to_owned()));
4219 break;
4220 }
4221 }
4222
4223 if let Some((dot_git_abs_path, path_in_git_dir)) = dot_git_paths {
4224 // We ignore `""` as well, as that is going to be the
4225 // `.git` folder itself. WE do not care about it, if
4226 // there are changes within we will see them, we need
4227 // this ignore to prevent us from accidentally observing
4228 // the ignored created file due to the events not being
4229 // empty after filtering.
4230
4231 let is_dot_git_changed = {
4232 path_in_git_dir == Path::new("")
4233 && event.kind == Some(PathEventKind::Changed)
4234 && abs_path
4235 .strip_prefix(root_canonical_path)
4236 .ok()
4237 .and_then(|it| RelPath::new(it, PathStyle::local()).ok())
4238 .is_some_and(|it| {
4239 snapshot
4240 .entry_for_path(&it)
4241 .is_some_and(|entry| entry.kind == EntryKind::Dir)
4242 })
4243 };
4244 let condition = skipped_files_in_dot_git.iter().any(|skipped| {
4245 OsStr::new(skipped) == path_in_git_dir.as_path().as_os_str()
4246 }) || skipped_dirs_in_dot_git
4247 .iter()
4248 .any(|skipped_git_subdir| path_in_git_dir.starts_with(skipped_git_subdir))
4249 || is_dot_git_changed;
4250 if condition {
4251 log::debug!(
4252 "ignoring event {abs_path:?} as it's in the .git directory among skipped files or directories"
4253 );
4254 skip_ix(&mut ranges_to_drop, ix);
4255 continue;
4256 }
4257
4258 is_git_related = true;
4259 if !dot_git_abs_paths.contains(&dot_git_abs_path) {
4260 dot_git_abs_paths.push(dot_git_abs_path);
4261 }
4262 }
4263
4264 let relative_path = if let Ok(path) = abs_path.strip_prefix(&root_canonical_path)
4265 && let Ok(path) = RelPath::new(path, PathStyle::local())
4266 {
4267 path
4268 } else {
4269 if is_git_related {
4270 log::debug!(
4271 "ignoring event {abs_path:?}, since it's in git dir outside of root path {root_canonical_path:?}",
4272 );
4273 } else {
4274 log::error!(
4275 "ignoring event {abs_path:?} outside of root path {root_canonical_path:?}",
4276 );
4277 }
4278 skip_ix(&mut ranges_to_drop, ix);
4279 continue;
4280 };
4281
4282 let absolute_path = abs_path.to_path_buf();
4283 if absolute_path.ends_with(Path::new(DOT_GIT).join(REPO_EXCLUDE)) {
4284 if let Some(repository) = snapshot
4285 .git_repositories
4286 .values()
4287 .find(|repo| repo.common_dir_abs_path.join(REPO_EXCLUDE) == absolute_path)
4288 {
4289 work_dirs_needing_exclude_update
4290 .push(repository.work_directory_abs_path.clone());
4291 }
4292 }
4293
4294 if abs_path.file_name() == Some(OsStr::new(GITIGNORE)) {
4295 for (_, repo) in snapshot
4296 .git_repositories
4297 .iter()
4298 .filter(|(_, repo)| repo.directory_contains(&relative_path))
4299 {
4300 if !dot_git_abs_paths.iter().any(|dot_git_abs_path| {
4301 dot_git_abs_path == repo.common_dir_abs_path.as_ref()
4302 }) {
4303 dot_git_abs_paths.push(repo.common_dir_abs_path.to_path_buf());
4304 }
4305 }
4306 }
4307
4308 let parent_dir_is_loaded = relative_path.parent().is_none_or(|parent| {
4309 snapshot
4310 .entry_for_path(parent)
4311 .is_some_and(|entry| entry.kind == EntryKind::Dir)
4312 });
4313 if !parent_dir_is_loaded {
4314 log::debug!("ignoring event {relative_path:?} within unloaded directory");
4315 skip_ix(&mut ranges_to_drop, ix);
4316 continue;
4317 }
4318
4319 if self.settings.is_path_excluded(&relative_path) {
4320 if !is_git_related {
4321 log::debug!("ignoring FS event for excluded path {relative_path:?}");
4322 }
4323 skip_ix(&mut ranges_to_drop, ix);
4324 continue;
4325 }
4326
4327 relative_paths.push(EventRoot {
4328 path: relative_path.into_arc(),
4329 was_rescanned: matches!(event.kind, Some(fs::PathEventKind::Rescan)),
4330 });
4331 }
4332
4333 for range_to_drop in ranges_to_drop.into_iter().rev() {
4334 events.drain(range_to_drop);
4335 }
4336 }
4337
4338 if relative_paths.is_empty() && dot_git_abs_paths.is_empty() {
4339 return;
4340 }
4341
4342 if !work_dirs_needing_exclude_update.is_empty() {
4343 let mut state = self.state.lock().await;
4344 for work_dir_abs_path in work_dirs_needing_exclude_update {
4345 if let Some((_, needs_update)) = state
4346 .snapshot
4347 .repo_exclude_by_work_dir_abs_path
4348 .get_mut(&work_dir_abs_path)
4349 {
4350 *needs_update = true;
4351 }
4352 }
4353 }
4354
4355 self.state.lock().await.snapshot.scan_id += 1;
4356
4357 let (scan_job_tx, scan_job_rx) = channel::unbounded();
4358 log::debug!(
4359 "received fs events {:?}",
4360 relative_paths
4361 .iter()
4362 .map(|event_root| &event_root.path)
4363 .collect::<Vec<_>>()
4364 );
4365 self.reload_entries_for_paths(
4366 &root_path,
4367 &root_canonical_path,
4368 &relative_paths
4369 .iter()
4370 .map(|event_root| event_root.path.clone())
4371 .collect::<Vec<_>>(),
4372 events
4373 .into_iter()
4374 .map(|event| event.path)
4375 .collect::<Vec<_>>(),
4376 Some(scan_job_tx.clone()),
4377 )
4378 .await;
4379
4380 let affected_repo_roots = if !dot_git_abs_paths.is_empty() {
4381 self.update_git_repositories(dot_git_abs_paths).await
4382 } else {
4383 Vec::new()
4384 };
4385
4386 {
4387 let mut ignores_to_update = self.ignores_needing_update().await;
4388 ignores_to_update.extend(affected_repo_roots);
4389 let ignores_to_update = self.order_ignores(ignores_to_update).await;
4390 let snapshot = self.state.lock().await.snapshot.clone();
4391 self.update_ignore_statuses_for_paths(scan_job_tx, snapshot, ignores_to_update)
4392 .await;
4393 self.scan_dirs(false, scan_job_rx).await;
4394 }
4395
4396 {
4397 let mut state = self.state.lock().await;
4398 state.snapshot.completed_scan_id = state.snapshot.scan_id;
4399 for (_, entry) in mem::take(&mut state.removed_entries) {
4400 state.scanned_dirs.remove(&entry.id);
4401 }
4402 }
4403 self.send_status_update(false, SmallVec::new(), &relative_paths)
4404 .await;
4405 }
4406
4407 async fn update_global_gitignore(&self, abs_path: &Path) {
4408 let ignore = build_gitignore(abs_path, self.fs.as_ref())
4409 .await
4410 .log_err()
4411 .map(Arc::new);
4412 let (prev_snapshot, ignore_stack, abs_path) = {
4413 let mut state = self.state.lock().await;
4414 state.snapshot.global_gitignore = ignore;
4415 let abs_path = state.snapshot.abs_path().clone();
4416 let ignore_stack = state
4417 .snapshot
4418 .ignore_stack_for_abs_path(&abs_path, true, self.fs.as_ref())
4419 .await;
4420 (state.snapshot.clone(), ignore_stack, abs_path)
4421 };
4422 let (scan_job_tx, scan_job_rx) = channel::unbounded();
4423 self.update_ignore_statuses_for_paths(
4424 scan_job_tx,
4425 prev_snapshot,
4426 vec![(abs_path, ignore_stack)],
4427 )
4428 .await;
4429 self.scan_dirs(false, scan_job_rx).await;
4430 self.send_status_update(false, SmallVec::new(), &[]).await;
4431 }
4432
4433 async fn forcibly_load_paths(&self, paths: &[Arc<RelPath>]) -> bool {
4434 let (scan_job_tx, scan_job_rx) = channel::unbounded();
4435 {
4436 let mut state = self.state.lock().await;
4437 let root_path = state.snapshot.abs_path.clone();
4438 for path in paths {
4439 for ancestor in path.ancestors() {
4440 if let Some(entry) = state.snapshot.entry_for_path(ancestor)
4441 && entry.kind == EntryKind::UnloadedDir
4442 {
4443 let abs_path = root_path.join(ancestor.as_std_path());
4444 state
4445 .enqueue_scan_dir(
4446 abs_path.into(),
4447 entry,
4448 &scan_job_tx,
4449 self.fs.as_ref(),
4450 )
4451 .await;
4452 state.paths_to_scan.insert(path.clone());
4453 break;
4454 }
4455 }
4456 }
4457 drop(scan_job_tx);
4458 }
4459 while let Ok(job) = scan_job_rx.recv().await {
4460 self.scan_dir(&job).await.log_err();
4461 }
4462
4463 !mem::take(&mut self.state.lock().await.paths_to_scan).is_empty()
4464 }
4465
4466 async fn scan_dirs(
4467 &self,
4468 enable_progress_updates: bool,
4469 scan_jobs_rx: channel::Receiver<ScanJob>,
4470 ) {
4471 if self
4472 .status_updates_tx
4473 .unbounded_send(ScanState::Started)
4474 .is_err()
4475 {
4476 return;
4477 }
4478
4479 let progress_update_count = AtomicUsize::new(0);
4480 self.executor
4481 .scoped_priority(Priority::Low, |scope| {
4482 for _ in 0..self.executor.num_cpus() {
4483 scope.spawn(async {
4484 let mut last_progress_update_count = 0;
4485 let progress_update_timer = self.progress_timer(enable_progress_updates).fuse();
4486 futures::pin_mut!(progress_update_timer);
4487
4488 loop {
4489 select_biased! {
4490 // Process any path refresh requests before moving on to process
4491 // the scan queue, so that user operations are prioritized.
4492 request = self.next_scan_request().fuse() => {
4493 let Ok(request) = request else { break };
4494 if !self.process_scan_request(request, true).await {
4495 return;
4496 }
4497 }
4498
4499 // Send periodic progress updates to the worktree. Use an atomic counter
4500 // to ensure that only one of the workers sends a progress update after
4501 // the update interval elapses.
4502 _ = progress_update_timer => {
4503 match progress_update_count.compare_exchange(
4504 last_progress_update_count,
4505 last_progress_update_count + 1,
4506 SeqCst,
4507 SeqCst
4508 ) {
4509 Ok(_) => {
4510 last_progress_update_count += 1;
4511 self.send_status_update(true, SmallVec::new(), &[])
4512 .await;
4513 }
4514 Err(count) => {
4515 last_progress_update_count = count;
4516 }
4517 }
4518 progress_update_timer.set(self.progress_timer(enable_progress_updates).fuse());
4519 }
4520
4521 // Recursively load directories from the file system.
4522 job = scan_jobs_rx.recv().fuse() => {
4523 let Ok(job) = job else { break };
4524 if let Err(err) = self.scan_dir(&job).await
4525 && job.path.is_empty() {
4526 log::error!("error scanning directory {:?}: {}", job.abs_path, err);
4527 }
4528 }
4529 }
4530 }
4531 });
4532 }
4533 })
4534 .await;
4535 }
4536
4537 async fn send_status_update(
4538 &self,
4539 scanning: bool,
4540 barrier: SmallVec<[barrier::Sender; 1]>,
4541 event_roots: &[EventRoot],
4542 ) -> bool {
4543 let mut state = self.state.lock().await;
4544 if state.changed_paths.is_empty() && event_roots.is_empty() && scanning {
4545 return true;
4546 }
4547
4548 let merged_event_roots = merge_event_roots(&state.changed_paths, event_roots);
4549
4550 let new_snapshot = state.snapshot.clone();
4551 let old_snapshot = mem::replace(&mut state.prev_snapshot, new_snapshot.snapshot.clone());
4552 let changes = build_diff(
4553 self.phase,
4554 &old_snapshot,
4555 &new_snapshot,
4556 &merged_event_roots,
4557 );
4558 state.changed_paths.clear();
4559
4560 self.status_updates_tx
4561 .unbounded_send(ScanState::Updated {
4562 snapshot: new_snapshot,
4563 changes,
4564 scanning,
4565 barrier,
4566 })
4567 .is_ok()
4568 }
4569
4570 async fn scan_dir(&self, job: &ScanJob) -> Result<()> {
4571 let root_abs_path;
4572 let root_char_bag;
4573 {
4574 let snapshot = &self.state.lock().await.snapshot;
4575 if self.settings.is_path_excluded(&job.path) {
4576 log::error!("skipping excluded directory {:?}", job.path);
4577 return Ok(());
4578 }
4579 log::trace!("scanning directory {:?}", job.path);
4580 root_abs_path = snapshot.abs_path().clone();
4581 root_char_bag = snapshot.root_char_bag;
4582 }
4583
4584 let next_entry_id = self.next_entry_id.clone();
4585 let mut ignore_stack = job.ignore_stack.clone();
4586 let mut new_ignore = None;
4587 let mut root_canonical_path = None;
4588 let mut new_entries: Vec<Entry> = Vec::new();
4589 let mut new_jobs: Vec<Option<ScanJob>> = Vec::new();
4590 let mut child_paths = self
4591 .fs
4592 .read_dir(&job.abs_path)
4593 .await?
4594 .filter_map(|entry| async {
4595 match entry {
4596 Ok(entry) => Some(entry),
4597 Err(error) => {
4598 log::error!("error processing entry {:?}", error);
4599 None
4600 }
4601 }
4602 })
4603 .collect::<Vec<_>>()
4604 .await;
4605
4606 // Ensure that .git and .gitignore are processed first.
4607 swap_to_front(&mut child_paths, GITIGNORE);
4608 swap_to_front(&mut child_paths, DOT_GIT);
4609
4610 if let Some(path) = child_paths.first()
4611 && path.ends_with(DOT_GIT)
4612 {
4613 ignore_stack.repo_root = Some(job.abs_path.clone());
4614 }
4615
4616 for child_abs_path in child_paths {
4617 let child_abs_path: Arc<Path> = child_abs_path.into();
4618 let child_name = child_abs_path.file_name().unwrap();
4619 let Some(child_path) = child_name
4620 .to_str()
4621 .and_then(|name| Some(job.path.join(RelPath::unix(name).ok()?)))
4622 else {
4623 continue;
4624 };
4625
4626 if child_name == DOT_GIT {
4627 let mut state = self.state.lock().await;
4628 state
4629 .insert_git_repository(
4630 child_path.clone(),
4631 self.fs.as_ref(),
4632 self.watcher.as_ref(),
4633 )
4634 .await;
4635 } else if child_name == GITIGNORE {
4636 match build_gitignore(&child_abs_path, self.fs.as_ref()).await {
4637 Ok(ignore) => {
4638 let ignore = Arc::new(ignore);
4639 ignore_stack = ignore_stack
4640 .append(IgnoreKind::Gitignore(job.abs_path.clone()), ignore.clone());
4641 new_ignore = Some(ignore);
4642 }
4643 Err(error) => {
4644 log::error!(
4645 "error loading .gitignore file {:?} - {:?}",
4646 child_name,
4647 error
4648 );
4649 }
4650 }
4651 }
4652
4653 if self.settings.is_path_excluded(&child_path) {
4654 log::debug!("skipping excluded child entry {child_path:?}");
4655 self.state
4656 .lock()
4657 .await
4658 .remove_path(&child_path, self.watcher.as_ref());
4659 continue;
4660 }
4661
4662 let child_metadata = match self.fs.metadata(&child_abs_path).await {
4663 Ok(Some(metadata)) => metadata,
4664 Ok(None) => continue,
4665 Err(err) => {
4666 log::error!("error processing {:?}: {err:#}", child_abs_path.display());
4667 continue;
4668 }
4669 };
4670
4671 let mut child_entry = Entry::new(
4672 child_path.clone(),
4673 &child_metadata,
4674 ProjectEntryId::new(&next_entry_id),
4675 root_char_bag,
4676 None,
4677 );
4678
4679 if job.is_external {
4680 child_entry.is_external = true;
4681 } else if child_metadata.is_symlink {
4682 let canonical_path = match self.fs.canonicalize(&child_abs_path).await {
4683 Ok(path) => path,
4684 Err(err) => {
4685 log::error!("error reading target of symlink {child_abs_path:?}: {err:#}",);
4686 continue;
4687 }
4688 };
4689
4690 // lazily canonicalize the root path in order to determine if
4691 // symlinks point outside of the worktree.
4692 let root_canonical_path = match &root_canonical_path {
4693 Some(path) => path,
4694 None => match self.fs.canonicalize(&root_abs_path).await {
4695 Ok(path) => root_canonical_path.insert(path),
4696 Err(err) => {
4697 log::error!("error canonicalizing root {:?}: {:?}", root_abs_path, err);
4698 continue;
4699 }
4700 },
4701 };
4702
4703 if !canonical_path.starts_with(root_canonical_path) {
4704 child_entry.is_external = true;
4705 }
4706
4707 child_entry.canonical_path = Some(canonical_path.into());
4708 }
4709
4710 if child_entry.is_dir() {
4711 child_entry.is_ignored = ignore_stack.is_abs_path_ignored(&child_abs_path, true);
4712 child_entry.is_always_included =
4713 self.settings.is_path_always_included(&child_path, true);
4714
4715 // Avoid recursing until crash in the case of a recursive symlink
4716 if job.ancestor_inodes.contains(&child_entry.inode) {
4717 new_jobs.push(None);
4718 } else {
4719 let mut ancestor_inodes = job.ancestor_inodes.clone();
4720 ancestor_inodes.insert(child_entry.inode);
4721
4722 new_jobs.push(Some(ScanJob {
4723 abs_path: child_abs_path.clone(),
4724 path: child_path,
4725 is_external: child_entry.is_external,
4726 ignore_stack: if child_entry.is_ignored {
4727 IgnoreStack::all()
4728 } else {
4729 ignore_stack.clone()
4730 },
4731 ancestor_inodes,
4732 scan_queue: job.scan_queue.clone(),
4733 }));
4734 }
4735 } else {
4736 child_entry.is_ignored = ignore_stack.is_abs_path_ignored(&child_abs_path, false);
4737 child_entry.is_always_included =
4738 self.settings.is_path_always_included(&child_path, false);
4739 }
4740
4741 {
4742 let relative_path = job
4743 .path
4744 .join(RelPath::unix(child_name.to_str().unwrap()).unwrap());
4745 if self.is_path_private(&relative_path) {
4746 log::debug!("detected private file: {relative_path:?}");
4747 child_entry.is_private = true;
4748 }
4749 if self.settings.is_path_hidden(&relative_path) {
4750 log::debug!("detected hidden file: {relative_path:?}");
4751 child_entry.is_hidden = true;
4752 }
4753 }
4754
4755 new_entries.push(child_entry);
4756 }
4757
4758 let mut state = self.state.lock().await;
4759
4760 // Identify any subdirectories that should not be scanned.
4761 let mut job_ix = 0;
4762 for entry in &mut new_entries {
4763 state.reuse_entry_id(entry);
4764 if entry.is_dir() {
4765 if state.should_scan_directory(entry) {
4766 job_ix += 1;
4767 } else {
4768 log::debug!("defer scanning directory {:?}", entry.path);
4769 entry.kind = EntryKind::UnloadedDir;
4770 new_jobs.remove(job_ix);
4771 }
4772 }
4773 if entry.is_always_included {
4774 state
4775 .snapshot
4776 .always_included_entries
4777 .push(entry.path.clone());
4778 }
4779 }
4780
4781 state.populate_dir(job.path.clone(), new_entries, new_ignore);
4782 self.watcher.add(job.abs_path.as_ref()).log_err();
4783
4784 for new_job in new_jobs.into_iter().flatten() {
4785 job.scan_queue
4786 .try_send(new_job)
4787 .expect("channel is unbounded");
4788 }
4789
4790 Ok(())
4791 }
4792
4793 /// All list arguments should be sorted before calling this function
4794 async fn reload_entries_for_paths(
4795 &self,
4796 root_abs_path: &SanitizedPath,
4797 root_canonical_path: &SanitizedPath,
4798 relative_paths: &[Arc<RelPath>],
4799 abs_paths: Vec<PathBuf>,
4800 scan_queue_tx: Option<Sender<ScanJob>>,
4801 ) {
4802 // grab metadata for all requested paths
4803 let metadata = futures::future::join_all(
4804 abs_paths
4805 .iter()
4806 .map(|abs_path| async move {
4807 let metadata = self.fs.metadata(abs_path).await?;
4808 if let Some(metadata) = metadata {
4809 let canonical_path = self.fs.canonicalize(abs_path).await?;
4810
4811 // If we're on a case-insensitive filesystem (default on macOS), we want
4812 // to only ignore metadata for non-symlink files if their absolute-path matches
4813 // the canonical-path.
4814 // Because if not, this might be a case-only-renaming (`mv test.txt TEST.TXT`)
4815 // and we want to ignore the metadata for the old path (`test.txt`) so it's
4816 // treated as removed.
4817 if !self.fs_case_sensitive && !metadata.is_symlink {
4818 let canonical_file_name = canonical_path.file_name();
4819 let file_name = abs_path.file_name();
4820 if canonical_file_name != file_name {
4821 return Ok(None);
4822 }
4823 }
4824
4825 anyhow::Ok(Some((metadata, SanitizedPath::new_arc(&canonical_path))))
4826 } else {
4827 Ok(None)
4828 }
4829 })
4830 .collect::<Vec<_>>(),
4831 )
4832 .await;
4833
4834 let mut new_ancestor_repo = if relative_paths.iter().any(|path| path.is_empty()) {
4835 Some(discover_ancestor_git_repo(self.fs.clone(), &root_abs_path).await)
4836 } else {
4837 None
4838 };
4839
4840 let mut state = self.state.lock().await;
4841 let doing_recursive_update = scan_queue_tx.is_some();
4842
4843 // Remove any entries for paths that no longer exist or are being recursively
4844 // refreshed. Do this before adding any new entries, so that renames can be
4845 // detected regardless of the order of the paths.
4846 for (path, metadata) in relative_paths.iter().zip(metadata.iter()) {
4847 if matches!(metadata, Ok(None)) || doing_recursive_update {
4848 state.remove_path(path, self.watcher.as_ref());
4849 }
4850 }
4851
4852 for (path, metadata) in relative_paths.iter().zip(metadata.into_iter()) {
4853 let abs_path: Arc<Path> = root_abs_path.join(path.as_std_path()).into();
4854 match metadata {
4855 Ok(Some((metadata, canonical_path))) => {
4856 let ignore_stack = state
4857 .snapshot
4858 .ignore_stack_for_abs_path(&abs_path, metadata.is_dir, self.fs.as_ref())
4859 .await;
4860 let is_external = !canonical_path.starts_with(&root_canonical_path);
4861 let entry_id = state.entry_id_for(self.next_entry_id.as_ref(), path, &metadata);
4862 let mut fs_entry = Entry::new(
4863 path.clone(),
4864 &metadata,
4865 entry_id,
4866 state.snapshot.root_char_bag,
4867 if metadata.is_symlink {
4868 Some(canonical_path.as_path().to_path_buf().into())
4869 } else {
4870 None
4871 },
4872 );
4873
4874 let is_dir = fs_entry.is_dir();
4875 fs_entry.is_ignored = ignore_stack.is_abs_path_ignored(&abs_path, is_dir);
4876 fs_entry.is_external = is_external;
4877 fs_entry.is_private = self.is_path_private(path);
4878 fs_entry.is_always_included =
4879 self.settings.is_path_always_included(path, is_dir);
4880 fs_entry.is_hidden = self.settings.is_path_hidden(path);
4881
4882 if let (Some(scan_queue_tx), true) = (&scan_queue_tx, is_dir) {
4883 if state.should_scan_directory(&fs_entry)
4884 || (fs_entry.path.is_empty()
4885 && abs_path.file_name() == Some(OsStr::new(DOT_GIT)))
4886 {
4887 state
4888 .enqueue_scan_dir(
4889 abs_path,
4890 &fs_entry,
4891 scan_queue_tx,
4892 self.fs.as_ref(),
4893 )
4894 .await;
4895 } else {
4896 fs_entry.kind = EntryKind::UnloadedDir;
4897 }
4898 }
4899
4900 state
4901 .insert_entry(fs_entry.clone(), self.fs.as_ref(), self.watcher.as_ref())
4902 .await;
4903
4904 if path.is_empty()
4905 && let Some((ignores, exclude, repo)) = new_ancestor_repo.take()
4906 {
4907 log::trace!("updating ancestor git repository");
4908 state.snapshot.ignores_by_parent_abs_path.extend(ignores);
4909 if let Some((ancestor_dot_git, work_directory)) = repo {
4910 if let Some(exclude) = exclude {
4911 let work_directory_abs_path = self
4912 .state
4913 .lock()
4914 .await
4915 .snapshot
4916 .work_directory_abs_path(&work_directory);
4917
4918 state
4919 .snapshot
4920 .repo_exclude_by_work_dir_abs_path
4921 .insert(work_directory_abs_path.into(), (exclude, false));
4922 }
4923 state
4924 .insert_git_repository_for_path(
4925 work_directory,
4926 ancestor_dot_git.into(),
4927 self.fs.as_ref(),
4928 self.watcher.as_ref(),
4929 )
4930 .await
4931 .log_err();
4932 }
4933 }
4934 }
4935 Ok(None) => {
4936 self.remove_repo_path(path.clone(), &mut state.snapshot);
4937 }
4938 Err(err) => {
4939 log::error!("error reading file {abs_path:?} on event: {err:#}");
4940 }
4941 }
4942 }
4943
4944 util::extend_sorted(
4945 &mut state.changed_paths,
4946 relative_paths.iter().cloned(),
4947 usize::MAX,
4948 Ord::cmp,
4949 );
4950 }
4951
4952 fn remove_repo_path(&self, path: Arc<RelPath>, snapshot: &mut LocalSnapshot) -> Option<()> {
4953 if !path.components().any(|component| component == DOT_GIT)
4954 && let Some(local_repo) = snapshot.local_repo_for_work_directory_path(&path)
4955 {
4956 let id = local_repo.work_directory_id;
4957 log::debug!("remove repo path: {:?}", path);
4958 snapshot.git_repositories.remove(&id);
4959 return Some(());
4960 }
4961
4962 Some(())
4963 }
4964
4965 async fn update_ignore_statuses_for_paths(
4966 &self,
4967 scan_job_tx: Sender<ScanJob>,
4968 prev_snapshot: LocalSnapshot,
4969 ignores_to_update: Vec<(Arc<Path>, IgnoreStack)>,
4970 ) {
4971 let (ignore_queue_tx, ignore_queue_rx) = channel::unbounded();
4972 {
4973 for (parent_abs_path, ignore_stack) in ignores_to_update {
4974 ignore_queue_tx
4975 .send_blocking(UpdateIgnoreStatusJob {
4976 abs_path: parent_abs_path,
4977 ignore_stack,
4978 ignore_queue: ignore_queue_tx.clone(),
4979 scan_queue: scan_job_tx.clone(),
4980 })
4981 .unwrap();
4982 }
4983 }
4984 drop(ignore_queue_tx);
4985
4986 self.executor
4987 .scoped(|scope| {
4988 for _ in 0..self.executor.num_cpus() {
4989 scope.spawn(async {
4990 loop {
4991 select_biased! {
4992 // Process any path refresh requests before moving on to process
4993 // the queue of ignore statuses.
4994 request = self.next_scan_request().fuse() => {
4995 let Ok(request) = request else { break };
4996 if !self.process_scan_request(request, true).await {
4997 return;
4998 }
4999 }
5000
5001 // Recursively process directories whose ignores have changed.
5002 job = ignore_queue_rx.recv().fuse() => {
5003 let Ok(job) = job else { break };
5004 self.update_ignore_status(job, &prev_snapshot).await;
5005 }
5006 }
5007 }
5008 });
5009 }
5010 })
5011 .await;
5012 }
5013
5014 async fn ignores_needing_update(&self) -> Vec<Arc<Path>> {
5015 let mut ignores_to_update = Vec::new();
5016 let mut excludes_to_load: Vec<(Arc<Path>, PathBuf)> = Vec::new();
5017
5018 // First pass: collect updates and drop stale entries without awaiting.
5019 {
5020 let snapshot = &mut self.state.lock().await.snapshot;
5021 let abs_path = snapshot.abs_path.clone();
5022 let mut repo_exclude_keys_to_remove: Vec<Arc<Path>> = Vec::new();
5023
5024 for (work_dir_abs_path, (_, needs_update)) in
5025 snapshot.repo_exclude_by_work_dir_abs_path.iter_mut()
5026 {
5027 let repository = snapshot
5028 .git_repositories
5029 .iter()
5030 .find(|(_, repo)| &repo.work_directory_abs_path == work_dir_abs_path);
5031
5032 if *needs_update {
5033 *needs_update = false;
5034 ignores_to_update.push(work_dir_abs_path.clone());
5035
5036 if let Some((_, repository)) = repository {
5037 let exclude_abs_path = repository.common_dir_abs_path.join(REPO_EXCLUDE);
5038 excludes_to_load.push((work_dir_abs_path.clone(), exclude_abs_path));
5039 }
5040 }
5041
5042 if repository.is_none() {
5043 repo_exclude_keys_to_remove.push(work_dir_abs_path.clone());
5044 }
5045 }
5046
5047 for key in repo_exclude_keys_to_remove {
5048 snapshot.repo_exclude_by_work_dir_abs_path.remove(&key);
5049 }
5050
5051 snapshot
5052 .ignores_by_parent_abs_path
5053 .retain(|parent_abs_path, (_, needs_update)| {
5054 if let Ok(parent_path) = parent_abs_path.strip_prefix(abs_path.as_path())
5055 && let Some(parent_path) =
5056 RelPath::new(&parent_path, PathStyle::local()).log_err()
5057 {
5058 if *needs_update {
5059 *needs_update = false;
5060 if snapshot.snapshot.entry_for_path(&parent_path).is_some() {
5061 ignores_to_update.push(parent_abs_path.clone());
5062 }
5063 }
5064
5065 let ignore_path = parent_path.join(RelPath::unix(GITIGNORE).unwrap());
5066 if snapshot.snapshot.entry_for_path(&ignore_path).is_none() {
5067 return false;
5068 }
5069 }
5070 true
5071 });
5072 }
5073
5074 // Load gitignores asynchronously (outside the lock)
5075 let mut loaded_excludes: Vec<(Arc<Path>, Arc<Gitignore>)> = Vec::new();
5076 for (work_dir_abs_path, exclude_abs_path) in excludes_to_load {
5077 if let Ok(current_exclude) = build_gitignore(&exclude_abs_path, self.fs.as_ref()).await
5078 {
5079 loaded_excludes.push((work_dir_abs_path, Arc::new(current_exclude)));
5080 }
5081 }
5082
5083 // Second pass: apply updates.
5084 if !loaded_excludes.is_empty() {
5085 let snapshot = &mut self.state.lock().await.snapshot;
5086
5087 for (work_dir_abs_path, exclude) in loaded_excludes {
5088 if let Some((existing_exclude, _)) = snapshot
5089 .repo_exclude_by_work_dir_abs_path
5090 .get_mut(&work_dir_abs_path)
5091 {
5092 *existing_exclude = exclude;
5093 }
5094 }
5095 }
5096
5097 ignores_to_update
5098 }
5099
5100 async fn order_ignores(&self, mut ignores: Vec<Arc<Path>>) -> Vec<(Arc<Path>, IgnoreStack)> {
5101 let fs = self.fs.clone();
5102 let snapshot = self.state.lock().await.snapshot.clone();
5103 ignores.sort_unstable();
5104 let mut ignores_to_update = ignores.into_iter().peekable();
5105
5106 let mut result = vec![];
5107 while let Some(parent_abs_path) = ignores_to_update.next() {
5108 while ignores_to_update
5109 .peek()
5110 .map_or(false, |p| p.starts_with(&parent_abs_path))
5111 {
5112 ignores_to_update.next().unwrap();
5113 }
5114 let ignore_stack = snapshot
5115 .ignore_stack_for_abs_path(&parent_abs_path, true, fs.as_ref())
5116 .await;
5117 result.push((parent_abs_path, ignore_stack));
5118 }
5119
5120 result
5121 }
5122
5123 async fn update_ignore_status(&self, job: UpdateIgnoreStatusJob, snapshot: &LocalSnapshot) {
5124 log::trace!("update ignore status {:?}", job.abs_path);
5125
5126 let mut ignore_stack = job.ignore_stack;
5127 if let Some((ignore, _)) = snapshot.ignores_by_parent_abs_path.get(&job.abs_path) {
5128 ignore_stack =
5129 ignore_stack.append(IgnoreKind::Gitignore(job.abs_path.clone()), ignore.clone());
5130 }
5131
5132 let mut entries_by_id_edits = Vec::new();
5133 let mut entries_by_path_edits = Vec::new();
5134 let Some(path) = job
5135 .abs_path
5136 .strip_prefix(snapshot.abs_path.as_path())
5137 .map_err(|_| {
5138 anyhow::anyhow!(
5139 "Failed to strip prefix '{}' from path '{}'",
5140 snapshot.abs_path.as_path().display(),
5141 job.abs_path.display()
5142 )
5143 })
5144 .log_err()
5145 else {
5146 return;
5147 };
5148
5149 let Some(path) = RelPath::new(&path, PathStyle::local()).log_err() else {
5150 return;
5151 };
5152
5153 if let Ok(Some(metadata)) = self.fs.metadata(&job.abs_path.join(DOT_GIT)).await
5154 && metadata.is_dir
5155 {
5156 ignore_stack.repo_root = Some(job.abs_path.clone());
5157 }
5158
5159 for mut entry in snapshot.child_entries(&path).cloned() {
5160 let was_ignored = entry.is_ignored;
5161 let abs_path: Arc<Path> = snapshot.absolutize(&entry.path).into();
5162 entry.is_ignored = ignore_stack.is_abs_path_ignored(&abs_path, entry.is_dir());
5163
5164 if entry.is_dir() {
5165 let child_ignore_stack = if entry.is_ignored {
5166 IgnoreStack::all()
5167 } else {
5168 ignore_stack.clone()
5169 };
5170
5171 // Scan any directories that were previously ignored and weren't previously scanned.
5172 if was_ignored && !entry.is_ignored && entry.kind.is_unloaded() {
5173 let state = self.state.lock().await;
5174 if state.should_scan_directory(&entry) {
5175 state
5176 .enqueue_scan_dir(
5177 abs_path.clone(),
5178 &entry,
5179 &job.scan_queue,
5180 self.fs.as_ref(),
5181 )
5182 .await;
5183 }
5184 }
5185
5186 job.ignore_queue
5187 .send(UpdateIgnoreStatusJob {
5188 abs_path: abs_path.clone(),
5189 ignore_stack: child_ignore_stack,
5190 ignore_queue: job.ignore_queue.clone(),
5191 scan_queue: job.scan_queue.clone(),
5192 })
5193 .await
5194 .unwrap();
5195 }
5196
5197 if entry.is_ignored != was_ignored {
5198 let mut path_entry = snapshot.entries_by_id.get(&entry.id, ()).unwrap().clone();
5199 path_entry.scan_id = snapshot.scan_id;
5200 path_entry.is_ignored = entry.is_ignored;
5201 entries_by_id_edits.push(Edit::Insert(path_entry));
5202 entries_by_path_edits.push(Edit::Insert(entry));
5203 }
5204 }
5205
5206 let state = &mut self.state.lock().await;
5207 for edit in &entries_by_path_edits {
5208 if let Edit::Insert(entry) = edit
5209 && let Err(ix) = state.changed_paths.binary_search(&entry.path)
5210 {
5211 state.changed_paths.insert(ix, entry.path.clone());
5212 }
5213 }
5214
5215 state
5216 .snapshot
5217 .entries_by_path
5218 .edit(entries_by_path_edits, ());
5219 state.snapshot.entries_by_id.edit(entries_by_id_edits, ());
5220 }
5221
5222 async fn update_git_repositories(&self, dot_git_paths: Vec<PathBuf>) -> Vec<Arc<Path>> {
5223 log::trace!("reloading repositories: {dot_git_paths:?}");
5224 let mut state = self.state.lock().await;
5225 let scan_id = state.snapshot.scan_id;
5226 let mut affected_repo_roots = Vec::new();
5227 for dot_git_dir in dot_git_paths {
5228 let existing_repository_entry =
5229 state
5230 .snapshot
5231 .git_repositories
5232 .iter()
5233 .find_map(|(_, repo)| {
5234 let dot_git_dir = SanitizedPath::new(&dot_git_dir);
5235 if SanitizedPath::new(repo.common_dir_abs_path.as_ref()) == dot_git_dir
5236 || SanitizedPath::new(repo.repository_dir_abs_path.as_ref())
5237 == dot_git_dir
5238 {
5239 Some(repo.clone())
5240 } else {
5241 None
5242 }
5243 });
5244
5245 match existing_repository_entry {
5246 None => {
5247 let Ok(relative) = dot_git_dir.strip_prefix(state.snapshot.abs_path()) else {
5248 debug_panic!(
5249 "update_git_repositories called with .git directory outside the worktree root"
5250 );
5251 return Vec::new();
5252 };
5253 affected_repo_roots.push(dot_git_dir.parent().unwrap().into());
5254 state
5255 .insert_git_repository(
5256 RelPath::new(relative, PathStyle::local())
5257 .unwrap()
5258 .into_arc(),
5259 self.fs.as_ref(),
5260 self.watcher.as_ref(),
5261 )
5262 .await;
5263 }
5264 Some(local_repository) => {
5265 state.snapshot.git_repositories.update(
5266 &local_repository.work_directory_id,
5267 |entry| {
5268 entry.git_dir_scan_id = scan_id;
5269 },
5270 );
5271 }
5272 };
5273 }
5274
5275 // Remove any git repositories whose .git entry no longer exists.
5276 let snapshot = &mut state.snapshot;
5277 let mut ids_to_preserve = HashSet::default();
5278 for (&work_directory_id, entry) in snapshot.git_repositories.iter() {
5279 let exists_in_snapshot =
5280 snapshot
5281 .entry_for_id(work_directory_id)
5282 .is_some_and(|entry| {
5283 snapshot
5284 .entry_for_path(&entry.path.join(RelPath::unix(DOT_GIT).unwrap()))
5285 .is_some()
5286 });
5287
5288 if exists_in_snapshot
5289 || matches!(
5290 self.fs.metadata(&entry.common_dir_abs_path).await,
5291 Ok(Some(_))
5292 )
5293 {
5294 ids_to_preserve.insert(work_directory_id);
5295 }
5296 }
5297
5298 snapshot
5299 .git_repositories
5300 .retain(|work_directory_id, entry| {
5301 let preserve = ids_to_preserve.contains(work_directory_id);
5302 if !preserve {
5303 affected_repo_roots.push(entry.dot_git_abs_path.parent().unwrap().into());
5304 snapshot
5305 .repo_exclude_by_work_dir_abs_path
5306 .remove(&entry.work_directory_abs_path);
5307 }
5308 preserve
5309 });
5310
5311 affected_repo_roots
5312 }
5313
5314 async fn progress_timer(&self, running: bool) {
5315 if !running {
5316 return futures::future::pending().await;
5317 }
5318
5319 #[cfg(feature = "test-support")]
5320 if self.fs.is_fake() {
5321 return self.executor.simulate_random_delay().await;
5322 }
5323
5324 self.executor.timer(FS_WATCH_LATENCY).await
5325 }
5326
5327 fn is_path_private(&self, path: &RelPath) -> bool {
5328 !self.share_private_files && self.settings.is_path_private(path)
5329 }
5330
5331 async fn next_scan_request(&self) -> Result<ScanRequest> {
5332 let mut request = self.scan_requests_rx.recv().await?;
5333 while let Ok(next_request) = self.scan_requests_rx.try_recv() {
5334 request.relative_paths.extend(next_request.relative_paths);
5335 request.done.extend(next_request.done);
5336 }
5337 Ok(request)
5338 }
5339}
5340
5341async fn discover_ancestor_git_repo(
5342 fs: Arc<dyn Fs>,
5343 root_abs_path: &SanitizedPath,
5344) -> (
5345 HashMap<Arc<Path>, (Arc<Gitignore>, bool)>,
5346 Option<Arc<Gitignore>>,
5347 Option<(PathBuf, WorkDirectory)>,
5348) {
5349 let mut exclude = None;
5350 let mut ignores = HashMap::default();
5351 for (index, ancestor) in root_abs_path.as_path().ancestors().enumerate() {
5352 if index != 0 {
5353 if ancestor == paths::home_dir() {
5354 // Unless $HOME is itself the worktree root, don't consider it as a
5355 // containing git repository---expensive and likely unwanted.
5356 break;
5357 } else if let Ok(ignore) = build_gitignore(&ancestor.join(GITIGNORE), fs.as_ref()).await
5358 {
5359 ignores.insert(ancestor.into(), (ignore.into(), false));
5360 }
5361 }
5362
5363 let ancestor_dot_git = ancestor.join(DOT_GIT);
5364 log::trace!("considering ancestor: {ancestor_dot_git:?}");
5365 // Check whether the directory or file called `.git` exists (in the
5366 // case of worktrees it's a file.)
5367 if fs
5368 .metadata(&ancestor_dot_git)
5369 .await
5370 .is_ok_and(|metadata| metadata.is_some())
5371 {
5372 if index != 0 {
5373 // We canonicalize, since the FS events use the canonicalized path.
5374 if let Some(ancestor_dot_git) = fs.canonicalize(&ancestor_dot_git).await.log_err() {
5375 let location_in_repo = root_abs_path
5376 .as_path()
5377 .strip_prefix(ancestor)
5378 .unwrap()
5379 .into();
5380 log::info!("inserting parent git repo for this worktree: {location_in_repo:?}");
5381 // We associate the external git repo with our root folder and
5382 // also mark where in the git repo the root folder is located.
5383 return (
5384 ignores,
5385 exclude,
5386 Some((
5387 ancestor_dot_git,
5388 WorkDirectory::AboveProject {
5389 absolute_path: ancestor.into(),
5390 location_in_repo,
5391 },
5392 )),
5393 );
5394 };
5395 }
5396
5397 let repo_exclude_abs_path = ancestor_dot_git.join(REPO_EXCLUDE);
5398 if let Ok(repo_exclude) = build_gitignore(&repo_exclude_abs_path, fs.as_ref()).await {
5399 exclude = Some(Arc::new(repo_exclude));
5400 }
5401
5402 // Reached root of git repository.
5403 break;
5404 }
5405 }
5406
5407 (ignores, exclude, None)
5408}
5409
5410fn merge_event_roots(changed_paths: &[Arc<RelPath>], event_roots: &[EventRoot]) -> Vec<EventRoot> {
5411 let mut merged_event_roots = Vec::with_capacity(changed_paths.len() + event_roots.len());
5412 let mut changed_paths = changed_paths.iter().peekable();
5413 let mut event_roots = event_roots.iter().peekable();
5414 while let (Some(path), Some(event_root)) = (changed_paths.peek(), event_roots.peek()) {
5415 match path.cmp(&&event_root.path) {
5416 Ordering::Less => {
5417 merged_event_roots.push(EventRoot {
5418 path: (*changed_paths.next().expect("peeked changed path")).clone(),
5419 was_rescanned: false,
5420 });
5421 }
5422 Ordering::Equal => {
5423 merged_event_roots.push((*event_roots.next().expect("peeked event root")).clone());
5424 changed_paths.next();
5425 }
5426 Ordering::Greater => {
5427 merged_event_roots.push((*event_roots.next().expect("peeked event root")).clone());
5428 }
5429 }
5430 }
5431 merged_event_roots.extend(changed_paths.map(|path| EventRoot {
5432 path: path.clone(),
5433 was_rescanned: false,
5434 }));
5435 merged_event_roots.extend(event_roots.cloned());
5436 merged_event_roots
5437}
5438
5439fn build_diff(
5440 phase: BackgroundScannerPhase,
5441 old_snapshot: &Snapshot,
5442 new_snapshot: &Snapshot,
5443 event_roots: &[EventRoot],
5444) -> UpdatedEntriesSet {
5445 use BackgroundScannerPhase::*;
5446 use PathChange::{Added, AddedOrUpdated, Loaded, Removed, Updated};
5447
5448 // Identify which paths have changed. Use the known set of changed
5449 // parent paths to optimize the search.
5450 let mut changes = Vec::new();
5451
5452 let mut old_paths = old_snapshot.entries_by_path.cursor::<PathKey>(());
5453 let mut new_paths = new_snapshot.entries_by_path.cursor::<PathKey>(());
5454 let mut last_newly_loaded_dir_path = None;
5455 old_paths.next();
5456 new_paths.next();
5457 for event_root in event_roots {
5458 let path = PathKey(event_root.path.clone());
5459 if old_paths.item().is_some_and(|e| e.path < path.0) {
5460 old_paths.seek_forward(&path, Bias::Left);
5461 }
5462 if new_paths.item().is_some_and(|e| e.path < path.0) {
5463 new_paths.seek_forward(&path, Bias::Left);
5464 }
5465 loop {
5466 match (old_paths.item(), new_paths.item()) {
5467 (Some(old_entry), Some(new_entry)) => {
5468 if old_entry.path > path.0
5469 && new_entry.path > path.0
5470 && !old_entry.path.starts_with(&path.0)
5471 && !new_entry.path.starts_with(&path.0)
5472 {
5473 break;
5474 }
5475
5476 match Ord::cmp(&old_entry.path, &new_entry.path) {
5477 Ordering::Less => {
5478 changes.push((old_entry.path.clone(), old_entry.id, Removed));
5479 old_paths.next();
5480 }
5481 Ordering::Equal => {
5482 if phase == EventsReceivedDuringInitialScan {
5483 if old_entry.id != new_entry.id {
5484 changes.push((old_entry.path.clone(), old_entry.id, Removed));
5485 }
5486 // If the worktree was not fully initialized when this event was generated,
5487 // we can't know whether this entry was added during the scan or whether
5488 // it was merely updated.
5489 changes.push((
5490 new_entry.path.clone(),
5491 new_entry.id,
5492 AddedOrUpdated,
5493 ));
5494 } else if old_entry.id != new_entry.id {
5495 changes.push((old_entry.path.clone(), old_entry.id, Removed));
5496 changes.push((new_entry.path.clone(), new_entry.id, Added));
5497 } else if old_entry != new_entry {
5498 if old_entry.kind.is_unloaded() {
5499 last_newly_loaded_dir_path = Some(&new_entry.path);
5500 changes.push((new_entry.path.clone(), new_entry.id, Loaded));
5501 } else {
5502 changes.push((new_entry.path.clone(), new_entry.id, Updated));
5503 }
5504 } else if event_root.was_rescanned {
5505 changes.push((new_entry.path.clone(), new_entry.id, Updated));
5506 }
5507 old_paths.next();
5508 new_paths.next();
5509 }
5510 Ordering::Greater => {
5511 let is_newly_loaded = phase == InitialScan
5512 || last_newly_loaded_dir_path
5513 .as_ref()
5514 .is_some_and(|dir| new_entry.path.starts_with(dir));
5515 changes.push((
5516 new_entry.path.clone(),
5517 new_entry.id,
5518 if is_newly_loaded { Loaded } else { Added },
5519 ));
5520 new_paths.next();
5521 }
5522 }
5523 }
5524 (Some(old_entry), None) => {
5525 changes.push((old_entry.path.clone(), old_entry.id, Removed));
5526 old_paths.next();
5527 }
5528 (None, Some(new_entry)) => {
5529 let is_newly_loaded = phase == InitialScan
5530 || last_newly_loaded_dir_path
5531 .as_ref()
5532 .is_some_and(|dir| new_entry.path.starts_with(dir));
5533 changes.push((
5534 new_entry.path.clone(),
5535 new_entry.id,
5536 if is_newly_loaded { Loaded } else { Added },
5537 ));
5538 new_paths.next();
5539 }
5540 (None, None) => break,
5541 }
5542 }
5543 }
5544
5545 changes.into()
5546}
5547
5548fn swap_to_front(child_paths: &mut Vec<PathBuf>, file: &str) {
5549 let position = child_paths
5550 .iter()
5551 .position(|path| path.file_name().unwrap() == file);
5552 if let Some(position) = position {
5553 let temp = child_paths.remove(position);
5554 child_paths.insert(0, temp);
5555 }
5556}
5557
5558fn char_bag_for_path(root_char_bag: CharBag, path: &RelPath) -> CharBag {
5559 let mut result = root_char_bag;
5560 result.extend(path.as_unix_str().chars().map(|c| c.to_ascii_lowercase()));
5561 result
5562}
5563
5564#[derive(Debug)]
5565struct ScanJob {
5566 abs_path: Arc<Path>,
5567 path: Arc<RelPath>,
5568 ignore_stack: IgnoreStack,
5569 scan_queue: Sender<ScanJob>,
5570 ancestor_inodes: TreeSet<u64>,
5571 is_external: bool,
5572}
5573
5574struct UpdateIgnoreStatusJob {
5575 abs_path: Arc<Path>,
5576 ignore_stack: IgnoreStack,
5577 ignore_queue: Sender<UpdateIgnoreStatusJob>,
5578 scan_queue: Sender<ScanJob>,
5579}
5580
5581pub trait WorktreeModelHandle {
5582 #[cfg(feature = "test-support")]
5583 fn flush_fs_events<'a>(
5584 &self,
5585 cx: &'a mut gpui::TestAppContext,
5586 ) -> futures::future::LocalBoxFuture<'a, ()>;
5587
5588 #[cfg(feature = "test-support")]
5589 fn flush_fs_events_in_root_git_repository<'a>(
5590 &self,
5591 cx: &'a mut gpui::TestAppContext,
5592 ) -> futures::future::LocalBoxFuture<'a, ()>;
5593}
5594
5595impl WorktreeModelHandle for Entity<Worktree> {
5596 // When the worktree's FS event stream sometimes delivers "redundant" events for FS changes that
5597 // occurred before the worktree was constructed. These events can cause the worktree to perform
5598 // extra directory scans, and emit extra scan-state notifications.
5599 //
5600 // This function mutates the worktree's directory and waits for those mutations to be picked up,
5601 // to ensure that all redundant FS events have already been processed.
5602 #[cfg(feature = "test-support")]
5603 fn flush_fs_events<'a>(
5604 &self,
5605 cx: &'a mut gpui::TestAppContext,
5606 ) -> futures::future::LocalBoxFuture<'a, ()> {
5607 let file_name = "fs-event-sentinel";
5608
5609 let tree = self.clone();
5610 let (fs, root_path) = self.read_with(cx, |tree, _| {
5611 let tree = tree.as_local().unwrap();
5612 (tree.fs.clone(), tree.abs_path.clone())
5613 });
5614
5615 async move {
5616 // Subscribe to events BEFORE creating the file to avoid race condition
5617 // where events fire before subscription is set up
5618 let mut events = cx.events(&tree);
5619
5620 fs.create_file(&root_path.join(file_name), Default::default())
5621 .await
5622 .unwrap();
5623
5624 // Check if condition is already met before waiting for events
5625 let file_exists = || {
5626 tree.read_with(cx, |tree, _| {
5627 tree.entry_for_path(RelPath::unix(file_name).unwrap())
5628 .is_some()
5629 })
5630 };
5631
5632 // Use select to avoid blocking indefinitely if events are delayed
5633 while !file_exists() {
5634 futures::select_biased! {
5635 _ = events.next() => {}
5636 _ = futures::FutureExt::fuse(cx.background_executor.timer(std::time::Duration::from_millis(10))) => {}
5637 }
5638 }
5639
5640 fs.remove_file(&root_path.join(file_name), Default::default())
5641 .await
5642 .unwrap();
5643
5644 // Check if condition is already met before waiting for events
5645 let file_gone = || {
5646 tree.read_with(cx, |tree, _| {
5647 tree.entry_for_path(RelPath::unix(file_name).unwrap())
5648 .is_none()
5649 })
5650 };
5651
5652 // Use select to avoid blocking indefinitely if events are delayed
5653 while !file_gone() {
5654 futures::select_biased! {
5655 _ = events.next() => {}
5656 _ = futures::FutureExt::fuse(cx.background_executor.timer(std::time::Duration::from_millis(10))) => {}
5657 }
5658 }
5659
5660 cx.update(|cx| tree.read(cx).as_local().unwrap().scan_complete())
5661 .await;
5662 }
5663 .boxed_local()
5664 }
5665
5666 // This function is similar to flush_fs_events, except that it waits for events to be flushed in
5667 // the .git folder of the root repository.
5668 // The reason for its existence is that a repository's .git folder might live *outside* of the
5669 // worktree and thus its FS events might go through a different path.
5670 // In order to flush those, we need to create artificial events in the .git folder and wait
5671 // for the repository to be reloaded.
5672 #[cfg(feature = "test-support")]
5673 fn flush_fs_events_in_root_git_repository<'a>(
5674 &self,
5675 cx: &'a mut gpui::TestAppContext,
5676 ) -> futures::future::LocalBoxFuture<'a, ()> {
5677 let file_name = "fs-event-sentinel";
5678
5679 let tree = self.clone();
5680 let (fs, root_path, mut git_dir_scan_id) = self.read_with(cx, |tree, _| {
5681 let tree = tree.as_local().unwrap();
5682 let local_repo_entry = tree
5683 .git_repositories
5684 .values()
5685 .min_by_key(|local_repo_entry| local_repo_entry.work_directory.clone())
5686 .unwrap();
5687 (
5688 tree.fs.clone(),
5689 local_repo_entry.common_dir_abs_path.clone(),
5690 local_repo_entry.git_dir_scan_id,
5691 )
5692 });
5693
5694 let scan_id_increased = |tree: &mut Worktree, git_dir_scan_id: &mut usize| {
5695 let tree = tree.as_local().unwrap();
5696 // let repository = tree.repositories.first().unwrap();
5697 let local_repo_entry = tree
5698 .git_repositories
5699 .values()
5700 .min_by_key(|local_repo_entry| local_repo_entry.work_directory.clone())
5701 .unwrap();
5702
5703 if local_repo_entry.git_dir_scan_id > *git_dir_scan_id {
5704 *git_dir_scan_id = local_repo_entry.git_dir_scan_id;
5705 true
5706 } else {
5707 false
5708 }
5709 };
5710
5711 async move {
5712 // Subscribe to events BEFORE creating the file to avoid race condition
5713 // where events fire before subscription is set up
5714 let mut events = cx.events(&tree);
5715
5716 fs.create_file(&root_path.join(file_name), Default::default())
5717 .await
5718 .unwrap();
5719
5720 // Use select to avoid blocking indefinitely if events are delayed
5721 while !tree.update(cx, |tree, _| scan_id_increased(tree, &mut git_dir_scan_id)) {
5722 futures::select_biased! {
5723 _ = events.next() => {}
5724 _ = futures::FutureExt::fuse(cx.background_executor.timer(std::time::Duration::from_millis(10))) => {}
5725 }
5726 }
5727
5728 fs.remove_file(&root_path.join(file_name), Default::default())
5729 .await
5730 .unwrap();
5731
5732 // Use select to avoid blocking indefinitely if events are delayed
5733 while !tree.update(cx, |tree, _| scan_id_increased(tree, &mut git_dir_scan_id)) {
5734 futures::select_biased! {
5735 _ = events.next() => {}
5736 _ = futures::FutureExt::fuse(cx.background_executor.timer(std::time::Duration::from_millis(10))) => {}
5737 }
5738 }
5739
5740 cx.update(|cx| tree.read(cx).as_local().unwrap().scan_complete())
5741 .await;
5742 }
5743 .boxed_local()
5744 }
5745}
5746
5747#[derive(Clone, Debug)]
5748struct TraversalProgress<'a> {
5749 max_path: &'a RelPath,
5750 count: usize,
5751 non_ignored_count: usize,
5752 file_count: usize,
5753 non_ignored_file_count: usize,
5754}
5755
5756impl TraversalProgress<'_> {
5757 fn count(&self, include_files: bool, include_dirs: bool, include_ignored: bool) -> usize {
5758 match (include_files, include_dirs, include_ignored) {
5759 (true, true, true) => self.count,
5760 (true, true, false) => self.non_ignored_count,
5761 (true, false, true) => self.file_count,
5762 (true, false, false) => self.non_ignored_file_count,
5763 (false, true, true) => self.count - self.file_count,
5764 (false, true, false) => self.non_ignored_count - self.non_ignored_file_count,
5765 (false, false, _) => 0,
5766 }
5767 }
5768}
5769
5770impl<'a> sum_tree::Dimension<'a, EntrySummary> for TraversalProgress<'a> {
5771 fn zero(_cx: ()) -> Self {
5772 Default::default()
5773 }
5774
5775 fn add_summary(&mut self, summary: &'a EntrySummary, _: ()) {
5776 self.max_path = summary.max_path.as_ref();
5777 self.count += summary.count;
5778 self.non_ignored_count += summary.non_ignored_count;
5779 self.file_count += summary.file_count;
5780 self.non_ignored_file_count += summary.non_ignored_file_count;
5781 }
5782}
5783
5784impl Default for TraversalProgress<'_> {
5785 fn default() -> Self {
5786 Self {
5787 max_path: RelPath::empty(),
5788 count: 0,
5789 non_ignored_count: 0,
5790 file_count: 0,
5791 non_ignored_file_count: 0,
5792 }
5793 }
5794}
5795
5796#[derive(Debug)]
5797pub struct Traversal<'a> {
5798 snapshot: &'a Snapshot,
5799 cursor: sum_tree::Cursor<'a, 'static, Entry, TraversalProgress<'a>>,
5800 include_ignored: bool,
5801 include_files: bool,
5802 include_dirs: bool,
5803}
5804
5805impl<'a> Traversal<'a> {
5806 fn new(
5807 snapshot: &'a Snapshot,
5808 include_files: bool,
5809 include_dirs: bool,
5810 include_ignored: bool,
5811 start_path: &RelPath,
5812 ) -> Self {
5813 let mut cursor = snapshot.entries_by_path.cursor(());
5814 cursor.seek(&TraversalTarget::path(start_path), Bias::Left);
5815 let mut traversal = Self {
5816 snapshot,
5817 cursor,
5818 include_files,
5819 include_dirs,
5820 include_ignored,
5821 };
5822 if traversal.end_offset() == traversal.start_offset() {
5823 traversal.next();
5824 }
5825 traversal
5826 }
5827
5828 pub fn advance(&mut self) -> bool {
5829 self.advance_by(1)
5830 }
5831
5832 pub fn advance_by(&mut self, count: usize) -> bool {
5833 self.cursor.seek_forward(
5834 &TraversalTarget::Count {
5835 count: self.end_offset() + count,
5836 include_dirs: self.include_dirs,
5837 include_files: self.include_files,
5838 include_ignored: self.include_ignored,
5839 },
5840 Bias::Left,
5841 )
5842 }
5843
5844 pub fn advance_to_sibling(&mut self) -> bool {
5845 while let Some(entry) = self.cursor.item() {
5846 self.cursor
5847 .seek_forward(&TraversalTarget::successor(&entry.path), Bias::Left);
5848 if let Some(entry) = self.cursor.item()
5849 && (self.include_files || !entry.is_file())
5850 && (self.include_dirs || !entry.is_dir())
5851 && (self.include_ignored || !entry.is_ignored || entry.is_always_included)
5852 {
5853 return true;
5854 }
5855 }
5856 false
5857 }
5858
5859 pub fn back_to_parent(&mut self) -> bool {
5860 let Some(parent_path) = self.cursor.item().and_then(|entry| entry.path.parent()) else {
5861 return false;
5862 };
5863 self.cursor
5864 .seek(&TraversalTarget::path(parent_path), Bias::Left)
5865 }
5866
5867 pub fn entry(&self) -> Option<&'a Entry> {
5868 self.cursor.item()
5869 }
5870
5871 pub fn snapshot(&self) -> &'a Snapshot {
5872 self.snapshot
5873 }
5874
5875 pub fn start_offset(&self) -> usize {
5876 self.cursor
5877 .start()
5878 .count(self.include_files, self.include_dirs, self.include_ignored)
5879 }
5880
5881 pub fn end_offset(&self) -> usize {
5882 self.cursor
5883 .end()
5884 .count(self.include_files, self.include_dirs, self.include_ignored)
5885 }
5886}
5887
5888impl<'a> Iterator for Traversal<'a> {
5889 type Item = &'a Entry;
5890
5891 fn next(&mut self) -> Option<Self::Item> {
5892 if let Some(item) = self.entry() {
5893 self.advance();
5894 Some(item)
5895 } else {
5896 None
5897 }
5898 }
5899}
5900
5901#[derive(Debug, Clone, Copy)]
5902pub enum PathTarget<'a> {
5903 Path(&'a RelPath),
5904 Successor(&'a RelPath),
5905}
5906
5907impl PathTarget<'_> {
5908 fn cmp_path(&self, other: &RelPath) -> Ordering {
5909 match self {
5910 PathTarget::Path(path) => path.cmp(&other),
5911 PathTarget::Successor(path) => {
5912 if other.starts_with(path) {
5913 Ordering::Greater
5914 } else {
5915 Ordering::Equal
5916 }
5917 }
5918 }
5919 }
5920}
5921
5922impl<'a, S: Summary> SeekTarget<'a, PathSummary<S>, PathProgress<'a>> for PathTarget<'_> {
5923 fn cmp(&self, cursor_location: &PathProgress<'a>, _: S::Context<'_>) -> Ordering {
5924 self.cmp_path(cursor_location.max_path)
5925 }
5926}
5927
5928impl<'a, S: Summary> SeekTarget<'a, PathSummary<S>, TraversalProgress<'a>> for PathTarget<'_> {
5929 fn cmp(&self, cursor_location: &TraversalProgress<'a>, _: S::Context<'_>) -> Ordering {
5930 self.cmp_path(cursor_location.max_path)
5931 }
5932}
5933
5934#[derive(Debug)]
5935enum TraversalTarget<'a> {
5936 Path(PathTarget<'a>),
5937 Count {
5938 count: usize,
5939 include_files: bool,
5940 include_ignored: bool,
5941 include_dirs: bool,
5942 },
5943}
5944
5945impl<'a> TraversalTarget<'a> {
5946 fn path(path: &'a RelPath) -> Self {
5947 Self::Path(PathTarget::Path(path))
5948 }
5949
5950 fn successor(path: &'a RelPath) -> Self {
5951 Self::Path(PathTarget::Successor(path))
5952 }
5953
5954 fn cmp_progress(&self, progress: &TraversalProgress) -> Ordering {
5955 match self {
5956 TraversalTarget::Path(path) => path.cmp_path(progress.max_path),
5957 TraversalTarget::Count {
5958 count,
5959 include_files,
5960 include_dirs,
5961 include_ignored,
5962 } => Ord::cmp(
5963 count,
5964 &progress.count(*include_files, *include_dirs, *include_ignored),
5965 ),
5966 }
5967 }
5968}
5969
5970impl<'a> SeekTarget<'a, EntrySummary, TraversalProgress<'a>> for TraversalTarget<'_> {
5971 fn cmp(&self, cursor_location: &TraversalProgress<'a>, _: ()) -> Ordering {
5972 self.cmp_progress(cursor_location)
5973 }
5974}
5975
5976impl<'a> SeekTarget<'a, PathSummary<sum_tree::NoSummary>, TraversalProgress<'a>>
5977 for TraversalTarget<'_>
5978{
5979 fn cmp(&self, cursor_location: &TraversalProgress<'a>, _: ()) -> Ordering {
5980 self.cmp_progress(cursor_location)
5981 }
5982}
5983
5984pub struct ChildEntriesOptions {
5985 pub include_files: bool,
5986 pub include_dirs: bool,
5987 pub include_ignored: bool,
5988}
5989
5990pub struct ChildEntriesIter<'a> {
5991 parent_path: &'a RelPath,
5992 traversal: Traversal<'a>,
5993}
5994
5995impl<'a> Iterator for ChildEntriesIter<'a> {
5996 type Item = &'a Entry;
5997
5998 fn next(&mut self) -> Option<Self::Item> {
5999 if let Some(item) = self.traversal.entry()
6000 && item.path.starts_with(self.parent_path)
6001 {
6002 self.traversal.advance_to_sibling();
6003 return Some(item);
6004 }
6005 None
6006 }
6007}
6008
6009impl<'a> From<&'a Entry> for proto::Entry {
6010 fn from(entry: &'a Entry) -> Self {
6011 Self {
6012 id: entry.id.to_proto(),
6013 is_dir: entry.is_dir(),
6014 path: entry.path.as_ref().to_proto(),
6015 inode: entry.inode,
6016 mtime: entry.mtime.map(|time| time.into()),
6017 is_ignored: entry.is_ignored,
6018 is_hidden: entry.is_hidden,
6019 is_external: entry.is_external,
6020 is_fifo: entry.is_fifo,
6021 size: Some(entry.size),
6022 canonical_path: entry
6023 .canonical_path
6024 .as_ref()
6025 .map(|path| path.to_string_lossy().into_owned()),
6026 }
6027 }
6028}
6029
6030impl TryFrom<(&CharBag, &PathMatcher, proto::Entry)> for Entry {
6031 type Error = anyhow::Error;
6032
6033 fn try_from(
6034 (root_char_bag, always_included, entry): (&CharBag, &PathMatcher, proto::Entry),
6035 ) -> Result<Self> {
6036 let kind = if entry.is_dir {
6037 EntryKind::Dir
6038 } else {
6039 EntryKind::File
6040 };
6041
6042 let path =
6043 RelPath::from_proto(&entry.path).context("invalid relative path in proto message")?;
6044 let char_bag = char_bag_for_path(*root_char_bag, &path);
6045 let is_always_included = always_included.is_match(&path);
6046 Ok(Entry {
6047 id: ProjectEntryId::from_proto(entry.id),
6048 kind,
6049 path,
6050 inode: entry.inode,
6051 mtime: entry.mtime.map(|time| time.into()),
6052 size: entry.size.unwrap_or(0),
6053 canonical_path: entry
6054 .canonical_path
6055 .map(|path_string| Arc::from(PathBuf::from(path_string))),
6056 is_ignored: entry.is_ignored,
6057 is_hidden: entry.is_hidden,
6058 is_always_included,
6059 is_external: entry.is_external,
6060 is_private: false,
6061 char_bag,
6062 is_fifo: entry.is_fifo,
6063 })
6064 }
6065}
6066
6067#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
6068pub struct ProjectEntryId(usize);
6069
6070impl ProjectEntryId {
6071 pub const MAX: Self = Self(usize::MAX);
6072 pub const MIN: Self = Self(usize::MIN);
6073
6074 pub fn new(counter: &AtomicUsize) -> Self {
6075 Self(counter.fetch_add(1, SeqCst))
6076 }
6077
6078 pub fn from_proto(id: u64) -> Self {
6079 Self(id as usize)
6080 }
6081
6082 pub fn to_proto(self) -> u64 {
6083 self.0 as u64
6084 }
6085
6086 pub fn from_usize(id: usize) -> Self {
6087 ProjectEntryId(id)
6088 }
6089
6090 pub fn to_usize(self) -> usize {
6091 self.0
6092 }
6093}
6094
6095#[cfg(feature = "test-support")]
6096impl CreatedEntry {
6097 pub fn into_included(self) -> Option<Entry> {
6098 match self {
6099 CreatedEntry::Included(entry) => Some(entry),
6100 CreatedEntry::Excluded { .. } => None,
6101 }
6102 }
6103}
6104
6105fn parse_gitfile(content: &str) -> anyhow::Result<&Path> {
6106 let path = content
6107 .strip_prefix("gitdir:")
6108 .with_context(|| format!("parsing gitfile content {content:?}"))?;
6109 Ok(Path::new(path.trim()))
6110}
6111
6112pub async fn discover_root_repo_common_dir(root_abs_path: &Path, fs: &dyn Fs) -> Option<Arc<Path>> {
6113 let root_dot_git = root_abs_path.join(DOT_GIT);
6114 if !fs.metadata(&root_dot_git).await.is_ok_and(|m| m.is_some()) {
6115 return None;
6116 }
6117 let dot_git_path: Arc<Path> = root_dot_git.into();
6118 let (_, common_dir) = discover_git_paths(&dot_git_path, fs).await;
6119 Some(common_dir)
6120}
6121
6122async fn discover_git_paths(dot_git_abs_path: &Arc<Path>, fs: &dyn Fs) -> (Arc<Path>, Arc<Path>) {
6123 let mut repository_dir_abs_path = dot_git_abs_path.clone();
6124 let mut common_dir_abs_path = dot_git_abs_path.clone();
6125
6126 if let Some(path) = fs
6127 .load(dot_git_abs_path)
6128 .await
6129 .ok()
6130 .as_ref()
6131 .and_then(|contents| parse_gitfile(contents).log_err())
6132 {
6133 let path = dot_git_abs_path
6134 .parent()
6135 .unwrap_or(Path::new(""))
6136 .join(path);
6137 if let Some(path) = fs.canonicalize(&path).await.log_err() {
6138 repository_dir_abs_path = Path::new(&path).into();
6139 common_dir_abs_path = repository_dir_abs_path.clone();
6140
6141 if let Some(commondir_contents) = fs.load(&path.join("commondir")).await.ok()
6142 && let Some(commondir_path) = fs
6143 .canonicalize(&path.join(commondir_contents.trim()))
6144 .await
6145 .log_err()
6146 {
6147 common_dir_abs_path = commondir_path.as_path().into();
6148 }
6149 }
6150 };
6151 (repository_dir_abs_path, common_dir_abs_path)
6152}
6153
6154struct NullWatcher;
6155
6156impl fs::Watcher for NullWatcher {
6157 fn add(&self, _path: &Path) -> Result<()> {
6158 Ok(())
6159 }
6160
6161 fn remove(&self, _path: &Path) -> Result<()> {
6162 Ok(())
6163 }
6164}
6165
6166const FILE_ANALYSIS_BYTES: usize = 1024;
6167
6168async fn decode_file_text(
6169 fs: &dyn Fs,
6170 abs_path: &Path,
6171) -> Result<(String, &'static Encoding, bool)> {
6172 let mut file = fs
6173 .open_sync(&abs_path)
6174 .await
6175 .with_context(|| format!("opening file {abs_path:?}"))?;
6176
6177 // First, read the beginning of the file to determine its kind and encoding.
6178 // We do not want to load an entire large blob into memory only to discard it.
6179 let mut file_first_bytes = Vec::with_capacity(FILE_ANALYSIS_BYTES);
6180 let mut buf = [0u8; FILE_ANALYSIS_BYTES];
6181 let mut reached_eof = false;
6182 loop {
6183 if file_first_bytes.len() >= FILE_ANALYSIS_BYTES {
6184 break;
6185 }
6186 let n = file
6187 .read(&mut buf)
6188 .with_context(|| format!("reading bytes of the file {abs_path:?}"))?;
6189 if n == 0 {
6190 reached_eof = true;
6191 break;
6192 }
6193 file_first_bytes.extend_from_slice(&buf[..n]);
6194 }
6195 let (bom_encoding, byte_content) = decode_byte_header(&file_first_bytes);
6196 anyhow::ensure!(
6197 byte_content != ByteContent::Binary,
6198 "Binary files are not supported"
6199 );
6200
6201 // If the file is eligible for opening, read the rest of the file.
6202 let mut content = file_first_bytes;
6203 if !reached_eof {
6204 let mut buf = [0u8; 8 * 1024];
6205 loop {
6206 let n = file
6207 .read(&mut buf)
6208 .with_context(|| format!("reading remaining bytes of the file {abs_path:?}"))?;
6209 if n == 0 {
6210 break;
6211 }
6212 content.extend_from_slice(&buf[..n]);
6213 }
6214 }
6215 decode_byte_full(content, bom_encoding, byte_content)
6216}
6217
6218fn decode_byte_header(prefix: &[u8]) -> (Option<&'static Encoding>, ByteContent) {
6219 if let Some((encoding, _bom_len)) = Encoding::for_bom(prefix) {
6220 return (Some(encoding), ByteContent::Unknown);
6221 }
6222 (None, analyze_byte_content(prefix))
6223}
6224
6225fn decode_byte_full(
6226 bytes: Vec<u8>,
6227 bom_encoding: Option<&'static Encoding>,
6228 byte_content: ByteContent,
6229) -> Result<(String, &'static Encoding, bool)> {
6230 if let Some(encoding) = bom_encoding {
6231 let (cow, _) = encoding.decode_with_bom_removal(&bytes);
6232 return Ok((cow.into_owned(), encoding, true));
6233 }
6234
6235 match byte_content {
6236 ByteContent::Utf16Le => {
6237 let encoding = encoding_rs::UTF_16LE;
6238 let (cow, _, _) = encoding.decode(&bytes);
6239 return Ok((cow.into_owned(), encoding, false));
6240 }
6241 ByteContent::Utf16Be => {
6242 let encoding = encoding_rs::UTF_16BE;
6243 let (cow, _, _) = encoding.decode(&bytes);
6244 return Ok((cow.into_owned(), encoding, false));
6245 }
6246 ByteContent::Binary => {
6247 anyhow::bail!("Binary files are not supported");
6248 }
6249 ByteContent::Unknown => {}
6250 }
6251
6252 fn detect_encoding(bytes: Vec<u8>) -> (String, &'static Encoding) {
6253 let mut detector = EncodingDetector::new();
6254 detector.feed(&bytes, true);
6255
6256 let encoding = detector.guess(None, true); // Use None for TLD hint to ensure neutral detection logic.
6257
6258 let (cow, _, _) = encoding.decode(&bytes);
6259 (cow.into_owned(), encoding)
6260 }
6261
6262 match String::from_utf8(bytes) {
6263 Ok(text) => {
6264 // ISO-2022-JP (and other ISO-2022 variants) consists entirely of 7-bit ASCII bytes,
6265 // so it is valid UTF-8. However, it contains escape sequences starting with '\x1b'.
6266 // If we find an escape character, we double-check the encoding to prevent
6267 // displaying raw escape sequences instead of the correct characters.
6268 if text.contains('\x1b') {
6269 let (s, enc) = detect_encoding(text.into_bytes());
6270 Ok((s, enc, false))
6271 } else {
6272 Ok((text, encoding_rs::UTF_8, false))
6273 }
6274 }
6275 Err(e) => {
6276 let (s, enc) = detect_encoding(e.into_bytes());
6277 Ok((s, enc, false))
6278 }
6279 }
6280}
6281
6282#[derive(Debug, PartialEq)]
6283enum ByteContent {
6284 Utf16Le,
6285 Utf16Be,
6286 Binary,
6287 Unknown,
6288}
6289
6290// Heuristic check using null byte distribution plus a generic text-likeness
6291// heuristic. This prefers UTF-16 when many bytes are NUL and otherwise
6292// distinguishes between text-like and binary-like content.
6293fn analyze_byte_content(bytes: &[u8]) -> ByteContent {
6294 if bytes.len() < 2 {
6295 return ByteContent::Unknown;
6296 }
6297
6298 if is_known_binary_header(bytes) {
6299 return ByteContent::Binary;
6300 }
6301
6302 let limit = bytes.len().min(FILE_ANALYSIS_BYTES);
6303 let mut even_null_count = 0usize;
6304 let mut odd_null_count = 0usize;
6305 let mut non_text_like_count = 0usize;
6306
6307 for (i, &byte) in bytes[..limit].iter().enumerate() {
6308 if byte == 0 {
6309 if i % 2 == 0 {
6310 even_null_count += 1;
6311 } else {
6312 odd_null_count += 1;
6313 }
6314 non_text_like_count += 1;
6315 continue;
6316 }
6317
6318 let is_text_like = match byte {
6319 b'\t' | b'\n' | b'\r' | 0x0C => true,
6320 0x20..=0x7E => true,
6321 // Treat bytes that are likely part of UTF-8 or single-byte encodings as text-like.
6322 0x80..=0xBF | 0xC2..=0xF4 => true,
6323 _ => false,
6324 };
6325
6326 if !is_text_like {
6327 non_text_like_count += 1;
6328 }
6329 }
6330
6331 let total_null_count = even_null_count + odd_null_count;
6332
6333 // If there are no NUL bytes at all, this is overwhelmingly likely to be text.
6334 if total_null_count == 0 {
6335 return ByteContent::Unknown;
6336 }
6337
6338 let has_significant_nulls = total_null_count >= limit / 16;
6339 let nulls_skew_to_even = even_null_count > odd_null_count * 4;
6340 let nulls_skew_to_odd = odd_null_count > even_null_count * 4;
6341
6342 if has_significant_nulls {
6343 let sample = &bytes[..limit];
6344
6345 // UTF-16BE ASCII: [0x00, char] — nulls at even positions (high byte first)
6346 // UTF-16LE ASCII: [char, 0x00] — nulls at odd positions (low byte first)
6347
6348 if nulls_skew_to_even && is_plausible_utf16_text(sample, false) {
6349 return ByteContent::Utf16Be;
6350 }
6351
6352 if nulls_skew_to_odd && is_plausible_utf16_text(sample, true) {
6353 return ByteContent::Utf16Le;
6354 }
6355
6356 return ByteContent::Binary;
6357 }
6358
6359 if non_text_like_count * 100 < limit * 8 {
6360 ByteContent::Unknown
6361 } else {
6362 ByteContent::Binary
6363 }
6364}
6365
6366fn is_known_binary_header(bytes: &[u8]) -> bool {
6367 bytes.starts_with(b"%PDF-") // PDF
6368 || bytes.starts_with(b"PK\x03\x04") // ZIP local header
6369 || bytes.starts_with(b"PK\x05\x06") // ZIP end of central directory
6370 || bytes.starts_with(b"PK\x07\x08") // ZIP spanning/splitting
6371 || bytes.starts_with(b"\x89PNG\r\n\x1a\n") // PNG
6372 || bytes.starts_with(b"\xFF\xD8\xFF") // JPEG
6373 || bytes.starts_with(b"GIF87a") // GIF87a
6374 || bytes.starts_with(b"GIF89a") // GIF89a
6375 || bytes.starts_with(b"IWAD") // Doom IWAD archive
6376 || bytes.starts_with(b"PWAD") // Doom PWAD archive
6377 || bytes.starts_with(b"RIFF") // WAV, AVI, WebP
6378 || bytes.starts_with(b"OggS") // OGG (Vorbis, Opus, FLAC)
6379 || bytes.starts_with(b"fLaC") // FLAC
6380 || bytes.starts_with(b"ID3") // MP3 with ID3v2 tag
6381 || bytes.starts_with(b"\xFF\xFB") // MP3 frame sync (MPEG1 Layer3)
6382 || bytes.starts_with(b"\xFF\xFA") // MP3 frame sync (MPEG1 Layer3)
6383 || bytes.starts_with(b"\xFF\xF3") // MP3 frame sync (MPEG2 Layer3)
6384 || bytes.starts_with(b"\xFF\xF2") // MP3 frame sync (MPEG2 Layer3)
6385}
6386
6387// Null byte skew alone is not enough to identify UTF-16 -- binary formats with
6388// small 16-bit values (like PCM audio) produce the same pattern. Decode the
6389// bytes as UTF-16 and reject if too many code units land in control character
6390// ranges or form unpaired surrogates, which real text almost never contains.
6391fn is_plausible_utf16_text(bytes: &[u8], little_endian: bool) -> bool {
6392 let mut suspicious_count = 0usize;
6393 let mut total = 0usize;
6394
6395 let mut i = 0;
6396 while let Some(code_unit) = read_u16(bytes, i, little_endian) {
6397 total += 1;
6398
6399 match code_unit {
6400 0x0009 | 0x000A | 0x000C | 0x000D => {}
6401 // C0/C1 control characters and non-characters
6402 0x0000..=0x001F | 0x007F..=0x009F | 0xFFFE | 0xFFFF => suspicious_count += 1,
6403 0xD800..=0xDBFF => {
6404 let next_offset = i + 2;
6405 let has_low_surrogate = read_u16(bytes, next_offset, little_endian)
6406 .is_some_and(|next| (0xDC00..=0xDFFF).contains(&next));
6407 if has_low_surrogate {
6408 total += 1;
6409 i += 2;
6410 } else {
6411 suspicious_count += 1;
6412 }
6413 }
6414 // Lone low surrogate without a preceding high surrogate
6415 0xDC00..=0xDFFF => suspicious_count += 1,
6416 _ => {}
6417 }
6418
6419 i += 2;
6420 }
6421
6422 if total == 0 {
6423 return false;
6424 }
6425
6426 // Real UTF-16 text has near-zero control characters; binary data with
6427 // small 16-bit values typically exceeds 5%. 2% provides a safe margin.
6428 suspicious_count * 100 < total * 2
6429}
6430
6431fn read_u16(bytes: &[u8], offset: usize, little_endian: bool) -> Option<u16> {
6432 let pair = [*bytes.get(offset)?, *bytes.get(offset + 1)?];
6433 if little_endian {
6434 return Some(u16::from_le_bytes(pair));
6435 }
6436 Some(u16::from_be_bytes(pair))
6437}
6438
6439#[cfg(test)]
6440mod tests {
6441 use super::*;
6442
6443 /// reproduction of issue #50785
6444 fn build_pcm16_wav_bytes() -> Vec<u8> {
6445 let header: Vec<u8> = vec![
6446 /* RIFF header */
6447 0x52, 0x49, 0x46, 0x46, // "RIFF"
6448 0xc6, 0xcf, 0x00, 0x00, // file size: 8
6449 0x57, 0x41, 0x56, 0x45, // "WAVE"
6450 /* fmt chunk */
6451 0x66, 0x6d, 0x74, 0x20, // "fmt "
6452 0x10, 0x00, 0x00, 0x00, // chunk size: 16
6453 0x01, 0x00, // format: PCM (1)
6454 0x01, 0x00, // channels: 1 (mono)
6455 0x80, 0x3e, 0x00, 0x00, // sample rate: 16000
6456 0x00, 0x7d, 0x00, 0x00, // byte rate: 32000
6457 0x02, 0x00, // block align: 2
6458 0x10, 0x00, // bits per sample: 16
6459 /* LIST chunk */
6460 0x4c, 0x49, 0x53, 0x54, // "LIST"
6461 0x1a, 0x00, 0x00, 0x00, // chunk size: 26
6462 0x49, 0x4e, 0x46, 0x4f, // "INFO"
6463 0x49, 0x53, 0x46, 0x54, // "ISFT"
6464 0x0d, 0x00, 0x00, 0x00, // sub-chunk size: 13
6465 0x4c, 0x61, 0x76, 0x66, 0x36, 0x32, 0x2e, 0x33, // "Lavf62.3"
6466 0x2e, 0x31, 0x30, 0x30, 0x00, // ".100\0"
6467 /* padding byte for word alignment */
6468 0x00, // data chunk header
6469 0x64, 0x61, 0x74, 0x61, // "data"
6470 0x80, 0xcf, 0x00, 0x00, // chunk size
6471 ];
6472
6473 let mut bytes = header;
6474
6475 // fill remaining space up to `FILE_ANALYSIS_BYTES` with synthetic PCM
6476 let audio_bytes_needed = FILE_ANALYSIS_BYTES - bytes.len();
6477 for i in 0..(audio_bytes_needed / 2) {
6478 let sample = (i & 0xFF) as u8;
6479 bytes.push(sample); // low byte: varies
6480 bytes.push(0x00); // high byte: zero for small values
6481 }
6482
6483 bytes
6484 }
6485
6486 #[test]
6487 fn test_pcm16_wav_detected_as_binary() {
6488 let wav_bytes = build_pcm16_wav_bytes();
6489 assert_eq!(wav_bytes.len(), FILE_ANALYSIS_BYTES);
6490
6491 let result = analyze_byte_content(&wav_bytes);
6492 assert_eq!(
6493 result,
6494 ByteContent::Binary,
6495 "PCM 16-bit WAV should be detected as Binary via RIFF header"
6496 );
6497 }
6498
6499 #[test]
6500 fn test_le16_binary_not_misdetected_as_utf16le() {
6501 let mut bytes = b"FAKE".to_vec();
6502 while bytes.len() < FILE_ANALYSIS_BYTES {
6503 let sample = (bytes.len() & 0xFF) as u8;
6504 bytes.push(sample);
6505 bytes.push(0x00);
6506 }
6507 bytes.truncate(FILE_ANALYSIS_BYTES);
6508
6509 let result = analyze_byte_content(&bytes);
6510 assert_eq!(
6511 result,
6512 ByteContent::Binary,
6513 "LE 16-bit binary with control characters should be detected as Binary"
6514 );
6515 }
6516
6517 #[test]
6518 fn test_be16_binary_not_misdetected_as_utf16be() {
6519 let mut bytes = b"FAKE".to_vec();
6520 while bytes.len() < FILE_ANALYSIS_BYTES {
6521 bytes.push(0x00);
6522 let sample = (bytes.len() & 0xFF) as u8;
6523 bytes.push(sample);
6524 }
6525 bytes.truncate(FILE_ANALYSIS_BYTES);
6526
6527 let result = analyze_byte_content(&bytes);
6528 assert_eq!(
6529 result,
6530 ByteContent::Binary,
6531 "BE 16-bit binary with control characters should be detected as Binary"
6532 );
6533 }
6534
6535 #[test]
6536 fn test_utf16le_text_detected_as_utf16le() {
6537 let text = "Hello, world! This is a UTF-16 test string. ";
6538 let mut bytes = Vec::new();
6539 while bytes.len() < FILE_ANALYSIS_BYTES {
6540 bytes.extend(text.encode_utf16().flat_map(|u| u.to_le_bytes()));
6541 }
6542 bytes.truncate(FILE_ANALYSIS_BYTES);
6543
6544 assert_eq!(analyze_byte_content(&bytes), ByteContent::Utf16Le);
6545 }
6546
6547 #[test]
6548 fn test_utf16be_text_detected_as_utf16be() {
6549 let text = "Hello, world! This is a UTF-16 test string. ";
6550 let mut bytes = Vec::new();
6551 while bytes.len() < FILE_ANALYSIS_BYTES {
6552 bytes.extend(text.encode_utf16().flat_map(|u| u.to_be_bytes()));
6553 }
6554 bytes.truncate(FILE_ANALYSIS_BYTES);
6555
6556 assert_eq!(analyze_byte_content(&bytes), ByteContent::Utf16Be);
6557 }
6558
6559 #[test]
6560 fn test_known_binary_headers() {
6561 let cases: &[(&[u8], &str)] = &[
6562 (b"RIFF\x00\x00\x00\x00WAVE", "WAV"),
6563 (b"RIFF\x00\x00\x00\x00AVI ", "AVI"),
6564 (b"OggS\x00\x02", "OGG"),
6565 (b"fLaC\x00\x00", "FLAC"),
6566 (b"ID3\x03\x00", "MP3 ID3v2"),
6567 (b"\xFF\xFB\x90\x00", "MP3 MPEG1 Layer3"),
6568 (b"\xFF\xF3\x90\x00", "MP3 MPEG2 Layer3"),
6569 ];
6570
6571 for (header, label) in cases {
6572 let mut bytes = header.to_vec();
6573 bytes.resize(FILE_ANALYSIS_BYTES, 0x41); // pad with 'A'
6574 assert_eq!(
6575 analyze_byte_content(&bytes),
6576 ByteContent::Binary,
6577 "{label} should be detected as Binary"
6578 );
6579 }
6580 }
6581}