1mod ignore;
2mod worktree_settings;
3
4use ::ignore::gitignore::{Gitignore, GitignoreBuilder};
5use anyhow::{Context as _, Result, anyhow};
6use chardetng::EncodingDetector;
7use clock::ReplicaId;
8use collections::{HashMap, HashSet, VecDeque};
9use encoding_rs::Encoding;
10use fs::{
11 Fs, MTime, PathEvent, PathEventKind, RemoveOptions, Watcher, copy_recursive, read_dir_items,
12};
13use futures::{
14 FutureExt as _, Stream, StreamExt,
15 channel::{
16 mpsc::{self, UnboundedSender},
17 oneshot,
18 },
19 select_biased, stream,
20 task::Poll,
21};
22use fuzzy::CharBag;
23use git::{
24 COMMIT_MESSAGE, DOT_GIT, FSMONITOR_DAEMON, GITIGNORE, INDEX_LOCK, LFS_DIR, REPO_EXCLUDE,
25 status::GitSummary,
26};
27use gpui::{
28 App, AppContext as _, AsyncApp, BackgroundExecutor, Context, Entity, EventEmitter, Priority,
29 Task,
30};
31use ignore::IgnoreStack;
32use language::DiskState;
33
34use parking_lot::Mutex;
35use paths::{local_settings_folder_name, local_vscode_folder_name};
36use postage::{
37 barrier,
38 prelude::{Sink as _, Stream as _},
39 watch,
40};
41use rpc::{
42 AnyProtoClient,
43 proto::{self, split_worktree_update},
44};
45pub use settings::WorktreeId;
46use settings::{Settings, SettingsLocation, SettingsStore};
47use smallvec::{SmallVec, smallvec};
48use smol::channel::{self, Sender};
49use std::{
50 any::Any,
51 borrow::Borrow as _,
52 cmp::Ordering,
53 collections::hash_map,
54 convert::TryFrom,
55 ffi::OsStr,
56 fmt,
57 future::Future,
58 mem::{self},
59 ops::{Deref, DerefMut, Range},
60 path::{Path, PathBuf},
61 pin::Pin,
62 sync::{
63 Arc,
64 atomic::{AtomicUsize, Ordering::SeqCst},
65 },
66 time::{Duration, Instant},
67};
68use sum_tree::{Bias, Dimensions, Edit, KeyedItem, SeekTarget, SumTree, Summary, TreeMap, TreeSet};
69use text::{LineEnding, Rope};
70use util::{
71 ResultExt, debug_panic, maybe,
72 paths::{PathMatcher, PathStyle, SanitizedPath, home_dir},
73 rel_path::RelPath,
74};
75pub use worktree_settings::WorktreeSettings;
76
77use crate::ignore::IgnoreKind;
78
79pub const FS_WATCH_LATENCY: Duration = Duration::from_millis(100);
80
81/// A set of local or remote files that are being opened as part of a project.
82/// Responsible for tracking related FS (for local)/collab (for remote) events and corresponding updates.
83/// Stores git repositories data and the diagnostics for the file(s).
84///
85/// Has an absolute path, and may be set to be visible in Zed UI or not.
86/// May correspond to a directory or a single file.
87/// Possible examples:
88/// * a drag and dropped file — may be added as an invisible, "ephemeral" entry to the current worktree
89/// * a directory opened in Zed — may be added as a visible entry to the current worktree
90///
91/// Uses [`Entry`] to track the state of each file/directory, can look up absolute paths for entries.
92pub enum Worktree {
93 Local(LocalWorktree),
94 Remote(RemoteWorktree),
95}
96
97/// An entry, created in the worktree.
98#[derive(Debug)]
99pub enum CreatedEntry {
100 /// Got created and indexed by the worktree, receiving a corresponding entry.
101 Included(Entry),
102 /// Got created, but not indexed due to falling under exclusion filters.
103 Excluded { abs_path: PathBuf },
104}
105
106#[derive(Debug)]
107pub struct LoadedFile {
108 pub file: Arc<File>,
109 pub text: String,
110 pub encoding: &'static Encoding,
111 pub has_bom: bool,
112}
113
114pub struct LoadedBinaryFile {
115 pub file: Arc<File>,
116 pub content: Vec<u8>,
117}
118
119impl fmt::Debug for LoadedBinaryFile {
120 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
121 f.debug_struct("LoadedBinaryFile")
122 .field("file", &self.file)
123 .field("content_bytes", &self.content.len())
124 .finish()
125 }
126}
127
128pub struct LocalWorktree {
129 snapshot: LocalSnapshot,
130 scan_requests_tx: channel::Sender<ScanRequest>,
131 path_prefixes_to_scan_tx: channel::Sender<PathPrefixScanRequest>,
132 is_scanning: (watch::Sender<bool>, watch::Receiver<bool>),
133 snapshot_subscriptions: VecDeque<(usize, oneshot::Sender<()>)>,
134 _background_scanner_tasks: Vec<Task<()>>,
135 update_observer: Option<UpdateObservationState>,
136 fs: Arc<dyn Fs>,
137 fs_case_sensitive: bool,
138 visible: bool,
139 next_entry_id: Arc<AtomicUsize>,
140 settings: WorktreeSettings,
141 share_private_files: bool,
142 scanning_enabled: bool,
143}
144
145pub struct PathPrefixScanRequest {
146 path: Arc<RelPath>,
147 done: SmallVec<[barrier::Sender; 1]>,
148}
149
150struct ScanRequest {
151 relative_paths: Vec<Arc<RelPath>>,
152 done: SmallVec<[barrier::Sender; 1]>,
153}
154
155pub struct RemoteWorktree {
156 snapshot: Snapshot,
157 background_snapshot: Arc<Mutex<(Snapshot, Vec<proto::UpdateWorktree>)>>,
158 project_id: u64,
159 client: AnyProtoClient,
160 file_scan_inclusions: PathMatcher,
161 updates_tx: Option<UnboundedSender<proto::UpdateWorktree>>,
162 update_observer: Option<mpsc::UnboundedSender<proto::UpdateWorktree>>,
163 snapshot_subscriptions: VecDeque<(usize, oneshot::Sender<()>)>,
164 replica_id: ReplicaId,
165 visible: bool,
166 disconnected: bool,
167}
168
169#[derive(Clone)]
170pub struct Snapshot {
171 id: WorktreeId,
172 /// The absolute path of the worktree root.
173 abs_path: Arc<SanitizedPath>,
174 path_style: PathStyle,
175 root_name: Arc<RelPath>,
176 root_char_bag: CharBag,
177 entries_by_path: SumTree<Entry>,
178 entries_by_id: SumTree<PathEntry>,
179 always_included_entries: Vec<Arc<RelPath>>,
180
181 /// A number that increases every time the worktree begins scanning
182 /// a set of paths from the filesystem. This scanning could be caused
183 /// by some operation performed on the worktree, such as reading or
184 /// writing a file, or by an event reported by the filesystem.
185 scan_id: usize,
186
187 /// The latest scan id that has completed, and whose preceding scans
188 /// have all completed. The current `scan_id` could be more than one
189 /// greater than the `completed_scan_id` if operations are performed
190 /// on the worktree while it is processing a file-system event.
191 completed_scan_id: usize,
192}
193
194/// This path corresponds to the 'content path' of a repository in relation
195/// to Zed's project root.
196/// In the majority of the cases, this is the folder that contains the .git folder.
197/// But if a sub-folder of a git repository is opened, this corresponds to the
198/// project root and the .git folder is located in a parent directory.
199#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
200pub enum WorkDirectory {
201 InProject {
202 relative_path: Arc<RelPath>,
203 },
204 AboveProject {
205 absolute_path: Arc<Path>,
206 location_in_repo: Arc<Path>,
207 },
208}
209
210impl WorkDirectory {
211 fn path_key(&self) -> PathKey {
212 match self {
213 WorkDirectory::InProject { relative_path } => PathKey(relative_path.clone()),
214 WorkDirectory::AboveProject { .. } => PathKey(RelPath::empty().into()),
215 }
216 }
217
218 /// Returns true if the given path is a child of the work directory.
219 ///
220 /// Note that the path may not be a member of this repository, if there
221 /// is a repository in a directory between these two paths
222 /// external .git folder in a parent folder of the project root.
223 #[track_caller]
224 pub fn directory_contains(&self, path: &RelPath) -> bool {
225 match self {
226 WorkDirectory::InProject { relative_path } => path.starts_with(relative_path),
227 WorkDirectory::AboveProject { .. } => true,
228 }
229 }
230}
231
232impl Default for WorkDirectory {
233 fn default() -> Self {
234 Self::InProject {
235 relative_path: Arc::from(RelPath::empty()),
236 }
237 }
238}
239
240#[derive(Clone)]
241pub struct LocalSnapshot {
242 snapshot: Snapshot,
243 global_gitignore: Option<Arc<Gitignore>>,
244 /// Exclude files for all git repositories in the worktree, indexed by their absolute path.
245 /// The boolean indicates whether the gitignore needs to be updated.
246 repo_exclude_by_work_dir_abs_path: HashMap<Arc<Path>, (Arc<Gitignore>, bool)>,
247 /// All of the gitignore files in the worktree, indexed by their absolute path.
248 /// The boolean indicates whether the gitignore needs to be updated.
249 ignores_by_parent_abs_path: HashMap<Arc<Path>, (Arc<Gitignore>, bool)>,
250 /// All of the git repositories in the worktree, indexed by the project entry
251 /// id of their parent directory.
252 git_repositories: TreeMap<ProjectEntryId, LocalRepositoryEntry>,
253 /// The file handle of the worktree root
254 /// (so we can find it after it's been moved)
255 root_file_handle: Option<Arc<dyn fs::FileHandle>>,
256}
257
258struct BackgroundScannerState {
259 snapshot: LocalSnapshot,
260 scanned_dirs: HashSet<ProjectEntryId>,
261 path_prefixes_to_scan: HashSet<Arc<RelPath>>,
262 paths_to_scan: HashSet<Arc<RelPath>>,
263 /// The ids of all of the entries that were removed from the snapshot
264 /// as part of the current update. These entry ids may be re-used
265 /// if the same inode is discovered at a new path, or if the given
266 /// path is re-created after being deleted.
267 removed_entries: HashMap<u64, Entry>,
268 changed_paths: Vec<Arc<RelPath>>,
269 prev_snapshot: Snapshot,
270 scanning_enabled: bool,
271}
272
273#[derive(Clone, Debug, Eq, PartialEq)]
274struct EventRoot {
275 path: Arc<RelPath>,
276 was_rescanned: bool,
277}
278
279#[derive(Debug, Clone)]
280struct LocalRepositoryEntry {
281 work_directory_id: ProjectEntryId,
282 work_directory: WorkDirectory,
283 work_directory_abs_path: Arc<Path>,
284 git_dir_scan_id: usize,
285 /// Absolute path to the original .git entry that caused us to create this repository.
286 ///
287 /// This is normally a directory, but may be a "gitfile" that points to a directory elsewhere
288 /// (whose path we then store in `repository_dir_abs_path`).
289 dot_git_abs_path: Arc<Path>,
290 /// Absolute path to the "commondir" for this repository.
291 ///
292 /// This is always a directory. For a normal repository, this is the same as
293 /// `dot_git_abs_path`. For a linked worktree, this is the main repo's `.git`
294 /// directory (resolved from the worktree's `commondir` file). For a submodule,
295 /// this equals `repository_dir_abs_path` (submodules don't have a `commondir`
296 /// file).
297 common_dir_abs_path: Arc<Path>,
298 /// Absolute path to the directory holding the repository's state.
299 ///
300 /// For a normal repository, this is a directory and coincides with `dot_git_abs_path` and
301 /// `common_dir_abs_path`. For a submodule or worktree, this is some subdirectory of the
302 /// commondir like `/project/.git/modules/foo`.
303 repository_dir_abs_path: Arc<Path>,
304}
305
306impl sum_tree::Item for LocalRepositoryEntry {
307 type Summary = PathSummary<sum_tree::NoSummary>;
308
309 fn summary(&self, _: <Self::Summary as Summary>::Context<'_>) -> Self::Summary {
310 PathSummary {
311 max_path: self.work_directory.path_key().0,
312 item_summary: sum_tree::NoSummary,
313 }
314 }
315}
316
317impl KeyedItem for LocalRepositoryEntry {
318 type Key = PathKey;
319
320 fn key(&self) -> Self::Key {
321 self.work_directory.path_key()
322 }
323}
324
325impl Deref for LocalRepositoryEntry {
326 type Target = WorkDirectory;
327
328 fn deref(&self) -> &Self::Target {
329 &self.work_directory
330 }
331}
332
333impl Deref for LocalSnapshot {
334 type Target = Snapshot;
335
336 fn deref(&self) -> &Self::Target {
337 &self.snapshot
338 }
339}
340
341impl DerefMut for LocalSnapshot {
342 fn deref_mut(&mut self) -> &mut Self::Target {
343 &mut self.snapshot
344 }
345}
346
347enum ScanState {
348 Started,
349 Updated {
350 snapshot: LocalSnapshot,
351 changes: UpdatedEntriesSet,
352 barrier: SmallVec<[barrier::Sender; 1]>,
353 scanning: bool,
354 },
355 RootUpdated {
356 new_path: Arc<SanitizedPath>,
357 },
358}
359
360struct UpdateObservationState {
361 snapshots_tx: mpsc::UnboundedSender<(LocalSnapshot, UpdatedEntriesSet)>,
362 resume_updates: watch::Sender<()>,
363 _maintain_remote_snapshot: Task<Option<()>>,
364}
365
366#[derive(Debug, Clone)]
367pub enum Event {
368 UpdatedEntries(UpdatedEntriesSet),
369 UpdatedGitRepositories(UpdatedGitRepositoriesSet),
370 DeletedEntry(ProjectEntryId),
371}
372
373impl EventEmitter<Event> for Worktree {}
374
375impl Worktree {
376 pub async fn local(
377 path: impl Into<Arc<Path>>,
378 visible: bool,
379 fs: Arc<dyn Fs>,
380 next_entry_id: Arc<AtomicUsize>,
381 scanning_enabled: bool,
382 worktree_id: WorktreeId,
383 cx: &mut AsyncApp,
384 ) -> Result<Entity<Self>> {
385 let abs_path = path.into();
386 let metadata = fs
387 .metadata(&abs_path)
388 .await
389 .context("failed to stat worktree path")?;
390
391 let fs_case_sensitive = fs.is_case_sensitive().await;
392
393 let root_file_handle = if metadata.as_ref().is_some() {
394 fs.open_handle(&abs_path)
395 .await
396 .with_context(|| {
397 format!(
398 "failed to open local worktree root at {}",
399 abs_path.display()
400 )
401 })
402 .log_err()
403 } else {
404 None
405 };
406
407 Ok(cx.new(move |cx: &mut Context<Worktree>| {
408 let mut snapshot = LocalSnapshot {
409 ignores_by_parent_abs_path: Default::default(),
410 global_gitignore: Default::default(),
411 repo_exclude_by_work_dir_abs_path: Default::default(),
412 git_repositories: Default::default(),
413 snapshot: Snapshot::new(
414 worktree_id,
415 abs_path
416 .file_name()
417 .and_then(|f| f.to_str())
418 .map_or(RelPath::empty().into(), |f| {
419 RelPath::unix(f).unwrap().into()
420 }),
421 abs_path.clone(),
422 PathStyle::local(),
423 ),
424 root_file_handle,
425 };
426
427 let worktree_id = snapshot.id();
428 let settings_location = Some(SettingsLocation {
429 worktree_id,
430 path: RelPath::empty(),
431 });
432
433 let settings = WorktreeSettings::get(settings_location, cx).clone();
434 cx.observe_global::<SettingsStore>(move |this, cx| {
435 if let Self::Local(this) = this {
436 let settings = WorktreeSettings::get(settings_location, cx).clone();
437 if this.settings != settings {
438 this.settings = settings;
439 this.restart_background_scanners(cx);
440 }
441 }
442 })
443 .detach();
444
445 let share_private_files = false;
446 if let Some(metadata) = metadata {
447 let mut entry = Entry::new(
448 RelPath::empty().into(),
449 &metadata,
450 ProjectEntryId::new(&next_entry_id),
451 snapshot.root_char_bag,
452 None,
453 );
454 if metadata.is_dir {
455 if !scanning_enabled {
456 entry.kind = EntryKind::UnloadedDir;
457 }
458 } else {
459 if let Some(file_name) = abs_path.file_name()
460 && let Some(file_name) = file_name.to_str()
461 && let Ok(path) = RelPath::unix(file_name)
462 {
463 entry.is_private = !share_private_files && settings.is_path_private(path);
464 entry.is_hidden = settings.is_path_hidden(path);
465 }
466 }
467 cx.foreground_executor()
468 .block_on(snapshot.insert_entry(entry, fs.as_ref()));
469 }
470
471 let (scan_requests_tx, scan_requests_rx) = channel::unbounded();
472 let (path_prefixes_to_scan_tx, path_prefixes_to_scan_rx) = channel::unbounded();
473 let mut worktree = LocalWorktree {
474 share_private_files,
475 next_entry_id,
476 snapshot,
477 is_scanning: watch::channel_with(true),
478 snapshot_subscriptions: Default::default(),
479 update_observer: None,
480 scan_requests_tx,
481 path_prefixes_to_scan_tx,
482 _background_scanner_tasks: Vec::new(),
483 fs,
484 fs_case_sensitive,
485 visible,
486 settings,
487 scanning_enabled,
488 };
489 worktree.start_background_scanner(scan_requests_rx, path_prefixes_to_scan_rx, cx);
490 Worktree::Local(worktree)
491 }))
492 }
493
494 pub fn remote(
495 project_id: u64,
496 replica_id: ReplicaId,
497 worktree: proto::WorktreeMetadata,
498 client: AnyProtoClient,
499 path_style: PathStyle,
500 cx: &mut App,
501 ) -> Entity<Self> {
502 cx.new(|cx: &mut Context<Self>| {
503 let snapshot = Snapshot::new(
504 WorktreeId::from_proto(worktree.id),
505 RelPath::from_proto(&worktree.root_name)
506 .unwrap_or_else(|_| RelPath::empty().into()),
507 Path::new(&worktree.abs_path).into(),
508 path_style,
509 );
510
511 let background_snapshot = Arc::new(Mutex::new((
512 snapshot.clone(),
513 Vec::<proto::UpdateWorktree>::new(),
514 )));
515 let (background_updates_tx, mut background_updates_rx) =
516 mpsc::unbounded::<proto::UpdateWorktree>();
517 let (mut snapshot_updated_tx, mut snapshot_updated_rx) = watch::channel();
518
519 let worktree_id = snapshot.id();
520 let settings_location = Some(SettingsLocation {
521 worktree_id,
522 path: RelPath::empty(),
523 });
524
525 let settings = WorktreeSettings::get(settings_location, cx).clone();
526 let worktree = RemoteWorktree {
527 client,
528 project_id,
529 replica_id,
530 snapshot,
531 file_scan_inclusions: settings.parent_dir_scan_inclusions.clone(),
532 background_snapshot: background_snapshot.clone(),
533 updates_tx: Some(background_updates_tx),
534 update_observer: None,
535 snapshot_subscriptions: Default::default(),
536 visible: worktree.visible,
537 disconnected: false,
538 };
539
540 // Apply updates to a separate snapshot in a background task, then
541 // send them to a foreground task which updates the model.
542 cx.background_spawn(async move {
543 while let Some(update) = background_updates_rx.next().await {
544 {
545 let mut lock = background_snapshot.lock();
546 lock.0.apply_remote_update(
547 update.clone(),
548 &settings.parent_dir_scan_inclusions,
549 );
550 lock.1.push(update);
551 }
552 snapshot_updated_tx.send(()).await.ok();
553 }
554 })
555 .detach();
556
557 // On the foreground task, update to the latest snapshot and notify
558 // any update observer of all updates that led to that snapshot.
559 cx.spawn(async move |this, cx| {
560 while (snapshot_updated_rx.recv().await).is_some() {
561 this.update(cx, |this, cx| {
562 let mut entries_changed = false;
563 let this = this.as_remote_mut().unwrap();
564 {
565 let mut lock = this.background_snapshot.lock();
566 this.snapshot = lock.0.clone();
567 for update in lock.1.drain(..) {
568 entries_changed |= !update.updated_entries.is_empty()
569 || !update.removed_entries.is_empty();
570 if let Some(tx) = &this.update_observer {
571 tx.unbounded_send(update).ok();
572 }
573 }
574 };
575
576 if entries_changed {
577 cx.emit(Event::UpdatedEntries(Arc::default()));
578 }
579 cx.notify();
580 while let Some((scan_id, _)) = this.snapshot_subscriptions.front() {
581 if this.observed_snapshot(*scan_id) {
582 let (_, tx) = this.snapshot_subscriptions.pop_front().unwrap();
583 let _ = tx.send(());
584 } else {
585 break;
586 }
587 }
588 })?;
589 }
590 anyhow::Ok(())
591 })
592 .detach();
593
594 Worktree::Remote(worktree)
595 })
596 }
597
598 pub fn as_local(&self) -> Option<&LocalWorktree> {
599 if let Worktree::Local(worktree) = self {
600 Some(worktree)
601 } else {
602 None
603 }
604 }
605
606 pub fn as_remote(&self) -> Option<&RemoteWorktree> {
607 if let Worktree::Remote(worktree) = self {
608 Some(worktree)
609 } else {
610 None
611 }
612 }
613
614 pub fn as_local_mut(&mut self) -> Option<&mut LocalWorktree> {
615 if let Worktree::Local(worktree) = self {
616 Some(worktree)
617 } else {
618 None
619 }
620 }
621
622 pub fn as_remote_mut(&mut self) -> Option<&mut RemoteWorktree> {
623 if let Worktree::Remote(worktree) = self {
624 Some(worktree)
625 } else {
626 None
627 }
628 }
629
630 pub fn is_local(&self) -> bool {
631 matches!(self, Worktree::Local(_))
632 }
633
634 pub fn is_remote(&self) -> bool {
635 !self.is_local()
636 }
637
638 pub fn settings_location(&self, _: &Context<Self>) -> SettingsLocation<'static> {
639 SettingsLocation {
640 worktree_id: self.id(),
641 path: RelPath::empty(),
642 }
643 }
644
645 pub fn snapshot(&self) -> Snapshot {
646 match self {
647 Worktree::Local(worktree) => worktree.snapshot.snapshot.clone(),
648 Worktree::Remote(worktree) => worktree.snapshot.clone(),
649 }
650 }
651
652 pub fn scan_id(&self) -> usize {
653 match self {
654 Worktree::Local(worktree) => worktree.snapshot.scan_id,
655 Worktree::Remote(worktree) => worktree.snapshot.scan_id,
656 }
657 }
658
659 pub fn metadata_proto(&self) -> proto::WorktreeMetadata {
660 proto::WorktreeMetadata {
661 id: self.id().to_proto(),
662 root_name: self.root_name().to_proto(),
663 visible: self.is_visible(),
664 abs_path: self.abs_path().to_string_lossy().into_owned(),
665 }
666 }
667
668 pub fn completed_scan_id(&self) -> usize {
669 match self {
670 Worktree::Local(worktree) => worktree.snapshot.completed_scan_id,
671 Worktree::Remote(worktree) => worktree.snapshot.completed_scan_id,
672 }
673 }
674
675 pub fn is_visible(&self) -> bool {
676 match self {
677 Worktree::Local(worktree) => worktree.visible,
678 Worktree::Remote(worktree) => worktree.visible,
679 }
680 }
681
682 pub fn replica_id(&self) -> ReplicaId {
683 match self {
684 Worktree::Local(_) => ReplicaId::LOCAL,
685 Worktree::Remote(worktree) => worktree.replica_id,
686 }
687 }
688
689 pub fn abs_path(&self) -> Arc<Path> {
690 match self {
691 Worktree::Local(worktree) => SanitizedPath::cast_arc(worktree.abs_path.clone()),
692 Worktree::Remote(worktree) => SanitizedPath::cast_arc(worktree.abs_path.clone()),
693 }
694 }
695
696 pub fn root_file(&self, cx: &Context<Self>) -> Option<Arc<File>> {
697 let entry = self.root_entry()?;
698 Some(File::for_entry(entry.clone(), cx.entity()))
699 }
700
701 pub fn observe_updates<F, Fut>(&mut self, project_id: u64, cx: &Context<Worktree>, callback: F)
702 where
703 F: 'static + Send + Fn(proto::UpdateWorktree) -> Fut,
704 Fut: 'static + Send + Future<Output = bool>,
705 {
706 match self {
707 Worktree::Local(this) => this.observe_updates(project_id, cx, callback),
708 Worktree::Remote(this) => this.observe_updates(project_id, cx, callback),
709 }
710 }
711
712 pub fn stop_observing_updates(&mut self) {
713 match self {
714 Worktree::Local(this) => {
715 this.update_observer.take();
716 }
717 Worktree::Remote(this) => {
718 this.update_observer.take();
719 }
720 }
721 }
722
723 pub fn wait_for_snapshot(
724 &mut self,
725 scan_id: usize,
726 ) -> impl Future<Output = Result<()>> + use<> {
727 match self {
728 Worktree::Local(this) => this.wait_for_snapshot(scan_id).boxed(),
729 Worktree::Remote(this) => this.wait_for_snapshot(scan_id).boxed(),
730 }
731 }
732
733 #[cfg(feature = "test-support")]
734 pub fn has_update_observer(&self) -> bool {
735 match self {
736 Worktree::Local(this) => this.update_observer.is_some(),
737 Worktree::Remote(this) => this.update_observer.is_some(),
738 }
739 }
740
741 pub fn load_file(&self, path: &RelPath, cx: &Context<Worktree>) -> Task<Result<LoadedFile>> {
742 match self {
743 Worktree::Local(this) => this.load_file(path, cx),
744 Worktree::Remote(_) => {
745 Task::ready(Err(anyhow!("remote worktrees can't yet load files")))
746 }
747 }
748 }
749
750 pub fn load_binary_file(
751 &self,
752 path: &RelPath,
753 cx: &Context<Worktree>,
754 ) -> Task<Result<LoadedBinaryFile>> {
755 match self {
756 Worktree::Local(this) => this.load_binary_file(path, cx),
757 Worktree::Remote(_) => {
758 Task::ready(Err(anyhow!("remote worktrees can't yet load binary files")))
759 }
760 }
761 }
762
763 pub fn write_file(
764 &self,
765 path: Arc<RelPath>,
766 text: Rope,
767 line_ending: LineEnding,
768 encoding: &'static Encoding,
769 has_bom: bool,
770 cx: &Context<Worktree>,
771 ) -> Task<Result<Arc<File>>> {
772 match self {
773 Worktree::Local(this) => {
774 this.write_file(path, text, line_ending, encoding, has_bom, cx)
775 }
776 Worktree::Remote(_) => {
777 Task::ready(Err(anyhow!("remote worktree can't yet write files")))
778 }
779 }
780 }
781
782 pub fn create_entry(
783 &mut self,
784 path: Arc<RelPath>,
785 is_directory: bool,
786 content: Option<Vec<u8>>,
787 cx: &Context<Worktree>,
788 ) -> Task<Result<CreatedEntry>> {
789 let worktree_id = self.id();
790 match self {
791 Worktree::Local(this) => this.create_entry(path, is_directory, content, cx),
792 Worktree::Remote(this) => {
793 let project_id = this.project_id;
794 let request = this.client.request(proto::CreateProjectEntry {
795 worktree_id: worktree_id.to_proto(),
796 project_id,
797 path: path.as_ref().to_proto(),
798 content,
799 is_directory,
800 });
801 cx.spawn(async move |this, cx| {
802 let response = request.await?;
803 match response.entry {
804 Some(entry) => this
805 .update(cx, |worktree, cx| {
806 worktree.as_remote_mut().unwrap().insert_entry(
807 entry,
808 response.worktree_scan_id as usize,
809 cx,
810 )
811 })?
812 .await
813 .map(CreatedEntry::Included),
814 None => {
815 let abs_path =
816 this.read_with(cx, |worktree, _| worktree.absolutize(&path))?;
817 Ok(CreatedEntry::Excluded { abs_path })
818 }
819 }
820 })
821 }
822 }
823 }
824
825 pub fn delete_entry(
826 &mut self,
827 entry_id: ProjectEntryId,
828 trash: bool,
829 cx: &mut Context<Worktree>,
830 ) -> Option<Task<Result<()>>> {
831 let task = match self {
832 Worktree::Local(this) => this.delete_entry(entry_id, trash, cx),
833 Worktree::Remote(this) => this.delete_entry(entry_id, trash, cx),
834 }?;
835
836 let entry = match &*self {
837 Worktree::Local(this) => this.entry_for_id(entry_id),
838 Worktree::Remote(this) => this.entry_for_id(entry_id),
839 }?;
840
841 let mut ids = vec![entry_id];
842 let path = &*entry.path;
843
844 self.get_children_ids_recursive(path, &mut ids);
845
846 for id in ids {
847 cx.emit(Event::DeletedEntry(id));
848 }
849 Some(task)
850 }
851
852 fn get_children_ids_recursive(&self, path: &RelPath, ids: &mut Vec<ProjectEntryId>) {
853 let children_iter = self.child_entries(path);
854 for child in children_iter {
855 ids.push(child.id);
856 self.get_children_ids_recursive(&child.path, ids);
857 }
858 }
859
860 // pub fn rename_entry(
861 // &mut self,
862 // entry_id: ProjectEntryId,
863 // new_path: Arc<RelPath>,
864 // cx: &Context<Self>,
865 // ) -> Task<Result<CreatedEntry>> {
866 // match self {
867 // Worktree::Local(this) => this.rename_entry(entry_id, new_path, cx),
868 // Worktree::Remote(this) => this.rename_entry(entry_id, new_path, cx),
869 // }
870 // }
871
872 pub fn copy_external_entries(
873 &mut self,
874 target_directory: Arc<RelPath>,
875 paths: Vec<Arc<Path>>,
876 fs: Arc<dyn Fs>,
877 cx: &Context<Worktree>,
878 ) -> Task<Result<Vec<ProjectEntryId>>> {
879 match self {
880 Worktree::Local(this) => this.copy_external_entries(target_directory, paths, cx),
881 Worktree::Remote(this) => this.copy_external_entries(target_directory, paths, fs, cx),
882 }
883 }
884
885 pub fn expand_entry(
886 &mut self,
887 entry_id: ProjectEntryId,
888 cx: &Context<Worktree>,
889 ) -> Option<Task<Result<()>>> {
890 match self {
891 Worktree::Local(this) => this.expand_entry(entry_id, cx),
892 Worktree::Remote(this) => {
893 let response = this.client.request(proto::ExpandProjectEntry {
894 project_id: this.project_id,
895 entry_id: entry_id.to_proto(),
896 });
897 Some(cx.spawn(async move |this, cx| {
898 let response = response.await?;
899 this.update(cx, |this, _| {
900 this.as_remote_mut()
901 .unwrap()
902 .wait_for_snapshot(response.worktree_scan_id as usize)
903 })?
904 .await?;
905 Ok(())
906 }))
907 }
908 }
909 }
910
911 pub fn expand_all_for_entry(
912 &mut self,
913 entry_id: ProjectEntryId,
914 cx: &Context<Worktree>,
915 ) -> Option<Task<Result<()>>> {
916 match self {
917 Worktree::Local(this) => this.expand_all_for_entry(entry_id, cx),
918 Worktree::Remote(this) => {
919 let response = this.client.request(proto::ExpandAllForProjectEntry {
920 project_id: this.project_id,
921 entry_id: entry_id.to_proto(),
922 });
923 Some(cx.spawn(async move |this, cx| {
924 let response = response.await?;
925 this.update(cx, |this, _| {
926 this.as_remote_mut()
927 .unwrap()
928 .wait_for_snapshot(response.worktree_scan_id as usize)
929 })?
930 .await?;
931 Ok(())
932 }))
933 }
934 }
935 }
936
937 pub async fn handle_create_entry(
938 this: Entity<Self>,
939 request: proto::CreateProjectEntry,
940 mut cx: AsyncApp,
941 ) -> Result<proto::ProjectEntryResponse> {
942 let (scan_id, entry) = this.update(&mut cx, |this, cx| {
943 anyhow::Ok((
944 this.scan_id(),
945 this.create_entry(
946 RelPath::from_proto(&request.path).with_context(|| {
947 format!("received invalid relative path {:?}", request.path)
948 })?,
949 request.is_directory,
950 request.content,
951 cx,
952 ),
953 ))
954 })?;
955 Ok(proto::ProjectEntryResponse {
956 entry: match &entry.await? {
957 CreatedEntry::Included(entry) => Some(entry.into()),
958 CreatedEntry::Excluded { .. } => None,
959 },
960 worktree_scan_id: scan_id as u64,
961 })
962 }
963
964 pub async fn handle_delete_entry(
965 this: Entity<Self>,
966 request: proto::DeleteProjectEntry,
967 mut cx: AsyncApp,
968 ) -> Result<proto::ProjectEntryResponse> {
969 let (scan_id, task) = this.update(&mut cx, |this, cx| {
970 (
971 this.scan_id(),
972 this.delete_entry(
973 ProjectEntryId::from_proto(request.entry_id),
974 request.use_trash,
975 cx,
976 ),
977 )
978 });
979 task.ok_or_else(|| anyhow::anyhow!("invalid entry"))?
980 .await?;
981 Ok(proto::ProjectEntryResponse {
982 entry: None,
983 worktree_scan_id: scan_id as u64,
984 })
985 }
986
987 pub async fn handle_expand_entry(
988 this: Entity<Self>,
989 request: proto::ExpandProjectEntry,
990 mut cx: AsyncApp,
991 ) -> Result<proto::ExpandProjectEntryResponse> {
992 let task = this.update(&mut cx, |this, cx| {
993 this.expand_entry(ProjectEntryId::from_proto(request.entry_id), cx)
994 });
995 task.ok_or_else(|| anyhow::anyhow!("no such entry"))?
996 .await?;
997 let scan_id = this.read_with(&cx, |this, _| this.scan_id());
998 Ok(proto::ExpandProjectEntryResponse {
999 worktree_scan_id: scan_id as u64,
1000 })
1001 }
1002
1003 pub async fn handle_expand_all_for_entry(
1004 this: Entity<Self>,
1005 request: proto::ExpandAllForProjectEntry,
1006 mut cx: AsyncApp,
1007 ) -> Result<proto::ExpandAllForProjectEntryResponse> {
1008 let task = this.update(&mut cx, |this, cx| {
1009 this.expand_all_for_entry(ProjectEntryId::from_proto(request.entry_id), cx)
1010 });
1011 task.ok_or_else(|| anyhow::anyhow!("no such entry"))?
1012 .await?;
1013 let scan_id = this.read_with(&cx, |this, _| this.scan_id());
1014 Ok(proto::ExpandAllForProjectEntryResponse {
1015 worktree_scan_id: scan_id as u64,
1016 })
1017 }
1018
1019 pub fn is_single_file(&self) -> bool {
1020 self.root_dir().is_none()
1021 }
1022
1023 /// For visible worktrees, returns the path with the worktree name as the first component.
1024 /// Otherwise, returns an absolute path.
1025 pub fn full_path(&self, worktree_relative_path: &RelPath) -> PathBuf {
1026 if self.is_visible() {
1027 self.root_name()
1028 .join(worktree_relative_path)
1029 .display(self.path_style)
1030 .to_string()
1031 .into()
1032 } else {
1033 let full_path = self.abs_path();
1034 let mut full_path_string = if self.is_local()
1035 && let Ok(stripped) = full_path.strip_prefix(home_dir())
1036 {
1037 self.path_style
1038 .join("~", &*stripped.to_string_lossy())
1039 .unwrap()
1040 } else {
1041 full_path.to_string_lossy().into_owned()
1042 };
1043
1044 if worktree_relative_path.components().next().is_some() {
1045 full_path_string.push_str(self.path_style.primary_separator());
1046 full_path_string.push_str(&worktree_relative_path.display(self.path_style));
1047 }
1048
1049 full_path_string.into()
1050 }
1051 }
1052}
1053
1054impl LocalWorktree {
1055 pub fn fs(&self) -> &Arc<dyn Fs> {
1056 &self.fs
1057 }
1058
1059 pub fn is_path_private(&self, path: &RelPath) -> bool {
1060 !self.share_private_files && self.settings.is_path_private(path)
1061 }
1062
1063 pub fn fs_is_case_sensitive(&self) -> bool {
1064 self.fs_case_sensitive
1065 }
1066
1067 fn restart_background_scanners(&mut self, cx: &Context<Worktree>) {
1068 let (scan_requests_tx, scan_requests_rx) = channel::unbounded();
1069 let (path_prefixes_to_scan_tx, path_prefixes_to_scan_rx) = channel::unbounded();
1070 self.scan_requests_tx = scan_requests_tx;
1071 self.path_prefixes_to_scan_tx = path_prefixes_to_scan_tx;
1072
1073 self.start_background_scanner(scan_requests_rx, path_prefixes_to_scan_rx, cx);
1074 let always_included_entries = mem::take(&mut self.snapshot.always_included_entries);
1075 log::debug!(
1076 "refreshing entries for the following always included paths: {:?}",
1077 always_included_entries
1078 );
1079
1080 // Cleans up old always included entries to ensure they get updated properly. Otherwise,
1081 // nested always included entries may not get updated and will result in out-of-date info.
1082 self.refresh_entries_for_paths(always_included_entries);
1083 }
1084
1085 fn start_background_scanner(
1086 &mut self,
1087 scan_requests_rx: channel::Receiver<ScanRequest>,
1088 path_prefixes_to_scan_rx: channel::Receiver<PathPrefixScanRequest>,
1089 cx: &Context<Worktree>,
1090 ) {
1091 let snapshot = self.snapshot();
1092 let share_private_files = self.share_private_files;
1093 let next_entry_id = self.next_entry_id.clone();
1094 let fs = self.fs.clone();
1095 let scanning_enabled = self.scanning_enabled;
1096 let settings = self.settings.clone();
1097 let (scan_states_tx, mut scan_states_rx) = mpsc::unbounded();
1098 let background_scanner = cx.background_spawn({
1099 let abs_path = snapshot.abs_path.as_path().to_path_buf();
1100 let background = cx.background_executor().clone();
1101 async move {
1102 let (events, watcher) = if scanning_enabled {
1103 fs.watch(&abs_path, FS_WATCH_LATENCY).await
1104 } else {
1105 (Box::pin(stream::pending()) as _, Arc::new(NullWatcher) as _)
1106 };
1107 let fs_case_sensitive = fs.is_case_sensitive().await;
1108
1109 let mut scanner = BackgroundScanner {
1110 fs,
1111 fs_case_sensitive,
1112 status_updates_tx: scan_states_tx,
1113 executor: background,
1114 scan_requests_rx,
1115 path_prefixes_to_scan_rx,
1116 next_entry_id,
1117 state: async_lock::Mutex::new(BackgroundScannerState {
1118 prev_snapshot: snapshot.snapshot.clone(),
1119 snapshot,
1120 scanned_dirs: Default::default(),
1121 scanning_enabled,
1122 path_prefixes_to_scan: Default::default(),
1123 paths_to_scan: Default::default(),
1124 removed_entries: Default::default(),
1125 changed_paths: Default::default(),
1126 }),
1127 phase: BackgroundScannerPhase::InitialScan,
1128 share_private_files,
1129 settings,
1130 watcher,
1131 };
1132
1133 scanner
1134 .run(Box::pin(events.map(|events| events.into_iter().collect())))
1135 .await;
1136 }
1137 });
1138 let scan_state_updater = cx.spawn(async move |this, cx| {
1139 while let Some((state, this)) = scan_states_rx.next().await.zip(this.upgrade()) {
1140 this.update(cx, |this, cx| {
1141 let this = this.as_local_mut().unwrap();
1142 match state {
1143 ScanState::Started => {
1144 *this.is_scanning.0.borrow_mut() = true;
1145 }
1146 ScanState::Updated {
1147 snapshot,
1148 changes,
1149 barrier,
1150 scanning,
1151 } => {
1152 *this.is_scanning.0.borrow_mut() = scanning;
1153 this.set_snapshot(snapshot, changes, cx);
1154 drop(barrier);
1155 }
1156 ScanState::RootUpdated { new_path } => {
1157 this.update_abs_path_and_refresh(new_path, cx);
1158 }
1159 }
1160 });
1161 }
1162 });
1163 self._background_scanner_tasks = vec![background_scanner, scan_state_updater];
1164 *self.is_scanning.0.borrow_mut() = true;
1165 }
1166
1167 fn set_snapshot(
1168 &mut self,
1169 mut new_snapshot: LocalSnapshot,
1170 entry_changes: UpdatedEntriesSet,
1171 cx: &mut Context<Worktree>,
1172 ) {
1173 let repo_changes = self.changed_repos(&self.snapshot, &mut new_snapshot);
1174 self.snapshot = new_snapshot;
1175
1176 if let Some(share) = self.update_observer.as_mut() {
1177 share
1178 .snapshots_tx
1179 .unbounded_send((self.snapshot.clone(), entry_changes.clone()))
1180 .ok();
1181 }
1182
1183 if !entry_changes.is_empty() {
1184 cx.emit(Event::UpdatedEntries(entry_changes));
1185 }
1186 if !repo_changes.is_empty() {
1187 cx.emit(Event::UpdatedGitRepositories(repo_changes));
1188 }
1189
1190 while let Some((scan_id, _)) = self.snapshot_subscriptions.front() {
1191 if self.snapshot.completed_scan_id >= *scan_id {
1192 let (_, tx) = self.snapshot_subscriptions.pop_front().unwrap();
1193 tx.send(()).ok();
1194 } else {
1195 break;
1196 }
1197 }
1198 }
1199
1200 fn changed_repos(
1201 &self,
1202 old_snapshot: &LocalSnapshot,
1203 new_snapshot: &mut LocalSnapshot,
1204 ) -> UpdatedGitRepositoriesSet {
1205 let mut changes = Vec::new();
1206 let mut old_repos = old_snapshot.git_repositories.iter().peekable();
1207 let new_repos = new_snapshot.git_repositories.clone();
1208 let mut new_repos = new_repos.iter().peekable();
1209
1210 loop {
1211 match (new_repos.peek().map(clone), old_repos.peek().map(clone)) {
1212 (Some((new_entry_id, new_repo)), Some((old_entry_id, old_repo))) => {
1213 match Ord::cmp(&new_entry_id, &old_entry_id) {
1214 Ordering::Less => {
1215 changes.push(UpdatedGitRepository {
1216 work_directory_id: new_entry_id,
1217 old_work_directory_abs_path: None,
1218 new_work_directory_abs_path: Some(
1219 new_repo.work_directory_abs_path.clone(),
1220 ),
1221 dot_git_abs_path: Some(new_repo.dot_git_abs_path.clone()),
1222 repository_dir_abs_path: Some(
1223 new_repo.repository_dir_abs_path.clone(),
1224 ),
1225 common_dir_abs_path: Some(new_repo.common_dir_abs_path.clone()),
1226 });
1227 new_repos.next();
1228 }
1229 Ordering::Equal => {
1230 if new_repo.git_dir_scan_id != old_repo.git_dir_scan_id
1231 || new_repo.work_directory_abs_path
1232 != old_repo.work_directory_abs_path
1233 {
1234 changes.push(UpdatedGitRepository {
1235 work_directory_id: new_entry_id,
1236 old_work_directory_abs_path: Some(
1237 old_repo.work_directory_abs_path.clone(),
1238 ),
1239 new_work_directory_abs_path: Some(
1240 new_repo.work_directory_abs_path.clone(),
1241 ),
1242 dot_git_abs_path: Some(new_repo.dot_git_abs_path.clone()),
1243 repository_dir_abs_path: Some(
1244 new_repo.repository_dir_abs_path.clone(),
1245 ),
1246 common_dir_abs_path: Some(new_repo.common_dir_abs_path.clone()),
1247 });
1248 }
1249 new_repos.next();
1250 old_repos.next();
1251 }
1252 Ordering::Greater => {
1253 changes.push(UpdatedGitRepository {
1254 work_directory_id: old_entry_id,
1255 old_work_directory_abs_path: Some(
1256 old_repo.work_directory_abs_path.clone(),
1257 ),
1258 new_work_directory_abs_path: None,
1259 dot_git_abs_path: None,
1260 repository_dir_abs_path: None,
1261 common_dir_abs_path: None,
1262 });
1263 old_repos.next();
1264 }
1265 }
1266 }
1267 (Some((entry_id, repo)), None) => {
1268 changes.push(UpdatedGitRepository {
1269 work_directory_id: entry_id,
1270 old_work_directory_abs_path: None,
1271 new_work_directory_abs_path: Some(repo.work_directory_abs_path.clone()),
1272 dot_git_abs_path: Some(repo.dot_git_abs_path.clone()),
1273 repository_dir_abs_path: Some(repo.repository_dir_abs_path.clone()),
1274 common_dir_abs_path: Some(repo.common_dir_abs_path.clone()),
1275 });
1276 new_repos.next();
1277 }
1278 (None, Some((entry_id, repo))) => {
1279 changes.push(UpdatedGitRepository {
1280 work_directory_id: entry_id,
1281 old_work_directory_abs_path: Some(repo.work_directory_abs_path.clone()),
1282 new_work_directory_abs_path: None,
1283 dot_git_abs_path: Some(repo.dot_git_abs_path.clone()),
1284 repository_dir_abs_path: Some(repo.repository_dir_abs_path.clone()),
1285 common_dir_abs_path: Some(repo.common_dir_abs_path.clone()),
1286 });
1287 old_repos.next();
1288 }
1289 (None, None) => break,
1290 }
1291 }
1292
1293 fn clone<T: Clone, U: Clone>(value: &(&T, &U)) -> (T, U) {
1294 (value.0.clone(), value.1.clone())
1295 }
1296
1297 changes.into()
1298 }
1299
1300 pub fn scan_complete(&self) -> impl Future<Output = ()> + use<> {
1301 let mut is_scanning_rx = self.is_scanning.1.clone();
1302 async move {
1303 let mut is_scanning = *is_scanning_rx.borrow();
1304 while is_scanning {
1305 if let Some(value) = is_scanning_rx.recv().await {
1306 is_scanning = value;
1307 } else {
1308 break;
1309 }
1310 }
1311 }
1312 }
1313
1314 pub fn wait_for_snapshot(
1315 &mut self,
1316 scan_id: usize,
1317 ) -> impl Future<Output = Result<()>> + use<> {
1318 let (tx, rx) = oneshot::channel();
1319 if self.snapshot.completed_scan_id >= scan_id {
1320 tx.send(()).ok();
1321 } else {
1322 match self
1323 .snapshot_subscriptions
1324 .binary_search_by_key(&scan_id, |probe| probe.0)
1325 {
1326 Ok(ix) | Err(ix) => self.snapshot_subscriptions.insert(ix, (scan_id, tx)),
1327 }
1328 }
1329
1330 async move {
1331 rx.await?;
1332 Ok(())
1333 }
1334 }
1335
1336 pub fn snapshot(&self) -> LocalSnapshot {
1337 self.snapshot.clone()
1338 }
1339
1340 pub fn settings(&self) -> WorktreeSettings {
1341 self.settings.clone()
1342 }
1343
1344 fn load_binary_file(
1345 &self,
1346 path: &RelPath,
1347 cx: &Context<Worktree>,
1348 ) -> Task<Result<LoadedBinaryFile>> {
1349 let path = Arc::from(path);
1350 let abs_path = self.absolutize(&path);
1351 let fs = self.fs.clone();
1352 let entry = self.refresh_entry(path.clone(), None, cx);
1353 let is_private = self.is_path_private(&path);
1354
1355 let worktree = cx.weak_entity();
1356 cx.background_spawn(async move {
1357 let content = fs.load_bytes(&abs_path).await?;
1358
1359 let worktree = worktree.upgrade().context("worktree was dropped")?;
1360 let file = match entry.await? {
1361 Some(entry) => File::for_entry(entry, worktree),
1362 None => {
1363 let metadata = fs
1364 .metadata(&abs_path)
1365 .await
1366 .with_context(|| {
1367 format!("Loading metadata for excluded file {abs_path:?}")
1368 })?
1369 .with_context(|| {
1370 format!("Excluded file {abs_path:?} got removed during loading")
1371 })?;
1372 Arc::new(File {
1373 entry_id: None,
1374 worktree,
1375 path,
1376 disk_state: DiskState::Present {
1377 mtime: metadata.mtime,
1378 size: metadata.len,
1379 },
1380 is_local: true,
1381 is_private,
1382 })
1383 }
1384 };
1385
1386 Ok(LoadedBinaryFile { file, content })
1387 })
1388 }
1389
1390 #[ztracing::instrument(skip_all)]
1391 fn load_file(&self, path: &RelPath, cx: &Context<Worktree>) -> Task<Result<LoadedFile>> {
1392 let path = Arc::from(path);
1393 let abs_path = self.absolutize(&path);
1394 let fs = self.fs.clone();
1395 let entry = self.refresh_entry(path.clone(), None, cx);
1396 let is_private = self.is_path_private(path.as_ref());
1397
1398 let this = cx.weak_entity();
1399 cx.background_spawn(async move {
1400 // WARN: Temporary workaround for #27283.
1401 // We are not efficient with our memory usage per file, and use in excess of 64GB for a 10GB file
1402 // Therefore, as a temporary workaround to prevent system freezes, we just bail before opening a file
1403 // if it is too large
1404 // 5GB seems to be more reasonable, peaking at ~16GB, while 6GB jumps up to >24GB which seems like a
1405 // reasonable limit
1406 {
1407 const FILE_SIZE_MAX: u64 = 6 * 1024 * 1024 * 1024; // 6GB
1408 if let Ok(Some(metadata)) = fs.metadata(&abs_path).await
1409 && metadata.len >= FILE_SIZE_MAX
1410 {
1411 anyhow::bail!("File is too large to load");
1412 }
1413 }
1414 let (text, encoding, has_bom) = decode_file_text(fs.as_ref(), &abs_path).await?;
1415
1416 let worktree = this.upgrade().context("worktree was dropped")?;
1417 let file = match entry.await? {
1418 Some(entry) => File::for_entry(entry, worktree),
1419 None => {
1420 let metadata = fs
1421 .metadata(&abs_path)
1422 .await
1423 .with_context(|| {
1424 format!("Loading metadata for excluded file {abs_path:?}")
1425 })?
1426 .with_context(|| {
1427 format!("Excluded file {abs_path:?} got removed during loading")
1428 })?;
1429 Arc::new(File {
1430 entry_id: None,
1431 worktree,
1432 path,
1433 disk_state: DiskState::Present {
1434 mtime: metadata.mtime,
1435 size: metadata.len,
1436 },
1437 is_local: true,
1438 is_private,
1439 })
1440 }
1441 };
1442
1443 Ok(LoadedFile {
1444 file,
1445 text,
1446 encoding,
1447 has_bom,
1448 })
1449 })
1450 }
1451
1452 /// Find the lowest path in the worktree's datastructures that is an ancestor
1453 fn lowest_ancestor(&self, path: &RelPath) -> Arc<RelPath> {
1454 let mut lowest_ancestor = None;
1455 for path in path.ancestors() {
1456 if self.entry_for_path(path).is_some() {
1457 lowest_ancestor = Some(path.into());
1458 break;
1459 }
1460 }
1461
1462 lowest_ancestor.unwrap_or_else(|| RelPath::empty().into())
1463 }
1464
1465 pub fn create_entry(
1466 &self,
1467 path: Arc<RelPath>,
1468 is_dir: bool,
1469 content: Option<Vec<u8>>,
1470 cx: &Context<Worktree>,
1471 ) -> Task<Result<CreatedEntry>> {
1472 let abs_path = self.absolutize(&path);
1473 let path_excluded = self.settings.is_path_excluded(&path);
1474 let fs = self.fs.clone();
1475 let task_abs_path = abs_path.clone();
1476 let write = cx.background_spawn(async move {
1477 if is_dir {
1478 fs.create_dir(&task_abs_path)
1479 .await
1480 .with_context(|| format!("creating directory {task_abs_path:?}"))
1481 } else {
1482 fs.write(&task_abs_path, content.as_deref().unwrap_or(&[]))
1483 .await
1484 .with_context(|| format!("creating file {task_abs_path:?}"))
1485 }
1486 });
1487
1488 let lowest_ancestor = self.lowest_ancestor(&path);
1489 cx.spawn(async move |this, cx| {
1490 write.await?;
1491 if path_excluded {
1492 return Ok(CreatedEntry::Excluded { abs_path });
1493 }
1494
1495 let (result, refreshes) = this.update(cx, |this, cx| {
1496 let mut refreshes = Vec::new();
1497 let refresh_paths = path.strip_prefix(&lowest_ancestor).unwrap();
1498 for refresh_path in refresh_paths.ancestors() {
1499 if refresh_path == RelPath::empty() {
1500 continue;
1501 }
1502 let refresh_full_path = lowest_ancestor.join(refresh_path);
1503
1504 refreshes.push(this.as_local_mut().unwrap().refresh_entry(
1505 refresh_full_path,
1506 None,
1507 cx,
1508 ));
1509 }
1510 (
1511 this.as_local_mut().unwrap().refresh_entry(path, None, cx),
1512 refreshes,
1513 )
1514 })?;
1515 for refresh in refreshes {
1516 refresh.await.log_err();
1517 }
1518
1519 Ok(result
1520 .await?
1521 .map(CreatedEntry::Included)
1522 .unwrap_or_else(|| CreatedEntry::Excluded { abs_path }))
1523 })
1524 }
1525
1526 pub fn write_file(
1527 &self,
1528 path: Arc<RelPath>,
1529 text: Rope,
1530 line_ending: LineEnding,
1531 encoding: &'static Encoding,
1532 has_bom: bool,
1533 cx: &Context<Worktree>,
1534 ) -> Task<Result<Arc<File>>> {
1535 let fs = self.fs.clone();
1536 let is_private = self.is_path_private(&path);
1537 let abs_path = self.absolutize(&path);
1538
1539 let write = cx.background_spawn({
1540 let fs = fs.clone();
1541 let abs_path = abs_path.clone();
1542 async move {
1543 // For UTF-8, use the optimized `fs.save` which writes Rope chunks directly to disk
1544 // without allocating a contiguous string.
1545 if encoding == encoding_rs::UTF_8 && !has_bom {
1546 return fs.save(&abs_path, &text, line_ending).await;
1547 }
1548
1549 // For legacy encodings (e.g. Shift-JIS), we fall back to converting the entire Rope
1550 // to a String/Bytes in memory before writing.
1551 //
1552 // Note: This is inefficient for very large files compared to the streaming approach above,
1553 // but supporting streaming writes for arbitrary encodings would require a significant
1554 // refactor of the `fs` crate to expose a Writer interface.
1555 let text_string = text.to_string();
1556 let normalized_text = match line_ending {
1557 LineEnding::Unix => text_string,
1558 LineEnding::Windows => text_string.replace('\n', "\r\n"),
1559 };
1560
1561 // Create the byte vector manually for UTF-16 encodings because encoding_rs encodes to UTF-8 by default (per WHATWG standards),
1562 // which is not what we want for saving files.
1563 let bytes = if encoding == encoding_rs::UTF_16BE {
1564 let mut data = Vec::with_capacity(normalized_text.len() * 2 + 2);
1565 if has_bom {
1566 data.extend_from_slice(&[0xFE, 0xFF]); // BOM
1567 }
1568 let utf16be_bytes =
1569 normalized_text.encode_utf16().flat_map(|u| u.to_be_bytes());
1570 data.extend(utf16be_bytes);
1571 data.into()
1572 } else if encoding == encoding_rs::UTF_16LE {
1573 let mut data = Vec::with_capacity(normalized_text.len() * 2 + 2);
1574 if has_bom {
1575 data.extend_from_slice(&[0xFF, 0xFE]); // BOM
1576 }
1577 let utf16le_bytes =
1578 normalized_text.encode_utf16().flat_map(|u| u.to_le_bytes());
1579 data.extend(utf16le_bytes);
1580 data.into()
1581 } else {
1582 // For other encodings (Shift-JIS, UTF-8 with BOM, etc.), delegate to encoding_rs.
1583 let bom_bytes = if has_bom {
1584 if encoding == encoding_rs::UTF_8 {
1585 vec![0xEF, 0xBB, 0xBF]
1586 } else {
1587 vec![]
1588 }
1589 } else {
1590 vec![]
1591 };
1592 let (cow, _, _) = encoding.encode(&normalized_text);
1593 if !bom_bytes.is_empty() {
1594 let mut bytes = bom_bytes;
1595 bytes.extend_from_slice(&cow);
1596 bytes.into()
1597 } else {
1598 cow
1599 }
1600 };
1601
1602 fs.write(&abs_path, &bytes).await
1603 }
1604 });
1605
1606 cx.spawn(async move |this, cx| {
1607 write.await?;
1608 let entry = this
1609 .update(cx, |this, cx| {
1610 this.as_local_mut()
1611 .unwrap()
1612 .refresh_entry(path.clone(), None, cx)
1613 })?
1614 .await?;
1615 let worktree = this.upgrade().context("worktree dropped")?;
1616 if let Some(entry) = entry {
1617 Ok(File::for_entry(entry, worktree))
1618 } else {
1619 let metadata = fs
1620 .metadata(&abs_path)
1621 .await
1622 .with_context(|| {
1623 format!("Fetching metadata after saving the excluded buffer {abs_path:?}")
1624 })?
1625 .with_context(|| {
1626 format!("Excluded buffer {path:?} got removed during saving")
1627 })?;
1628 Ok(Arc::new(File {
1629 worktree,
1630 path,
1631 disk_state: DiskState::Present {
1632 mtime: metadata.mtime,
1633 size: metadata.len,
1634 },
1635 entry_id: None,
1636 is_local: true,
1637 is_private,
1638 }))
1639 }
1640 })
1641 }
1642
1643 pub fn delete_entry(
1644 &self,
1645 entry_id: ProjectEntryId,
1646 trash: bool,
1647 cx: &Context<Worktree>,
1648 ) -> Option<Task<Result<()>>> {
1649 let entry = self.entry_for_id(entry_id)?.clone();
1650 let abs_path = self.absolutize(&entry.path);
1651 let fs = self.fs.clone();
1652
1653 let delete = cx.background_spawn(async move {
1654 if entry.is_file() {
1655 if trash {
1656 fs.trash_file(&abs_path, Default::default()).await?;
1657 } else {
1658 fs.remove_file(&abs_path, Default::default()).await?;
1659 }
1660 } else if trash {
1661 fs.trash_dir(
1662 &abs_path,
1663 RemoveOptions {
1664 recursive: true,
1665 ignore_if_not_exists: false,
1666 },
1667 )
1668 .await?;
1669 } else {
1670 fs.remove_dir(
1671 &abs_path,
1672 RemoveOptions {
1673 recursive: true,
1674 ignore_if_not_exists: false,
1675 },
1676 )
1677 .await?;
1678 }
1679 anyhow::Ok(entry.path)
1680 });
1681
1682 Some(cx.spawn(async move |this, cx| {
1683 let path = delete.await?;
1684 this.update(cx, |this, _| {
1685 this.as_local_mut()
1686 .unwrap()
1687 .refresh_entries_for_paths(vec![path])
1688 })?
1689 .recv()
1690 .await;
1691 Ok(())
1692 }))
1693 }
1694
1695 pub fn copy_external_entries(
1696 &self,
1697 target_directory: Arc<RelPath>,
1698 paths: Vec<Arc<Path>>,
1699 cx: &Context<Worktree>,
1700 ) -> Task<Result<Vec<ProjectEntryId>>> {
1701 let target_directory = self.absolutize(&target_directory);
1702 let worktree_path = self.abs_path().clone();
1703 let fs = self.fs.clone();
1704 let paths = paths
1705 .into_iter()
1706 .filter_map(|source| {
1707 let file_name = source.file_name()?;
1708 let mut target = target_directory.clone();
1709 target.push(file_name);
1710
1711 // Do not allow copying the same file to itself.
1712 if source.as_ref() != target.as_path() {
1713 Some((source, target))
1714 } else {
1715 None
1716 }
1717 })
1718 .collect::<Vec<_>>();
1719
1720 let paths_to_refresh = paths
1721 .iter()
1722 .filter_map(|(_, target)| {
1723 RelPath::new(
1724 target.strip_prefix(&worktree_path).ok()?,
1725 PathStyle::local(),
1726 )
1727 .ok()
1728 .map(|path| path.into_arc())
1729 })
1730 .collect::<Vec<_>>();
1731
1732 cx.spawn(async move |this, cx| {
1733 cx.background_spawn(async move {
1734 for (source, target) in paths {
1735 copy_recursive(
1736 fs.as_ref(),
1737 &source,
1738 &target,
1739 fs::CopyOptions {
1740 overwrite: true,
1741 ..Default::default()
1742 },
1743 )
1744 .await
1745 .with_context(|| {
1746 format!("Failed to copy file from {source:?} to {target:?}")
1747 })?;
1748 }
1749 anyhow::Ok(())
1750 })
1751 .await
1752 .log_err();
1753 let mut refresh = cx.read_entity(
1754 &this.upgrade().with_context(|| "Dropped worktree")?,
1755 |this, _| {
1756 anyhow::Ok::<postage::barrier::Receiver>(
1757 this.as_local()
1758 .with_context(|| "Worktree is not local")?
1759 .refresh_entries_for_paths(paths_to_refresh.clone()),
1760 )
1761 },
1762 )?;
1763
1764 cx.background_spawn(async move {
1765 refresh.next().await;
1766 anyhow::Ok(())
1767 })
1768 .await
1769 .log_err();
1770
1771 let this = this.upgrade().with_context(|| "Dropped worktree")?;
1772 Ok(cx.read_entity(&this, |this, _| {
1773 paths_to_refresh
1774 .iter()
1775 .filter_map(|path| Some(this.entry_for_path(path)?.id))
1776 .collect()
1777 }))
1778 })
1779 }
1780
1781 fn expand_entry(
1782 &self,
1783 entry_id: ProjectEntryId,
1784 cx: &Context<Worktree>,
1785 ) -> Option<Task<Result<()>>> {
1786 let path = self.entry_for_id(entry_id)?.path.clone();
1787 let mut refresh = self.refresh_entries_for_paths(vec![path]);
1788 Some(cx.background_spawn(async move {
1789 refresh.next().await;
1790 Ok(())
1791 }))
1792 }
1793
1794 fn expand_all_for_entry(
1795 &self,
1796 entry_id: ProjectEntryId,
1797 cx: &Context<Worktree>,
1798 ) -> Option<Task<Result<()>>> {
1799 let path = self.entry_for_id(entry_id).unwrap().path.clone();
1800 let mut rx = self.add_path_prefix_to_scan(path);
1801 Some(cx.background_spawn(async move {
1802 rx.next().await;
1803 Ok(())
1804 }))
1805 }
1806
1807 pub fn refresh_entries_for_paths(&self, paths: Vec<Arc<RelPath>>) -> barrier::Receiver {
1808 let (tx, rx) = barrier::channel();
1809 self.scan_requests_tx
1810 .try_send(ScanRequest {
1811 relative_paths: paths,
1812 done: smallvec![tx],
1813 })
1814 .ok();
1815 rx
1816 }
1817
1818 #[cfg(feature = "test-support")]
1819 pub fn manually_refresh_entries_for_paths(
1820 &self,
1821 paths: Vec<Arc<RelPath>>,
1822 ) -> barrier::Receiver {
1823 self.refresh_entries_for_paths(paths)
1824 }
1825
1826 pub fn add_path_prefix_to_scan(&self, path_prefix: Arc<RelPath>) -> barrier::Receiver {
1827 let (tx, rx) = barrier::channel();
1828 self.path_prefixes_to_scan_tx
1829 .try_send(PathPrefixScanRequest {
1830 path: path_prefix,
1831 done: smallvec![tx],
1832 })
1833 .ok();
1834 rx
1835 }
1836
1837 pub fn refresh_entry(
1838 &self,
1839 path: Arc<RelPath>,
1840 old_path: Option<Arc<RelPath>>,
1841 cx: &Context<Worktree>,
1842 ) -> Task<Result<Option<Entry>>> {
1843 if self.settings.is_path_excluded(&path) {
1844 return Task::ready(Ok(None));
1845 }
1846 let paths = if let Some(old_path) = old_path.as_ref() {
1847 vec![old_path.clone(), path.clone()]
1848 } else {
1849 vec![path.clone()]
1850 };
1851 let t0 = Instant::now();
1852 let mut refresh = self.refresh_entries_for_paths(paths);
1853 // todo(lw): Hot foreground spawn
1854 cx.spawn(async move |this, cx| {
1855 refresh.recv().await;
1856 log::trace!("refreshed entry {path:?} in {:?}", t0.elapsed());
1857 let new_entry = this.read_with(cx, |this, _| {
1858 this.entry_for_path(&path).cloned().with_context(|| {
1859 format!("Could not find entry in worktree for {path:?} after refresh")
1860 })
1861 })??;
1862 Ok(Some(new_entry))
1863 })
1864 }
1865
1866 pub fn observe_updates<F, Fut>(&mut self, project_id: u64, cx: &Context<Worktree>, callback: F)
1867 where
1868 F: 'static + Send + Fn(proto::UpdateWorktree) -> Fut,
1869 Fut: 'static + Send + Future<Output = bool>,
1870 {
1871 if let Some(observer) = self.update_observer.as_mut() {
1872 *observer.resume_updates.borrow_mut() = ();
1873 return;
1874 }
1875
1876 let (resume_updates_tx, mut resume_updates_rx) = watch::channel::<()>();
1877 let (snapshots_tx, mut snapshots_rx) =
1878 mpsc::unbounded::<(LocalSnapshot, UpdatedEntriesSet)>();
1879 snapshots_tx
1880 .unbounded_send((self.snapshot(), Arc::default()))
1881 .ok();
1882
1883 let worktree_id = self.id.to_proto();
1884 let _maintain_remote_snapshot = cx.background_spawn(async move {
1885 let mut is_first = true;
1886 while let Some((snapshot, entry_changes)) = snapshots_rx.next().await {
1887 let update = if is_first {
1888 is_first = false;
1889 snapshot.build_initial_update(project_id, worktree_id)
1890 } else {
1891 snapshot.build_update(project_id, worktree_id, entry_changes)
1892 };
1893
1894 for update in proto::split_worktree_update(update) {
1895 let _ = resume_updates_rx.try_recv();
1896 loop {
1897 let result = callback(update.clone());
1898 if result.await {
1899 break;
1900 } else {
1901 log::info!("waiting to resume updates");
1902 if resume_updates_rx.next().await.is_none() {
1903 return Some(());
1904 }
1905 }
1906 }
1907 }
1908 }
1909 Some(())
1910 });
1911
1912 self.update_observer = Some(UpdateObservationState {
1913 snapshots_tx,
1914 resume_updates: resume_updates_tx,
1915 _maintain_remote_snapshot,
1916 });
1917 }
1918
1919 pub fn share_private_files(&mut self, cx: &Context<Worktree>) {
1920 self.share_private_files = true;
1921 self.restart_background_scanners(cx);
1922 }
1923
1924 pub fn update_abs_path_and_refresh(
1925 &mut self,
1926 new_path: Arc<SanitizedPath>,
1927 cx: &Context<Worktree>,
1928 ) {
1929 self.snapshot.git_repositories = Default::default();
1930 self.snapshot.ignores_by_parent_abs_path = Default::default();
1931 let root_name = new_path
1932 .as_path()
1933 .file_name()
1934 .and_then(|f| f.to_str())
1935 .map_or(RelPath::empty().into(), |f| {
1936 RelPath::unix(f).unwrap().into()
1937 });
1938 self.snapshot.update_abs_path(new_path, root_name);
1939 self.restart_background_scanners(cx);
1940 }
1941 #[cfg(feature = "test-support")]
1942 pub fn repositories(&self) -> Vec<Arc<Path>> {
1943 self.git_repositories
1944 .values()
1945 .map(|entry| entry.work_directory_abs_path.clone())
1946 .collect::<Vec<_>>()
1947 }
1948}
1949
1950impl RemoteWorktree {
1951 pub fn project_id(&self) -> u64 {
1952 self.project_id
1953 }
1954
1955 pub fn client(&self) -> AnyProtoClient {
1956 self.client.clone()
1957 }
1958
1959 pub fn disconnected_from_host(&mut self) {
1960 self.updates_tx.take();
1961 self.snapshot_subscriptions.clear();
1962 self.disconnected = true;
1963 }
1964
1965 pub fn update_from_remote(&self, update: proto::UpdateWorktree) {
1966 if let Some(updates_tx) = &self.updates_tx {
1967 updates_tx
1968 .unbounded_send(update)
1969 .expect("consumer runs to completion");
1970 }
1971 }
1972
1973 fn observe_updates<F, Fut>(&mut self, project_id: u64, cx: &Context<Worktree>, callback: F)
1974 where
1975 F: 'static + Send + Fn(proto::UpdateWorktree) -> Fut,
1976 Fut: 'static + Send + Future<Output = bool>,
1977 {
1978 let (tx, mut rx) = mpsc::unbounded();
1979 let initial_update = self
1980 .snapshot
1981 .build_initial_update(project_id, self.id().to_proto());
1982 self.update_observer = Some(tx);
1983 cx.spawn(async move |this, cx| {
1984 let mut update = initial_update;
1985 'outer: loop {
1986 // SSH projects use a special project ID of 0, and we need to
1987 // remap it to the correct one here.
1988 update.project_id = project_id;
1989
1990 for chunk in split_worktree_update(update) {
1991 if !callback(chunk).await {
1992 break 'outer;
1993 }
1994 }
1995
1996 if let Some(next_update) = rx.next().await {
1997 update = next_update;
1998 } else {
1999 break;
2000 }
2001 }
2002 this.update(cx, |this, _| {
2003 let this = this.as_remote_mut().unwrap();
2004 this.update_observer.take();
2005 })
2006 })
2007 .detach();
2008 }
2009
2010 fn observed_snapshot(&self, scan_id: usize) -> bool {
2011 self.completed_scan_id >= scan_id
2012 }
2013
2014 pub fn wait_for_snapshot(
2015 &mut self,
2016 scan_id: usize,
2017 ) -> impl Future<Output = Result<()>> + use<> {
2018 let (tx, rx) = oneshot::channel();
2019 if self.observed_snapshot(scan_id) {
2020 let _ = tx.send(());
2021 } else if self.disconnected {
2022 drop(tx);
2023 } else {
2024 match self
2025 .snapshot_subscriptions
2026 .binary_search_by_key(&scan_id, |probe| probe.0)
2027 {
2028 Ok(ix) | Err(ix) => self.snapshot_subscriptions.insert(ix, (scan_id, tx)),
2029 }
2030 }
2031
2032 async move {
2033 rx.await?;
2034 Ok(())
2035 }
2036 }
2037
2038 pub fn insert_entry(
2039 &mut self,
2040 entry: proto::Entry,
2041 scan_id: usize,
2042 cx: &Context<Worktree>,
2043 ) -> Task<Result<Entry>> {
2044 let wait_for_snapshot = self.wait_for_snapshot(scan_id);
2045 cx.spawn(async move |this, cx| {
2046 wait_for_snapshot.await?;
2047 this.update(cx, |worktree, _| {
2048 let worktree = worktree.as_remote_mut().unwrap();
2049 let snapshot = &mut worktree.background_snapshot.lock().0;
2050 let entry = snapshot.insert_entry(entry, &worktree.file_scan_inclusions);
2051 worktree.snapshot = snapshot.clone();
2052 entry
2053 })?
2054 })
2055 }
2056
2057 fn delete_entry(
2058 &self,
2059 entry_id: ProjectEntryId,
2060 trash: bool,
2061 cx: &Context<Worktree>,
2062 ) -> Option<Task<Result<()>>> {
2063 let response = self.client.request(proto::DeleteProjectEntry {
2064 project_id: self.project_id,
2065 entry_id: entry_id.to_proto(),
2066 use_trash: trash,
2067 });
2068 Some(cx.spawn(async move |this, cx| {
2069 let response = response.await?;
2070 let scan_id = response.worktree_scan_id as usize;
2071
2072 this.update(cx, move |this, _| {
2073 this.as_remote_mut().unwrap().wait_for_snapshot(scan_id)
2074 })?
2075 .await?;
2076
2077 this.update(cx, |this, _| {
2078 let this = this.as_remote_mut().unwrap();
2079 let snapshot = &mut this.background_snapshot.lock().0;
2080 snapshot.delete_entry(entry_id);
2081 this.snapshot = snapshot.clone();
2082 })
2083 }))
2084 }
2085
2086 // fn rename_entry(
2087 // &self,
2088 // entry_id: ProjectEntryId,
2089 // new_path: impl Into<Arc<RelPath>>,
2090 // cx: &Context<Worktree>,
2091 // ) -> Task<Result<CreatedEntry>> {
2092 // let new_path: Arc<RelPath> = new_path.into();
2093 // let response = self.client.request(proto::RenameProjectEntry {
2094 // project_id: self.project_id,
2095 // entry_id: entry_id.to_proto(),
2096 // new_worktree_id: new_path.worktree_id,
2097 // new_path: new_path.as_ref().to_proto(),
2098 // });
2099 // cx.spawn(async move |this, cx| {
2100 // let response = response.await?;
2101 // match response.entry {
2102 // Some(entry) => this
2103 // .update(cx, |this, cx| {
2104 // this.as_remote_mut().unwrap().insert_entry(
2105 // entry,
2106 // response.worktree_scan_id as usize,
2107 // cx,
2108 // )
2109 // })?
2110 // .await
2111 // .map(CreatedEntry::Included),
2112 // None => {
2113 // let abs_path =
2114 // this.read_with(cx, |worktree, _| worktree.absolutize(&new_path))?;
2115 // Ok(CreatedEntry::Excluded { abs_path })
2116 // }
2117 // }
2118 // })
2119 // }
2120
2121 fn copy_external_entries(
2122 &self,
2123 target_directory: Arc<RelPath>,
2124 paths_to_copy: Vec<Arc<Path>>,
2125 local_fs: Arc<dyn Fs>,
2126 cx: &Context<Worktree>,
2127 ) -> Task<anyhow::Result<Vec<ProjectEntryId>>> {
2128 let client = self.client.clone();
2129 let worktree_id = self.id().to_proto();
2130 let project_id = self.project_id;
2131
2132 cx.background_spawn(async move {
2133 let mut requests = Vec::new();
2134 for root_path_to_copy in paths_to_copy {
2135 let Some(filename) = root_path_to_copy
2136 .file_name()
2137 .and_then(|name| name.to_str())
2138 .and_then(|filename| RelPath::unix(filename).ok())
2139 else {
2140 continue;
2141 };
2142 for (abs_path, is_directory) in
2143 read_dir_items(local_fs.as_ref(), &root_path_to_copy).await?
2144 {
2145 let Some(relative_path) = abs_path
2146 .strip_prefix(&root_path_to_copy)
2147 .map_err(|e| anyhow::Error::from(e))
2148 .and_then(|relative_path| RelPath::new(relative_path, PathStyle::local()))
2149 .log_err()
2150 else {
2151 continue;
2152 };
2153 let content = if is_directory {
2154 None
2155 } else {
2156 Some(local_fs.load_bytes(&abs_path).await?)
2157 };
2158
2159 let mut target_path = target_directory.join(filename);
2160 if relative_path.file_name().is_some() {
2161 target_path = target_path.join(&relative_path);
2162 }
2163
2164 requests.push(proto::CreateProjectEntry {
2165 project_id,
2166 worktree_id,
2167 path: target_path.to_proto(),
2168 is_directory,
2169 content,
2170 });
2171 }
2172 }
2173 requests.sort_unstable_by(|a, b| a.path.cmp(&b.path));
2174 requests.dedup();
2175
2176 let mut copied_entry_ids = Vec::new();
2177 for request in requests {
2178 let response = client.request(request).await?;
2179 copied_entry_ids.extend(response.entry.map(|e| ProjectEntryId::from_proto(e.id)));
2180 }
2181
2182 Ok(copied_entry_ids)
2183 })
2184 }
2185}
2186
2187impl Snapshot {
2188 pub fn new(
2189 id: WorktreeId,
2190 root_name: Arc<RelPath>,
2191 abs_path: Arc<Path>,
2192 path_style: PathStyle,
2193 ) -> Self {
2194 Snapshot {
2195 id,
2196 abs_path: SanitizedPath::from_arc(abs_path),
2197 path_style,
2198 root_char_bag: root_name
2199 .as_unix_str()
2200 .chars()
2201 .map(|c| c.to_ascii_lowercase())
2202 .collect(),
2203 root_name,
2204 always_included_entries: Default::default(),
2205 entries_by_path: Default::default(),
2206 entries_by_id: Default::default(),
2207 scan_id: 1,
2208 completed_scan_id: 0,
2209 }
2210 }
2211
2212 pub fn id(&self) -> WorktreeId {
2213 self.id
2214 }
2215
2216 // TODO:
2217 // Consider the following:
2218 //
2219 // ```rust
2220 // let abs_path: Arc<Path> = snapshot.abs_path(); // e.g. "C:\Users\user\Desktop\project"
2221 // let some_non_trimmed_path = Path::new("\\\\?\\C:\\Users\\user\\Desktop\\project\\main.rs");
2222 // // The caller perform some actions here:
2223 // some_non_trimmed_path.strip_prefix(abs_path); // This fails
2224 // some_non_trimmed_path.starts_with(abs_path); // This fails too
2225 // ```
2226 //
2227 // This is definitely a bug, but it's not clear if we should handle it here or not.
2228 pub fn abs_path(&self) -> &Arc<Path> {
2229 SanitizedPath::cast_arc_ref(&self.abs_path)
2230 }
2231
2232 fn build_initial_update(&self, project_id: u64, worktree_id: u64) -> proto::UpdateWorktree {
2233 let mut updated_entries = self
2234 .entries_by_path
2235 .iter()
2236 .map(proto::Entry::from)
2237 .collect::<Vec<_>>();
2238 updated_entries.sort_unstable_by_key(|e| e.id);
2239
2240 proto::UpdateWorktree {
2241 project_id,
2242 worktree_id,
2243 abs_path: self.abs_path().to_string_lossy().into_owned(),
2244 root_name: self.root_name().to_proto(),
2245 updated_entries,
2246 removed_entries: Vec::new(),
2247 scan_id: self.scan_id as u64,
2248 is_last_update: self.completed_scan_id == self.scan_id,
2249 // Sent in separate messages.
2250 updated_repositories: Vec::new(),
2251 removed_repositories: Vec::new(),
2252 }
2253 }
2254
2255 pub fn work_directory_abs_path(&self, work_directory: &WorkDirectory) -> PathBuf {
2256 match work_directory {
2257 WorkDirectory::InProject { relative_path } => self.absolutize(relative_path),
2258 WorkDirectory::AboveProject { absolute_path, .. } => absolute_path.as_ref().to_owned(),
2259 }
2260 }
2261
2262 pub fn absolutize(&self, path: &RelPath) -> PathBuf {
2263 if path.file_name().is_some() {
2264 let mut abs_path = self.abs_path.to_string();
2265 for component in path.components() {
2266 if !abs_path.ends_with(self.path_style.primary_separator()) {
2267 abs_path.push_str(self.path_style.primary_separator());
2268 }
2269 abs_path.push_str(component);
2270 }
2271 PathBuf::from(abs_path)
2272 } else {
2273 self.abs_path.as_path().to_path_buf()
2274 }
2275 }
2276
2277 pub fn contains_entry(&self, entry_id: ProjectEntryId) -> bool {
2278 self.entries_by_id.get(&entry_id, ()).is_some()
2279 }
2280
2281 fn insert_entry(
2282 &mut self,
2283 entry: proto::Entry,
2284 always_included_paths: &PathMatcher,
2285 ) -> Result<Entry> {
2286 let entry = Entry::try_from((&self.root_char_bag, always_included_paths, entry))?;
2287 let old_entry = self.entries_by_id.insert_or_replace(
2288 PathEntry {
2289 id: entry.id,
2290 path: entry.path.clone(),
2291 is_ignored: entry.is_ignored,
2292 scan_id: 0,
2293 },
2294 (),
2295 );
2296 if let Some(old_entry) = old_entry {
2297 self.entries_by_path.remove(&PathKey(old_entry.path), ());
2298 }
2299 self.entries_by_path.insert_or_replace(entry.clone(), ());
2300 Ok(entry)
2301 }
2302
2303 fn delete_entry(&mut self, entry_id: ProjectEntryId) -> Option<Arc<RelPath>> {
2304 let removed_entry = self.entries_by_id.remove(&entry_id, ())?;
2305 self.entries_by_path = {
2306 let mut cursor = self.entries_by_path.cursor::<TraversalProgress>(());
2307 let mut new_entries_by_path =
2308 cursor.slice(&TraversalTarget::path(&removed_entry.path), Bias::Left);
2309 while let Some(entry) = cursor.item() {
2310 if entry.path.starts_with(&removed_entry.path) {
2311 self.entries_by_id.remove(&entry.id, ());
2312 cursor.next();
2313 } else {
2314 break;
2315 }
2316 }
2317 new_entries_by_path.append(cursor.suffix(), ());
2318 new_entries_by_path
2319 };
2320
2321 Some(removed_entry.path)
2322 }
2323
2324 fn update_abs_path(&mut self, abs_path: Arc<SanitizedPath>, root_name: Arc<RelPath>) {
2325 self.abs_path = abs_path;
2326 if root_name != self.root_name {
2327 self.root_char_bag = root_name
2328 .as_unix_str()
2329 .chars()
2330 .map(|c| c.to_ascii_lowercase())
2331 .collect();
2332 self.root_name = root_name;
2333 }
2334 }
2335
2336 pub fn apply_remote_update(
2337 &mut self,
2338 update: proto::UpdateWorktree,
2339 always_included_paths: &PathMatcher,
2340 ) {
2341 log::debug!(
2342 "applying remote worktree update. {} entries updated, {} removed",
2343 update.updated_entries.len(),
2344 update.removed_entries.len()
2345 );
2346 if let Some(root_name) = RelPath::from_proto(&update.root_name).log_err() {
2347 self.update_abs_path(
2348 SanitizedPath::new_arc(&Path::new(&update.abs_path)),
2349 root_name,
2350 );
2351 }
2352
2353 let mut entries_by_path_edits = Vec::new();
2354 let mut entries_by_id_edits = Vec::new();
2355
2356 for entry_id in update.removed_entries {
2357 let entry_id = ProjectEntryId::from_proto(entry_id);
2358 entries_by_id_edits.push(Edit::Remove(entry_id));
2359 if let Some(entry) = self.entry_for_id(entry_id) {
2360 entries_by_path_edits.push(Edit::Remove(PathKey(entry.path.clone())));
2361 }
2362 }
2363
2364 for entry in update.updated_entries {
2365 let Some(entry) =
2366 Entry::try_from((&self.root_char_bag, always_included_paths, entry)).log_err()
2367 else {
2368 continue;
2369 };
2370 if let Some(PathEntry { path, .. }) = self.entries_by_id.get(&entry.id, ()) {
2371 entries_by_path_edits.push(Edit::Remove(PathKey(path.clone())));
2372 }
2373 if let Some(old_entry) = self.entries_by_path.get(&PathKey(entry.path.clone()), ())
2374 && old_entry.id != entry.id
2375 {
2376 entries_by_id_edits.push(Edit::Remove(old_entry.id));
2377 }
2378 entries_by_id_edits.push(Edit::Insert(PathEntry {
2379 id: entry.id,
2380 path: entry.path.clone(),
2381 is_ignored: entry.is_ignored,
2382 scan_id: 0,
2383 }));
2384 entries_by_path_edits.push(Edit::Insert(entry));
2385 }
2386
2387 self.entries_by_path.edit(entries_by_path_edits, ());
2388 self.entries_by_id.edit(entries_by_id_edits, ());
2389
2390 self.scan_id = update.scan_id as usize;
2391 if update.is_last_update {
2392 self.completed_scan_id = update.scan_id as usize;
2393 }
2394 }
2395
2396 pub fn entry_count(&self) -> usize {
2397 self.entries_by_path.summary().count
2398 }
2399
2400 pub fn visible_entry_count(&self) -> usize {
2401 self.entries_by_path.summary().non_ignored_count
2402 }
2403
2404 pub fn dir_count(&self) -> usize {
2405 let summary = self.entries_by_path.summary();
2406 summary.count - summary.file_count
2407 }
2408
2409 pub fn visible_dir_count(&self) -> usize {
2410 let summary = self.entries_by_path.summary();
2411 summary.non_ignored_count - summary.non_ignored_file_count
2412 }
2413
2414 pub fn file_count(&self) -> usize {
2415 self.entries_by_path.summary().file_count
2416 }
2417
2418 pub fn visible_file_count(&self) -> usize {
2419 self.entries_by_path.summary().non_ignored_file_count
2420 }
2421
2422 fn traverse_from_offset(
2423 &self,
2424 include_files: bool,
2425 include_dirs: bool,
2426 include_ignored: bool,
2427 start_offset: usize,
2428 ) -> Traversal<'_> {
2429 let mut cursor = self.entries_by_path.cursor(());
2430 cursor.seek(
2431 &TraversalTarget::Count {
2432 count: start_offset,
2433 include_files,
2434 include_dirs,
2435 include_ignored,
2436 },
2437 Bias::Right,
2438 );
2439 Traversal {
2440 snapshot: self,
2441 cursor,
2442 include_files,
2443 include_dirs,
2444 include_ignored,
2445 }
2446 }
2447
2448 pub fn traverse_from_path(
2449 &self,
2450 include_files: bool,
2451 include_dirs: bool,
2452 include_ignored: bool,
2453 path: &RelPath,
2454 ) -> Traversal<'_> {
2455 Traversal::new(self, include_files, include_dirs, include_ignored, path)
2456 }
2457
2458 pub fn files(&self, include_ignored: bool, start: usize) -> Traversal<'_> {
2459 self.traverse_from_offset(true, false, include_ignored, start)
2460 }
2461
2462 pub fn directories(&self, include_ignored: bool, start: usize) -> Traversal<'_> {
2463 self.traverse_from_offset(false, true, include_ignored, start)
2464 }
2465
2466 pub fn entries(&self, include_ignored: bool, start: usize) -> Traversal<'_> {
2467 self.traverse_from_offset(true, true, include_ignored, start)
2468 }
2469
2470 pub fn paths(&self) -> impl Iterator<Item = &RelPath> {
2471 self.entries_by_path
2472 .cursor::<()>(())
2473 .filter(move |entry| !entry.path.is_empty())
2474 .map(|entry| entry.path.as_ref())
2475 }
2476
2477 pub fn child_entries<'a>(&'a self, parent_path: &'a RelPath) -> ChildEntriesIter<'a> {
2478 let options = ChildEntriesOptions {
2479 include_files: true,
2480 include_dirs: true,
2481 include_ignored: true,
2482 };
2483 self.child_entries_with_options(parent_path, options)
2484 }
2485
2486 pub fn child_entries_with_options<'a>(
2487 &'a self,
2488 parent_path: &'a RelPath,
2489 options: ChildEntriesOptions,
2490 ) -> ChildEntriesIter<'a> {
2491 let mut cursor = self.entries_by_path.cursor(());
2492 cursor.seek(&TraversalTarget::path(parent_path), Bias::Right);
2493 let traversal = Traversal {
2494 snapshot: self,
2495 cursor,
2496 include_files: options.include_files,
2497 include_dirs: options.include_dirs,
2498 include_ignored: options.include_ignored,
2499 };
2500 ChildEntriesIter {
2501 traversal,
2502 parent_path,
2503 }
2504 }
2505
2506 pub fn root_entry(&self) -> Option<&Entry> {
2507 self.entries_by_path.first()
2508 }
2509
2510 /// Returns `None` for a single file worktree, or `Some(self.abs_path())` if
2511 /// it is a directory.
2512 pub fn root_dir(&self) -> Option<Arc<Path>> {
2513 self.root_entry()
2514 .filter(|entry| entry.is_dir())
2515 .map(|_| self.abs_path().clone())
2516 }
2517
2518 pub fn root_name(&self) -> &RelPath {
2519 &self.root_name
2520 }
2521
2522 pub fn root_name_str(&self) -> &str {
2523 self.root_name.as_unix_str()
2524 }
2525
2526 pub fn scan_id(&self) -> usize {
2527 self.scan_id
2528 }
2529
2530 pub fn entry_for_path(&self, path: &RelPath) -> Option<&Entry> {
2531 self.traverse_from_path(true, true, true, path)
2532 .entry()
2533 .and_then(|entry| {
2534 if entry.path.as_ref() == path {
2535 Some(entry)
2536 } else {
2537 None
2538 }
2539 })
2540 }
2541
2542 /// Resolves a path to an executable using the following heuristics:
2543 ///
2544 /// 1. If the path starts with `~`, it is expanded to the user's home directory.
2545 /// 2. If the path is relative and contains more than one component,
2546 /// it is joined to the worktree root path.
2547 /// 3. If the path is relative and exists in the worktree
2548 /// (even if falls under an exclusion filter),
2549 /// it is joined to the worktree root path.
2550 /// 4. Otherwise the path is returned unmodified.
2551 ///
2552 /// Relative paths that do not exist in the worktree may
2553 /// still be found using the `PATH` environment variable.
2554 pub fn resolve_relative_path(&self, path: PathBuf) -> PathBuf {
2555 if let Some(path_str) = path.to_str() {
2556 if let Some(remaining_path) = path_str.strip_prefix("~/") {
2557 return home_dir().join(remaining_path);
2558 } else if path_str == "~" {
2559 return home_dir().to_path_buf();
2560 }
2561 }
2562
2563 if let Ok(rel_path) = RelPath::new(&path, self.path_style)
2564 && (path.components().count() > 1 || self.entry_for_path(&rel_path).is_some())
2565 {
2566 self.abs_path().join(path)
2567 } else {
2568 path
2569 }
2570 }
2571
2572 pub fn entry_for_id(&self, id: ProjectEntryId) -> Option<&Entry> {
2573 let entry = self.entries_by_id.get(&id, ())?;
2574 self.entry_for_path(&entry.path)
2575 }
2576
2577 pub fn path_style(&self) -> PathStyle {
2578 self.path_style
2579 }
2580}
2581
2582impl LocalSnapshot {
2583 fn local_repo_for_work_directory_path(&self, path: &RelPath) -> Option<&LocalRepositoryEntry> {
2584 self.git_repositories
2585 .iter()
2586 .map(|(_, entry)| entry)
2587 .find(|entry| entry.work_directory.path_key() == PathKey(path.into()))
2588 }
2589
2590 fn build_update(
2591 &self,
2592 project_id: u64,
2593 worktree_id: u64,
2594 entry_changes: UpdatedEntriesSet,
2595 ) -> proto::UpdateWorktree {
2596 let mut updated_entries = Vec::new();
2597 let mut removed_entries = Vec::new();
2598
2599 for (_, entry_id, path_change) in entry_changes.iter() {
2600 if let PathChange::Removed = path_change {
2601 removed_entries.push(entry_id.0 as u64);
2602 } else if let Some(entry) = self.entry_for_id(*entry_id) {
2603 updated_entries.push(proto::Entry::from(entry));
2604 }
2605 }
2606
2607 removed_entries.sort_unstable();
2608 updated_entries.sort_unstable_by_key(|e| e.id);
2609
2610 // TODO - optimize, knowing that removed_entries are sorted.
2611 removed_entries.retain(|id| updated_entries.binary_search_by_key(id, |e| e.id).is_err());
2612
2613 proto::UpdateWorktree {
2614 project_id,
2615 worktree_id,
2616 abs_path: self.abs_path().to_string_lossy().into_owned(),
2617 root_name: self.root_name().to_proto(),
2618 updated_entries,
2619 removed_entries,
2620 scan_id: self.scan_id as u64,
2621 is_last_update: self.completed_scan_id == self.scan_id,
2622 // Sent in separate messages.
2623 updated_repositories: Vec::new(),
2624 removed_repositories: Vec::new(),
2625 }
2626 }
2627
2628 async fn insert_entry(&mut self, mut entry: Entry, fs: &dyn Fs) -> Entry {
2629 log::trace!("insert entry {:?}", entry.path);
2630 if entry.is_file() && entry.path.file_name() == Some(&GITIGNORE) {
2631 let abs_path = self.absolutize(&entry.path);
2632 match build_gitignore(&abs_path, fs).await {
2633 Ok(ignore) => {
2634 self.ignores_by_parent_abs_path
2635 .insert(abs_path.parent().unwrap().into(), (Arc::new(ignore), true));
2636 }
2637 Err(error) => {
2638 log::error!(
2639 "error loading .gitignore file {:?} - {:?}",
2640 &entry.path,
2641 error
2642 );
2643 }
2644 }
2645 }
2646
2647 if entry.kind == EntryKind::PendingDir
2648 && let Some(existing_entry) = self.entries_by_path.get(&PathKey(entry.path.clone()), ())
2649 {
2650 entry.kind = existing_entry.kind;
2651 }
2652
2653 let scan_id = self.scan_id;
2654 let removed = self.entries_by_path.insert_or_replace(entry.clone(), ());
2655 if let Some(removed) = removed
2656 && removed.id != entry.id
2657 {
2658 self.entries_by_id.remove(&removed.id, ());
2659 }
2660 self.entries_by_id.insert_or_replace(
2661 PathEntry {
2662 id: entry.id,
2663 path: entry.path.clone(),
2664 is_ignored: entry.is_ignored,
2665 scan_id,
2666 },
2667 (),
2668 );
2669
2670 entry
2671 }
2672
2673 fn ancestor_inodes_for_path(&self, path: &RelPath) -> TreeSet<u64> {
2674 let mut inodes = TreeSet::default();
2675 for ancestor in path.ancestors().skip(1) {
2676 if let Some(entry) = self.entry_for_path(ancestor) {
2677 inodes.insert(entry.inode);
2678 }
2679 }
2680 inodes
2681 }
2682
2683 async fn ignore_stack_for_abs_path(
2684 &self,
2685 abs_path: &Path,
2686 is_dir: bool,
2687 fs: &dyn Fs,
2688 ) -> IgnoreStack {
2689 let mut new_ignores = Vec::new();
2690 let mut repo_root = None;
2691 for (index, ancestor) in abs_path.ancestors().enumerate() {
2692 if index > 0 {
2693 if let Some((ignore, _)) = self.ignores_by_parent_abs_path.get(ancestor) {
2694 new_ignores.push((ancestor, Some(ignore.clone())));
2695 } else {
2696 new_ignores.push((ancestor, None));
2697 }
2698 }
2699
2700 let metadata = fs.metadata(&ancestor.join(DOT_GIT)).await.ok().flatten();
2701 if metadata.is_some() {
2702 repo_root = Some(Arc::from(ancestor));
2703 break;
2704 }
2705 }
2706
2707 let mut ignore_stack = if let Some(global_gitignore) = self.global_gitignore.clone() {
2708 IgnoreStack::global(global_gitignore)
2709 } else {
2710 IgnoreStack::none()
2711 };
2712
2713 if let Some((repo_exclude, _)) = repo_root
2714 .as_ref()
2715 .and_then(|abs_path| self.repo_exclude_by_work_dir_abs_path.get(abs_path))
2716 {
2717 ignore_stack = ignore_stack.append(IgnoreKind::RepoExclude, repo_exclude.clone());
2718 }
2719 ignore_stack.repo_root = repo_root;
2720 for (parent_abs_path, ignore) in new_ignores.into_iter().rev() {
2721 if ignore_stack.is_abs_path_ignored(parent_abs_path, true) {
2722 ignore_stack = IgnoreStack::all();
2723 break;
2724 } else if let Some(ignore) = ignore {
2725 ignore_stack =
2726 ignore_stack.append(IgnoreKind::Gitignore(parent_abs_path.into()), ignore);
2727 }
2728 }
2729
2730 if ignore_stack.is_abs_path_ignored(abs_path, is_dir) {
2731 ignore_stack = IgnoreStack::all();
2732 }
2733
2734 ignore_stack
2735 }
2736
2737 #[cfg(feature = "test-support")]
2738 pub fn expanded_entries(&self) -> impl Iterator<Item = &Entry> {
2739 self.entries_by_path
2740 .cursor::<()>(())
2741 .filter(|entry| entry.kind == EntryKind::Dir && (entry.is_external || entry.is_ignored))
2742 }
2743
2744 #[cfg(feature = "test-support")]
2745 pub fn check_invariants(&self, git_state: bool) {
2746 use pretty_assertions::assert_eq;
2747
2748 assert_eq!(
2749 self.entries_by_path
2750 .cursor::<()>(())
2751 .map(|e| (&e.path, e.id))
2752 .collect::<Vec<_>>(),
2753 self.entries_by_id
2754 .cursor::<()>(())
2755 .map(|e| (&e.path, e.id))
2756 .collect::<collections::BTreeSet<_>>()
2757 .into_iter()
2758 .collect::<Vec<_>>(),
2759 "entries_by_path and entries_by_id are inconsistent"
2760 );
2761
2762 let mut files = self.files(true, 0);
2763 let mut visible_files = self.files(false, 0);
2764 for entry in self.entries_by_path.cursor::<()>(()) {
2765 if entry.is_file() {
2766 assert_eq!(files.next().unwrap().inode, entry.inode);
2767 if (!entry.is_ignored && !entry.is_external) || entry.is_always_included {
2768 assert_eq!(visible_files.next().unwrap().inode, entry.inode);
2769 }
2770 }
2771 }
2772
2773 assert!(files.next().is_none());
2774 assert!(visible_files.next().is_none());
2775
2776 let mut bfs_paths = Vec::new();
2777 let mut stack = self
2778 .root_entry()
2779 .map(|e| e.path.as_ref())
2780 .into_iter()
2781 .collect::<Vec<_>>();
2782 while let Some(path) = stack.pop() {
2783 bfs_paths.push(path);
2784 let ix = stack.len();
2785 for child_entry in self.child_entries(path) {
2786 stack.insert(ix, &child_entry.path);
2787 }
2788 }
2789
2790 let dfs_paths_via_iter = self
2791 .entries_by_path
2792 .cursor::<()>(())
2793 .map(|e| e.path.as_ref())
2794 .collect::<Vec<_>>();
2795 assert_eq!(bfs_paths, dfs_paths_via_iter);
2796
2797 let dfs_paths_via_traversal = self
2798 .entries(true, 0)
2799 .map(|e| e.path.as_ref())
2800 .collect::<Vec<_>>();
2801
2802 assert_eq!(dfs_paths_via_traversal, dfs_paths_via_iter);
2803
2804 if git_state {
2805 for ignore_parent_abs_path in self.ignores_by_parent_abs_path.keys() {
2806 let ignore_parent_path = &RelPath::new(
2807 ignore_parent_abs_path
2808 .strip_prefix(self.abs_path.as_path())
2809 .unwrap(),
2810 PathStyle::local(),
2811 )
2812 .unwrap();
2813 assert!(self.entry_for_path(ignore_parent_path).is_some());
2814 assert!(
2815 self.entry_for_path(
2816 &ignore_parent_path.join(RelPath::unix(GITIGNORE).unwrap())
2817 )
2818 .is_some()
2819 );
2820 }
2821 }
2822 }
2823
2824 #[cfg(feature = "test-support")]
2825 pub fn entries_without_ids(&self, include_ignored: bool) -> Vec<(&RelPath, u64, bool)> {
2826 let mut paths = Vec::new();
2827 for entry in self.entries_by_path.cursor::<()>(()) {
2828 if include_ignored || !entry.is_ignored {
2829 paths.push((entry.path.as_ref(), entry.inode, entry.is_ignored));
2830 }
2831 }
2832 paths.sort_by(|a, b| a.0.cmp(b.0));
2833 paths
2834 }
2835}
2836
2837impl BackgroundScannerState {
2838 fn should_scan_directory(&self, entry: &Entry) -> bool {
2839 (self.scanning_enabled && !entry.is_external && (!entry.is_ignored || entry.is_always_included))
2840 || entry.path.file_name() == Some(DOT_GIT)
2841 || entry.path.file_name() == Some(local_settings_folder_name())
2842 || entry.path.file_name() == Some(local_vscode_folder_name())
2843 || self.scanned_dirs.contains(&entry.id) // If we've ever scanned it, keep scanning
2844 || self
2845 .paths_to_scan
2846 .iter()
2847 .any(|p| p.starts_with(&entry.path))
2848 || self
2849 .path_prefixes_to_scan
2850 .iter()
2851 .any(|p| entry.path.starts_with(p))
2852 }
2853
2854 async fn enqueue_scan_dir(
2855 &self,
2856 abs_path: Arc<Path>,
2857 entry: &Entry,
2858 scan_job_tx: &Sender<ScanJob>,
2859 fs: &dyn Fs,
2860 ) {
2861 let path = entry.path.clone();
2862 let ignore_stack = self
2863 .snapshot
2864 .ignore_stack_for_abs_path(&abs_path, true, fs)
2865 .await;
2866 let mut ancestor_inodes = self.snapshot.ancestor_inodes_for_path(&path);
2867
2868 if !ancestor_inodes.contains(&entry.inode) {
2869 ancestor_inodes.insert(entry.inode);
2870 scan_job_tx
2871 .try_send(ScanJob {
2872 abs_path,
2873 path,
2874 ignore_stack,
2875 scan_queue: scan_job_tx.clone(),
2876 ancestor_inodes,
2877 is_external: entry.is_external,
2878 })
2879 .unwrap();
2880 }
2881 }
2882
2883 fn reuse_entry_id(&mut self, entry: &mut Entry) {
2884 if let Some(mtime) = entry.mtime {
2885 // If an entry with the same inode was removed from the worktree during this scan,
2886 // then it *might* represent the same file or directory. But the OS might also have
2887 // re-used the inode for a completely different file or directory.
2888 //
2889 // Conditionally reuse the old entry's id:
2890 // * if the mtime is the same, the file was probably been renamed.
2891 // * if the path is the same, the file may just have been updated
2892 if let Some(removed_entry) = self.removed_entries.remove(&entry.inode) {
2893 if removed_entry.mtime == Some(mtime) || removed_entry.path == entry.path {
2894 entry.id = removed_entry.id;
2895 }
2896 } else if let Some(existing_entry) = self.snapshot.entry_for_path(&entry.path) {
2897 entry.id = existing_entry.id;
2898 }
2899 }
2900 }
2901
2902 fn entry_id_for(
2903 &mut self,
2904 next_entry_id: &AtomicUsize,
2905 path: &RelPath,
2906 metadata: &fs::Metadata,
2907 ) -> ProjectEntryId {
2908 // If an entry with the same inode was removed from the worktree during this scan,
2909 // then it *might* represent the same file or directory. But the OS might also have
2910 // re-used the inode for a completely different file or directory.
2911 //
2912 // Conditionally reuse the old entry's id:
2913 // * if the mtime is the same, the file was probably been renamed.
2914 // * if the path is the same, the file may just have been updated
2915 if let Some(removed_entry) = self.removed_entries.remove(&metadata.inode) {
2916 if removed_entry.mtime == Some(metadata.mtime) || *removed_entry.path == *path {
2917 return removed_entry.id;
2918 }
2919 } else if let Some(existing_entry) = self.snapshot.entry_for_path(path) {
2920 return existing_entry.id;
2921 }
2922 ProjectEntryId::new(next_entry_id)
2923 }
2924
2925 async fn insert_entry(&mut self, entry: Entry, fs: &dyn Fs, watcher: &dyn Watcher) -> Entry {
2926 let entry = self.snapshot.insert_entry(entry, fs).await;
2927 if entry.path.file_name() == Some(&DOT_GIT) {
2928 self.insert_git_repository(entry.path.clone(), fs, watcher)
2929 .await;
2930 }
2931
2932 #[cfg(feature = "test-support")]
2933 self.snapshot.check_invariants(false);
2934
2935 entry
2936 }
2937
2938 fn populate_dir(
2939 &mut self,
2940 parent_path: Arc<RelPath>,
2941 entries: impl IntoIterator<Item = Entry>,
2942 ignore: Option<Arc<Gitignore>>,
2943 ) {
2944 let mut parent_entry = if let Some(parent_entry) = self
2945 .snapshot
2946 .entries_by_path
2947 .get(&PathKey(parent_path.clone()), ())
2948 {
2949 parent_entry.clone()
2950 } else {
2951 log::warn!(
2952 "populating a directory {:?} that has been removed",
2953 parent_path
2954 );
2955 return;
2956 };
2957
2958 match parent_entry.kind {
2959 EntryKind::PendingDir | EntryKind::UnloadedDir => parent_entry.kind = EntryKind::Dir,
2960 EntryKind::Dir => {}
2961 _ => return,
2962 }
2963
2964 if let Some(ignore) = ignore {
2965 let abs_parent_path = self
2966 .snapshot
2967 .abs_path
2968 .as_path()
2969 .join(parent_path.as_std_path())
2970 .into();
2971 self.snapshot
2972 .ignores_by_parent_abs_path
2973 .insert(abs_parent_path, (ignore, false));
2974 }
2975
2976 let parent_entry_id = parent_entry.id;
2977 self.scanned_dirs.insert(parent_entry_id);
2978 let mut entries_by_path_edits = vec![Edit::Insert(parent_entry)];
2979 let mut entries_by_id_edits = Vec::new();
2980
2981 for entry in entries {
2982 entries_by_id_edits.push(Edit::Insert(PathEntry {
2983 id: entry.id,
2984 path: entry.path.clone(),
2985 is_ignored: entry.is_ignored,
2986 scan_id: self.snapshot.scan_id,
2987 }));
2988 entries_by_path_edits.push(Edit::Insert(entry));
2989 }
2990
2991 self.snapshot
2992 .entries_by_path
2993 .edit(entries_by_path_edits, ());
2994 self.snapshot.entries_by_id.edit(entries_by_id_edits, ());
2995
2996 if let Err(ix) = self.changed_paths.binary_search(&parent_path) {
2997 self.changed_paths.insert(ix, parent_path.clone());
2998 }
2999
3000 #[cfg(feature = "test-support")]
3001 self.snapshot.check_invariants(false);
3002 }
3003
3004 fn remove_path(&mut self, path: &RelPath, watcher: &dyn Watcher) {
3005 log::trace!("background scanner removing path {path:?}");
3006 let mut new_entries;
3007 let removed_entries;
3008 {
3009 let mut cursor = self
3010 .snapshot
3011 .entries_by_path
3012 .cursor::<TraversalProgress>(());
3013 new_entries = cursor.slice(&TraversalTarget::path(path), Bias::Left);
3014 removed_entries = cursor.slice(&TraversalTarget::successor(path), Bias::Left);
3015 new_entries.append(cursor.suffix(), ());
3016 }
3017 self.snapshot.entries_by_path = new_entries;
3018
3019 let mut removed_ids = Vec::with_capacity(removed_entries.summary().count);
3020 let mut removed_dir_abs_paths = Vec::new();
3021 for entry in removed_entries.cursor::<()>(()) {
3022 if entry.is_dir() {
3023 removed_dir_abs_paths.push(self.snapshot.absolutize(&entry.path));
3024 }
3025
3026 match self.removed_entries.entry(entry.inode) {
3027 hash_map::Entry::Occupied(mut e) => {
3028 let prev_removed_entry = e.get_mut();
3029 if entry.id > prev_removed_entry.id {
3030 *prev_removed_entry = entry.clone();
3031 }
3032 }
3033 hash_map::Entry::Vacant(e) => {
3034 e.insert(entry.clone());
3035 }
3036 }
3037
3038 if entry.path.file_name() == Some(GITIGNORE) {
3039 let abs_parent_path = self.snapshot.absolutize(&entry.path.parent().unwrap());
3040 if let Some((_, needs_update)) = self
3041 .snapshot
3042 .ignores_by_parent_abs_path
3043 .get_mut(abs_parent_path.as_path())
3044 {
3045 *needs_update = true;
3046 }
3047 }
3048
3049 if let Err(ix) = removed_ids.binary_search(&entry.id) {
3050 removed_ids.insert(ix, entry.id);
3051 }
3052 }
3053
3054 self.snapshot
3055 .entries_by_id
3056 .edit(removed_ids.iter().map(|&id| Edit::Remove(id)).collect(), ());
3057 self.snapshot
3058 .git_repositories
3059 .retain(|id, _| removed_ids.binary_search(id).is_err());
3060
3061 for removed_dir_abs_path in removed_dir_abs_paths {
3062 watcher.remove(&removed_dir_abs_path).log_err();
3063 }
3064
3065 #[cfg(feature = "test-support")]
3066 self.snapshot.check_invariants(false);
3067 }
3068
3069 async fn insert_git_repository(
3070 &mut self,
3071 dot_git_path: Arc<RelPath>,
3072 fs: &dyn Fs,
3073 watcher: &dyn Watcher,
3074 ) {
3075 let work_dir_path: Arc<RelPath> = match dot_git_path.parent() {
3076 Some(parent_dir) => {
3077 // Guard against repositories inside the repository metadata
3078 if parent_dir
3079 .components()
3080 .any(|component| component == DOT_GIT)
3081 {
3082 log::debug!(
3083 "not building git repository for nested `.git` directory, `.git` path in the worktree: {dot_git_path:?}"
3084 );
3085 return;
3086 };
3087
3088 parent_dir.into()
3089 }
3090 None => {
3091 // `dot_git_path.parent().is_none()` means `.git` directory is the opened worktree itself,
3092 // no files inside that directory are tracked by git, so no need to build the repo around it
3093 log::debug!(
3094 "not building git repository for the worktree itself, `.git` path in the worktree: {dot_git_path:?}"
3095 );
3096 return;
3097 }
3098 };
3099
3100 let dot_git_abs_path = Arc::from(self.snapshot.absolutize(&dot_git_path).as_ref());
3101
3102 self.insert_git_repository_for_path(
3103 WorkDirectory::InProject {
3104 relative_path: work_dir_path,
3105 },
3106 dot_git_abs_path,
3107 fs,
3108 watcher,
3109 )
3110 .await
3111 .log_err();
3112 }
3113
3114 async fn insert_git_repository_for_path(
3115 &mut self,
3116 work_directory: WorkDirectory,
3117 dot_git_abs_path: Arc<Path>,
3118 fs: &dyn Fs,
3119 watcher: &dyn Watcher,
3120 ) -> Result<LocalRepositoryEntry> {
3121 let work_dir_entry = self
3122 .snapshot
3123 .entry_for_path(&work_directory.path_key().0)
3124 .with_context(|| {
3125 format!(
3126 "working directory `{}` not indexed",
3127 work_directory
3128 .path_key()
3129 .0
3130 .display(self.snapshot.path_style)
3131 )
3132 })?;
3133 let work_directory_abs_path = self.snapshot.work_directory_abs_path(&work_directory);
3134
3135 let (repository_dir_abs_path, common_dir_abs_path) =
3136 discover_git_paths(&dot_git_abs_path, fs).await;
3137 watcher
3138 .add(&common_dir_abs_path)
3139 .context("failed to add common directory to watcher")
3140 .log_err();
3141 watcher
3142 .add(&repository_dir_abs_path)
3143 .context("failed to add repository directory to watcher")
3144 .log_err();
3145
3146 let work_directory_id = work_dir_entry.id;
3147
3148 let local_repository = LocalRepositoryEntry {
3149 work_directory_id,
3150 work_directory,
3151 work_directory_abs_path: work_directory_abs_path.as_path().into(),
3152 git_dir_scan_id: 0,
3153 dot_git_abs_path,
3154 common_dir_abs_path,
3155 repository_dir_abs_path,
3156 };
3157
3158 self.snapshot
3159 .git_repositories
3160 .insert(work_directory_id, local_repository.clone());
3161
3162 log::trace!("inserting new local git repository");
3163 Ok(local_repository)
3164 }
3165}
3166
3167async fn is_git_dir(path: &Path, fs: &dyn Fs) -> bool {
3168 if let Some(file_name) = path.file_name()
3169 && file_name == DOT_GIT
3170 {
3171 return true;
3172 }
3173
3174 // If we're in a bare repository, we are not inside a `.git` folder. In a
3175 // bare repository, the root folder contains what would normally be in the
3176 // `.git` folder.
3177 let head_metadata = fs.metadata(&path.join("HEAD")).await;
3178 if !matches!(head_metadata, Ok(Some(_))) {
3179 return false;
3180 }
3181 let config_metadata = fs.metadata(&path.join("config")).await;
3182 matches!(config_metadata, Ok(Some(_)))
3183}
3184
3185async fn build_gitignore(abs_path: &Path, fs: &dyn Fs) -> Result<Gitignore> {
3186 let contents = fs
3187 .load(abs_path)
3188 .await
3189 .with_context(|| format!("failed to load gitignore file at {}", abs_path.display()))?;
3190 let parent = abs_path.parent().unwrap_or_else(|| Path::new("/"));
3191 let mut builder = GitignoreBuilder::new(parent);
3192 for line in contents.lines() {
3193 builder.add_line(Some(abs_path.into()), line)?;
3194 }
3195 Ok(builder.build()?)
3196}
3197
3198impl Deref for Worktree {
3199 type Target = Snapshot;
3200
3201 fn deref(&self) -> &Self::Target {
3202 match self {
3203 Worktree::Local(worktree) => &worktree.snapshot,
3204 Worktree::Remote(worktree) => &worktree.snapshot,
3205 }
3206 }
3207}
3208
3209impl Deref for LocalWorktree {
3210 type Target = LocalSnapshot;
3211
3212 fn deref(&self) -> &Self::Target {
3213 &self.snapshot
3214 }
3215}
3216
3217impl Deref for RemoteWorktree {
3218 type Target = Snapshot;
3219
3220 fn deref(&self) -> &Self::Target {
3221 &self.snapshot
3222 }
3223}
3224
3225impl fmt::Debug for LocalWorktree {
3226 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3227 self.snapshot.fmt(f)
3228 }
3229}
3230
3231impl fmt::Debug for Snapshot {
3232 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3233 struct EntriesById<'a>(&'a SumTree<PathEntry>);
3234 struct EntriesByPath<'a>(&'a SumTree<Entry>);
3235
3236 impl fmt::Debug for EntriesByPath<'_> {
3237 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3238 f.debug_map()
3239 .entries(self.0.iter().map(|entry| (&entry.path, entry.id)))
3240 .finish()
3241 }
3242 }
3243
3244 impl fmt::Debug for EntriesById<'_> {
3245 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3246 f.debug_list().entries(self.0.iter()).finish()
3247 }
3248 }
3249
3250 f.debug_struct("Snapshot")
3251 .field("id", &self.id)
3252 .field("root_name", &self.root_name)
3253 .field("entries_by_path", &EntriesByPath(&self.entries_by_path))
3254 .field("entries_by_id", &EntriesById(&self.entries_by_id))
3255 .finish()
3256 }
3257}
3258
3259#[derive(Debug, Clone, PartialEq)]
3260pub struct File {
3261 pub worktree: Entity<Worktree>,
3262 pub path: Arc<RelPath>,
3263 pub disk_state: DiskState,
3264 pub entry_id: Option<ProjectEntryId>,
3265 pub is_local: bool,
3266 pub is_private: bool,
3267}
3268
3269impl language::File for File {
3270 fn as_local(&self) -> Option<&dyn language::LocalFile> {
3271 if self.is_local { Some(self) } else { None }
3272 }
3273
3274 fn disk_state(&self) -> DiskState {
3275 self.disk_state
3276 }
3277
3278 fn path(&self) -> &Arc<RelPath> {
3279 &self.path
3280 }
3281
3282 fn full_path(&self, cx: &App) -> PathBuf {
3283 self.worktree.read(cx).full_path(&self.path)
3284 }
3285
3286 /// Returns the last component of this handle's absolute path. If this handle refers to the root
3287 /// of its worktree, then this method will return the name of the worktree itself.
3288 fn file_name<'a>(&'a self, cx: &'a App) -> &'a str {
3289 self.path
3290 .file_name()
3291 .unwrap_or_else(|| self.worktree.read(cx).root_name_str())
3292 }
3293
3294 fn worktree_id(&self, cx: &App) -> WorktreeId {
3295 self.worktree.read(cx).id()
3296 }
3297
3298 fn to_proto(&self, cx: &App) -> rpc::proto::File {
3299 rpc::proto::File {
3300 worktree_id: self.worktree.read(cx).id().to_proto(),
3301 entry_id: self.entry_id.map(|id| id.to_proto()),
3302 path: self.path.as_ref().to_proto(),
3303 mtime: self.disk_state.mtime().map(|time| time.into()),
3304 is_deleted: self.disk_state.is_deleted(),
3305 is_historic: matches!(self.disk_state, DiskState::Historic { .. }),
3306 }
3307 }
3308
3309 fn is_private(&self) -> bool {
3310 self.is_private
3311 }
3312
3313 fn path_style(&self, cx: &App) -> PathStyle {
3314 self.worktree.read(cx).path_style()
3315 }
3316
3317 fn can_open(&self) -> bool {
3318 true
3319 }
3320}
3321
3322impl language::LocalFile for File {
3323 fn abs_path(&self, cx: &App) -> PathBuf {
3324 self.worktree.read(cx).absolutize(&self.path)
3325 }
3326
3327 fn load(&self, cx: &App) -> Task<Result<String>> {
3328 let worktree = self.worktree.read(cx).as_local().unwrap();
3329 let abs_path = worktree.absolutize(&self.path);
3330 let fs = worktree.fs.clone();
3331 cx.background_spawn(async move { fs.load(&abs_path).await })
3332 }
3333
3334 fn load_bytes(&self, cx: &App) -> Task<Result<Vec<u8>>> {
3335 let worktree = self.worktree.read(cx).as_local().unwrap();
3336 let abs_path = worktree.absolutize(&self.path);
3337 let fs = worktree.fs.clone();
3338 cx.background_spawn(async move { fs.load_bytes(&abs_path).await })
3339 }
3340}
3341
3342impl File {
3343 pub fn for_entry(entry: Entry, worktree: Entity<Worktree>) -> Arc<Self> {
3344 Arc::new(Self {
3345 worktree,
3346 path: entry.path.clone(),
3347 disk_state: if let Some(mtime) = entry.mtime {
3348 DiskState::Present {
3349 mtime,
3350 size: entry.size,
3351 }
3352 } else {
3353 DiskState::New
3354 },
3355 entry_id: Some(entry.id),
3356 is_local: true,
3357 is_private: entry.is_private,
3358 })
3359 }
3360
3361 pub fn from_proto(
3362 proto: rpc::proto::File,
3363 worktree: Entity<Worktree>,
3364 cx: &App,
3365 ) -> Result<Self> {
3366 let worktree_id = worktree.read(cx).as_remote().context("not remote")?.id();
3367
3368 anyhow::ensure!(
3369 worktree_id.to_proto() == proto.worktree_id,
3370 "worktree id does not match file"
3371 );
3372
3373 let disk_state = if proto.is_historic {
3374 DiskState::Historic {
3375 was_deleted: proto.is_deleted,
3376 }
3377 } else if proto.is_deleted {
3378 DiskState::Deleted
3379 } else if let Some(mtime) = proto.mtime.map(&Into::into) {
3380 DiskState::Present { mtime, size: 0 }
3381 } else {
3382 DiskState::New
3383 };
3384
3385 Ok(Self {
3386 worktree,
3387 path: RelPath::from_proto(&proto.path).context("invalid path in file protobuf")?,
3388 disk_state,
3389 entry_id: proto.entry_id.map(ProjectEntryId::from_proto),
3390 is_local: false,
3391 is_private: false,
3392 })
3393 }
3394
3395 pub fn from_dyn(file: Option<&Arc<dyn language::File>>) -> Option<&Self> {
3396 file.and_then(|f| {
3397 let f: &dyn language::File = f.borrow();
3398 let f: &dyn Any = f;
3399 f.downcast_ref()
3400 })
3401 }
3402
3403 pub fn worktree_id(&self, cx: &App) -> WorktreeId {
3404 self.worktree.read(cx).id()
3405 }
3406
3407 pub fn project_entry_id(&self) -> Option<ProjectEntryId> {
3408 match self.disk_state {
3409 DiskState::Deleted => None,
3410 _ => self.entry_id,
3411 }
3412 }
3413}
3414
3415#[derive(Clone, Debug, PartialEq, Eq)]
3416pub struct Entry {
3417 pub id: ProjectEntryId,
3418 pub kind: EntryKind,
3419 pub path: Arc<RelPath>,
3420 pub inode: u64,
3421 pub mtime: Option<MTime>,
3422
3423 pub canonical_path: Option<Arc<Path>>,
3424 /// Whether this entry is ignored by Git.
3425 ///
3426 /// We only scan ignored entries once the directory is expanded and
3427 /// exclude them from searches.
3428 pub is_ignored: bool,
3429
3430 /// Whether this entry is hidden or inside hidden directory.
3431 ///
3432 /// We only scan hidden entries once the directory is expanded.
3433 pub is_hidden: bool,
3434
3435 /// Whether this entry is always included in searches.
3436 ///
3437 /// This is used for entries that are always included in searches, even
3438 /// if they are ignored by git. Overridden by file_scan_exclusions.
3439 pub is_always_included: bool,
3440
3441 /// Whether this entry's canonical path is outside of the worktree.
3442 /// This means the entry is only accessible from the worktree root via a
3443 /// symlink.
3444 ///
3445 /// We only scan entries outside of the worktree once the symlinked
3446 /// directory is expanded. External entries are treated like gitignored
3447 /// entries in that they are not included in searches.
3448 pub is_external: bool,
3449
3450 /// Whether this entry is considered to be a `.env` file.
3451 pub is_private: bool,
3452 /// The entry's size on disk, in bytes.
3453 pub size: u64,
3454 pub char_bag: CharBag,
3455 pub is_fifo: bool,
3456}
3457
3458#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3459pub enum EntryKind {
3460 UnloadedDir,
3461 PendingDir,
3462 Dir,
3463 File,
3464}
3465
3466#[derive(Clone, Copy, Debug, PartialEq)]
3467pub enum PathChange {
3468 /// A filesystem entry was was created.
3469 Added,
3470 /// A filesystem entry was removed.
3471 Removed,
3472 /// A filesystem entry was updated.
3473 Updated,
3474 /// A filesystem entry was either updated or added. We don't know
3475 /// whether or not it already existed, because the path had not
3476 /// been loaded before the event.
3477 AddedOrUpdated,
3478 /// A filesystem entry was found during the initial scan of the worktree.
3479 Loaded,
3480}
3481
3482#[derive(Clone, Debug, PartialEq, Eq)]
3483pub struct UpdatedGitRepository {
3484 /// ID of the repository's working directory.
3485 ///
3486 /// For a repo that's above the worktree root, this is the ID of the worktree root, and hence not unique.
3487 /// It's included here to aid the GitStore in detecting when a repository's working directory is renamed.
3488 pub work_directory_id: ProjectEntryId,
3489 pub old_work_directory_abs_path: Option<Arc<Path>>,
3490 pub new_work_directory_abs_path: Option<Arc<Path>>,
3491 /// For a normal git repository checkout, the absolute path to the .git directory.
3492 /// For a worktree, the absolute path to the worktree's subdirectory inside the .git directory.
3493 pub dot_git_abs_path: Option<Arc<Path>>,
3494 pub repository_dir_abs_path: Option<Arc<Path>>,
3495 pub common_dir_abs_path: Option<Arc<Path>>,
3496}
3497
3498pub type UpdatedEntriesSet = Arc<[(Arc<RelPath>, ProjectEntryId, PathChange)]>;
3499pub type UpdatedGitRepositoriesSet = Arc<[UpdatedGitRepository]>;
3500
3501#[derive(Clone, Debug)]
3502pub struct PathProgress<'a> {
3503 pub max_path: &'a RelPath,
3504}
3505
3506#[derive(Clone, Debug)]
3507pub struct PathSummary<S> {
3508 pub max_path: Arc<RelPath>,
3509 pub item_summary: S,
3510}
3511
3512impl<S: Summary> Summary for PathSummary<S> {
3513 type Context<'a> = S::Context<'a>;
3514
3515 fn zero(cx: Self::Context<'_>) -> Self {
3516 Self {
3517 max_path: RelPath::empty().into(),
3518 item_summary: S::zero(cx),
3519 }
3520 }
3521
3522 fn add_summary(&mut self, rhs: &Self, cx: Self::Context<'_>) {
3523 self.max_path = rhs.max_path.clone();
3524 self.item_summary.add_summary(&rhs.item_summary, cx);
3525 }
3526}
3527
3528impl<'a, S: Summary> sum_tree::Dimension<'a, PathSummary<S>> for PathProgress<'a> {
3529 fn zero(_: <PathSummary<S> as Summary>::Context<'_>) -> Self {
3530 Self {
3531 max_path: RelPath::empty(),
3532 }
3533 }
3534
3535 fn add_summary(
3536 &mut self,
3537 summary: &'a PathSummary<S>,
3538 _: <PathSummary<S> as Summary>::Context<'_>,
3539 ) {
3540 self.max_path = summary.max_path.as_ref()
3541 }
3542}
3543
3544impl<'a> sum_tree::Dimension<'a, PathSummary<GitSummary>> for GitSummary {
3545 fn zero(_cx: ()) -> Self {
3546 Default::default()
3547 }
3548
3549 fn add_summary(&mut self, summary: &'a PathSummary<GitSummary>, _: ()) {
3550 *self += summary.item_summary
3551 }
3552}
3553
3554impl<'a>
3555 sum_tree::SeekTarget<'a, PathSummary<GitSummary>, Dimensions<TraversalProgress<'a>, GitSummary>>
3556 for PathTarget<'_>
3557{
3558 fn cmp(
3559 &self,
3560 cursor_location: &Dimensions<TraversalProgress<'a>, GitSummary>,
3561 _: (),
3562 ) -> Ordering {
3563 self.cmp_path(cursor_location.0.max_path)
3564 }
3565}
3566
3567impl<'a, S: Summary> sum_tree::Dimension<'a, PathSummary<S>> for PathKey {
3568 fn zero(_: S::Context<'_>) -> Self {
3569 Default::default()
3570 }
3571
3572 fn add_summary(&mut self, summary: &'a PathSummary<S>, _: S::Context<'_>) {
3573 self.0 = summary.max_path.clone();
3574 }
3575}
3576
3577impl<'a, S: Summary> sum_tree::Dimension<'a, PathSummary<S>> for TraversalProgress<'a> {
3578 fn zero(_cx: S::Context<'_>) -> Self {
3579 Default::default()
3580 }
3581
3582 fn add_summary(&mut self, summary: &'a PathSummary<S>, _: S::Context<'_>) {
3583 self.max_path = summary.max_path.as_ref();
3584 }
3585}
3586
3587impl Entry {
3588 fn new(
3589 path: Arc<RelPath>,
3590 metadata: &fs::Metadata,
3591 id: ProjectEntryId,
3592 root_char_bag: CharBag,
3593 canonical_path: Option<Arc<Path>>,
3594 ) -> Self {
3595 let char_bag = char_bag_for_path(root_char_bag, &path);
3596 Self {
3597 id,
3598 kind: if metadata.is_dir {
3599 EntryKind::PendingDir
3600 } else {
3601 EntryKind::File
3602 },
3603 path,
3604 inode: metadata.inode,
3605 mtime: Some(metadata.mtime),
3606 size: metadata.len,
3607 canonical_path,
3608 is_ignored: false,
3609 is_hidden: false,
3610 is_always_included: false,
3611 is_external: false,
3612 is_private: false,
3613 char_bag,
3614 is_fifo: metadata.is_fifo,
3615 }
3616 }
3617
3618 pub fn is_created(&self) -> bool {
3619 self.mtime.is_some()
3620 }
3621
3622 pub fn is_dir(&self) -> bool {
3623 self.kind.is_dir()
3624 }
3625
3626 pub fn is_file(&self) -> bool {
3627 self.kind.is_file()
3628 }
3629}
3630
3631impl EntryKind {
3632 pub fn is_dir(&self) -> bool {
3633 matches!(
3634 self,
3635 EntryKind::Dir | EntryKind::PendingDir | EntryKind::UnloadedDir
3636 )
3637 }
3638
3639 pub fn is_unloaded(&self) -> bool {
3640 matches!(self, EntryKind::UnloadedDir)
3641 }
3642
3643 pub fn is_file(&self) -> bool {
3644 matches!(self, EntryKind::File)
3645 }
3646}
3647
3648impl sum_tree::Item for Entry {
3649 type Summary = EntrySummary;
3650
3651 fn summary(&self, _cx: ()) -> Self::Summary {
3652 let non_ignored_count = if (self.is_ignored || self.is_external) && !self.is_always_included
3653 {
3654 0
3655 } else {
3656 1
3657 };
3658 let file_count;
3659 let non_ignored_file_count;
3660 if self.is_file() {
3661 file_count = 1;
3662 non_ignored_file_count = non_ignored_count;
3663 } else {
3664 file_count = 0;
3665 non_ignored_file_count = 0;
3666 }
3667
3668 EntrySummary {
3669 max_path: self.path.clone(),
3670 count: 1,
3671 non_ignored_count,
3672 file_count,
3673 non_ignored_file_count,
3674 }
3675 }
3676}
3677
3678impl sum_tree::KeyedItem for Entry {
3679 type Key = PathKey;
3680
3681 fn key(&self) -> Self::Key {
3682 PathKey(self.path.clone())
3683 }
3684}
3685
3686#[derive(Clone, Debug)]
3687pub struct EntrySummary {
3688 max_path: Arc<RelPath>,
3689 count: usize,
3690 non_ignored_count: usize,
3691 file_count: usize,
3692 non_ignored_file_count: usize,
3693}
3694
3695impl Default for EntrySummary {
3696 fn default() -> Self {
3697 Self {
3698 max_path: Arc::from(RelPath::empty()),
3699 count: 0,
3700 non_ignored_count: 0,
3701 file_count: 0,
3702 non_ignored_file_count: 0,
3703 }
3704 }
3705}
3706
3707impl sum_tree::ContextLessSummary for EntrySummary {
3708 fn zero() -> Self {
3709 Default::default()
3710 }
3711
3712 fn add_summary(&mut self, rhs: &Self) {
3713 self.max_path = rhs.max_path.clone();
3714 self.count += rhs.count;
3715 self.non_ignored_count += rhs.non_ignored_count;
3716 self.file_count += rhs.file_count;
3717 self.non_ignored_file_count += rhs.non_ignored_file_count;
3718 }
3719}
3720
3721#[derive(Clone, Debug)]
3722struct PathEntry {
3723 id: ProjectEntryId,
3724 path: Arc<RelPath>,
3725 is_ignored: bool,
3726 scan_id: usize,
3727}
3728
3729impl sum_tree::Item for PathEntry {
3730 type Summary = PathEntrySummary;
3731
3732 fn summary(&self, _cx: ()) -> Self::Summary {
3733 PathEntrySummary { max_id: self.id }
3734 }
3735}
3736
3737impl sum_tree::KeyedItem for PathEntry {
3738 type Key = ProjectEntryId;
3739
3740 fn key(&self) -> Self::Key {
3741 self.id
3742 }
3743}
3744
3745#[derive(Clone, Debug, Default)]
3746struct PathEntrySummary {
3747 max_id: ProjectEntryId,
3748}
3749
3750impl sum_tree::ContextLessSummary for PathEntrySummary {
3751 fn zero() -> Self {
3752 Default::default()
3753 }
3754
3755 fn add_summary(&mut self, summary: &Self) {
3756 self.max_id = summary.max_id;
3757 }
3758}
3759
3760impl<'a> sum_tree::Dimension<'a, PathEntrySummary> for ProjectEntryId {
3761 fn zero(_cx: ()) -> Self {
3762 Default::default()
3763 }
3764
3765 fn add_summary(&mut self, summary: &'a PathEntrySummary, _: ()) {
3766 *self = summary.max_id;
3767 }
3768}
3769
3770#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
3771pub struct PathKey(pub Arc<RelPath>);
3772
3773impl Default for PathKey {
3774 fn default() -> Self {
3775 Self(RelPath::empty().into())
3776 }
3777}
3778
3779impl<'a> sum_tree::Dimension<'a, EntrySummary> for PathKey {
3780 fn zero(_cx: ()) -> Self {
3781 Default::default()
3782 }
3783
3784 fn add_summary(&mut self, summary: &'a EntrySummary, _: ()) {
3785 self.0 = summary.max_path.clone();
3786 }
3787}
3788
3789struct BackgroundScanner {
3790 state: async_lock::Mutex<BackgroundScannerState>,
3791 fs: Arc<dyn Fs>,
3792 fs_case_sensitive: bool,
3793 status_updates_tx: UnboundedSender<ScanState>,
3794 executor: BackgroundExecutor,
3795 scan_requests_rx: channel::Receiver<ScanRequest>,
3796 path_prefixes_to_scan_rx: channel::Receiver<PathPrefixScanRequest>,
3797 next_entry_id: Arc<AtomicUsize>,
3798 phase: BackgroundScannerPhase,
3799 watcher: Arc<dyn Watcher>,
3800 settings: WorktreeSettings,
3801 share_private_files: bool,
3802}
3803
3804#[derive(Copy, Clone, PartialEq)]
3805enum BackgroundScannerPhase {
3806 InitialScan,
3807 EventsReceivedDuringInitialScan,
3808 Events,
3809}
3810
3811impl BackgroundScanner {
3812 async fn run(&mut self, mut fs_events_rx: Pin<Box<dyn Send + Stream<Item = Vec<PathEvent>>>>) {
3813 let root_abs_path;
3814 let scanning_enabled;
3815 {
3816 let state = self.state.lock().await;
3817 root_abs_path = state.snapshot.abs_path.clone();
3818 scanning_enabled = state.scanning_enabled;
3819 }
3820
3821 // If the worktree root does not contain a git repository, then find
3822 // the git repository in an ancestor directory. Find any gitignore files
3823 // in ancestor directories.
3824 let repo = if scanning_enabled {
3825 let (ignores, exclude, repo) =
3826 discover_ancestor_git_repo(self.fs.clone(), &root_abs_path).await;
3827 self.state
3828 .lock()
3829 .await
3830 .snapshot
3831 .ignores_by_parent_abs_path
3832 .extend(ignores);
3833 if let Some(exclude) = exclude {
3834 self.state
3835 .lock()
3836 .await
3837 .snapshot
3838 .repo_exclude_by_work_dir_abs_path
3839 .insert(root_abs_path.as_path().into(), (exclude, false));
3840 }
3841
3842 repo
3843 } else {
3844 None
3845 };
3846
3847 let containing_git_repository = if let Some((ancestor_dot_git, work_directory)) = repo
3848 && scanning_enabled
3849 {
3850 maybe!(async {
3851 self.state
3852 .lock()
3853 .await
3854 .insert_git_repository_for_path(
3855 work_directory,
3856 ancestor_dot_git.clone().into(),
3857 self.fs.as_ref(),
3858 self.watcher.as_ref(),
3859 )
3860 .await
3861 .log_err()?;
3862 Some(ancestor_dot_git)
3863 })
3864 .await
3865 } else {
3866 None
3867 };
3868
3869 log::trace!("containing git repository: {containing_git_repository:?}");
3870
3871 let global_gitignore_file = paths::global_gitignore_path();
3872 let mut global_gitignore_events = if let Some(global_gitignore_path) =
3873 &global_gitignore_file
3874 && scanning_enabled
3875 {
3876 let is_file = self.fs.is_file(&global_gitignore_path).await;
3877 self.state.lock().await.snapshot.global_gitignore = if is_file {
3878 build_gitignore(global_gitignore_path, self.fs.as_ref())
3879 .await
3880 .ok()
3881 .map(Arc::new)
3882 } else {
3883 None
3884 };
3885 if is_file {
3886 self.fs
3887 .watch(global_gitignore_path, FS_WATCH_LATENCY)
3888 .await
3889 .0
3890 } else {
3891 Box::pin(futures::stream::pending())
3892 }
3893 } else {
3894 self.state.lock().await.snapshot.global_gitignore = None;
3895 Box::pin(futures::stream::pending())
3896 };
3897
3898 let (scan_job_tx, scan_job_rx) = channel::unbounded();
3899 {
3900 let mut state = self.state.lock().await;
3901 state.snapshot.scan_id += 1;
3902 if let Some(mut root_entry) = state.snapshot.root_entry().cloned() {
3903 let ignore_stack = state
3904 .snapshot
3905 .ignore_stack_for_abs_path(root_abs_path.as_path(), true, self.fs.as_ref())
3906 .await;
3907 if ignore_stack.is_abs_path_ignored(root_abs_path.as_path(), true) {
3908 root_entry.is_ignored = true;
3909 let mut root_entry = root_entry.clone();
3910 state.reuse_entry_id(&mut root_entry);
3911 state
3912 .insert_entry(root_entry, self.fs.as_ref(), self.watcher.as_ref())
3913 .await;
3914 }
3915 if root_entry.is_dir() && state.scanning_enabled {
3916 state
3917 .enqueue_scan_dir(
3918 root_abs_path.as_path().into(),
3919 &root_entry,
3920 &scan_job_tx,
3921 self.fs.as_ref(),
3922 )
3923 .await;
3924 }
3925 }
3926 };
3927
3928 // Perform an initial scan of the directory.
3929 drop(scan_job_tx);
3930 self.scan_dirs(true, scan_job_rx).await;
3931 {
3932 let mut state = self.state.lock().await;
3933 state.snapshot.completed_scan_id = state.snapshot.scan_id;
3934 }
3935
3936 self.send_status_update(false, SmallVec::new(), &[]).await;
3937
3938 // Process any any FS events that occurred while performing the initial scan.
3939 // For these events, update events cannot be as precise, because we didn't
3940 // have the previous state loaded yet.
3941 self.phase = BackgroundScannerPhase::EventsReceivedDuringInitialScan;
3942 if let Poll::Ready(Some(mut paths)) = futures::poll!(fs_events_rx.next()) {
3943 while let Poll::Ready(Some(more_paths)) = futures::poll!(fs_events_rx.next()) {
3944 paths.extend(more_paths);
3945 }
3946 self.process_events(
3947 paths
3948 .into_iter()
3949 .filter(|event| event.kind.is_some())
3950 .collect(),
3951 )
3952 .await;
3953 }
3954 if let Some(abs_path) = containing_git_repository {
3955 self.process_events(vec![PathEvent {
3956 path: abs_path,
3957 kind: Some(fs::PathEventKind::Changed),
3958 }])
3959 .await;
3960 }
3961
3962 // Continue processing events until the worktree is dropped.
3963 self.phase = BackgroundScannerPhase::Events;
3964
3965 loop {
3966 select_biased! {
3967 // Process any path refresh requests from the worktree. Prioritize
3968 // these before handling changes reported by the filesystem.
3969 request = self.next_scan_request().fuse() => {
3970 let Ok(request) = request else { break };
3971 if !self.process_scan_request(request, false).await {
3972 return;
3973 }
3974 }
3975
3976 path_prefix_request = self.path_prefixes_to_scan_rx.recv().fuse() => {
3977 let Ok(request) = path_prefix_request else { break };
3978 log::trace!("adding path prefix {:?}", request.path);
3979
3980 let did_scan = self.forcibly_load_paths(std::slice::from_ref(&request.path)).await;
3981 if did_scan {
3982 let abs_path =
3983 {
3984 let mut state = self.state.lock().await;
3985 state.path_prefixes_to_scan.insert(request.path.clone());
3986 state.snapshot.absolutize(&request.path)
3987 };
3988
3989 if let Some(abs_path) = self.fs.canonicalize(&abs_path).await.log_err() {
3990 self.process_events(vec![PathEvent {
3991 path: abs_path,
3992 kind: Some(fs::PathEventKind::Changed),
3993 }])
3994 .await;
3995 }
3996 }
3997 self.send_status_update(false, request.done, &[]).await;
3998 }
3999
4000 paths = fs_events_rx.next().fuse() => {
4001 let Some(mut paths) = paths else { break };
4002 while let Poll::Ready(Some(more_paths)) = futures::poll!(fs_events_rx.next()) {
4003 paths.extend(more_paths);
4004 }
4005 self.process_events(paths.into_iter().filter(|event| event.kind.is_some()).collect()).await;
4006 }
4007
4008 _ = global_gitignore_events.next().fuse() => {
4009 if let Some(path) = &global_gitignore_file {
4010 self.update_global_gitignore(&path).await;
4011 }
4012 }
4013 }
4014 }
4015 }
4016
4017 async fn process_scan_request(&self, mut request: ScanRequest, scanning: bool) -> bool {
4018 log::debug!("rescanning paths {:?}", request.relative_paths);
4019
4020 request.relative_paths.sort_unstable();
4021 self.forcibly_load_paths(&request.relative_paths).await;
4022
4023 let root_path = self.state.lock().await.snapshot.abs_path.clone();
4024 let root_canonical_path = self.fs.canonicalize(root_path.as_path()).await;
4025 let root_canonical_path = match &root_canonical_path {
4026 Ok(path) => SanitizedPath::new(path),
4027 Err(err) => {
4028 log::error!("failed to canonicalize root path {root_path:?}: {err:#}");
4029 return true;
4030 }
4031 };
4032 let abs_paths = request
4033 .relative_paths
4034 .iter()
4035 .map(|path| {
4036 if path.file_name().is_some() {
4037 root_canonical_path.as_path().join(path.as_std_path())
4038 } else {
4039 root_canonical_path.as_path().to_path_buf()
4040 }
4041 })
4042 .collect::<Vec<_>>();
4043
4044 {
4045 let mut state = self.state.lock().await;
4046 let is_idle = state.snapshot.completed_scan_id == state.snapshot.scan_id;
4047 state.snapshot.scan_id += 1;
4048 if is_idle {
4049 state.snapshot.completed_scan_id = state.snapshot.scan_id;
4050 }
4051 }
4052
4053 self.reload_entries_for_paths(
4054 &root_path,
4055 &root_canonical_path,
4056 &request.relative_paths,
4057 abs_paths,
4058 None,
4059 )
4060 .await;
4061
4062 self.send_status_update(scanning, request.done, &[]).await
4063 }
4064
4065 async fn process_events(&self, mut events: Vec<PathEvent>) {
4066 let root_path = self.state.lock().await.snapshot.abs_path.clone();
4067 let root_canonical_path = self.fs.canonicalize(root_path.as_path()).await;
4068 let root_canonical_path = match &root_canonical_path {
4069 Ok(path) => SanitizedPath::new(path),
4070 Err(err) => {
4071 let new_path = self
4072 .state
4073 .lock()
4074 .await
4075 .snapshot
4076 .root_file_handle
4077 .clone()
4078 .and_then(|handle| match handle.current_path(&self.fs) {
4079 Ok(new_path) => Some(new_path),
4080 Err(e) => {
4081 log::error!("Failed to refresh worktree root path: {e:#}");
4082 None
4083 }
4084 })
4085 .map(|path| SanitizedPath::new_arc(&path))
4086 .filter(|new_path| *new_path != root_path);
4087
4088 if let Some(new_path) = new_path {
4089 log::info!(
4090 "root renamed from {:?} to {:?}",
4091 root_path.as_path(),
4092 new_path.as_path(),
4093 );
4094 self.status_updates_tx
4095 .unbounded_send(ScanState::RootUpdated { new_path })
4096 .ok();
4097 } else {
4098 log::error!("root path could not be canonicalized: {err:#}");
4099 }
4100 return;
4101 }
4102 };
4103
4104 // Certain directories may have FS changes, but do not lead to git data changes that Zed cares about.
4105 // Ignore these, to avoid Zed unnecessarily rescanning git metadata.
4106 let skipped_files_in_dot_git = [COMMIT_MESSAGE, INDEX_LOCK];
4107 let skipped_dirs_in_dot_git = [FSMONITOR_DAEMON, LFS_DIR];
4108
4109 let mut relative_paths = Vec::with_capacity(events.len());
4110 let mut dot_git_abs_paths = Vec::new();
4111 let mut work_dirs_needing_exclude_update = Vec::new();
4112 events.sort_unstable_by(|left, right| left.path.cmp(&right.path));
4113 events.dedup_by(|left, right| {
4114 if left.path == right.path {
4115 if matches!(left.kind, Some(fs::PathEventKind::Rescan)) {
4116 right.kind = left.kind;
4117 }
4118 true
4119 } else if left.path.starts_with(&right.path) {
4120 if matches!(left.kind, Some(fs::PathEventKind::Rescan)) {
4121 right.kind = left.kind;
4122 }
4123 true
4124 } else {
4125 false
4126 }
4127 });
4128 {
4129 let snapshot = &self.state.lock().await.snapshot;
4130
4131 let mut ranges_to_drop = SmallVec::<[Range<usize>; 4]>::new();
4132
4133 fn skip_ix(ranges: &mut SmallVec<[Range<usize>; 4]>, ix: usize) {
4134 if let Some(last_range) = ranges.last_mut()
4135 && last_range.end == ix
4136 {
4137 last_range.end += 1;
4138 } else {
4139 ranges.push(ix..ix + 1);
4140 }
4141 }
4142
4143 for (ix, event) in events.iter().enumerate() {
4144 let abs_path = SanitizedPath::new(&event.path);
4145
4146 let mut is_git_related = false;
4147 let mut dot_git_paths = None;
4148
4149 for ancestor in abs_path.as_path().ancestors() {
4150 if is_git_dir(ancestor, self.fs.as_ref()).await {
4151 let path_in_git_dir = abs_path
4152 .as_path()
4153 .strip_prefix(ancestor)
4154 .expect("stripping off the ancestor");
4155 dot_git_paths = Some((ancestor.to_owned(), path_in_git_dir.to_owned()));
4156 break;
4157 }
4158 }
4159
4160 if let Some((dot_git_abs_path, path_in_git_dir)) = dot_git_paths {
4161 // We ignore `""` as well, as that is going to be the
4162 // `.git` folder itself. WE do not care about it, if
4163 // there are changes within we will see them, we need
4164 // this ignore to prevent us from accidentally observing
4165 // the ignored created file due to the events not being
4166 // empty after filtering.
4167
4168 let is_dot_git_changed = {
4169 path_in_git_dir == Path::new("")
4170 && event.kind == Some(PathEventKind::Changed)
4171 && abs_path
4172 .strip_prefix(root_canonical_path)
4173 .ok()
4174 .and_then(|it| RelPath::new(it, PathStyle::local()).ok())
4175 .is_some_and(|it| {
4176 snapshot
4177 .entry_for_path(&it)
4178 .is_some_and(|entry| entry.kind == EntryKind::Dir)
4179 })
4180 };
4181 let condition = skipped_files_in_dot_git.iter().any(|skipped| {
4182 OsStr::new(skipped) == path_in_git_dir.as_path().as_os_str()
4183 }) || skipped_dirs_in_dot_git
4184 .iter()
4185 .any(|skipped_git_subdir| path_in_git_dir.starts_with(skipped_git_subdir))
4186 || is_dot_git_changed;
4187 if condition {
4188 log::debug!(
4189 "ignoring event {abs_path:?} as it's in the .git directory among skipped files or directories"
4190 );
4191 skip_ix(&mut ranges_to_drop, ix);
4192 continue;
4193 }
4194
4195 is_git_related = true;
4196 if !dot_git_abs_paths.contains(&dot_git_abs_path) {
4197 dot_git_abs_paths.push(dot_git_abs_path);
4198 }
4199 }
4200
4201 let relative_path = if let Ok(path) = abs_path.strip_prefix(&root_canonical_path)
4202 && let Ok(path) = RelPath::new(path, PathStyle::local())
4203 {
4204 path
4205 } else {
4206 if is_git_related {
4207 log::debug!(
4208 "ignoring event {abs_path:?}, since it's in git dir outside of root path {root_canonical_path:?}",
4209 );
4210 } else {
4211 log::error!(
4212 "ignoring event {abs_path:?} outside of root path {root_canonical_path:?}",
4213 );
4214 }
4215 skip_ix(&mut ranges_to_drop, ix);
4216 continue;
4217 };
4218
4219 let absolute_path = abs_path.to_path_buf();
4220 if absolute_path.ends_with(Path::new(DOT_GIT).join(REPO_EXCLUDE)) {
4221 if let Some(repository) = snapshot
4222 .git_repositories
4223 .values()
4224 .find(|repo| repo.common_dir_abs_path.join(REPO_EXCLUDE) == absolute_path)
4225 {
4226 work_dirs_needing_exclude_update
4227 .push(repository.work_directory_abs_path.clone());
4228 }
4229 }
4230
4231 if abs_path.file_name() == Some(OsStr::new(GITIGNORE)) {
4232 for (_, repo) in snapshot
4233 .git_repositories
4234 .iter()
4235 .filter(|(_, repo)| repo.directory_contains(&relative_path))
4236 {
4237 if !dot_git_abs_paths.iter().any(|dot_git_abs_path| {
4238 dot_git_abs_path == repo.common_dir_abs_path.as_ref()
4239 }) {
4240 dot_git_abs_paths.push(repo.common_dir_abs_path.to_path_buf());
4241 }
4242 }
4243 }
4244
4245 let parent_dir_is_loaded = relative_path.parent().is_none_or(|parent| {
4246 snapshot
4247 .entry_for_path(parent)
4248 .is_some_and(|entry| entry.kind == EntryKind::Dir)
4249 });
4250 if !parent_dir_is_loaded {
4251 log::debug!("ignoring event {relative_path:?} within unloaded directory");
4252 skip_ix(&mut ranges_to_drop, ix);
4253 continue;
4254 }
4255
4256 if self.settings.is_path_excluded(&relative_path) {
4257 if !is_git_related {
4258 log::debug!("ignoring FS event for excluded path {relative_path:?}");
4259 }
4260 skip_ix(&mut ranges_to_drop, ix);
4261 continue;
4262 }
4263
4264 relative_paths.push(EventRoot {
4265 path: relative_path.into_arc(),
4266 was_rescanned: matches!(event.kind, Some(fs::PathEventKind::Rescan)),
4267 });
4268 }
4269
4270 for range_to_drop in ranges_to_drop.into_iter().rev() {
4271 events.drain(range_to_drop);
4272 }
4273 }
4274
4275 if relative_paths.is_empty() && dot_git_abs_paths.is_empty() {
4276 return;
4277 }
4278
4279 if !work_dirs_needing_exclude_update.is_empty() {
4280 let mut state = self.state.lock().await;
4281 for work_dir_abs_path in work_dirs_needing_exclude_update {
4282 if let Some((_, needs_update)) = state
4283 .snapshot
4284 .repo_exclude_by_work_dir_abs_path
4285 .get_mut(&work_dir_abs_path)
4286 {
4287 *needs_update = true;
4288 }
4289 }
4290 }
4291
4292 self.state.lock().await.snapshot.scan_id += 1;
4293
4294 let (scan_job_tx, scan_job_rx) = channel::unbounded();
4295 log::debug!(
4296 "received fs events {:?}",
4297 relative_paths
4298 .iter()
4299 .map(|event_root| &event_root.path)
4300 .collect::<Vec<_>>()
4301 );
4302 self.reload_entries_for_paths(
4303 &root_path,
4304 &root_canonical_path,
4305 &relative_paths
4306 .iter()
4307 .map(|event_root| event_root.path.clone())
4308 .collect::<Vec<_>>(),
4309 events
4310 .into_iter()
4311 .map(|event| event.path)
4312 .collect::<Vec<_>>(),
4313 Some(scan_job_tx.clone()),
4314 )
4315 .await;
4316
4317 let affected_repo_roots = if !dot_git_abs_paths.is_empty() {
4318 self.update_git_repositories(dot_git_abs_paths).await
4319 } else {
4320 Vec::new()
4321 };
4322
4323 {
4324 let mut ignores_to_update = self.ignores_needing_update().await;
4325 ignores_to_update.extend(affected_repo_roots);
4326 let ignores_to_update = self.order_ignores(ignores_to_update).await;
4327 let snapshot = self.state.lock().await.snapshot.clone();
4328 self.update_ignore_statuses_for_paths(scan_job_tx, snapshot, ignores_to_update)
4329 .await;
4330 self.scan_dirs(false, scan_job_rx).await;
4331 }
4332
4333 {
4334 let mut state = self.state.lock().await;
4335 state.snapshot.completed_scan_id = state.snapshot.scan_id;
4336 for (_, entry) in mem::take(&mut state.removed_entries) {
4337 state.scanned_dirs.remove(&entry.id);
4338 }
4339 }
4340 self.send_status_update(false, SmallVec::new(), &relative_paths)
4341 .await;
4342 }
4343
4344 async fn update_global_gitignore(&self, abs_path: &Path) {
4345 let ignore = build_gitignore(abs_path, self.fs.as_ref())
4346 .await
4347 .log_err()
4348 .map(Arc::new);
4349 let (prev_snapshot, ignore_stack, abs_path) = {
4350 let mut state = self.state.lock().await;
4351 state.snapshot.global_gitignore = ignore;
4352 let abs_path = state.snapshot.abs_path().clone();
4353 let ignore_stack = state
4354 .snapshot
4355 .ignore_stack_for_abs_path(&abs_path, true, self.fs.as_ref())
4356 .await;
4357 (state.snapshot.clone(), ignore_stack, abs_path)
4358 };
4359 let (scan_job_tx, scan_job_rx) = channel::unbounded();
4360 self.update_ignore_statuses_for_paths(
4361 scan_job_tx,
4362 prev_snapshot,
4363 vec![(abs_path, ignore_stack)],
4364 )
4365 .await;
4366 self.scan_dirs(false, scan_job_rx).await;
4367 self.send_status_update(false, SmallVec::new(), &[]).await;
4368 }
4369
4370 async fn forcibly_load_paths(&self, paths: &[Arc<RelPath>]) -> bool {
4371 let (scan_job_tx, scan_job_rx) = channel::unbounded();
4372 {
4373 let mut state = self.state.lock().await;
4374 let root_path = state.snapshot.abs_path.clone();
4375 for path in paths {
4376 for ancestor in path.ancestors() {
4377 if let Some(entry) = state.snapshot.entry_for_path(ancestor)
4378 && entry.kind == EntryKind::UnloadedDir
4379 {
4380 let abs_path = root_path.join(ancestor.as_std_path());
4381 state
4382 .enqueue_scan_dir(
4383 abs_path.into(),
4384 entry,
4385 &scan_job_tx,
4386 self.fs.as_ref(),
4387 )
4388 .await;
4389 state.paths_to_scan.insert(path.clone());
4390 break;
4391 }
4392 }
4393 }
4394 drop(scan_job_tx);
4395 }
4396 while let Ok(job) = scan_job_rx.recv().await {
4397 self.scan_dir(&job).await.log_err();
4398 }
4399
4400 !mem::take(&mut self.state.lock().await.paths_to_scan).is_empty()
4401 }
4402
4403 async fn scan_dirs(
4404 &self,
4405 enable_progress_updates: bool,
4406 scan_jobs_rx: channel::Receiver<ScanJob>,
4407 ) {
4408 if self
4409 .status_updates_tx
4410 .unbounded_send(ScanState::Started)
4411 .is_err()
4412 {
4413 return;
4414 }
4415
4416 let progress_update_count = AtomicUsize::new(0);
4417 self.executor
4418 .scoped_priority(Priority::Low, |scope| {
4419 for _ in 0..self.executor.num_cpus() {
4420 scope.spawn(async {
4421 let mut last_progress_update_count = 0;
4422 let progress_update_timer = self.progress_timer(enable_progress_updates).fuse();
4423 futures::pin_mut!(progress_update_timer);
4424
4425 loop {
4426 select_biased! {
4427 // Process any path refresh requests before moving on to process
4428 // the scan queue, so that user operations are prioritized.
4429 request = self.next_scan_request().fuse() => {
4430 let Ok(request) = request else { break };
4431 if !self.process_scan_request(request, true).await {
4432 return;
4433 }
4434 }
4435
4436 // Send periodic progress updates to the worktree. Use an atomic counter
4437 // to ensure that only one of the workers sends a progress update after
4438 // the update interval elapses.
4439 _ = progress_update_timer => {
4440 match progress_update_count.compare_exchange(
4441 last_progress_update_count,
4442 last_progress_update_count + 1,
4443 SeqCst,
4444 SeqCst
4445 ) {
4446 Ok(_) => {
4447 last_progress_update_count += 1;
4448 self.send_status_update(true, SmallVec::new(), &[])
4449 .await;
4450 }
4451 Err(count) => {
4452 last_progress_update_count = count;
4453 }
4454 }
4455 progress_update_timer.set(self.progress_timer(enable_progress_updates).fuse());
4456 }
4457
4458 // Recursively load directories from the file system.
4459 job = scan_jobs_rx.recv().fuse() => {
4460 let Ok(job) = job else { break };
4461 if let Err(err) = self.scan_dir(&job).await
4462 && job.path.is_empty() {
4463 log::error!("error scanning directory {:?}: {}", job.abs_path, err);
4464 }
4465 }
4466 }
4467 }
4468 });
4469 }
4470 })
4471 .await;
4472 }
4473
4474 async fn send_status_update(
4475 &self,
4476 scanning: bool,
4477 barrier: SmallVec<[barrier::Sender; 1]>,
4478 event_roots: &[EventRoot],
4479 ) -> bool {
4480 let mut state = self.state.lock().await;
4481 if state.changed_paths.is_empty() && event_roots.is_empty() && scanning {
4482 return true;
4483 }
4484
4485 let merged_event_roots = merge_event_roots(&state.changed_paths, event_roots);
4486
4487 let new_snapshot = state.snapshot.clone();
4488 let old_snapshot = mem::replace(&mut state.prev_snapshot, new_snapshot.snapshot.clone());
4489 let changes = build_diff(
4490 self.phase,
4491 &old_snapshot,
4492 &new_snapshot,
4493 &merged_event_roots,
4494 );
4495 state.changed_paths.clear();
4496
4497 self.status_updates_tx
4498 .unbounded_send(ScanState::Updated {
4499 snapshot: new_snapshot,
4500 changes,
4501 scanning,
4502 barrier,
4503 })
4504 .is_ok()
4505 }
4506
4507 async fn scan_dir(&self, job: &ScanJob) -> Result<()> {
4508 let root_abs_path;
4509 let root_char_bag;
4510 {
4511 let snapshot = &self.state.lock().await.snapshot;
4512 if self.settings.is_path_excluded(&job.path) {
4513 log::error!("skipping excluded directory {:?}", job.path);
4514 return Ok(());
4515 }
4516 log::trace!("scanning directory {:?}", job.path);
4517 root_abs_path = snapshot.abs_path().clone();
4518 root_char_bag = snapshot.root_char_bag;
4519 }
4520
4521 let next_entry_id = self.next_entry_id.clone();
4522 let mut ignore_stack = job.ignore_stack.clone();
4523 let mut new_ignore = None;
4524 let mut root_canonical_path = None;
4525 let mut new_entries: Vec<Entry> = Vec::new();
4526 let mut new_jobs: Vec<Option<ScanJob>> = Vec::new();
4527 let mut child_paths = self
4528 .fs
4529 .read_dir(&job.abs_path)
4530 .await?
4531 .filter_map(|entry| async {
4532 match entry {
4533 Ok(entry) => Some(entry),
4534 Err(error) => {
4535 log::error!("error processing entry {:?}", error);
4536 None
4537 }
4538 }
4539 })
4540 .collect::<Vec<_>>()
4541 .await;
4542
4543 // Ensure that .git and .gitignore are processed first.
4544 swap_to_front(&mut child_paths, GITIGNORE);
4545 swap_to_front(&mut child_paths, DOT_GIT);
4546
4547 if let Some(path) = child_paths.first()
4548 && path.ends_with(DOT_GIT)
4549 {
4550 ignore_stack.repo_root = Some(job.abs_path.clone());
4551 }
4552
4553 for child_abs_path in child_paths {
4554 let child_abs_path: Arc<Path> = child_abs_path.into();
4555 let child_name = child_abs_path.file_name().unwrap();
4556 let Some(child_path) = child_name
4557 .to_str()
4558 .and_then(|name| Some(job.path.join(RelPath::unix(name).ok()?)))
4559 else {
4560 continue;
4561 };
4562
4563 if child_name == DOT_GIT {
4564 let mut state = self.state.lock().await;
4565 state
4566 .insert_git_repository(
4567 child_path.clone(),
4568 self.fs.as_ref(),
4569 self.watcher.as_ref(),
4570 )
4571 .await;
4572 } else if child_name == GITIGNORE {
4573 match build_gitignore(&child_abs_path, self.fs.as_ref()).await {
4574 Ok(ignore) => {
4575 let ignore = Arc::new(ignore);
4576 ignore_stack = ignore_stack
4577 .append(IgnoreKind::Gitignore(job.abs_path.clone()), ignore.clone());
4578 new_ignore = Some(ignore);
4579 }
4580 Err(error) => {
4581 log::error!(
4582 "error loading .gitignore file {:?} - {:?}",
4583 child_name,
4584 error
4585 );
4586 }
4587 }
4588 }
4589
4590 if self.settings.is_path_excluded(&child_path) {
4591 log::debug!("skipping excluded child entry {child_path:?}");
4592 self.state
4593 .lock()
4594 .await
4595 .remove_path(&child_path, self.watcher.as_ref());
4596 continue;
4597 }
4598
4599 let child_metadata = match self.fs.metadata(&child_abs_path).await {
4600 Ok(Some(metadata)) => metadata,
4601 Ok(None) => continue,
4602 Err(err) => {
4603 log::error!("error processing {:?}: {err:#}", child_abs_path.display());
4604 continue;
4605 }
4606 };
4607
4608 let mut child_entry = Entry::new(
4609 child_path.clone(),
4610 &child_metadata,
4611 ProjectEntryId::new(&next_entry_id),
4612 root_char_bag,
4613 None,
4614 );
4615
4616 if job.is_external {
4617 child_entry.is_external = true;
4618 } else if child_metadata.is_symlink {
4619 let canonical_path = match self.fs.canonicalize(&child_abs_path).await {
4620 Ok(path) => path,
4621 Err(err) => {
4622 log::error!("error reading target of symlink {child_abs_path:?}: {err:#}",);
4623 continue;
4624 }
4625 };
4626
4627 // lazily canonicalize the root path in order to determine if
4628 // symlinks point outside of the worktree.
4629 let root_canonical_path = match &root_canonical_path {
4630 Some(path) => path,
4631 None => match self.fs.canonicalize(&root_abs_path).await {
4632 Ok(path) => root_canonical_path.insert(path),
4633 Err(err) => {
4634 log::error!("error canonicalizing root {:?}: {:?}", root_abs_path, err);
4635 continue;
4636 }
4637 },
4638 };
4639
4640 if !canonical_path.starts_with(root_canonical_path) {
4641 child_entry.is_external = true;
4642 }
4643
4644 child_entry.canonical_path = Some(canonical_path.into());
4645 }
4646
4647 if child_entry.is_dir() {
4648 child_entry.is_ignored = ignore_stack.is_abs_path_ignored(&child_abs_path, true);
4649 child_entry.is_always_included =
4650 self.settings.is_path_always_included(&child_path, true);
4651
4652 // Avoid recursing until crash in the case of a recursive symlink
4653 if job.ancestor_inodes.contains(&child_entry.inode) {
4654 new_jobs.push(None);
4655 } else {
4656 let mut ancestor_inodes = job.ancestor_inodes.clone();
4657 ancestor_inodes.insert(child_entry.inode);
4658
4659 new_jobs.push(Some(ScanJob {
4660 abs_path: child_abs_path.clone(),
4661 path: child_path,
4662 is_external: child_entry.is_external,
4663 ignore_stack: if child_entry.is_ignored {
4664 IgnoreStack::all()
4665 } else {
4666 ignore_stack.clone()
4667 },
4668 ancestor_inodes,
4669 scan_queue: job.scan_queue.clone(),
4670 }));
4671 }
4672 } else {
4673 child_entry.is_ignored = ignore_stack.is_abs_path_ignored(&child_abs_path, false);
4674 child_entry.is_always_included =
4675 self.settings.is_path_always_included(&child_path, false);
4676 }
4677
4678 {
4679 let relative_path = job
4680 .path
4681 .join(RelPath::unix(child_name.to_str().unwrap()).unwrap());
4682 if self.is_path_private(&relative_path) {
4683 log::debug!("detected private file: {relative_path:?}");
4684 child_entry.is_private = true;
4685 }
4686 if self.settings.is_path_hidden(&relative_path) {
4687 log::debug!("detected hidden file: {relative_path:?}");
4688 child_entry.is_hidden = true;
4689 }
4690 }
4691
4692 new_entries.push(child_entry);
4693 }
4694
4695 let mut state = self.state.lock().await;
4696
4697 // Identify any subdirectories that should not be scanned.
4698 let mut job_ix = 0;
4699 for entry in &mut new_entries {
4700 state.reuse_entry_id(entry);
4701 if entry.is_dir() {
4702 if state.should_scan_directory(entry) {
4703 job_ix += 1;
4704 } else {
4705 log::debug!("defer scanning directory {:?}", entry.path);
4706 entry.kind = EntryKind::UnloadedDir;
4707 new_jobs.remove(job_ix);
4708 }
4709 }
4710 if entry.is_always_included {
4711 state
4712 .snapshot
4713 .always_included_entries
4714 .push(entry.path.clone());
4715 }
4716 }
4717
4718 state.populate_dir(job.path.clone(), new_entries, new_ignore);
4719 self.watcher.add(job.abs_path.as_ref()).log_err();
4720
4721 for new_job in new_jobs.into_iter().flatten() {
4722 job.scan_queue
4723 .try_send(new_job)
4724 .expect("channel is unbounded");
4725 }
4726
4727 Ok(())
4728 }
4729
4730 /// All list arguments should be sorted before calling this function
4731 async fn reload_entries_for_paths(
4732 &self,
4733 root_abs_path: &SanitizedPath,
4734 root_canonical_path: &SanitizedPath,
4735 relative_paths: &[Arc<RelPath>],
4736 abs_paths: Vec<PathBuf>,
4737 scan_queue_tx: Option<Sender<ScanJob>>,
4738 ) {
4739 // grab metadata for all requested paths
4740 let metadata = futures::future::join_all(
4741 abs_paths
4742 .iter()
4743 .map(|abs_path| async move {
4744 let metadata = self.fs.metadata(abs_path).await?;
4745 if let Some(metadata) = metadata {
4746 let canonical_path = self.fs.canonicalize(abs_path).await?;
4747
4748 // If we're on a case-insensitive filesystem (default on macOS), we want
4749 // to only ignore metadata for non-symlink files if their absolute-path matches
4750 // the canonical-path.
4751 // Because if not, this might be a case-only-renaming (`mv test.txt TEST.TXT`)
4752 // and we want to ignore the metadata for the old path (`test.txt`) so it's
4753 // treated as removed.
4754 if !self.fs_case_sensitive && !metadata.is_symlink {
4755 let canonical_file_name = canonical_path.file_name();
4756 let file_name = abs_path.file_name();
4757 if canonical_file_name != file_name {
4758 return Ok(None);
4759 }
4760 }
4761
4762 anyhow::Ok(Some((metadata, SanitizedPath::new_arc(&canonical_path))))
4763 } else {
4764 Ok(None)
4765 }
4766 })
4767 .collect::<Vec<_>>(),
4768 )
4769 .await;
4770
4771 let mut new_ancestor_repo = if relative_paths.iter().any(|path| path.is_empty()) {
4772 Some(discover_ancestor_git_repo(self.fs.clone(), &root_abs_path).await)
4773 } else {
4774 None
4775 };
4776
4777 let mut state = self.state.lock().await;
4778 let doing_recursive_update = scan_queue_tx.is_some();
4779
4780 // Remove any entries for paths that no longer exist or are being recursively
4781 // refreshed. Do this before adding any new entries, so that renames can be
4782 // detected regardless of the order of the paths.
4783 for (path, metadata) in relative_paths.iter().zip(metadata.iter()) {
4784 if matches!(metadata, Ok(None)) || doing_recursive_update {
4785 state.remove_path(path, self.watcher.as_ref());
4786 }
4787 }
4788
4789 for (path, metadata) in relative_paths.iter().zip(metadata.into_iter()) {
4790 let abs_path: Arc<Path> = root_abs_path.join(path.as_std_path()).into();
4791 match metadata {
4792 Ok(Some((metadata, canonical_path))) => {
4793 let ignore_stack = state
4794 .snapshot
4795 .ignore_stack_for_abs_path(&abs_path, metadata.is_dir, self.fs.as_ref())
4796 .await;
4797 let is_external = !canonical_path.starts_with(&root_canonical_path);
4798 let entry_id = state.entry_id_for(self.next_entry_id.as_ref(), path, &metadata);
4799 let mut fs_entry = Entry::new(
4800 path.clone(),
4801 &metadata,
4802 entry_id,
4803 state.snapshot.root_char_bag,
4804 if metadata.is_symlink {
4805 Some(canonical_path.as_path().to_path_buf().into())
4806 } else {
4807 None
4808 },
4809 );
4810
4811 let is_dir = fs_entry.is_dir();
4812 fs_entry.is_ignored = ignore_stack.is_abs_path_ignored(&abs_path, is_dir);
4813 fs_entry.is_external = is_external;
4814 fs_entry.is_private = self.is_path_private(path);
4815 fs_entry.is_always_included =
4816 self.settings.is_path_always_included(path, is_dir);
4817 fs_entry.is_hidden = self.settings.is_path_hidden(path);
4818
4819 if let (Some(scan_queue_tx), true) = (&scan_queue_tx, is_dir) {
4820 if state.should_scan_directory(&fs_entry)
4821 || (fs_entry.path.is_empty()
4822 && abs_path.file_name() == Some(OsStr::new(DOT_GIT)))
4823 {
4824 state
4825 .enqueue_scan_dir(
4826 abs_path,
4827 &fs_entry,
4828 scan_queue_tx,
4829 self.fs.as_ref(),
4830 )
4831 .await;
4832 } else {
4833 fs_entry.kind = EntryKind::UnloadedDir;
4834 }
4835 }
4836
4837 state
4838 .insert_entry(fs_entry.clone(), self.fs.as_ref(), self.watcher.as_ref())
4839 .await;
4840
4841 if path.is_empty()
4842 && let Some((ignores, exclude, repo)) = new_ancestor_repo.take()
4843 {
4844 log::trace!("updating ancestor git repository");
4845 state.snapshot.ignores_by_parent_abs_path.extend(ignores);
4846 if let Some((ancestor_dot_git, work_directory)) = repo {
4847 if let Some(exclude) = exclude {
4848 let work_directory_abs_path = self
4849 .state
4850 .lock()
4851 .await
4852 .snapshot
4853 .work_directory_abs_path(&work_directory);
4854
4855 state
4856 .snapshot
4857 .repo_exclude_by_work_dir_abs_path
4858 .insert(work_directory_abs_path.into(), (exclude, false));
4859 }
4860 state
4861 .insert_git_repository_for_path(
4862 work_directory,
4863 ancestor_dot_git.into(),
4864 self.fs.as_ref(),
4865 self.watcher.as_ref(),
4866 )
4867 .await
4868 .log_err();
4869 }
4870 }
4871 }
4872 Ok(None) => {
4873 self.remove_repo_path(path.clone(), &mut state.snapshot);
4874 }
4875 Err(err) => {
4876 log::error!("error reading file {abs_path:?} on event: {err:#}");
4877 }
4878 }
4879 }
4880
4881 util::extend_sorted(
4882 &mut state.changed_paths,
4883 relative_paths.iter().cloned(),
4884 usize::MAX,
4885 Ord::cmp,
4886 );
4887 }
4888
4889 fn remove_repo_path(&self, path: Arc<RelPath>, snapshot: &mut LocalSnapshot) -> Option<()> {
4890 if !path.components().any(|component| component == DOT_GIT)
4891 && let Some(local_repo) = snapshot.local_repo_for_work_directory_path(&path)
4892 {
4893 let id = local_repo.work_directory_id;
4894 log::debug!("remove repo path: {:?}", path);
4895 snapshot.git_repositories.remove(&id);
4896 return Some(());
4897 }
4898
4899 Some(())
4900 }
4901
4902 async fn update_ignore_statuses_for_paths(
4903 &self,
4904 scan_job_tx: Sender<ScanJob>,
4905 prev_snapshot: LocalSnapshot,
4906 ignores_to_update: Vec<(Arc<Path>, IgnoreStack)>,
4907 ) {
4908 let (ignore_queue_tx, ignore_queue_rx) = channel::unbounded();
4909 {
4910 for (parent_abs_path, ignore_stack) in ignores_to_update {
4911 ignore_queue_tx
4912 .send_blocking(UpdateIgnoreStatusJob {
4913 abs_path: parent_abs_path,
4914 ignore_stack,
4915 ignore_queue: ignore_queue_tx.clone(),
4916 scan_queue: scan_job_tx.clone(),
4917 })
4918 .unwrap();
4919 }
4920 }
4921 drop(ignore_queue_tx);
4922
4923 self.executor
4924 .scoped(|scope| {
4925 for _ in 0..self.executor.num_cpus() {
4926 scope.spawn(async {
4927 loop {
4928 select_biased! {
4929 // Process any path refresh requests before moving on to process
4930 // the queue of ignore statuses.
4931 request = self.next_scan_request().fuse() => {
4932 let Ok(request) = request else { break };
4933 if !self.process_scan_request(request, true).await {
4934 return;
4935 }
4936 }
4937
4938 // Recursively process directories whose ignores have changed.
4939 job = ignore_queue_rx.recv().fuse() => {
4940 let Ok(job) = job else { break };
4941 self.update_ignore_status(job, &prev_snapshot).await;
4942 }
4943 }
4944 }
4945 });
4946 }
4947 })
4948 .await;
4949 }
4950
4951 async fn ignores_needing_update(&self) -> Vec<Arc<Path>> {
4952 let mut ignores_to_update = Vec::new();
4953 let mut excludes_to_load: Vec<(Arc<Path>, PathBuf)> = Vec::new();
4954
4955 // First pass: collect updates and drop stale entries without awaiting.
4956 {
4957 let snapshot = &mut self.state.lock().await.snapshot;
4958 let abs_path = snapshot.abs_path.clone();
4959 let mut repo_exclude_keys_to_remove: Vec<Arc<Path>> = Vec::new();
4960
4961 for (work_dir_abs_path, (_, needs_update)) in
4962 snapshot.repo_exclude_by_work_dir_abs_path.iter_mut()
4963 {
4964 let repository = snapshot
4965 .git_repositories
4966 .iter()
4967 .find(|(_, repo)| &repo.work_directory_abs_path == work_dir_abs_path);
4968
4969 if *needs_update {
4970 *needs_update = false;
4971 ignores_to_update.push(work_dir_abs_path.clone());
4972
4973 if let Some((_, repository)) = repository {
4974 let exclude_abs_path = repository.common_dir_abs_path.join(REPO_EXCLUDE);
4975 excludes_to_load.push((work_dir_abs_path.clone(), exclude_abs_path));
4976 }
4977 }
4978
4979 if repository.is_none() {
4980 repo_exclude_keys_to_remove.push(work_dir_abs_path.clone());
4981 }
4982 }
4983
4984 for key in repo_exclude_keys_to_remove {
4985 snapshot.repo_exclude_by_work_dir_abs_path.remove(&key);
4986 }
4987
4988 snapshot
4989 .ignores_by_parent_abs_path
4990 .retain(|parent_abs_path, (_, needs_update)| {
4991 if let Ok(parent_path) = parent_abs_path.strip_prefix(abs_path.as_path())
4992 && let Some(parent_path) =
4993 RelPath::new(&parent_path, PathStyle::local()).log_err()
4994 {
4995 if *needs_update {
4996 *needs_update = false;
4997 if snapshot.snapshot.entry_for_path(&parent_path).is_some() {
4998 ignores_to_update.push(parent_abs_path.clone());
4999 }
5000 }
5001
5002 let ignore_path = parent_path.join(RelPath::unix(GITIGNORE).unwrap());
5003 if snapshot.snapshot.entry_for_path(&ignore_path).is_none() {
5004 return false;
5005 }
5006 }
5007 true
5008 });
5009 }
5010
5011 // Load gitignores asynchronously (outside the lock)
5012 let mut loaded_excludes: Vec<(Arc<Path>, Arc<Gitignore>)> = Vec::new();
5013 for (work_dir_abs_path, exclude_abs_path) in excludes_to_load {
5014 if let Ok(current_exclude) = build_gitignore(&exclude_abs_path, self.fs.as_ref()).await
5015 {
5016 loaded_excludes.push((work_dir_abs_path, Arc::new(current_exclude)));
5017 }
5018 }
5019
5020 // Second pass: apply updates.
5021 if !loaded_excludes.is_empty() {
5022 let snapshot = &mut self.state.lock().await.snapshot;
5023
5024 for (work_dir_abs_path, exclude) in loaded_excludes {
5025 if let Some((existing_exclude, _)) = snapshot
5026 .repo_exclude_by_work_dir_abs_path
5027 .get_mut(&work_dir_abs_path)
5028 {
5029 *existing_exclude = exclude;
5030 }
5031 }
5032 }
5033
5034 ignores_to_update
5035 }
5036
5037 async fn order_ignores(&self, mut ignores: Vec<Arc<Path>>) -> Vec<(Arc<Path>, IgnoreStack)> {
5038 let fs = self.fs.clone();
5039 let snapshot = self.state.lock().await.snapshot.clone();
5040 ignores.sort_unstable();
5041 let mut ignores_to_update = ignores.into_iter().peekable();
5042
5043 let mut result = vec![];
5044 while let Some(parent_abs_path) = ignores_to_update.next() {
5045 while ignores_to_update
5046 .peek()
5047 .map_or(false, |p| p.starts_with(&parent_abs_path))
5048 {
5049 ignores_to_update.next().unwrap();
5050 }
5051 let ignore_stack = snapshot
5052 .ignore_stack_for_abs_path(&parent_abs_path, true, fs.as_ref())
5053 .await;
5054 result.push((parent_abs_path, ignore_stack));
5055 }
5056
5057 result
5058 }
5059
5060 async fn update_ignore_status(&self, job: UpdateIgnoreStatusJob, snapshot: &LocalSnapshot) {
5061 log::trace!("update ignore status {:?}", job.abs_path);
5062
5063 let mut ignore_stack = job.ignore_stack;
5064 if let Some((ignore, _)) = snapshot.ignores_by_parent_abs_path.get(&job.abs_path) {
5065 ignore_stack =
5066 ignore_stack.append(IgnoreKind::Gitignore(job.abs_path.clone()), ignore.clone());
5067 }
5068
5069 let mut entries_by_id_edits = Vec::new();
5070 let mut entries_by_path_edits = Vec::new();
5071 let Some(path) = job
5072 .abs_path
5073 .strip_prefix(snapshot.abs_path.as_path())
5074 .map_err(|_| {
5075 anyhow::anyhow!(
5076 "Failed to strip prefix '{}' from path '{}'",
5077 snapshot.abs_path.as_path().display(),
5078 job.abs_path.display()
5079 )
5080 })
5081 .log_err()
5082 else {
5083 return;
5084 };
5085
5086 let Some(path) = RelPath::new(&path, PathStyle::local()).log_err() else {
5087 return;
5088 };
5089
5090 if let Ok(Some(metadata)) = self.fs.metadata(&job.abs_path.join(DOT_GIT)).await
5091 && metadata.is_dir
5092 {
5093 ignore_stack.repo_root = Some(job.abs_path.clone());
5094 }
5095
5096 for mut entry in snapshot.child_entries(&path).cloned() {
5097 let was_ignored = entry.is_ignored;
5098 let abs_path: Arc<Path> = snapshot.absolutize(&entry.path).into();
5099 entry.is_ignored = ignore_stack.is_abs_path_ignored(&abs_path, entry.is_dir());
5100
5101 if entry.is_dir() {
5102 let child_ignore_stack = if entry.is_ignored {
5103 IgnoreStack::all()
5104 } else {
5105 ignore_stack.clone()
5106 };
5107
5108 // Scan any directories that were previously ignored and weren't previously scanned.
5109 if was_ignored && !entry.is_ignored && entry.kind.is_unloaded() {
5110 let state = self.state.lock().await;
5111 if state.should_scan_directory(&entry) {
5112 state
5113 .enqueue_scan_dir(
5114 abs_path.clone(),
5115 &entry,
5116 &job.scan_queue,
5117 self.fs.as_ref(),
5118 )
5119 .await;
5120 }
5121 }
5122
5123 job.ignore_queue
5124 .send(UpdateIgnoreStatusJob {
5125 abs_path: abs_path.clone(),
5126 ignore_stack: child_ignore_stack,
5127 ignore_queue: job.ignore_queue.clone(),
5128 scan_queue: job.scan_queue.clone(),
5129 })
5130 .await
5131 .unwrap();
5132 }
5133
5134 if entry.is_ignored != was_ignored {
5135 let mut path_entry = snapshot.entries_by_id.get(&entry.id, ()).unwrap().clone();
5136 path_entry.scan_id = snapshot.scan_id;
5137 path_entry.is_ignored = entry.is_ignored;
5138 entries_by_id_edits.push(Edit::Insert(path_entry));
5139 entries_by_path_edits.push(Edit::Insert(entry));
5140 }
5141 }
5142
5143 let state = &mut self.state.lock().await;
5144 for edit in &entries_by_path_edits {
5145 if let Edit::Insert(entry) = edit
5146 && let Err(ix) = state.changed_paths.binary_search(&entry.path)
5147 {
5148 state.changed_paths.insert(ix, entry.path.clone());
5149 }
5150 }
5151
5152 state
5153 .snapshot
5154 .entries_by_path
5155 .edit(entries_by_path_edits, ());
5156 state.snapshot.entries_by_id.edit(entries_by_id_edits, ());
5157 }
5158
5159 async fn update_git_repositories(&self, dot_git_paths: Vec<PathBuf>) -> Vec<Arc<Path>> {
5160 log::trace!("reloading repositories: {dot_git_paths:?}");
5161 let mut state = self.state.lock().await;
5162 let scan_id = state.snapshot.scan_id;
5163 let mut affected_repo_roots = Vec::new();
5164 for dot_git_dir in dot_git_paths {
5165 let existing_repository_entry =
5166 state
5167 .snapshot
5168 .git_repositories
5169 .iter()
5170 .find_map(|(_, repo)| {
5171 let dot_git_dir = SanitizedPath::new(&dot_git_dir);
5172 if SanitizedPath::new(repo.common_dir_abs_path.as_ref()) == dot_git_dir
5173 || SanitizedPath::new(repo.repository_dir_abs_path.as_ref())
5174 == dot_git_dir
5175 {
5176 Some(repo.clone())
5177 } else {
5178 None
5179 }
5180 });
5181
5182 match existing_repository_entry {
5183 None => {
5184 let Ok(relative) = dot_git_dir.strip_prefix(state.snapshot.abs_path()) else {
5185 debug_panic!(
5186 "update_git_repositories called with .git directory outside the worktree root"
5187 );
5188 return Vec::new();
5189 };
5190 affected_repo_roots.push(dot_git_dir.parent().unwrap().into());
5191 state
5192 .insert_git_repository(
5193 RelPath::new(relative, PathStyle::local())
5194 .unwrap()
5195 .into_arc(),
5196 self.fs.as_ref(),
5197 self.watcher.as_ref(),
5198 )
5199 .await;
5200 }
5201 Some(local_repository) => {
5202 state.snapshot.git_repositories.update(
5203 &local_repository.work_directory_id,
5204 |entry| {
5205 entry.git_dir_scan_id = scan_id;
5206 },
5207 );
5208 }
5209 };
5210 }
5211
5212 // Remove any git repositories whose .git entry no longer exists.
5213 let snapshot = &mut state.snapshot;
5214 let mut ids_to_preserve = HashSet::default();
5215 for (&work_directory_id, entry) in snapshot.git_repositories.iter() {
5216 let exists_in_snapshot =
5217 snapshot
5218 .entry_for_id(work_directory_id)
5219 .is_some_and(|entry| {
5220 snapshot
5221 .entry_for_path(&entry.path.join(RelPath::unix(DOT_GIT).unwrap()))
5222 .is_some()
5223 });
5224
5225 if exists_in_snapshot
5226 || matches!(
5227 self.fs.metadata(&entry.common_dir_abs_path).await,
5228 Ok(Some(_))
5229 )
5230 {
5231 ids_to_preserve.insert(work_directory_id);
5232 }
5233 }
5234
5235 snapshot
5236 .git_repositories
5237 .retain(|work_directory_id, entry| {
5238 let preserve = ids_to_preserve.contains(work_directory_id);
5239 if !preserve {
5240 affected_repo_roots.push(entry.dot_git_abs_path.parent().unwrap().into());
5241 snapshot
5242 .repo_exclude_by_work_dir_abs_path
5243 .remove(&entry.work_directory_abs_path);
5244 }
5245 preserve
5246 });
5247
5248 affected_repo_roots
5249 }
5250
5251 async fn progress_timer(&self, running: bool) {
5252 if !running {
5253 return futures::future::pending().await;
5254 }
5255
5256 #[cfg(feature = "test-support")]
5257 if self.fs.is_fake() {
5258 return self.executor.simulate_random_delay().await;
5259 }
5260
5261 self.executor.timer(FS_WATCH_LATENCY).await
5262 }
5263
5264 fn is_path_private(&self, path: &RelPath) -> bool {
5265 !self.share_private_files && self.settings.is_path_private(path)
5266 }
5267
5268 async fn next_scan_request(&self) -> Result<ScanRequest> {
5269 let mut request = self.scan_requests_rx.recv().await?;
5270 while let Ok(next_request) = self.scan_requests_rx.try_recv() {
5271 request.relative_paths.extend(next_request.relative_paths);
5272 request.done.extend(next_request.done);
5273 }
5274 Ok(request)
5275 }
5276}
5277
5278async fn discover_ancestor_git_repo(
5279 fs: Arc<dyn Fs>,
5280 root_abs_path: &SanitizedPath,
5281) -> (
5282 HashMap<Arc<Path>, (Arc<Gitignore>, bool)>,
5283 Option<Arc<Gitignore>>,
5284 Option<(PathBuf, WorkDirectory)>,
5285) {
5286 let mut exclude = None;
5287 let mut ignores = HashMap::default();
5288 for (index, ancestor) in root_abs_path.as_path().ancestors().enumerate() {
5289 if index != 0 {
5290 if ancestor == paths::home_dir() {
5291 // Unless $HOME is itself the worktree root, don't consider it as a
5292 // containing git repository---expensive and likely unwanted.
5293 break;
5294 } else if let Ok(ignore) = build_gitignore(&ancestor.join(GITIGNORE), fs.as_ref()).await
5295 {
5296 ignores.insert(ancestor.into(), (ignore.into(), false));
5297 }
5298 }
5299
5300 let ancestor_dot_git = ancestor.join(DOT_GIT);
5301 log::trace!("considering ancestor: {ancestor_dot_git:?}");
5302 // Check whether the directory or file called `.git` exists (in the
5303 // case of worktrees it's a file.)
5304 if fs
5305 .metadata(&ancestor_dot_git)
5306 .await
5307 .is_ok_and(|metadata| metadata.is_some())
5308 {
5309 if index != 0 {
5310 // We canonicalize, since the FS events use the canonicalized path.
5311 if let Some(ancestor_dot_git) = fs.canonicalize(&ancestor_dot_git).await.log_err() {
5312 let location_in_repo = root_abs_path
5313 .as_path()
5314 .strip_prefix(ancestor)
5315 .unwrap()
5316 .into();
5317 log::info!("inserting parent git repo for this worktree: {location_in_repo:?}");
5318 // We associate the external git repo with our root folder and
5319 // also mark where in the git repo the root folder is located.
5320 return (
5321 ignores,
5322 exclude,
5323 Some((
5324 ancestor_dot_git,
5325 WorkDirectory::AboveProject {
5326 absolute_path: ancestor.into(),
5327 location_in_repo,
5328 },
5329 )),
5330 );
5331 };
5332 }
5333
5334 let repo_exclude_abs_path = ancestor_dot_git.join(REPO_EXCLUDE);
5335 if let Ok(repo_exclude) = build_gitignore(&repo_exclude_abs_path, fs.as_ref()).await {
5336 exclude = Some(Arc::new(repo_exclude));
5337 }
5338
5339 // Reached root of git repository.
5340 break;
5341 }
5342 }
5343
5344 (ignores, exclude, None)
5345}
5346
5347fn merge_event_roots(changed_paths: &[Arc<RelPath>], event_roots: &[EventRoot]) -> Vec<EventRoot> {
5348 let mut merged_event_roots = Vec::with_capacity(changed_paths.len() + event_roots.len());
5349 let mut changed_paths = changed_paths.iter().peekable();
5350 let mut event_roots = event_roots.iter().peekable();
5351 while let (Some(path), Some(event_root)) = (changed_paths.peek(), event_roots.peek()) {
5352 match path.cmp(&&event_root.path) {
5353 Ordering::Less => {
5354 merged_event_roots.push(EventRoot {
5355 path: (*changed_paths.next().expect("peeked changed path")).clone(),
5356 was_rescanned: false,
5357 });
5358 }
5359 Ordering::Equal => {
5360 merged_event_roots.push((*event_roots.next().expect("peeked event root")).clone());
5361 changed_paths.next();
5362 }
5363 Ordering::Greater => {
5364 merged_event_roots.push((*event_roots.next().expect("peeked event root")).clone());
5365 }
5366 }
5367 }
5368 merged_event_roots.extend(changed_paths.map(|path| EventRoot {
5369 path: path.clone(),
5370 was_rescanned: false,
5371 }));
5372 merged_event_roots.extend(event_roots.cloned());
5373 merged_event_roots
5374}
5375
5376fn build_diff(
5377 phase: BackgroundScannerPhase,
5378 old_snapshot: &Snapshot,
5379 new_snapshot: &Snapshot,
5380 event_roots: &[EventRoot],
5381) -> UpdatedEntriesSet {
5382 use BackgroundScannerPhase::*;
5383 use PathChange::{Added, AddedOrUpdated, Loaded, Removed, Updated};
5384
5385 // Identify which paths have changed. Use the known set of changed
5386 // parent paths to optimize the search.
5387 let mut changes = Vec::new();
5388
5389 let mut old_paths = old_snapshot.entries_by_path.cursor::<PathKey>(());
5390 let mut new_paths = new_snapshot.entries_by_path.cursor::<PathKey>(());
5391 let mut last_newly_loaded_dir_path = None;
5392 old_paths.next();
5393 new_paths.next();
5394 for event_root in event_roots {
5395 let path = PathKey(event_root.path.clone());
5396 if old_paths.item().is_some_and(|e| e.path < path.0) {
5397 old_paths.seek_forward(&path, Bias::Left);
5398 }
5399 if new_paths.item().is_some_and(|e| e.path < path.0) {
5400 new_paths.seek_forward(&path, Bias::Left);
5401 }
5402 loop {
5403 match (old_paths.item(), new_paths.item()) {
5404 (Some(old_entry), Some(new_entry)) => {
5405 if old_entry.path > path.0
5406 && new_entry.path > path.0
5407 && !old_entry.path.starts_with(&path.0)
5408 && !new_entry.path.starts_with(&path.0)
5409 {
5410 break;
5411 }
5412
5413 match Ord::cmp(&old_entry.path, &new_entry.path) {
5414 Ordering::Less => {
5415 changes.push((old_entry.path.clone(), old_entry.id, Removed));
5416 old_paths.next();
5417 }
5418 Ordering::Equal => {
5419 if phase == EventsReceivedDuringInitialScan {
5420 if old_entry.id != new_entry.id {
5421 changes.push((old_entry.path.clone(), old_entry.id, Removed));
5422 }
5423 // If the worktree was not fully initialized when this event was generated,
5424 // we can't know whether this entry was added during the scan or whether
5425 // it was merely updated.
5426 changes.push((
5427 new_entry.path.clone(),
5428 new_entry.id,
5429 AddedOrUpdated,
5430 ));
5431 } else if old_entry.id != new_entry.id {
5432 changes.push((old_entry.path.clone(), old_entry.id, Removed));
5433 changes.push((new_entry.path.clone(), new_entry.id, Added));
5434 } else if old_entry != new_entry {
5435 if old_entry.kind.is_unloaded() {
5436 last_newly_loaded_dir_path = Some(&new_entry.path);
5437 changes.push((new_entry.path.clone(), new_entry.id, Loaded));
5438 } else {
5439 changes.push((new_entry.path.clone(), new_entry.id, Updated));
5440 }
5441 } else if event_root.was_rescanned {
5442 changes.push((new_entry.path.clone(), new_entry.id, Updated));
5443 }
5444 old_paths.next();
5445 new_paths.next();
5446 }
5447 Ordering::Greater => {
5448 let is_newly_loaded = phase == InitialScan
5449 || last_newly_loaded_dir_path
5450 .as_ref()
5451 .is_some_and(|dir| new_entry.path.starts_with(dir));
5452 changes.push((
5453 new_entry.path.clone(),
5454 new_entry.id,
5455 if is_newly_loaded { Loaded } else { Added },
5456 ));
5457 new_paths.next();
5458 }
5459 }
5460 }
5461 (Some(old_entry), None) => {
5462 changes.push((old_entry.path.clone(), old_entry.id, Removed));
5463 old_paths.next();
5464 }
5465 (None, Some(new_entry)) => {
5466 let is_newly_loaded = phase == InitialScan
5467 || last_newly_loaded_dir_path
5468 .as_ref()
5469 .is_some_and(|dir| new_entry.path.starts_with(dir));
5470 changes.push((
5471 new_entry.path.clone(),
5472 new_entry.id,
5473 if is_newly_loaded { Loaded } else { Added },
5474 ));
5475 new_paths.next();
5476 }
5477 (None, None) => break,
5478 }
5479 }
5480 }
5481
5482 changes.into()
5483}
5484
5485fn swap_to_front(child_paths: &mut Vec<PathBuf>, file: &str) {
5486 let position = child_paths
5487 .iter()
5488 .position(|path| path.file_name().unwrap() == file);
5489 if let Some(position) = position {
5490 let temp = child_paths.remove(position);
5491 child_paths.insert(0, temp);
5492 }
5493}
5494
5495fn char_bag_for_path(root_char_bag: CharBag, path: &RelPath) -> CharBag {
5496 let mut result = root_char_bag;
5497 result.extend(path.as_unix_str().chars().map(|c| c.to_ascii_lowercase()));
5498 result
5499}
5500
5501#[derive(Debug)]
5502struct ScanJob {
5503 abs_path: Arc<Path>,
5504 path: Arc<RelPath>,
5505 ignore_stack: IgnoreStack,
5506 scan_queue: Sender<ScanJob>,
5507 ancestor_inodes: TreeSet<u64>,
5508 is_external: bool,
5509}
5510
5511struct UpdateIgnoreStatusJob {
5512 abs_path: Arc<Path>,
5513 ignore_stack: IgnoreStack,
5514 ignore_queue: Sender<UpdateIgnoreStatusJob>,
5515 scan_queue: Sender<ScanJob>,
5516}
5517
5518pub trait WorktreeModelHandle {
5519 #[cfg(feature = "test-support")]
5520 fn flush_fs_events<'a>(
5521 &self,
5522 cx: &'a mut gpui::TestAppContext,
5523 ) -> futures::future::LocalBoxFuture<'a, ()>;
5524
5525 #[cfg(feature = "test-support")]
5526 fn flush_fs_events_in_root_git_repository<'a>(
5527 &self,
5528 cx: &'a mut gpui::TestAppContext,
5529 ) -> futures::future::LocalBoxFuture<'a, ()>;
5530}
5531
5532impl WorktreeModelHandle for Entity<Worktree> {
5533 // When the worktree's FS event stream sometimes delivers "redundant" events for FS changes that
5534 // occurred before the worktree was constructed. These events can cause the worktree to perform
5535 // extra directory scans, and emit extra scan-state notifications.
5536 //
5537 // This function mutates the worktree's directory and waits for those mutations to be picked up,
5538 // to ensure that all redundant FS events have already been processed.
5539 #[cfg(feature = "test-support")]
5540 fn flush_fs_events<'a>(
5541 &self,
5542 cx: &'a mut gpui::TestAppContext,
5543 ) -> futures::future::LocalBoxFuture<'a, ()> {
5544 let file_name = "fs-event-sentinel";
5545
5546 let tree = self.clone();
5547 let (fs, root_path) = self.read_with(cx, |tree, _| {
5548 let tree = tree.as_local().unwrap();
5549 (tree.fs.clone(), tree.abs_path.clone())
5550 });
5551
5552 async move {
5553 // Subscribe to events BEFORE creating the file to avoid race condition
5554 // where events fire before subscription is set up
5555 let mut events = cx.events(&tree);
5556
5557 fs.create_file(&root_path.join(file_name), Default::default())
5558 .await
5559 .unwrap();
5560
5561 // Check if condition is already met before waiting for events
5562 let file_exists = || {
5563 tree.read_with(cx, |tree, _| {
5564 tree.entry_for_path(RelPath::unix(file_name).unwrap())
5565 .is_some()
5566 })
5567 };
5568
5569 // Use select to avoid blocking indefinitely if events are delayed
5570 while !file_exists() {
5571 futures::select_biased! {
5572 _ = events.next() => {}
5573 _ = futures::FutureExt::fuse(cx.background_executor.timer(std::time::Duration::from_millis(10))) => {}
5574 }
5575 }
5576
5577 fs.remove_file(&root_path.join(file_name), Default::default())
5578 .await
5579 .unwrap();
5580
5581 // Check if condition is already met before waiting for events
5582 let file_gone = || {
5583 tree.read_with(cx, |tree, _| {
5584 tree.entry_for_path(RelPath::unix(file_name).unwrap())
5585 .is_none()
5586 })
5587 };
5588
5589 // Use select to avoid blocking indefinitely if events are delayed
5590 while !file_gone() {
5591 futures::select_biased! {
5592 _ = events.next() => {}
5593 _ = futures::FutureExt::fuse(cx.background_executor.timer(std::time::Duration::from_millis(10))) => {}
5594 }
5595 }
5596
5597 cx.update(|cx| tree.read(cx).as_local().unwrap().scan_complete())
5598 .await;
5599 }
5600 .boxed_local()
5601 }
5602
5603 // This function is similar to flush_fs_events, except that it waits for events to be flushed in
5604 // the .git folder of the root repository.
5605 // The reason for its existence is that a repository's .git folder might live *outside* of the
5606 // worktree and thus its FS events might go through a different path.
5607 // In order to flush those, we need to create artificial events in the .git folder and wait
5608 // for the repository to be reloaded.
5609 #[cfg(feature = "test-support")]
5610 fn flush_fs_events_in_root_git_repository<'a>(
5611 &self,
5612 cx: &'a mut gpui::TestAppContext,
5613 ) -> futures::future::LocalBoxFuture<'a, ()> {
5614 let file_name = "fs-event-sentinel";
5615
5616 let tree = self.clone();
5617 let (fs, root_path, mut git_dir_scan_id) = self.read_with(cx, |tree, _| {
5618 let tree = tree.as_local().unwrap();
5619 let local_repo_entry = tree
5620 .git_repositories
5621 .values()
5622 .min_by_key(|local_repo_entry| local_repo_entry.work_directory.clone())
5623 .unwrap();
5624 (
5625 tree.fs.clone(),
5626 local_repo_entry.common_dir_abs_path.clone(),
5627 local_repo_entry.git_dir_scan_id,
5628 )
5629 });
5630
5631 let scan_id_increased = |tree: &mut Worktree, git_dir_scan_id: &mut usize| {
5632 let tree = tree.as_local().unwrap();
5633 // let repository = tree.repositories.first().unwrap();
5634 let local_repo_entry = tree
5635 .git_repositories
5636 .values()
5637 .min_by_key(|local_repo_entry| local_repo_entry.work_directory.clone())
5638 .unwrap();
5639
5640 if local_repo_entry.git_dir_scan_id > *git_dir_scan_id {
5641 *git_dir_scan_id = local_repo_entry.git_dir_scan_id;
5642 true
5643 } else {
5644 false
5645 }
5646 };
5647
5648 async move {
5649 // Subscribe to events BEFORE creating the file to avoid race condition
5650 // where events fire before subscription is set up
5651 let mut events = cx.events(&tree);
5652
5653 fs.create_file(&root_path.join(file_name), Default::default())
5654 .await
5655 .unwrap();
5656
5657 // Use select to avoid blocking indefinitely if events are delayed
5658 while !tree.update(cx, |tree, _| scan_id_increased(tree, &mut git_dir_scan_id)) {
5659 futures::select_biased! {
5660 _ = events.next() => {}
5661 _ = futures::FutureExt::fuse(cx.background_executor.timer(std::time::Duration::from_millis(10))) => {}
5662 }
5663 }
5664
5665 fs.remove_file(&root_path.join(file_name), Default::default())
5666 .await
5667 .unwrap();
5668
5669 // Use select to avoid blocking indefinitely if events are delayed
5670 while !tree.update(cx, |tree, _| scan_id_increased(tree, &mut git_dir_scan_id)) {
5671 futures::select_biased! {
5672 _ = events.next() => {}
5673 _ = futures::FutureExt::fuse(cx.background_executor.timer(std::time::Duration::from_millis(10))) => {}
5674 }
5675 }
5676
5677 cx.update(|cx| tree.read(cx).as_local().unwrap().scan_complete())
5678 .await;
5679 }
5680 .boxed_local()
5681 }
5682}
5683
5684#[derive(Clone, Debug)]
5685struct TraversalProgress<'a> {
5686 max_path: &'a RelPath,
5687 count: usize,
5688 non_ignored_count: usize,
5689 file_count: usize,
5690 non_ignored_file_count: usize,
5691}
5692
5693impl TraversalProgress<'_> {
5694 fn count(&self, include_files: bool, include_dirs: bool, include_ignored: bool) -> usize {
5695 match (include_files, include_dirs, include_ignored) {
5696 (true, true, true) => self.count,
5697 (true, true, false) => self.non_ignored_count,
5698 (true, false, true) => self.file_count,
5699 (true, false, false) => self.non_ignored_file_count,
5700 (false, true, true) => self.count - self.file_count,
5701 (false, true, false) => self.non_ignored_count - self.non_ignored_file_count,
5702 (false, false, _) => 0,
5703 }
5704 }
5705}
5706
5707impl<'a> sum_tree::Dimension<'a, EntrySummary> for TraversalProgress<'a> {
5708 fn zero(_cx: ()) -> Self {
5709 Default::default()
5710 }
5711
5712 fn add_summary(&mut self, summary: &'a EntrySummary, _: ()) {
5713 self.max_path = summary.max_path.as_ref();
5714 self.count += summary.count;
5715 self.non_ignored_count += summary.non_ignored_count;
5716 self.file_count += summary.file_count;
5717 self.non_ignored_file_count += summary.non_ignored_file_count;
5718 }
5719}
5720
5721impl Default for TraversalProgress<'_> {
5722 fn default() -> Self {
5723 Self {
5724 max_path: RelPath::empty(),
5725 count: 0,
5726 non_ignored_count: 0,
5727 file_count: 0,
5728 non_ignored_file_count: 0,
5729 }
5730 }
5731}
5732
5733#[derive(Debug)]
5734pub struct Traversal<'a> {
5735 snapshot: &'a Snapshot,
5736 cursor: sum_tree::Cursor<'a, 'static, Entry, TraversalProgress<'a>>,
5737 include_ignored: bool,
5738 include_files: bool,
5739 include_dirs: bool,
5740}
5741
5742impl<'a> Traversal<'a> {
5743 fn new(
5744 snapshot: &'a Snapshot,
5745 include_files: bool,
5746 include_dirs: bool,
5747 include_ignored: bool,
5748 start_path: &RelPath,
5749 ) -> Self {
5750 let mut cursor = snapshot.entries_by_path.cursor(());
5751 cursor.seek(&TraversalTarget::path(start_path), Bias::Left);
5752 let mut traversal = Self {
5753 snapshot,
5754 cursor,
5755 include_files,
5756 include_dirs,
5757 include_ignored,
5758 };
5759 if traversal.end_offset() == traversal.start_offset() {
5760 traversal.next();
5761 }
5762 traversal
5763 }
5764
5765 pub fn advance(&mut self) -> bool {
5766 self.advance_by(1)
5767 }
5768
5769 pub fn advance_by(&mut self, count: usize) -> bool {
5770 self.cursor.seek_forward(
5771 &TraversalTarget::Count {
5772 count: self.end_offset() + count,
5773 include_dirs: self.include_dirs,
5774 include_files: self.include_files,
5775 include_ignored: self.include_ignored,
5776 },
5777 Bias::Left,
5778 )
5779 }
5780
5781 pub fn advance_to_sibling(&mut self) -> bool {
5782 while let Some(entry) = self.cursor.item() {
5783 self.cursor
5784 .seek_forward(&TraversalTarget::successor(&entry.path), Bias::Left);
5785 if let Some(entry) = self.cursor.item()
5786 && (self.include_files || !entry.is_file())
5787 && (self.include_dirs || !entry.is_dir())
5788 && (self.include_ignored || !entry.is_ignored || entry.is_always_included)
5789 {
5790 return true;
5791 }
5792 }
5793 false
5794 }
5795
5796 pub fn back_to_parent(&mut self) -> bool {
5797 let Some(parent_path) = self.cursor.item().and_then(|entry| entry.path.parent()) else {
5798 return false;
5799 };
5800 self.cursor
5801 .seek(&TraversalTarget::path(parent_path), Bias::Left)
5802 }
5803
5804 pub fn entry(&self) -> Option<&'a Entry> {
5805 self.cursor.item()
5806 }
5807
5808 pub fn snapshot(&self) -> &'a Snapshot {
5809 self.snapshot
5810 }
5811
5812 pub fn start_offset(&self) -> usize {
5813 self.cursor
5814 .start()
5815 .count(self.include_files, self.include_dirs, self.include_ignored)
5816 }
5817
5818 pub fn end_offset(&self) -> usize {
5819 self.cursor
5820 .end()
5821 .count(self.include_files, self.include_dirs, self.include_ignored)
5822 }
5823}
5824
5825impl<'a> Iterator for Traversal<'a> {
5826 type Item = &'a Entry;
5827
5828 fn next(&mut self) -> Option<Self::Item> {
5829 if let Some(item) = self.entry() {
5830 self.advance();
5831 Some(item)
5832 } else {
5833 None
5834 }
5835 }
5836}
5837
5838#[derive(Debug, Clone, Copy)]
5839pub enum PathTarget<'a> {
5840 Path(&'a RelPath),
5841 Successor(&'a RelPath),
5842}
5843
5844impl PathTarget<'_> {
5845 fn cmp_path(&self, other: &RelPath) -> Ordering {
5846 match self {
5847 PathTarget::Path(path) => path.cmp(&other),
5848 PathTarget::Successor(path) => {
5849 if other.starts_with(path) {
5850 Ordering::Greater
5851 } else {
5852 Ordering::Equal
5853 }
5854 }
5855 }
5856 }
5857}
5858
5859impl<'a, S: Summary> SeekTarget<'a, PathSummary<S>, PathProgress<'a>> for PathTarget<'_> {
5860 fn cmp(&self, cursor_location: &PathProgress<'a>, _: S::Context<'_>) -> Ordering {
5861 self.cmp_path(cursor_location.max_path)
5862 }
5863}
5864
5865impl<'a, S: Summary> SeekTarget<'a, PathSummary<S>, TraversalProgress<'a>> for PathTarget<'_> {
5866 fn cmp(&self, cursor_location: &TraversalProgress<'a>, _: S::Context<'_>) -> Ordering {
5867 self.cmp_path(cursor_location.max_path)
5868 }
5869}
5870
5871#[derive(Debug)]
5872enum TraversalTarget<'a> {
5873 Path(PathTarget<'a>),
5874 Count {
5875 count: usize,
5876 include_files: bool,
5877 include_ignored: bool,
5878 include_dirs: bool,
5879 },
5880}
5881
5882impl<'a> TraversalTarget<'a> {
5883 fn path(path: &'a RelPath) -> Self {
5884 Self::Path(PathTarget::Path(path))
5885 }
5886
5887 fn successor(path: &'a RelPath) -> Self {
5888 Self::Path(PathTarget::Successor(path))
5889 }
5890
5891 fn cmp_progress(&self, progress: &TraversalProgress) -> Ordering {
5892 match self {
5893 TraversalTarget::Path(path) => path.cmp_path(progress.max_path),
5894 TraversalTarget::Count {
5895 count,
5896 include_files,
5897 include_dirs,
5898 include_ignored,
5899 } => Ord::cmp(
5900 count,
5901 &progress.count(*include_files, *include_dirs, *include_ignored),
5902 ),
5903 }
5904 }
5905}
5906
5907impl<'a> SeekTarget<'a, EntrySummary, TraversalProgress<'a>> for TraversalTarget<'_> {
5908 fn cmp(&self, cursor_location: &TraversalProgress<'a>, _: ()) -> Ordering {
5909 self.cmp_progress(cursor_location)
5910 }
5911}
5912
5913impl<'a> SeekTarget<'a, PathSummary<sum_tree::NoSummary>, TraversalProgress<'a>>
5914 for TraversalTarget<'_>
5915{
5916 fn cmp(&self, cursor_location: &TraversalProgress<'a>, _: ()) -> Ordering {
5917 self.cmp_progress(cursor_location)
5918 }
5919}
5920
5921pub struct ChildEntriesOptions {
5922 pub include_files: bool,
5923 pub include_dirs: bool,
5924 pub include_ignored: bool,
5925}
5926
5927pub struct ChildEntriesIter<'a> {
5928 parent_path: &'a RelPath,
5929 traversal: Traversal<'a>,
5930}
5931
5932impl<'a> Iterator for ChildEntriesIter<'a> {
5933 type Item = &'a Entry;
5934
5935 fn next(&mut self) -> Option<Self::Item> {
5936 if let Some(item) = self.traversal.entry()
5937 && item.path.starts_with(self.parent_path)
5938 {
5939 self.traversal.advance_to_sibling();
5940 return Some(item);
5941 }
5942 None
5943 }
5944}
5945
5946impl<'a> From<&'a Entry> for proto::Entry {
5947 fn from(entry: &'a Entry) -> Self {
5948 Self {
5949 id: entry.id.to_proto(),
5950 is_dir: entry.is_dir(),
5951 path: entry.path.as_ref().to_proto(),
5952 inode: entry.inode,
5953 mtime: entry.mtime.map(|time| time.into()),
5954 is_ignored: entry.is_ignored,
5955 is_hidden: entry.is_hidden,
5956 is_external: entry.is_external,
5957 is_fifo: entry.is_fifo,
5958 size: Some(entry.size),
5959 canonical_path: entry
5960 .canonical_path
5961 .as_ref()
5962 .map(|path| path.to_string_lossy().into_owned()),
5963 }
5964 }
5965}
5966
5967impl TryFrom<(&CharBag, &PathMatcher, proto::Entry)> for Entry {
5968 type Error = anyhow::Error;
5969
5970 fn try_from(
5971 (root_char_bag, always_included, entry): (&CharBag, &PathMatcher, proto::Entry),
5972 ) -> Result<Self> {
5973 let kind = if entry.is_dir {
5974 EntryKind::Dir
5975 } else {
5976 EntryKind::File
5977 };
5978
5979 let path =
5980 RelPath::from_proto(&entry.path).context("invalid relative path in proto message")?;
5981 let char_bag = char_bag_for_path(*root_char_bag, &path);
5982 let is_always_included = always_included.is_match(&path);
5983 Ok(Entry {
5984 id: ProjectEntryId::from_proto(entry.id),
5985 kind,
5986 path,
5987 inode: entry.inode,
5988 mtime: entry.mtime.map(|time| time.into()),
5989 size: entry.size.unwrap_or(0),
5990 canonical_path: entry
5991 .canonical_path
5992 .map(|path_string| Arc::from(PathBuf::from(path_string))),
5993 is_ignored: entry.is_ignored,
5994 is_hidden: entry.is_hidden,
5995 is_always_included,
5996 is_external: entry.is_external,
5997 is_private: false,
5998 char_bag,
5999 is_fifo: entry.is_fifo,
6000 })
6001 }
6002}
6003
6004#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
6005pub struct ProjectEntryId(usize);
6006
6007impl ProjectEntryId {
6008 pub const MAX: Self = Self(usize::MAX);
6009 pub const MIN: Self = Self(usize::MIN);
6010
6011 pub fn new(counter: &AtomicUsize) -> Self {
6012 Self(counter.fetch_add(1, SeqCst))
6013 }
6014
6015 pub fn from_proto(id: u64) -> Self {
6016 Self(id as usize)
6017 }
6018
6019 pub fn to_proto(self) -> u64 {
6020 self.0 as u64
6021 }
6022
6023 pub fn from_usize(id: usize) -> Self {
6024 ProjectEntryId(id)
6025 }
6026
6027 pub fn to_usize(self) -> usize {
6028 self.0
6029 }
6030}
6031
6032#[cfg(feature = "test-support")]
6033impl CreatedEntry {
6034 pub fn into_included(self) -> Option<Entry> {
6035 match self {
6036 CreatedEntry::Included(entry) => Some(entry),
6037 CreatedEntry::Excluded { .. } => None,
6038 }
6039 }
6040}
6041
6042fn parse_gitfile(content: &str) -> anyhow::Result<&Path> {
6043 let path = content
6044 .strip_prefix("gitdir:")
6045 .with_context(|| format!("parsing gitfile content {content:?}"))?;
6046 Ok(Path::new(path.trim()))
6047}
6048
6049async fn discover_git_paths(dot_git_abs_path: &Arc<Path>, fs: &dyn Fs) -> (Arc<Path>, Arc<Path>) {
6050 let mut repository_dir_abs_path = dot_git_abs_path.clone();
6051 let mut common_dir_abs_path = dot_git_abs_path.clone();
6052
6053 if let Some(path) = fs
6054 .load(dot_git_abs_path)
6055 .await
6056 .ok()
6057 .as_ref()
6058 .and_then(|contents| parse_gitfile(contents).log_err())
6059 {
6060 let path = dot_git_abs_path
6061 .parent()
6062 .unwrap_or(Path::new(""))
6063 .join(path);
6064 if let Some(path) = fs.canonicalize(&path).await.log_err() {
6065 repository_dir_abs_path = Path::new(&path).into();
6066 common_dir_abs_path = repository_dir_abs_path.clone();
6067
6068 if let Some(commondir_contents) = fs.load(&path.join("commondir")).await.ok()
6069 && let Some(commondir_path) = fs
6070 .canonicalize(&path.join(commondir_contents.trim()))
6071 .await
6072 .log_err()
6073 {
6074 common_dir_abs_path = commondir_path.as_path().into();
6075 }
6076 }
6077 };
6078 (repository_dir_abs_path, common_dir_abs_path)
6079}
6080
6081struct NullWatcher;
6082
6083impl fs::Watcher for NullWatcher {
6084 fn add(&self, _path: &Path) -> Result<()> {
6085 Ok(())
6086 }
6087
6088 fn remove(&self, _path: &Path) -> Result<()> {
6089 Ok(())
6090 }
6091}
6092
6093const FILE_ANALYSIS_BYTES: usize = 1024;
6094
6095async fn decode_file_text(
6096 fs: &dyn Fs,
6097 abs_path: &Path,
6098) -> Result<(String, &'static Encoding, bool)> {
6099 let mut file = fs
6100 .open_sync(&abs_path)
6101 .await
6102 .with_context(|| format!("opening file {abs_path:?}"))?;
6103
6104 // First, read the beginning of the file to determine its kind and encoding.
6105 // We do not want to load an entire large blob into memory only to discard it.
6106 let mut file_first_bytes = Vec::with_capacity(FILE_ANALYSIS_BYTES);
6107 let mut buf = [0u8; FILE_ANALYSIS_BYTES];
6108 let mut reached_eof = false;
6109 loop {
6110 if file_first_bytes.len() >= FILE_ANALYSIS_BYTES {
6111 break;
6112 }
6113 let n = file
6114 .read(&mut buf)
6115 .with_context(|| format!("reading bytes of the file {abs_path:?}"))?;
6116 if n == 0 {
6117 reached_eof = true;
6118 break;
6119 }
6120 file_first_bytes.extend_from_slice(&buf[..n]);
6121 }
6122 let (bom_encoding, byte_content) = decode_byte_header(&file_first_bytes);
6123 anyhow::ensure!(
6124 byte_content != ByteContent::Binary,
6125 "Binary files are not supported"
6126 );
6127
6128 // If the file is eligible for opening, read the rest of the file.
6129 let mut content = file_first_bytes;
6130 if !reached_eof {
6131 let mut buf = [0u8; 8 * 1024];
6132 loop {
6133 let n = file
6134 .read(&mut buf)
6135 .with_context(|| format!("reading remaining bytes of the file {abs_path:?}"))?;
6136 if n == 0 {
6137 break;
6138 }
6139 content.extend_from_slice(&buf[..n]);
6140 }
6141 }
6142 decode_byte_full(content, bom_encoding, byte_content)
6143}
6144
6145fn decode_byte_header(prefix: &[u8]) -> (Option<&'static Encoding>, ByteContent) {
6146 if let Some((encoding, _bom_len)) = Encoding::for_bom(prefix) {
6147 return (Some(encoding), ByteContent::Unknown);
6148 }
6149 (None, analyze_byte_content(prefix))
6150}
6151
6152fn decode_byte_full(
6153 bytes: Vec<u8>,
6154 bom_encoding: Option<&'static Encoding>,
6155 byte_content: ByteContent,
6156) -> Result<(String, &'static Encoding, bool)> {
6157 if let Some(encoding) = bom_encoding {
6158 let (cow, _) = encoding.decode_with_bom_removal(&bytes);
6159 return Ok((cow.into_owned(), encoding, true));
6160 }
6161
6162 match byte_content {
6163 ByteContent::Utf16Le => {
6164 let encoding = encoding_rs::UTF_16LE;
6165 let (cow, _, _) = encoding.decode(&bytes);
6166 return Ok((cow.into_owned(), encoding, false));
6167 }
6168 ByteContent::Utf16Be => {
6169 let encoding = encoding_rs::UTF_16BE;
6170 let (cow, _, _) = encoding.decode(&bytes);
6171 return Ok((cow.into_owned(), encoding, false));
6172 }
6173 ByteContent::Binary => {
6174 anyhow::bail!("Binary files are not supported");
6175 }
6176 ByteContent::Unknown => {}
6177 }
6178
6179 fn detect_encoding(bytes: Vec<u8>) -> (String, &'static Encoding) {
6180 let mut detector = EncodingDetector::new();
6181 detector.feed(&bytes, true);
6182
6183 let encoding = detector.guess(None, true); // Use None for TLD hint to ensure neutral detection logic.
6184
6185 let (cow, _, _) = encoding.decode(&bytes);
6186 (cow.into_owned(), encoding)
6187 }
6188
6189 match String::from_utf8(bytes) {
6190 Ok(text) => {
6191 // ISO-2022-JP (and other ISO-2022 variants) consists entirely of 7-bit ASCII bytes,
6192 // so it is valid UTF-8. However, it contains escape sequences starting with '\x1b'.
6193 // If we find an escape character, we double-check the encoding to prevent
6194 // displaying raw escape sequences instead of the correct characters.
6195 if text.contains('\x1b') {
6196 let (s, enc) = detect_encoding(text.into_bytes());
6197 Ok((s, enc, false))
6198 } else {
6199 Ok((text, encoding_rs::UTF_8, false))
6200 }
6201 }
6202 Err(e) => {
6203 let (s, enc) = detect_encoding(e.into_bytes());
6204 Ok((s, enc, false))
6205 }
6206 }
6207}
6208
6209#[derive(Debug, PartialEq)]
6210enum ByteContent {
6211 Utf16Le,
6212 Utf16Be,
6213 Binary,
6214 Unknown,
6215}
6216
6217// Heuristic check using null byte distribution plus a generic text-likeness
6218// heuristic. This prefers UTF-16 when many bytes are NUL and otherwise
6219// distinguishes between text-like and binary-like content.
6220fn analyze_byte_content(bytes: &[u8]) -> ByteContent {
6221 if bytes.len() < 2 {
6222 return ByteContent::Unknown;
6223 }
6224
6225 if is_known_binary_header(bytes) {
6226 return ByteContent::Binary;
6227 }
6228
6229 let limit = bytes.len().min(FILE_ANALYSIS_BYTES);
6230 let mut even_null_count = 0usize;
6231 let mut odd_null_count = 0usize;
6232 let mut non_text_like_count = 0usize;
6233
6234 for (i, &byte) in bytes[..limit].iter().enumerate() {
6235 if byte == 0 {
6236 if i % 2 == 0 {
6237 even_null_count += 1;
6238 } else {
6239 odd_null_count += 1;
6240 }
6241 non_text_like_count += 1;
6242 continue;
6243 }
6244
6245 let is_text_like = match byte {
6246 b'\t' | b'\n' | b'\r' | 0x0C => true,
6247 0x20..=0x7E => true,
6248 // Treat bytes that are likely part of UTF-8 or single-byte encodings as text-like.
6249 0x80..=0xBF | 0xC2..=0xF4 => true,
6250 _ => false,
6251 };
6252
6253 if !is_text_like {
6254 non_text_like_count += 1;
6255 }
6256 }
6257
6258 let total_null_count = even_null_count + odd_null_count;
6259
6260 // If there are no NUL bytes at all, this is overwhelmingly likely to be text.
6261 if total_null_count == 0 {
6262 return ByteContent::Unknown;
6263 }
6264
6265 let has_significant_nulls = total_null_count >= limit / 16;
6266 let nulls_skew_to_even = even_null_count > odd_null_count * 4;
6267 let nulls_skew_to_odd = odd_null_count > even_null_count * 4;
6268
6269 if has_significant_nulls {
6270 let sample = &bytes[..limit];
6271
6272 // UTF-16BE ASCII: [0x00, char] — nulls at even positions (high byte first)
6273 // UTF-16LE ASCII: [char, 0x00] — nulls at odd positions (low byte first)
6274
6275 if nulls_skew_to_even && is_plausible_utf16_text(sample, false) {
6276 return ByteContent::Utf16Be;
6277 }
6278
6279 if nulls_skew_to_odd && is_plausible_utf16_text(sample, true) {
6280 return ByteContent::Utf16Le;
6281 }
6282
6283 return ByteContent::Binary;
6284 }
6285
6286 if non_text_like_count * 100 < limit * 8 {
6287 ByteContent::Unknown
6288 } else {
6289 ByteContent::Binary
6290 }
6291}
6292
6293fn is_known_binary_header(bytes: &[u8]) -> bool {
6294 bytes.starts_with(b"%PDF-") // PDF
6295 || bytes.starts_with(b"PK\x03\x04") // ZIP local header
6296 || bytes.starts_with(b"PK\x05\x06") // ZIP end of central directory
6297 || bytes.starts_with(b"PK\x07\x08") // ZIP spanning/splitting
6298 || bytes.starts_with(b"\x89PNG\r\n\x1a\n") // PNG
6299 || bytes.starts_with(b"\xFF\xD8\xFF") // JPEG
6300 || bytes.starts_with(b"GIF87a") // GIF87a
6301 || bytes.starts_with(b"GIF89a") // GIF89a
6302 || bytes.starts_with(b"IWAD") // Doom IWAD archive
6303 || bytes.starts_with(b"PWAD") // Doom PWAD archive
6304 || bytes.starts_with(b"RIFF") // WAV, AVI, WebP
6305 || bytes.starts_with(b"OggS") // OGG (Vorbis, Opus, FLAC)
6306 || bytes.starts_with(b"fLaC") // FLAC
6307 || bytes.starts_with(b"ID3") // MP3 with ID3v2 tag
6308 || bytes.starts_with(b"\xFF\xFB") // MP3 frame sync (MPEG1 Layer3)
6309 || bytes.starts_with(b"\xFF\xFA") // MP3 frame sync (MPEG1 Layer3)
6310 || bytes.starts_with(b"\xFF\xF3") // MP3 frame sync (MPEG2 Layer3)
6311 || bytes.starts_with(b"\xFF\xF2") // MP3 frame sync (MPEG2 Layer3)
6312}
6313
6314// Null byte skew alone is not enough to identify UTF-16 -- binary formats with
6315// small 16-bit values (like PCM audio) produce the same pattern. Decode the
6316// bytes as UTF-16 and reject if too many code units land in control character
6317// ranges or form unpaired surrogates, which real text almost never contains.
6318fn is_plausible_utf16_text(bytes: &[u8], little_endian: bool) -> bool {
6319 let mut suspicious_count = 0usize;
6320 let mut total = 0usize;
6321
6322 let mut i = 0;
6323 while let Some(code_unit) = read_u16(bytes, i, little_endian) {
6324 total += 1;
6325
6326 match code_unit {
6327 0x0009 | 0x000A | 0x000C | 0x000D => {}
6328 // C0/C1 control characters and non-characters
6329 0x0000..=0x001F | 0x007F..=0x009F | 0xFFFE | 0xFFFF => suspicious_count += 1,
6330 0xD800..=0xDBFF => {
6331 let next_offset = i + 2;
6332 let has_low_surrogate = read_u16(bytes, next_offset, little_endian)
6333 .is_some_and(|next| (0xDC00..=0xDFFF).contains(&next));
6334 if has_low_surrogate {
6335 total += 1;
6336 i += 2;
6337 } else {
6338 suspicious_count += 1;
6339 }
6340 }
6341 // Lone low surrogate without a preceding high surrogate
6342 0xDC00..=0xDFFF => suspicious_count += 1,
6343 _ => {}
6344 }
6345
6346 i += 2;
6347 }
6348
6349 if total == 0 {
6350 return false;
6351 }
6352
6353 // Real UTF-16 text has near-zero control characters; binary data with
6354 // small 16-bit values typically exceeds 5%. 2% provides a safe margin.
6355 suspicious_count * 100 < total * 2
6356}
6357
6358fn read_u16(bytes: &[u8], offset: usize, little_endian: bool) -> Option<u16> {
6359 let pair = [*bytes.get(offset)?, *bytes.get(offset + 1)?];
6360 if little_endian {
6361 return Some(u16::from_le_bytes(pair));
6362 }
6363 Some(u16::from_be_bytes(pair))
6364}
6365
6366#[cfg(test)]
6367mod tests {
6368 use super::*;
6369
6370 /// reproduction of issue #50785
6371 fn build_pcm16_wav_bytes() -> Vec<u8> {
6372 let header: Vec<u8> = vec![
6373 /* RIFF header */
6374 0x52, 0x49, 0x46, 0x46, // "RIFF"
6375 0xc6, 0xcf, 0x00, 0x00, // file size: 8
6376 0x57, 0x41, 0x56, 0x45, // "WAVE"
6377 /* fmt chunk */
6378 0x66, 0x6d, 0x74, 0x20, // "fmt "
6379 0x10, 0x00, 0x00, 0x00, // chunk size: 16
6380 0x01, 0x00, // format: PCM (1)
6381 0x01, 0x00, // channels: 1 (mono)
6382 0x80, 0x3e, 0x00, 0x00, // sample rate: 16000
6383 0x00, 0x7d, 0x00, 0x00, // byte rate: 32000
6384 0x02, 0x00, // block align: 2
6385 0x10, 0x00, // bits per sample: 16
6386 /* LIST chunk */
6387 0x4c, 0x49, 0x53, 0x54, // "LIST"
6388 0x1a, 0x00, 0x00, 0x00, // chunk size: 26
6389 0x49, 0x4e, 0x46, 0x4f, // "INFO"
6390 0x49, 0x53, 0x46, 0x54, // "ISFT"
6391 0x0d, 0x00, 0x00, 0x00, // sub-chunk size: 13
6392 0x4c, 0x61, 0x76, 0x66, 0x36, 0x32, 0x2e, 0x33, // "Lavf62.3"
6393 0x2e, 0x31, 0x30, 0x30, 0x00, // ".100\0"
6394 /* padding byte for word alignment */
6395 0x00, // data chunk header
6396 0x64, 0x61, 0x74, 0x61, // "data"
6397 0x80, 0xcf, 0x00, 0x00, // chunk size
6398 ];
6399
6400 let mut bytes = header;
6401
6402 // fill remaining space up to `FILE_ANALYSIS_BYTES` with synthetic PCM
6403 let audio_bytes_needed = FILE_ANALYSIS_BYTES - bytes.len();
6404 for i in 0..(audio_bytes_needed / 2) {
6405 let sample = (i & 0xFF) as u8;
6406 bytes.push(sample); // low byte: varies
6407 bytes.push(0x00); // high byte: zero for small values
6408 }
6409
6410 bytes
6411 }
6412
6413 #[test]
6414 fn test_pcm16_wav_detected_as_binary() {
6415 let wav_bytes = build_pcm16_wav_bytes();
6416 assert_eq!(wav_bytes.len(), FILE_ANALYSIS_BYTES);
6417
6418 let result = analyze_byte_content(&wav_bytes);
6419 assert_eq!(
6420 result,
6421 ByteContent::Binary,
6422 "PCM 16-bit WAV should be detected as Binary via RIFF header"
6423 );
6424 }
6425
6426 #[test]
6427 fn test_le16_binary_not_misdetected_as_utf16le() {
6428 let mut bytes = b"FAKE".to_vec();
6429 while bytes.len() < FILE_ANALYSIS_BYTES {
6430 let sample = (bytes.len() & 0xFF) as u8;
6431 bytes.push(sample);
6432 bytes.push(0x00);
6433 }
6434 bytes.truncate(FILE_ANALYSIS_BYTES);
6435
6436 let result = analyze_byte_content(&bytes);
6437 assert_eq!(
6438 result,
6439 ByteContent::Binary,
6440 "LE 16-bit binary with control characters should be detected as Binary"
6441 );
6442 }
6443
6444 #[test]
6445 fn test_be16_binary_not_misdetected_as_utf16be() {
6446 let mut bytes = b"FAKE".to_vec();
6447 while bytes.len() < FILE_ANALYSIS_BYTES {
6448 bytes.push(0x00);
6449 let sample = (bytes.len() & 0xFF) as u8;
6450 bytes.push(sample);
6451 }
6452 bytes.truncate(FILE_ANALYSIS_BYTES);
6453
6454 let result = analyze_byte_content(&bytes);
6455 assert_eq!(
6456 result,
6457 ByteContent::Binary,
6458 "BE 16-bit binary with control characters should be detected as Binary"
6459 );
6460 }
6461
6462 #[test]
6463 fn test_utf16le_text_detected_as_utf16le() {
6464 let text = "Hello, world! This is a UTF-16 test string. ";
6465 let mut bytes = Vec::new();
6466 while bytes.len() < FILE_ANALYSIS_BYTES {
6467 bytes.extend(text.encode_utf16().flat_map(|u| u.to_le_bytes()));
6468 }
6469 bytes.truncate(FILE_ANALYSIS_BYTES);
6470
6471 assert_eq!(analyze_byte_content(&bytes), ByteContent::Utf16Le);
6472 }
6473
6474 #[test]
6475 fn test_utf16be_text_detected_as_utf16be() {
6476 let text = "Hello, world! This is a UTF-16 test string. ";
6477 let mut bytes = Vec::new();
6478 while bytes.len() < FILE_ANALYSIS_BYTES {
6479 bytes.extend(text.encode_utf16().flat_map(|u| u.to_be_bytes()));
6480 }
6481 bytes.truncate(FILE_ANALYSIS_BYTES);
6482
6483 assert_eq!(analyze_byte_content(&bytes), ByteContent::Utf16Be);
6484 }
6485
6486 #[test]
6487 fn test_known_binary_headers() {
6488 let cases: &[(&[u8], &str)] = &[
6489 (b"RIFF\x00\x00\x00\x00WAVE", "WAV"),
6490 (b"RIFF\x00\x00\x00\x00AVI ", "AVI"),
6491 (b"OggS\x00\x02", "OGG"),
6492 (b"fLaC\x00\x00", "FLAC"),
6493 (b"ID3\x03\x00", "MP3 ID3v2"),
6494 (b"\xFF\xFB\x90\x00", "MP3 MPEG1 Layer3"),
6495 (b"\xFF\xF3\x90\x00", "MP3 MPEG2 Layer3"),
6496 ];
6497
6498 for (header, label) in cases {
6499 let mut bytes = header.to_vec();
6500 bytes.resize(FILE_ANALYSIS_BYTES, 0x41); // pad with 'A'
6501 assert_eq!(
6502 analyze_byte_content(&bytes),
6503 ByteContent::Binary,
6504 "{label} should be detected as Binary"
6505 );
6506 }
6507 }
6508}