worktree.rs

   1mod ignore;
   2mod worktree_settings;
   3
   4use ::ignore::gitignore::{Gitignore, GitignoreBuilder};
   5use anyhow::{Context as _, Result, anyhow};
   6use chardetng::EncodingDetector;
   7use clock::ReplicaId;
   8use collections::{HashMap, HashSet, VecDeque};
   9use encoding_rs::Encoding;
  10use fs::{
  11    Fs, MTime, PathEvent, PathEventKind, RemoveOptions, Watcher, copy_recursive, read_dir_items,
  12};
  13use futures::{
  14    FutureExt as _, Stream, StreamExt,
  15    channel::{
  16        mpsc::{self, UnboundedSender},
  17        oneshot,
  18    },
  19    select_biased, stream,
  20    task::Poll,
  21};
  22use fuzzy::CharBag;
  23use git::{
  24    COMMIT_MESSAGE, DOT_GIT, FSMONITOR_DAEMON, GITIGNORE, INDEX_LOCK, LFS_DIR, REPO_EXCLUDE,
  25    status::GitSummary,
  26};
  27use gpui::{
  28    App, AppContext as _, AsyncApp, BackgroundExecutor, Context, Entity, EventEmitter, Priority,
  29    Task,
  30};
  31use ignore::IgnoreStack;
  32use language::DiskState;
  33
  34use parking_lot::Mutex;
  35use paths::{local_settings_folder_name, local_vscode_folder_name};
  36use postage::{
  37    barrier,
  38    prelude::{Sink as _, Stream as _},
  39    watch,
  40};
  41use rpc::{
  42    AnyProtoClient,
  43    proto::{self, split_worktree_update},
  44};
  45pub use settings::WorktreeId;
  46use settings::{Settings, SettingsLocation, SettingsStore};
  47use smallvec::{SmallVec, smallvec};
  48use smol::channel::{self, Sender};
  49use std::{
  50    any::Any,
  51    borrow::Borrow as _,
  52    cmp::Ordering,
  53    collections::hash_map,
  54    convert::TryFrom,
  55    ffi::OsStr,
  56    fmt,
  57    future::Future,
  58    mem::{self},
  59    ops::{Deref, DerefMut, Range},
  60    path::{Path, PathBuf},
  61    pin::Pin,
  62    sync::{
  63        Arc,
  64        atomic::{AtomicUsize, Ordering::SeqCst},
  65    },
  66    time::{Duration, Instant},
  67};
  68use sum_tree::{Bias, Dimensions, Edit, KeyedItem, SeekTarget, SumTree, Summary, TreeMap, TreeSet};
  69use text::{LineEnding, Rope};
  70use util::{
  71    ResultExt, debug_panic, maybe,
  72    paths::{PathMatcher, PathStyle, SanitizedPath, home_dir},
  73    rel_path::RelPath,
  74};
  75pub use worktree_settings::WorktreeSettings;
  76
  77use crate::ignore::IgnoreKind;
  78
  79pub const FS_WATCH_LATENCY: Duration = Duration::from_millis(100);
  80
  81/// A set of local or remote files that are being opened as part of a project.
  82/// Responsible for tracking related FS (for local)/collab (for remote) events and corresponding updates.
  83/// Stores git repositories data and the diagnostics for the file(s).
  84///
  85/// Has an absolute path, and may be set to be visible in Zed UI or not.
  86/// May correspond to a directory or a single file.
  87/// Possible examples:
  88/// * a drag and dropped file — may be added as an invisible, "ephemeral" entry to the current worktree
  89/// * a directory opened in Zed — may be added as a visible entry to the current worktree
  90///
  91/// Uses [`Entry`] to track the state of each file/directory, can look up absolute paths for entries.
  92pub enum Worktree {
  93    Local(LocalWorktree),
  94    Remote(RemoteWorktree),
  95}
  96
  97/// An entry, created in the worktree.
  98#[derive(Debug)]
  99pub enum CreatedEntry {
 100    /// Got created and indexed by the worktree, receiving a corresponding entry.
 101    Included(Entry),
 102    /// Got created, but not indexed due to falling under exclusion filters.
 103    Excluded { abs_path: PathBuf },
 104}
 105
 106#[derive(Debug)]
 107pub struct LoadedFile {
 108    pub file: Arc<File>,
 109    pub text: String,
 110    pub encoding: &'static Encoding,
 111    pub has_bom: bool,
 112}
 113
 114pub struct LoadedBinaryFile {
 115    pub file: Arc<File>,
 116    pub content: Vec<u8>,
 117}
 118
 119impl fmt::Debug for LoadedBinaryFile {
 120    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 121        f.debug_struct("LoadedBinaryFile")
 122            .field("file", &self.file)
 123            .field("content_bytes", &self.content.len())
 124            .finish()
 125    }
 126}
 127
 128pub struct LocalWorktree {
 129    snapshot: LocalSnapshot,
 130    scan_requests_tx: channel::Sender<ScanRequest>,
 131    path_prefixes_to_scan_tx: channel::Sender<PathPrefixScanRequest>,
 132    is_scanning: (watch::Sender<bool>, watch::Receiver<bool>),
 133    snapshot_subscriptions: VecDeque<(usize, oneshot::Sender<()>)>,
 134    _background_scanner_tasks: Vec<Task<()>>,
 135    update_observer: Option<UpdateObservationState>,
 136    fs: Arc<dyn Fs>,
 137    fs_case_sensitive: bool,
 138    visible: bool,
 139    next_entry_id: Arc<AtomicUsize>,
 140    settings: WorktreeSettings,
 141    share_private_files: bool,
 142    scanning_enabled: bool,
 143}
 144
 145pub struct PathPrefixScanRequest {
 146    path: Arc<RelPath>,
 147    done: SmallVec<[barrier::Sender; 1]>,
 148}
 149
 150struct ScanRequest {
 151    relative_paths: Vec<Arc<RelPath>>,
 152    done: SmallVec<[barrier::Sender; 1]>,
 153}
 154
 155pub struct RemoteWorktree {
 156    snapshot: Snapshot,
 157    background_snapshot: Arc<Mutex<(Snapshot, Vec<proto::UpdateWorktree>)>>,
 158    project_id: u64,
 159    client: AnyProtoClient,
 160    file_scan_inclusions: PathMatcher,
 161    updates_tx: Option<UnboundedSender<proto::UpdateWorktree>>,
 162    update_observer: Option<mpsc::UnboundedSender<proto::UpdateWorktree>>,
 163    snapshot_subscriptions: VecDeque<(usize, oneshot::Sender<()>)>,
 164    replica_id: ReplicaId,
 165    visible: bool,
 166    disconnected: bool,
 167}
 168
 169#[derive(Clone)]
 170pub struct Snapshot {
 171    id: WorktreeId,
 172    /// The absolute path of the worktree root.
 173    abs_path: Arc<SanitizedPath>,
 174    path_style: PathStyle,
 175    root_name: Arc<RelPath>,
 176    root_char_bag: CharBag,
 177    entries_by_path: SumTree<Entry>,
 178    entries_by_id: SumTree<PathEntry>,
 179    always_included_entries: Vec<Arc<RelPath>>,
 180
 181    /// A number that increases every time the worktree begins scanning
 182    /// a set of paths from the filesystem. This scanning could be caused
 183    /// by some operation performed on the worktree, such as reading or
 184    /// writing a file, or by an event reported by the filesystem.
 185    scan_id: usize,
 186
 187    /// The latest scan id that has completed, and whose preceding scans
 188    /// have all completed. The current `scan_id` could be more than one
 189    /// greater than the `completed_scan_id` if operations are performed
 190    /// on the worktree while it is processing a file-system event.
 191    completed_scan_id: usize,
 192}
 193
 194/// This path corresponds to the 'content path' of a repository in relation
 195/// to Zed's project root.
 196/// In the majority of the cases, this is the folder that contains the .git folder.
 197/// But if a sub-folder of a git repository is opened, this corresponds to the
 198/// project root and the .git folder is located in a parent directory.
 199#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
 200pub enum WorkDirectory {
 201    InProject {
 202        relative_path: Arc<RelPath>,
 203    },
 204    AboveProject {
 205        absolute_path: Arc<Path>,
 206        location_in_repo: Arc<Path>,
 207    },
 208}
 209
 210impl WorkDirectory {
 211    fn path_key(&self) -> PathKey {
 212        match self {
 213            WorkDirectory::InProject { relative_path } => PathKey(relative_path.clone()),
 214            WorkDirectory::AboveProject { .. } => PathKey(RelPath::empty().into()),
 215        }
 216    }
 217
 218    /// Returns true if the given path is a child of the work directory.
 219    ///
 220    /// Note that the path may not be a member of this repository, if there
 221    /// is a repository in a directory between these two paths
 222    /// external .git folder in a parent folder of the project root.
 223    #[track_caller]
 224    pub fn directory_contains(&self, path: &RelPath) -> bool {
 225        match self {
 226            WorkDirectory::InProject { relative_path } => path.starts_with(relative_path),
 227            WorkDirectory::AboveProject { .. } => true,
 228        }
 229    }
 230}
 231
 232impl Default for WorkDirectory {
 233    fn default() -> Self {
 234        Self::InProject {
 235            relative_path: Arc::from(RelPath::empty()),
 236        }
 237    }
 238}
 239
 240#[derive(Clone)]
 241pub struct LocalSnapshot {
 242    snapshot: Snapshot,
 243    global_gitignore: Option<Arc<Gitignore>>,
 244    /// Exclude files for all git repositories in the worktree, indexed by their absolute path.
 245    /// The boolean indicates whether the gitignore needs to be updated.
 246    repo_exclude_by_work_dir_abs_path: HashMap<Arc<Path>, (Arc<Gitignore>, bool)>,
 247    /// All of the gitignore files in the worktree, indexed by their absolute path.
 248    /// The boolean indicates whether the gitignore needs to be updated.
 249    ignores_by_parent_abs_path: HashMap<Arc<Path>, (Arc<Gitignore>, bool)>,
 250    /// All of the git repositories in the worktree, indexed by the project entry
 251    /// id of their parent directory.
 252    git_repositories: TreeMap<ProjectEntryId, LocalRepositoryEntry>,
 253    /// The file handle of the worktree root
 254    /// (so we can find it after it's been moved)
 255    root_file_handle: Option<Arc<dyn fs::FileHandle>>,
 256}
 257
 258struct BackgroundScannerState {
 259    snapshot: LocalSnapshot,
 260    scanned_dirs: HashSet<ProjectEntryId>,
 261    path_prefixes_to_scan: HashSet<Arc<RelPath>>,
 262    paths_to_scan: HashSet<Arc<RelPath>>,
 263    /// The ids of all of the entries that were removed from the snapshot
 264    /// as part of the current update. These entry ids may be re-used
 265    /// if the same inode is discovered at a new path, or if the given
 266    /// path is re-created after being deleted.
 267    removed_entries: HashMap<u64, Entry>,
 268    changed_paths: Vec<Arc<RelPath>>,
 269    prev_snapshot: Snapshot,
 270    scanning_enabled: bool,
 271}
 272
 273#[derive(Clone, Debug, Eq, PartialEq)]
 274struct EventRoot {
 275    path: Arc<RelPath>,
 276    was_rescanned: bool,
 277}
 278
 279#[derive(Debug, Clone)]
 280struct LocalRepositoryEntry {
 281    work_directory_id: ProjectEntryId,
 282    work_directory: WorkDirectory,
 283    work_directory_abs_path: Arc<Path>,
 284    git_dir_scan_id: usize,
 285    /// Absolute path to the original .git entry that caused us to create this repository.
 286    ///
 287    /// This is normally a directory, but may be a "gitfile" that points to a directory elsewhere
 288    /// (whose path we then store in `repository_dir_abs_path`).
 289    dot_git_abs_path: Arc<Path>,
 290    /// Absolute path to the "commondir" for this repository.
 291    ///
 292    /// This is always a directory. For a normal repository, this is the same as
 293    /// `dot_git_abs_path`. For a linked worktree, this is the main repo's `.git`
 294    /// directory (resolved from the worktree's `commondir` file). For a submodule,
 295    /// this equals `repository_dir_abs_path` (submodules don't have a `commondir`
 296    /// file).
 297    common_dir_abs_path: Arc<Path>,
 298    /// Absolute path to the directory holding the repository's state.
 299    ///
 300    /// For a normal repository, this is a directory and coincides with `dot_git_abs_path` and
 301    /// `common_dir_abs_path`. For a submodule or worktree, this is some subdirectory of the
 302    /// commondir like `/project/.git/modules/foo`.
 303    repository_dir_abs_path: Arc<Path>,
 304}
 305
 306impl sum_tree::Item for LocalRepositoryEntry {
 307    type Summary = PathSummary<sum_tree::NoSummary>;
 308
 309    fn summary(&self, _: <Self::Summary as Summary>::Context<'_>) -> Self::Summary {
 310        PathSummary {
 311            max_path: self.work_directory.path_key().0,
 312            item_summary: sum_tree::NoSummary,
 313        }
 314    }
 315}
 316
 317impl KeyedItem for LocalRepositoryEntry {
 318    type Key = PathKey;
 319
 320    fn key(&self) -> Self::Key {
 321        self.work_directory.path_key()
 322    }
 323}
 324
 325impl Deref for LocalRepositoryEntry {
 326    type Target = WorkDirectory;
 327
 328    fn deref(&self) -> &Self::Target {
 329        &self.work_directory
 330    }
 331}
 332
 333impl Deref for LocalSnapshot {
 334    type Target = Snapshot;
 335
 336    fn deref(&self) -> &Self::Target {
 337        &self.snapshot
 338    }
 339}
 340
 341impl DerefMut for LocalSnapshot {
 342    fn deref_mut(&mut self) -> &mut Self::Target {
 343        &mut self.snapshot
 344    }
 345}
 346
 347enum ScanState {
 348    Started,
 349    Updated {
 350        snapshot: LocalSnapshot,
 351        changes: UpdatedEntriesSet,
 352        barrier: SmallVec<[barrier::Sender; 1]>,
 353        scanning: bool,
 354    },
 355    RootUpdated {
 356        new_path: Arc<SanitizedPath>,
 357    },
 358}
 359
 360struct UpdateObservationState {
 361    snapshots_tx: mpsc::UnboundedSender<(LocalSnapshot, UpdatedEntriesSet)>,
 362    resume_updates: watch::Sender<()>,
 363    _maintain_remote_snapshot: Task<Option<()>>,
 364}
 365
 366#[derive(Debug, Clone)]
 367pub enum Event {
 368    UpdatedEntries(UpdatedEntriesSet),
 369    UpdatedGitRepositories(UpdatedGitRepositoriesSet),
 370    DeletedEntry(ProjectEntryId),
 371}
 372
 373impl EventEmitter<Event> for Worktree {}
 374
 375impl Worktree {
 376    pub async fn local(
 377        path: impl Into<Arc<Path>>,
 378        visible: bool,
 379        fs: Arc<dyn Fs>,
 380        next_entry_id: Arc<AtomicUsize>,
 381        scanning_enabled: bool,
 382        worktree_id: WorktreeId,
 383        cx: &mut AsyncApp,
 384    ) -> Result<Entity<Self>> {
 385        let abs_path = path.into();
 386        let metadata = fs
 387            .metadata(&abs_path)
 388            .await
 389            .context("failed to stat worktree path")?;
 390
 391        let fs_case_sensitive = fs.is_case_sensitive().await;
 392
 393        let root_file_handle = if metadata.as_ref().is_some() {
 394            fs.open_handle(&abs_path)
 395                .await
 396                .with_context(|| {
 397                    format!(
 398                        "failed to open local worktree root at {}",
 399                        abs_path.display()
 400                    )
 401                })
 402                .log_err()
 403        } else {
 404            None
 405        };
 406
 407        Ok(cx.new(move |cx: &mut Context<Worktree>| {
 408            let mut snapshot = LocalSnapshot {
 409                ignores_by_parent_abs_path: Default::default(),
 410                global_gitignore: Default::default(),
 411                repo_exclude_by_work_dir_abs_path: Default::default(),
 412                git_repositories: Default::default(),
 413                snapshot: Snapshot::new(
 414                    worktree_id,
 415                    abs_path
 416                        .file_name()
 417                        .and_then(|f| f.to_str())
 418                        .map_or(RelPath::empty().into(), |f| {
 419                            RelPath::unix(f).unwrap().into()
 420                        }),
 421                    abs_path.clone(),
 422                    PathStyle::local(),
 423                ),
 424                root_file_handle,
 425            };
 426
 427            let worktree_id = snapshot.id();
 428            let settings_location = Some(SettingsLocation {
 429                worktree_id,
 430                path: RelPath::empty(),
 431            });
 432
 433            let settings = WorktreeSettings::get(settings_location, cx).clone();
 434            cx.observe_global::<SettingsStore>(move |this, cx| {
 435                if let Self::Local(this) = this {
 436                    let settings = WorktreeSettings::get(settings_location, cx).clone();
 437                    if this.settings != settings {
 438                        this.settings = settings;
 439                        this.restart_background_scanners(cx);
 440                    }
 441                }
 442            })
 443            .detach();
 444
 445            let share_private_files = false;
 446            if let Some(metadata) = metadata {
 447                let mut entry = Entry::new(
 448                    RelPath::empty().into(),
 449                    &metadata,
 450                    ProjectEntryId::new(&next_entry_id),
 451                    snapshot.root_char_bag,
 452                    None,
 453                );
 454                if metadata.is_dir {
 455                    if !scanning_enabled {
 456                        entry.kind = EntryKind::UnloadedDir;
 457                    }
 458                } else {
 459                    if let Some(file_name) = abs_path.file_name()
 460                        && let Some(file_name) = file_name.to_str()
 461                        && let Ok(path) = RelPath::unix(file_name)
 462                    {
 463                        entry.is_private = !share_private_files && settings.is_path_private(path);
 464                        entry.is_hidden = settings.is_path_hidden(path);
 465                    }
 466                }
 467                cx.foreground_executor()
 468                    .block_on(snapshot.insert_entry(entry, fs.as_ref()));
 469            }
 470
 471            let (scan_requests_tx, scan_requests_rx) = channel::unbounded();
 472            let (path_prefixes_to_scan_tx, path_prefixes_to_scan_rx) = channel::unbounded();
 473            let mut worktree = LocalWorktree {
 474                share_private_files,
 475                next_entry_id,
 476                snapshot,
 477                is_scanning: watch::channel_with(true),
 478                snapshot_subscriptions: Default::default(),
 479                update_observer: None,
 480                scan_requests_tx,
 481                path_prefixes_to_scan_tx,
 482                _background_scanner_tasks: Vec::new(),
 483                fs,
 484                fs_case_sensitive,
 485                visible,
 486                settings,
 487                scanning_enabled,
 488            };
 489            worktree.start_background_scanner(scan_requests_rx, path_prefixes_to_scan_rx, cx);
 490            Worktree::Local(worktree)
 491        }))
 492    }
 493
 494    pub fn remote(
 495        project_id: u64,
 496        replica_id: ReplicaId,
 497        worktree: proto::WorktreeMetadata,
 498        client: AnyProtoClient,
 499        path_style: PathStyle,
 500        cx: &mut App,
 501    ) -> Entity<Self> {
 502        cx.new(|cx: &mut Context<Self>| {
 503            let snapshot = Snapshot::new(
 504                WorktreeId::from_proto(worktree.id),
 505                RelPath::from_proto(&worktree.root_name)
 506                    .unwrap_or_else(|_| RelPath::empty().into()),
 507                Path::new(&worktree.abs_path).into(),
 508                path_style,
 509            );
 510
 511            let background_snapshot = Arc::new(Mutex::new((
 512                snapshot.clone(),
 513                Vec::<proto::UpdateWorktree>::new(),
 514            )));
 515            let (background_updates_tx, mut background_updates_rx) =
 516                mpsc::unbounded::<proto::UpdateWorktree>();
 517            let (mut snapshot_updated_tx, mut snapshot_updated_rx) = watch::channel();
 518
 519            let worktree_id = snapshot.id();
 520            let settings_location = Some(SettingsLocation {
 521                worktree_id,
 522                path: RelPath::empty(),
 523            });
 524
 525            let settings = WorktreeSettings::get(settings_location, cx).clone();
 526            let worktree = RemoteWorktree {
 527                client,
 528                project_id,
 529                replica_id,
 530                snapshot,
 531                file_scan_inclusions: settings.parent_dir_scan_inclusions.clone(),
 532                background_snapshot: background_snapshot.clone(),
 533                updates_tx: Some(background_updates_tx),
 534                update_observer: None,
 535                snapshot_subscriptions: Default::default(),
 536                visible: worktree.visible,
 537                disconnected: false,
 538            };
 539
 540            // Apply updates to a separate snapshot in a background task, then
 541            // send them to a foreground task which updates the model.
 542            cx.background_spawn(async move {
 543                while let Some(update) = background_updates_rx.next().await {
 544                    {
 545                        let mut lock = background_snapshot.lock();
 546                        lock.0.apply_remote_update(
 547                            update.clone(),
 548                            &settings.parent_dir_scan_inclusions,
 549                        );
 550                        lock.1.push(update);
 551                    }
 552                    snapshot_updated_tx.send(()).await.ok();
 553                }
 554            })
 555            .detach();
 556
 557            // On the foreground task, update to the latest snapshot and notify
 558            // any update observer of all updates that led to that snapshot.
 559            cx.spawn(async move |this, cx| {
 560                while (snapshot_updated_rx.recv().await).is_some() {
 561                    this.update(cx, |this, cx| {
 562                        let mut entries_changed = false;
 563                        let this = this.as_remote_mut().unwrap();
 564                        {
 565                            let mut lock = this.background_snapshot.lock();
 566                            this.snapshot = lock.0.clone();
 567                            for update in lock.1.drain(..) {
 568                                entries_changed |= !update.updated_entries.is_empty()
 569                                    || !update.removed_entries.is_empty();
 570                                if let Some(tx) = &this.update_observer {
 571                                    tx.unbounded_send(update).ok();
 572                                }
 573                            }
 574                        };
 575
 576                        if entries_changed {
 577                            cx.emit(Event::UpdatedEntries(Arc::default()));
 578                        }
 579                        cx.notify();
 580                        while let Some((scan_id, _)) = this.snapshot_subscriptions.front() {
 581                            if this.observed_snapshot(*scan_id) {
 582                                let (_, tx) = this.snapshot_subscriptions.pop_front().unwrap();
 583                                let _ = tx.send(());
 584                            } else {
 585                                break;
 586                            }
 587                        }
 588                    })?;
 589                }
 590                anyhow::Ok(())
 591            })
 592            .detach();
 593
 594            Worktree::Remote(worktree)
 595        })
 596    }
 597
 598    pub fn as_local(&self) -> Option<&LocalWorktree> {
 599        if let Worktree::Local(worktree) = self {
 600            Some(worktree)
 601        } else {
 602            None
 603        }
 604    }
 605
 606    pub fn as_remote(&self) -> Option<&RemoteWorktree> {
 607        if let Worktree::Remote(worktree) = self {
 608            Some(worktree)
 609        } else {
 610            None
 611        }
 612    }
 613
 614    pub fn as_local_mut(&mut self) -> Option<&mut LocalWorktree> {
 615        if let Worktree::Local(worktree) = self {
 616            Some(worktree)
 617        } else {
 618            None
 619        }
 620    }
 621
 622    pub fn as_remote_mut(&mut self) -> Option<&mut RemoteWorktree> {
 623        if let Worktree::Remote(worktree) = self {
 624            Some(worktree)
 625        } else {
 626            None
 627        }
 628    }
 629
 630    pub fn is_local(&self) -> bool {
 631        matches!(self, Worktree::Local(_))
 632    }
 633
 634    pub fn is_remote(&self) -> bool {
 635        !self.is_local()
 636    }
 637
 638    pub fn settings_location(&self, _: &Context<Self>) -> SettingsLocation<'static> {
 639        SettingsLocation {
 640            worktree_id: self.id(),
 641            path: RelPath::empty(),
 642        }
 643    }
 644
 645    pub fn snapshot(&self) -> Snapshot {
 646        match self {
 647            Worktree::Local(worktree) => worktree.snapshot.snapshot.clone(),
 648            Worktree::Remote(worktree) => worktree.snapshot.clone(),
 649        }
 650    }
 651
 652    pub fn scan_id(&self) -> usize {
 653        match self {
 654            Worktree::Local(worktree) => worktree.snapshot.scan_id,
 655            Worktree::Remote(worktree) => worktree.snapshot.scan_id,
 656        }
 657    }
 658
 659    pub fn metadata_proto(&self) -> proto::WorktreeMetadata {
 660        proto::WorktreeMetadata {
 661            id: self.id().to_proto(),
 662            root_name: self.root_name().to_proto(),
 663            visible: self.is_visible(),
 664            abs_path: self.abs_path().to_string_lossy().into_owned(),
 665        }
 666    }
 667
 668    pub fn completed_scan_id(&self) -> usize {
 669        match self {
 670            Worktree::Local(worktree) => worktree.snapshot.completed_scan_id,
 671            Worktree::Remote(worktree) => worktree.snapshot.completed_scan_id,
 672        }
 673    }
 674
 675    pub fn is_visible(&self) -> bool {
 676        match self {
 677            Worktree::Local(worktree) => worktree.visible,
 678            Worktree::Remote(worktree) => worktree.visible,
 679        }
 680    }
 681
 682    pub fn replica_id(&self) -> ReplicaId {
 683        match self {
 684            Worktree::Local(_) => ReplicaId::LOCAL,
 685            Worktree::Remote(worktree) => worktree.replica_id,
 686        }
 687    }
 688
 689    pub fn abs_path(&self) -> Arc<Path> {
 690        match self {
 691            Worktree::Local(worktree) => SanitizedPath::cast_arc(worktree.abs_path.clone()),
 692            Worktree::Remote(worktree) => SanitizedPath::cast_arc(worktree.abs_path.clone()),
 693        }
 694    }
 695
 696    pub fn root_file(&self, cx: &Context<Self>) -> Option<Arc<File>> {
 697        let entry = self.root_entry()?;
 698        Some(File::for_entry(entry.clone(), cx.entity()))
 699    }
 700
 701    pub fn observe_updates<F, Fut>(&mut self, project_id: u64, cx: &Context<Worktree>, callback: F)
 702    where
 703        F: 'static + Send + Fn(proto::UpdateWorktree) -> Fut,
 704        Fut: 'static + Send + Future<Output = bool>,
 705    {
 706        match self {
 707            Worktree::Local(this) => this.observe_updates(project_id, cx, callback),
 708            Worktree::Remote(this) => this.observe_updates(project_id, cx, callback),
 709        }
 710    }
 711
 712    pub fn stop_observing_updates(&mut self) {
 713        match self {
 714            Worktree::Local(this) => {
 715                this.update_observer.take();
 716            }
 717            Worktree::Remote(this) => {
 718                this.update_observer.take();
 719            }
 720        }
 721    }
 722
 723    pub fn wait_for_snapshot(
 724        &mut self,
 725        scan_id: usize,
 726    ) -> impl Future<Output = Result<()>> + use<> {
 727        match self {
 728            Worktree::Local(this) => this.wait_for_snapshot(scan_id).boxed(),
 729            Worktree::Remote(this) => this.wait_for_snapshot(scan_id).boxed(),
 730        }
 731    }
 732
 733    #[cfg(feature = "test-support")]
 734    pub fn has_update_observer(&self) -> bool {
 735        match self {
 736            Worktree::Local(this) => this.update_observer.is_some(),
 737            Worktree::Remote(this) => this.update_observer.is_some(),
 738        }
 739    }
 740
 741    pub fn load_file(&self, path: &RelPath, cx: &Context<Worktree>) -> Task<Result<LoadedFile>> {
 742        match self {
 743            Worktree::Local(this) => this.load_file(path, cx),
 744            Worktree::Remote(_) => {
 745                Task::ready(Err(anyhow!("remote worktrees can't yet load files")))
 746            }
 747        }
 748    }
 749
 750    pub fn load_binary_file(
 751        &self,
 752        path: &RelPath,
 753        cx: &Context<Worktree>,
 754    ) -> Task<Result<LoadedBinaryFile>> {
 755        match self {
 756            Worktree::Local(this) => this.load_binary_file(path, cx),
 757            Worktree::Remote(_) => {
 758                Task::ready(Err(anyhow!("remote worktrees can't yet load binary files")))
 759            }
 760        }
 761    }
 762
 763    pub fn write_file(
 764        &self,
 765        path: Arc<RelPath>,
 766        text: Rope,
 767        line_ending: LineEnding,
 768        encoding: &'static Encoding,
 769        has_bom: bool,
 770        cx: &Context<Worktree>,
 771    ) -> Task<Result<Arc<File>>> {
 772        match self {
 773            Worktree::Local(this) => {
 774                this.write_file(path, text, line_ending, encoding, has_bom, cx)
 775            }
 776            Worktree::Remote(_) => {
 777                Task::ready(Err(anyhow!("remote worktree can't yet write files")))
 778            }
 779        }
 780    }
 781
 782    pub fn create_entry(
 783        &mut self,
 784        path: Arc<RelPath>,
 785        is_directory: bool,
 786        content: Option<Vec<u8>>,
 787        cx: &Context<Worktree>,
 788    ) -> Task<Result<CreatedEntry>> {
 789        let worktree_id = self.id();
 790        match self {
 791            Worktree::Local(this) => this.create_entry(path, is_directory, content, cx),
 792            Worktree::Remote(this) => {
 793                let project_id = this.project_id;
 794                let request = this.client.request(proto::CreateProjectEntry {
 795                    worktree_id: worktree_id.to_proto(),
 796                    project_id,
 797                    path: path.as_ref().to_proto(),
 798                    content,
 799                    is_directory,
 800                });
 801                cx.spawn(async move |this, cx| {
 802                    let response = request.await?;
 803                    match response.entry {
 804                        Some(entry) => this
 805                            .update(cx, |worktree, cx| {
 806                                worktree.as_remote_mut().unwrap().insert_entry(
 807                                    entry,
 808                                    response.worktree_scan_id as usize,
 809                                    cx,
 810                                )
 811                            })?
 812                            .await
 813                            .map(CreatedEntry::Included),
 814                        None => {
 815                            let abs_path =
 816                                this.read_with(cx, |worktree, _| worktree.absolutize(&path))?;
 817                            Ok(CreatedEntry::Excluded { abs_path })
 818                        }
 819                    }
 820                })
 821            }
 822        }
 823    }
 824
 825    pub fn delete_entry(
 826        &mut self,
 827        entry_id: ProjectEntryId,
 828        trash: bool,
 829        cx: &mut Context<Worktree>,
 830    ) -> Option<Task<Result<()>>> {
 831        let task = match self {
 832            Worktree::Local(this) => this.delete_entry(entry_id, trash, cx),
 833            Worktree::Remote(this) => this.delete_entry(entry_id, trash, cx),
 834        }?;
 835
 836        let entry = match &*self {
 837            Worktree::Local(this) => this.entry_for_id(entry_id),
 838            Worktree::Remote(this) => this.entry_for_id(entry_id),
 839        }?;
 840
 841        let mut ids = vec![entry_id];
 842        let path = &*entry.path;
 843
 844        self.get_children_ids_recursive(path, &mut ids);
 845
 846        for id in ids {
 847            cx.emit(Event::DeletedEntry(id));
 848        }
 849        Some(task)
 850    }
 851
 852    fn get_children_ids_recursive(&self, path: &RelPath, ids: &mut Vec<ProjectEntryId>) {
 853        let children_iter = self.child_entries(path);
 854        for child in children_iter {
 855            ids.push(child.id);
 856            self.get_children_ids_recursive(&child.path, ids);
 857        }
 858    }
 859
 860    // pub fn rename_entry(
 861    //     &mut self,
 862    //     entry_id: ProjectEntryId,
 863    //     new_path: Arc<RelPath>,
 864    //     cx: &Context<Self>,
 865    // ) -> Task<Result<CreatedEntry>> {
 866    //     match self {
 867    //         Worktree::Local(this) => this.rename_entry(entry_id, new_path, cx),
 868    //         Worktree::Remote(this) => this.rename_entry(entry_id, new_path, cx),
 869    //     }
 870    // }
 871
 872    pub fn copy_external_entries(
 873        &mut self,
 874        target_directory: Arc<RelPath>,
 875        paths: Vec<Arc<Path>>,
 876        fs: Arc<dyn Fs>,
 877        cx: &Context<Worktree>,
 878    ) -> Task<Result<Vec<ProjectEntryId>>> {
 879        match self {
 880            Worktree::Local(this) => this.copy_external_entries(target_directory, paths, cx),
 881            Worktree::Remote(this) => this.copy_external_entries(target_directory, paths, fs, cx),
 882        }
 883    }
 884
 885    pub fn expand_entry(
 886        &mut self,
 887        entry_id: ProjectEntryId,
 888        cx: &Context<Worktree>,
 889    ) -> Option<Task<Result<()>>> {
 890        match self {
 891            Worktree::Local(this) => this.expand_entry(entry_id, cx),
 892            Worktree::Remote(this) => {
 893                let response = this.client.request(proto::ExpandProjectEntry {
 894                    project_id: this.project_id,
 895                    entry_id: entry_id.to_proto(),
 896                });
 897                Some(cx.spawn(async move |this, cx| {
 898                    let response = response.await?;
 899                    this.update(cx, |this, _| {
 900                        this.as_remote_mut()
 901                            .unwrap()
 902                            .wait_for_snapshot(response.worktree_scan_id as usize)
 903                    })?
 904                    .await?;
 905                    Ok(())
 906                }))
 907            }
 908        }
 909    }
 910
 911    pub fn expand_all_for_entry(
 912        &mut self,
 913        entry_id: ProjectEntryId,
 914        cx: &Context<Worktree>,
 915    ) -> Option<Task<Result<()>>> {
 916        match self {
 917            Worktree::Local(this) => this.expand_all_for_entry(entry_id, cx),
 918            Worktree::Remote(this) => {
 919                let response = this.client.request(proto::ExpandAllForProjectEntry {
 920                    project_id: this.project_id,
 921                    entry_id: entry_id.to_proto(),
 922                });
 923                Some(cx.spawn(async move |this, cx| {
 924                    let response = response.await?;
 925                    this.update(cx, |this, _| {
 926                        this.as_remote_mut()
 927                            .unwrap()
 928                            .wait_for_snapshot(response.worktree_scan_id as usize)
 929                    })?
 930                    .await?;
 931                    Ok(())
 932                }))
 933            }
 934        }
 935    }
 936
 937    pub async fn handle_create_entry(
 938        this: Entity<Self>,
 939        request: proto::CreateProjectEntry,
 940        mut cx: AsyncApp,
 941    ) -> Result<proto::ProjectEntryResponse> {
 942        let (scan_id, entry) = this.update(&mut cx, |this, cx| {
 943            anyhow::Ok((
 944                this.scan_id(),
 945                this.create_entry(
 946                    RelPath::from_proto(&request.path).with_context(|| {
 947                        format!("received invalid relative path {:?}", request.path)
 948                    })?,
 949                    request.is_directory,
 950                    request.content,
 951                    cx,
 952                ),
 953            ))
 954        })?;
 955        Ok(proto::ProjectEntryResponse {
 956            entry: match &entry.await? {
 957                CreatedEntry::Included(entry) => Some(entry.into()),
 958                CreatedEntry::Excluded { .. } => None,
 959            },
 960            worktree_scan_id: scan_id as u64,
 961        })
 962    }
 963
 964    pub async fn handle_delete_entry(
 965        this: Entity<Self>,
 966        request: proto::DeleteProjectEntry,
 967        mut cx: AsyncApp,
 968    ) -> Result<proto::ProjectEntryResponse> {
 969        let (scan_id, task) = this.update(&mut cx, |this, cx| {
 970            (
 971                this.scan_id(),
 972                this.delete_entry(
 973                    ProjectEntryId::from_proto(request.entry_id),
 974                    request.use_trash,
 975                    cx,
 976                ),
 977            )
 978        });
 979        task.ok_or_else(|| anyhow::anyhow!("invalid entry"))?
 980            .await?;
 981        Ok(proto::ProjectEntryResponse {
 982            entry: None,
 983            worktree_scan_id: scan_id as u64,
 984        })
 985    }
 986
 987    pub async fn handle_expand_entry(
 988        this: Entity<Self>,
 989        request: proto::ExpandProjectEntry,
 990        mut cx: AsyncApp,
 991    ) -> Result<proto::ExpandProjectEntryResponse> {
 992        let task = this.update(&mut cx, |this, cx| {
 993            this.expand_entry(ProjectEntryId::from_proto(request.entry_id), cx)
 994        });
 995        task.ok_or_else(|| anyhow::anyhow!("no such entry"))?
 996            .await?;
 997        let scan_id = this.read_with(&cx, |this, _| this.scan_id());
 998        Ok(proto::ExpandProjectEntryResponse {
 999            worktree_scan_id: scan_id as u64,
1000        })
1001    }
1002
1003    pub async fn handle_expand_all_for_entry(
1004        this: Entity<Self>,
1005        request: proto::ExpandAllForProjectEntry,
1006        mut cx: AsyncApp,
1007    ) -> Result<proto::ExpandAllForProjectEntryResponse> {
1008        let task = this.update(&mut cx, |this, cx| {
1009            this.expand_all_for_entry(ProjectEntryId::from_proto(request.entry_id), cx)
1010        });
1011        task.ok_or_else(|| anyhow::anyhow!("no such entry"))?
1012            .await?;
1013        let scan_id = this.read_with(&cx, |this, _| this.scan_id());
1014        Ok(proto::ExpandAllForProjectEntryResponse {
1015            worktree_scan_id: scan_id as u64,
1016        })
1017    }
1018
1019    pub fn is_single_file(&self) -> bool {
1020        self.root_dir().is_none()
1021    }
1022
1023    /// For visible worktrees, returns the path with the worktree name as the first component.
1024    /// Otherwise, returns an absolute path.
1025    pub fn full_path(&self, worktree_relative_path: &RelPath) -> PathBuf {
1026        if self.is_visible() {
1027            self.root_name()
1028                .join(worktree_relative_path)
1029                .display(self.path_style)
1030                .to_string()
1031                .into()
1032        } else {
1033            let full_path = self.abs_path();
1034            let mut full_path_string = if self.is_local()
1035                && let Ok(stripped) = full_path.strip_prefix(home_dir())
1036            {
1037                self.path_style
1038                    .join("~", &*stripped.to_string_lossy())
1039                    .unwrap()
1040            } else {
1041                full_path.to_string_lossy().into_owned()
1042            };
1043
1044            if worktree_relative_path.components().next().is_some() {
1045                full_path_string.push_str(self.path_style.primary_separator());
1046                full_path_string.push_str(&worktree_relative_path.display(self.path_style));
1047            }
1048
1049            full_path_string.into()
1050        }
1051    }
1052}
1053
1054impl LocalWorktree {
1055    pub fn fs(&self) -> &Arc<dyn Fs> {
1056        &self.fs
1057    }
1058
1059    pub fn is_path_private(&self, path: &RelPath) -> bool {
1060        !self.share_private_files && self.settings.is_path_private(path)
1061    }
1062
1063    pub fn fs_is_case_sensitive(&self) -> bool {
1064        self.fs_case_sensitive
1065    }
1066
1067    fn restart_background_scanners(&mut self, cx: &Context<Worktree>) {
1068        let (scan_requests_tx, scan_requests_rx) = channel::unbounded();
1069        let (path_prefixes_to_scan_tx, path_prefixes_to_scan_rx) = channel::unbounded();
1070        self.scan_requests_tx = scan_requests_tx;
1071        self.path_prefixes_to_scan_tx = path_prefixes_to_scan_tx;
1072
1073        self.start_background_scanner(scan_requests_rx, path_prefixes_to_scan_rx, cx);
1074        let always_included_entries = mem::take(&mut self.snapshot.always_included_entries);
1075        log::debug!(
1076            "refreshing entries for the following always included paths: {:?}",
1077            always_included_entries
1078        );
1079
1080        // Cleans up old always included entries to ensure they get updated properly. Otherwise,
1081        // nested always included entries may not get updated and will result in out-of-date info.
1082        self.refresh_entries_for_paths(always_included_entries);
1083    }
1084
1085    fn start_background_scanner(
1086        &mut self,
1087        scan_requests_rx: channel::Receiver<ScanRequest>,
1088        path_prefixes_to_scan_rx: channel::Receiver<PathPrefixScanRequest>,
1089        cx: &Context<Worktree>,
1090    ) {
1091        let snapshot = self.snapshot();
1092        let share_private_files = self.share_private_files;
1093        let next_entry_id = self.next_entry_id.clone();
1094        let fs = self.fs.clone();
1095        let scanning_enabled = self.scanning_enabled;
1096        let settings = self.settings.clone();
1097        let (scan_states_tx, mut scan_states_rx) = mpsc::unbounded();
1098        let background_scanner = cx.background_spawn({
1099            let abs_path = snapshot.abs_path.as_path().to_path_buf();
1100            let background = cx.background_executor().clone();
1101            async move {
1102                let (events, watcher) = if scanning_enabled {
1103                    fs.watch(&abs_path, FS_WATCH_LATENCY).await
1104                } else {
1105                    (Box::pin(stream::pending()) as _, Arc::new(NullWatcher) as _)
1106                };
1107                let fs_case_sensitive = fs.is_case_sensitive().await;
1108
1109                let mut scanner = BackgroundScanner {
1110                    fs,
1111                    fs_case_sensitive,
1112                    status_updates_tx: scan_states_tx,
1113                    executor: background,
1114                    scan_requests_rx,
1115                    path_prefixes_to_scan_rx,
1116                    next_entry_id,
1117                    state: async_lock::Mutex::new(BackgroundScannerState {
1118                        prev_snapshot: snapshot.snapshot.clone(),
1119                        snapshot,
1120                        scanned_dirs: Default::default(),
1121                        scanning_enabled,
1122                        path_prefixes_to_scan: Default::default(),
1123                        paths_to_scan: Default::default(),
1124                        removed_entries: Default::default(),
1125                        changed_paths: Default::default(),
1126                    }),
1127                    phase: BackgroundScannerPhase::InitialScan,
1128                    share_private_files,
1129                    settings,
1130                    watcher,
1131                };
1132
1133                scanner
1134                    .run(Box::pin(events.map(|events| events.into_iter().collect())))
1135                    .await;
1136            }
1137        });
1138        let scan_state_updater = cx.spawn(async move |this, cx| {
1139            while let Some((state, this)) = scan_states_rx.next().await.zip(this.upgrade()) {
1140                this.update(cx, |this, cx| {
1141                    let this = this.as_local_mut().unwrap();
1142                    match state {
1143                        ScanState::Started => {
1144                            *this.is_scanning.0.borrow_mut() = true;
1145                        }
1146                        ScanState::Updated {
1147                            snapshot,
1148                            changes,
1149                            barrier,
1150                            scanning,
1151                        } => {
1152                            *this.is_scanning.0.borrow_mut() = scanning;
1153                            this.set_snapshot(snapshot, changes, cx);
1154                            drop(barrier);
1155                        }
1156                        ScanState::RootUpdated { new_path } => {
1157                            this.update_abs_path_and_refresh(new_path, cx);
1158                        }
1159                    }
1160                });
1161            }
1162        });
1163        self._background_scanner_tasks = vec![background_scanner, scan_state_updater];
1164        *self.is_scanning.0.borrow_mut() = true;
1165    }
1166
1167    fn set_snapshot(
1168        &mut self,
1169        mut new_snapshot: LocalSnapshot,
1170        entry_changes: UpdatedEntriesSet,
1171        cx: &mut Context<Worktree>,
1172    ) {
1173        let repo_changes = self.changed_repos(&self.snapshot, &mut new_snapshot);
1174        self.snapshot = new_snapshot;
1175
1176        if let Some(share) = self.update_observer.as_mut() {
1177            share
1178                .snapshots_tx
1179                .unbounded_send((self.snapshot.clone(), entry_changes.clone()))
1180                .ok();
1181        }
1182
1183        if !entry_changes.is_empty() {
1184            cx.emit(Event::UpdatedEntries(entry_changes));
1185        }
1186        if !repo_changes.is_empty() {
1187            cx.emit(Event::UpdatedGitRepositories(repo_changes));
1188        }
1189
1190        while let Some((scan_id, _)) = self.snapshot_subscriptions.front() {
1191            if self.snapshot.completed_scan_id >= *scan_id {
1192                let (_, tx) = self.snapshot_subscriptions.pop_front().unwrap();
1193                tx.send(()).ok();
1194            } else {
1195                break;
1196            }
1197        }
1198    }
1199
1200    fn changed_repos(
1201        &self,
1202        old_snapshot: &LocalSnapshot,
1203        new_snapshot: &mut LocalSnapshot,
1204    ) -> UpdatedGitRepositoriesSet {
1205        let mut changes = Vec::new();
1206        let mut old_repos = old_snapshot.git_repositories.iter().peekable();
1207        let new_repos = new_snapshot.git_repositories.clone();
1208        let mut new_repos = new_repos.iter().peekable();
1209
1210        loop {
1211            match (new_repos.peek().map(clone), old_repos.peek().map(clone)) {
1212                (Some((new_entry_id, new_repo)), Some((old_entry_id, old_repo))) => {
1213                    match Ord::cmp(&new_entry_id, &old_entry_id) {
1214                        Ordering::Less => {
1215                            changes.push(UpdatedGitRepository {
1216                                work_directory_id: new_entry_id,
1217                                old_work_directory_abs_path: None,
1218                                new_work_directory_abs_path: Some(
1219                                    new_repo.work_directory_abs_path.clone(),
1220                                ),
1221                                dot_git_abs_path: Some(new_repo.dot_git_abs_path.clone()),
1222                                repository_dir_abs_path: Some(
1223                                    new_repo.repository_dir_abs_path.clone(),
1224                                ),
1225                                common_dir_abs_path: Some(new_repo.common_dir_abs_path.clone()),
1226                            });
1227                            new_repos.next();
1228                        }
1229                        Ordering::Equal => {
1230                            if new_repo.git_dir_scan_id != old_repo.git_dir_scan_id
1231                                || new_repo.work_directory_abs_path
1232                                    != old_repo.work_directory_abs_path
1233                            {
1234                                changes.push(UpdatedGitRepository {
1235                                    work_directory_id: new_entry_id,
1236                                    old_work_directory_abs_path: Some(
1237                                        old_repo.work_directory_abs_path.clone(),
1238                                    ),
1239                                    new_work_directory_abs_path: Some(
1240                                        new_repo.work_directory_abs_path.clone(),
1241                                    ),
1242                                    dot_git_abs_path: Some(new_repo.dot_git_abs_path.clone()),
1243                                    repository_dir_abs_path: Some(
1244                                        new_repo.repository_dir_abs_path.clone(),
1245                                    ),
1246                                    common_dir_abs_path: Some(new_repo.common_dir_abs_path.clone()),
1247                                });
1248                            }
1249                            new_repos.next();
1250                            old_repos.next();
1251                        }
1252                        Ordering::Greater => {
1253                            changes.push(UpdatedGitRepository {
1254                                work_directory_id: old_entry_id,
1255                                old_work_directory_abs_path: Some(
1256                                    old_repo.work_directory_abs_path.clone(),
1257                                ),
1258                                new_work_directory_abs_path: None,
1259                                dot_git_abs_path: None,
1260                                repository_dir_abs_path: None,
1261                                common_dir_abs_path: None,
1262                            });
1263                            old_repos.next();
1264                        }
1265                    }
1266                }
1267                (Some((entry_id, repo)), None) => {
1268                    changes.push(UpdatedGitRepository {
1269                        work_directory_id: entry_id,
1270                        old_work_directory_abs_path: None,
1271                        new_work_directory_abs_path: Some(repo.work_directory_abs_path.clone()),
1272                        dot_git_abs_path: Some(repo.dot_git_abs_path.clone()),
1273                        repository_dir_abs_path: Some(repo.repository_dir_abs_path.clone()),
1274                        common_dir_abs_path: Some(repo.common_dir_abs_path.clone()),
1275                    });
1276                    new_repos.next();
1277                }
1278                (None, Some((entry_id, repo))) => {
1279                    changes.push(UpdatedGitRepository {
1280                        work_directory_id: entry_id,
1281                        old_work_directory_abs_path: Some(repo.work_directory_abs_path.clone()),
1282                        new_work_directory_abs_path: None,
1283                        dot_git_abs_path: Some(repo.dot_git_abs_path.clone()),
1284                        repository_dir_abs_path: Some(repo.repository_dir_abs_path.clone()),
1285                        common_dir_abs_path: Some(repo.common_dir_abs_path.clone()),
1286                    });
1287                    old_repos.next();
1288                }
1289                (None, None) => break,
1290            }
1291        }
1292
1293        fn clone<T: Clone, U: Clone>(value: &(&T, &U)) -> (T, U) {
1294            (value.0.clone(), value.1.clone())
1295        }
1296
1297        changes.into()
1298    }
1299
1300    pub fn scan_complete(&self) -> impl Future<Output = ()> + use<> {
1301        let mut is_scanning_rx = self.is_scanning.1.clone();
1302        async move {
1303            let mut is_scanning = *is_scanning_rx.borrow();
1304            while is_scanning {
1305                if let Some(value) = is_scanning_rx.recv().await {
1306                    is_scanning = value;
1307                } else {
1308                    break;
1309                }
1310            }
1311        }
1312    }
1313
1314    pub fn wait_for_snapshot(
1315        &mut self,
1316        scan_id: usize,
1317    ) -> impl Future<Output = Result<()>> + use<> {
1318        let (tx, rx) = oneshot::channel();
1319        if self.snapshot.completed_scan_id >= scan_id {
1320            tx.send(()).ok();
1321        } else {
1322            match self
1323                .snapshot_subscriptions
1324                .binary_search_by_key(&scan_id, |probe| probe.0)
1325            {
1326                Ok(ix) | Err(ix) => self.snapshot_subscriptions.insert(ix, (scan_id, tx)),
1327            }
1328        }
1329
1330        async move {
1331            rx.await?;
1332            Ok(())
1333        }
1334    }
1335
1336    pub fn snapshot(&self) -> LocalSnapshot {
1337        self.snapshot.clone()
1338    }
1339
1340    pub fn settings(&self) -> WorktreeSettings {
1341        self.settings.clone()
1342    }
1343
1344    fn load_binary_file(
1345        &self,
1346        path: &RelPath,
1347        cx: &Context<Worktree>,
1348    ) -> Task<Result<LoadedBinaryFile>> {
1349        let path = Arc::from(path);
1350        let abs_path = self.absolutize(&path);
1351        let fs = self.fs.clone();
1352        let entry = self.refresh_entry(path.clone(), None, cx);
1353        let is_private = self.is_path_private(&path);
1354
1355        let worktree = cx.weak_entity();
1356        cx.background_spawn(async move {
1357            let content = fs.load_bytes(&abs_path).await?;
1358
1359            let worktree = worktree.upgrade().context("worktree was dropped")?;
1360            let file = match entry.await? {
1361                Some(entry) => File::for_entry(entry, worktree),
1362                None => {
1363                    let metadata = fs
1364                        .metadata(&abs_path)
1365                        .await
1366                        .with_context(|| {
1367                            format!("Loading metadata for excluded file {abs_path:?}")
1368                        })?
1369                        .with_context(|| {
1370                            format!("Excluded file {abs_path:?} got removed during loading")
1371                        })?;
1372                    Arc::new(File {
1373                        entry_id: None,
1374                        worktree,
1375                        path,
1376                        disk_state: DiskState::Present {
1377                            mtime: metadata.mtime,
1378                            size: metadata.len,
1379                        },
1380                        is_local: true,
1381                        is_private,
1382                    })
1383                }
1384            };
1385
1386            Ok(LoadedBinaryFile { file, content })
1387        })
1388    }
1389
1390    #[ztracing::instrument(skip_all)]
1391    fn load_file(&self, path: &RelPath, cx: &Context<Worktree>) -> Task<Result<LoadedFile>> {
1392        let path = Arc::from(path);
1393        let abs_path = self.absolutize(&path);
1394        let fs = self.fs.clone();
1395        let entry = self.refresh_entry(path.clone(), None, cx);
1396        let is_private = self.is_path_private(path.as_ref());
1397
1398        let this = cx.weak_entity();
1399        cx.background_spawn(async move {
1400            // WARN: Temporary workaround for #27283.
1401            //       We are not efficient with our memory usage per file, and use in excess of 64GB for a 10GB file
1402            //       Therefore, as a temporary workaround to prevent system freezes, we just bail before opening a file
1403            //       if it is too large
1404            //       5GB seems to be more reasonable, peaking at ~16GB, while 6GB jumps up to >24GB which seems like a
1405            //       reasonable limit
1406            {
1407                const FILE_SIZE_MAX: u64 = 6 * 1024 * 1024 * 1024; // 6GB
1408                if let Ok(Some(metadata)) = fs.metadata(&abs_path).await
1409                    && metadata.len >= FILE_SIZE_MAX
1410                {
1411                    anyhow::bail!("File is too large to load");
1412                }
1413            }
1414            let (text, encoding, has_bom) = decode_file_text(fs.as_ref(), &abs_path).await?;
1415
1416            let worktree = this.upgrade().context("worktree was dropped")?;
1417            let file = match entry.await? {
1418                Some(entry) => File::for_entry(entry, worktree),
1419                None => {
1420                    let metadata = fs
1421                        .metadata(&abs_path)
1422                        .await
1423                        .with_context(|| {
1424                            format!("Loading metadata for excluded file {abs_path:?}")
1425                        })?
1426                        .with_context(|| {
1427                            format!("Excluded file {abs_path:?} got removed during loading")
1428                        })?;
1429                    Arc::new(File {
1430                        entry_id: None,
1431                        worktree,
1432                        path,
1433                        disk_state: DiskState::Present {
1434                            mtime: metadata.mtime,
1435                            size: metadata.len,
1436                        },
1437                        is_local: true,
1438                        is_private,
1439                    })
1440                }
1441            };
1442
1443            Ok(LoadedFile {
1444                file,
1445                text,
1446                encoding,
1447                has_bom,
1448            })
1449        })
1450    }
1451
1452    /// Find the lowest path in the worktree's datastructures that is an ancestor
1453    fn lowest_ancestor(&self, path: &RelPath) -> Arc<RelPath> {
1454        let mut lowest_ancestor = None;
1455        for path in path.ancestors() {
1456            if self.entry_for_path(path).is_some() {
1457                lowest_ancestor = Some(path.into());
1458                break;
1459            }
1460        }
1461
1462        lowest_ancestor.unwrap_or_else(|| RelPath::empty().into())
1463    }
1464
1465    pub fn create_entry(
1466        &self,
1467        path: Arc<RelPath>,
1468        is_dir: bool,
1469        content: Option<Vec<u8>>,
1470        cx: &Context<Worktree>,
1471    ) -> Task<Result<CreatedEntry>> {
1472        let abs_path = self.absolutize(&path);
1473        let path_excluded = self.settings.is_path_excluded(&path);
1474        let fs = self.fs.clone();
1475        let task_abs_path = abs_path.clone();
1476        let write = cx.background_spawn(async move {
1477            if is_dir {
1478                fs.create_dir(&task_abs_path)
1479                    .await
1480                    .with_context(|| format!("creating directory {task_abs_path:?}"))
1481            } else {
1482                fs.write(&task_abs_path, content.as_deref().unwrap_or(&[]))
1483                    .await
1484                    .with_context(|| format!("creating file {task_abs_path:?}"))
1485            }
1486        });
1487
1488        let lowest_ancestor = self.lowest_ancestor(&path);
1489        cx.spawn(async move |this, cx| {
1490            write.await?;
1491            if path_excluded {
1492                return Ok(CreatedEntry::Excluded { abs_path });
1493            }
1494
1495            let (result, refreshes) = this.update(cx, |this, cx| {
1496                let mut refreshes = Vec::new();
1497                let refresh_paths = path.strip_prefix(&lowest_ancestor).unwrap();
1498                for refresh_path in refresh_paths.ancestors() {
1499                    if refresh_path == RelPath::empty() {
1500                        continue;
1501                    }
1502                    let refresh_full_path = lowest_ancestor.join(refresh_path);
1503
1504                    refreshes.push(this.as_local_mut().unwrap().refresh_entry(
1505                        refresh_full_path,
1506                        None,
1507                        cx,
1508                    ));
1509                }
1510                (
1511                    this.as_local_mut().unwrap().refresh_entry(path, None, cx),
1512                    refreshes,
1513                )
1514            })?;
1515            for refresh in refreshes {
1516                refresh.await.log_err();
1517            }
1518
1519            Ok(result
1520                .await?
1521                .map(CreatedEntry::Included)
1522                .unwrap_or_else(|| CreatedEntry::Excluded { abs_path }))
1523        })
1524    }
1525
1526    pub fn write_file(
1527        &self,
1528        path: Arc<RelPath>,
1529        text: Rope,
1530        line_ending: LineEnding,
1531        encoding: &'static Encoding,
1532        has_bom: bool,
1533        cx: &Context<Worktree>,
1534    ) -> Task<Result<Arc<File>>> {
1535        let fs = self.fs.clone();
1536        let is_private = self.is_path_private(&path);
1537        let abs_path = self.absolutize(&path);
1538
1539        let write = cx.background_spawn({
1540            let fs = fs.clone();
1541            let abs_path = abs_path.clone();
1542            async move {
1543                // For UTF-8, use the optimized `fs.save` which writes Rope chunks directly to disk
1544                // without allocating a contiguous string.
1545                if encoding == encoding_rs::UTF_8 && !has_bom {
1546                    return fs.save(&abs_path, &text, line_ending).await;
1547                }
1548
1549                // For legacy encodings (e.g. Shift-JIS), we fall back to converting the entire Rope
1550                // to a String/Bytes in memory before writing.
1551                //
1552                // Note: This is inefficient for very large files compared to the streaming approach above,
1553                // but supporting streaming writes for arbitrary encodings would require a significant
1554                // refactor of the `fs` crate to expose a Writer interface.
1555                let text_string = text.to_string();
1556                let normalized_text = match line_ending {
1557                    LineEnding::Unix => text_string,
1558                    LineEnding::Windows => text_string.replace('\n', "\r\n"),
1559                };
1560
1561                // Create the byte vector manually for UTF-16 encodings because encoding_rs encodes to UTF-8 by default (per WHATWG standards),
1562                //  which is not what we want for saving files.
1563                let bytes = if encoding == encoding_rs::UTF_16BE {
1564                    let mut data = Vec::with_capacity(normalized_text.len() * 2 + 2);
1565                    if has_bom {
1566                        data.extend_from_slice(&[0xFE, 0xFF]); // BOM
1567                    }
1568                    let utf16be_bytes =
1569                        normalized_text.encode_utf16().flat_map(|u| u.to_be_bytes());
1570                    data.extend(utf16be_bytes);
1571                    data.into()
1572                } else if encoding == encoding_rs::UTF_16LE {
1573                    let mut data = Vec::with_capacity(normalized_text.len() * 2 + 2);
1574                    if has_bom {
1575                        data.extend_from_slice(&[0xFF, 0xFE]); // BOM
1576                    }
1577                    let utf16le_bytes =
1578                        normalized_text.encode_utf16().flat_map(|u| u.to_le_bytes());
1579                    data.extend(utf16le_bytes);
1580                    data.into()
1581                } else {
1582                    // For other encodings (Shift-JIS, UTF-8 with BOM, etc.), delegate to encoding_rs.
1583                    let bom_bytes = if has_bom {
1584                        if encoding == encoding_rs::UTF_8 {
1585                            vec![0xEF, 0xBB, 0xBF]
1586                        } else {
1587                            vec![]
1588                        }
1589                    } else {
1590                        vec![]
1591                    };
1592                    let (cow, _, _) = encoding.encode(&normalized_text);
1593                    if !bom_bytes.is_empty() {
1594                        let mut bytes = bom_bytes;
1595                        bytes.extend_from_slice(&cow);
1596                        bytes.into()
1597                    } else {
1598                        cow
1599                    }
1600                };
1601
1602                fs.write(&abs_path, &bytes).await
1603            }
1604        });
1605
1606        cx.spawn(async move |this, cx| {
1607            write.await?;
1608            let entry = this
1609                .update(cx, |this, cx| {
1610                    this.as_local_mut()
1611                        .unwrap()
1612                        .refresh_entry(path.clone(), None, cx)
1613                })?
1614                .await?;
1615            let worktree = this.upgrade().context("worktree dropped")?;
1616            if let Some(entry) = entry {
1617                Ok(File::for_entry(entry, worktree))
1618            } else {
1619                let metadata = fs
1620                    .metadata(&abs_path)
1621                    .await
1622                    .with_context(|| {
1623                        format!("Fetching metadata after saving the excluded buffer {abs_path:?}")
1624                    })?
1625                    .with_context(|| {
1626                        format!("Excluded buffer {path:?} got removed during saving")
1627                    })?;
1628                Ok(Arc::new(File {
1629                    worktree,
1630                    path,
1631                    disk_state: DiskState::Present {
1632                        mtime: metadata.mtime,
1633                        size: metadata.len,
1634                    },
1635                    entry_id: None,
1636                    is_local: true,
1637                    is_private,
1638                }))
1639            }
1640        })
1641    }
1642
1643    pub fn delete_entry(
1644        &self,
1645        entry_id: ProjectEntryId,
1646        trash: bool,
1647        cx: &Context<Worktree>,
1648    ) -> Option<Task<Result<()>>> {
1649        let entry = self.entry_for_id(entry_id)?.clone();
1650        let abs_path = self.absolutize(&entry.path);
1651        let fs = self.fs.clone();
1652
1653        let delete = cx.background_spawn(async move {
1654            if entry.is_file() {
1655                if trash {
1656                    fs.trash_file(&abs_path, Default::default()).await?;
1657                } else {
1658                    fs.remove_file(&abs_path, Default::default()).await?;
1659                }
1660            } else if trash {
1661                fs.trash_dir(
1662                    &abs_path,
1663                    RemoveOptions {
1664                        recursive: true,
1665                        ignore_if_not_exists: false,
1666                    },
1667                )
1668                .await?;
1669            } else {
1670                fs.remove_dir(
1671                    &abs_path,
1672                    RemoveOptions {
1673                        recursive: true,
1674                        ignore_if_not_exists: false,
1675                    },
1676                )
1677                .await?;
1678            }
1679            anyhow::Ok(entry.path)
1680        });
1681
1682        Some(cx.spawn(async move |this, cx| {
1683            let path = delete.await?;
1684            this.update(cx, |this, _| {
1685                this.as_local_mut()
1686                    .unwrap()
1687                    .refresh_entries_for_paths(vec![path])
1688            })?
1689            .recv()
1690            .await;
1691            Ok(())
1692        }))
1693    }
1694
1695    pub fn copy_external_entries(
1696        &self,
1697        target_directory: Arc<RelPath>,
1698        paths: Vec<Arc<Path>>,
1699        cx: &Context<Worktree>,
1700    ) -> Task<Result<Vec<ProjectEntryId>>> {
1701        let target_directory = self.absolutize(&target_directory);
1702        let worktree_path = self.abs_path().clone();
1703        let fs = self.fs.clone();
1704        let paths = paths
1705            .into_iter()
1706            .filter_map(|source| {
1707                let file_name = source.file_name()?;
1708                let mut target = target_directory.clone();
1709                target.push(file_name);
1710
1711                // Do not allow copying the same file to itself.
1712                if source.as_ref() != target.as_path() {
1713                    Some((source, target))
1714                } else {
1715                    None
1716                }
1717            })
1718            .collect::<Vec<_>>();
1719
1720        let paths_to_refresh = paths
1721            .iter()
1722            .filter_map(|(_, target)| {
1723                RelPath::new(
1724                    target.strip_prefix(&worktree_path).ok()?,
1725                    PathStyle::local(),
1726                )
1727                .ok()
1728                .map(|path| path.into_arc())
1729            })
1730            .collect::<Vec<_>>();
1731
1732        cx.spawn(async move |this, cx| {
1733            cx.background_spawn(async move {
1734                for (source, target) in paths {
1735                    copy_recursive(
1736                        fs.as_ref(),
1737                        &source,
1738                        &target,
1739                        fs::CopyOptions {
1740                            overwrite: true,
1741                            ..Default::default()
1742                        },
1743                    )
1744                    .await
1745                    .with_context(|| {
1746                        format!("Failed to copy file from {source:?} to {target:?}")
1747                    })?;
1748                }
1749                anyhow::Ok(())
1750            })
1751            .await
1752            .log_err();
1753            let mut refresh = cx.read_entity(
1754                &this.upgrade().with_context(|| "Dropped worktree")?,
1755                |this, _| {
1756                    anyhow::Ok::<postage::barrier::Receiver>(
1757                        this.as_local()
1758                            .with_context(|| "Worktree is not local")?
1759                            .refresh_entries_for_paths(paths_to_refresh.clone()),
1760                    )
1761                },
1762            )?;
1763
1764            cx.background_spawn(async move {
1765                refresh.next().await;
1766                anyhow::Ok(())
1767            })
1768            .await
1769            .log_err();
1770
1771            let this = this.upgrade().with_context(|| "Dropped worktree")?;
1772            Ok(cx.read_entity(&this, |this, _| {
1773                paths_to_refresh
1774                    .iter()
1775                    .filter_map(|path| Some(this.entry_for_path(path)?.id))
1776                    .collect()
1777            }))
1778        })
1779    }
1780
1781    fn expand_entry(
1782        &self,
1783        entry_id: ProjectEntryId,
1784        cx: &Context<Worktree>,
1785    ) -> Option<Task<Result<()>>> {
1786        let path = self.entry_for_id(entry_id)?.path.clone();
1787        let mut refresh = self.refresh_entries_for_paths(vec![path]);
1788        Some(cx.background_spawn(async move {
1789            refresh.next().await;
1790            Ok(())
1791        }))
1792    }
1793
1794    fn expand_all_for_entry(
1795        &self,
1796        entry_id: ProjectEntryId,
1797        cx: &Context<Worktree>,
1798    ) -> Option<Task<Result<()>>> {
1799        let path = self.entry_for_id(entry_id).unwrap().path.clone();
1800        let mut rx = self.add_path_prefix_to_scan(path);
1801        Some(cx.background_spawn(async move {
1802            rx.next().await;
1803            Ok(())
1804        }))
1805    }
1806
1807    pub fn refresh_entries_for_paths(&self, paths: Vec<Arc<RelPath>>) -> barrier::Receiver {
1808        let (tx, rx) = barrier::channel();
1809        self.scan_requests_tx
1810            .try_send(ScanRequest {
1811                relative_paths: paths,
1812                done: smallvec![tx],
1813            })
1814            .ok();
1815        rx
1816    }
1817
1818    #[cfg(feature = "test-support")]
1819    pub fn manually_refresh_entries_for_paths(
1820        &self,
1821        paths: Vec<Arc<RelPath>>,
1822    ) -> barrier::Receiver {
1823        self.refresh_entries_for_paths(paths)
1824    }
1825
1826    pub fn add_path_prefix_to_scan(&self, path_prefix: Arc<RelPath>) -> barrier::Receiver {
1827        let (tx, rx) = barrier::channel();
1828        self.path_prefixes_to_scan_tx
1829            .try_send(PathPrefixScanRequest {
1830                path: path_prefix,
1831                done: smallvec![tx],
1832            })
1833            .ok();
1834        rx
1835    }
1836
1837    pub fn refresh_entry(
1838        &self,
1839        path: Arc<RelPath>,
1840        old_path: Option<Arc<RelPath>>,
1841        cx: &Context<Worktree>,
1842    ) -> Task<Result<Option<Entry>>> {
1843        if self.settings.is_path_excluded(&path) {
1844            return Task::ready(Ok(None));
1845        }
1846        let paths = if let Some(old_path) = old_path.as_ref() {
1847            vec![old_path.clone(), path.clone()]
1848        } else {
1849            vec![path.clone()]
1850        };
1851        let t0 = Instant::now();
1852        let mut refresh = self.refresh_entries_for_paths(paths);
1853        // todo(lw): Hot foreground spawn
1854        cx.spawn(async move |this, cx| {
1855            refresh.recv().await;
1856            log::trace!("refreshed entry {path:?} in {:?}", t0.elapsed());
1857            let new_entry = this.read_with(cx, |this, _| {
1858                this.entry_for_path(&path).cloned().with_context(|| {
1859                    format!("Could not find entry in worktree for {path:?} after refresh")
1860                })
1861            })??;
1862            Ok(Some(new_entry))
1863        })
1864    }
1865
1866    pub fn observe_updates<F, Fut>(&mut self, project_id: u64, cx: &Context<Worktree>, callback: F)
1867    where
1868        F: 'static + Send + Fn(proto::UpdateWorktree) -> Fut,
1869        Fut: 'static + Send + Future<Output = bool>,
1870    {
1871        if let Some(observer) = self.update_observer.as_mut() {
1872            *observer.resume_updates.borrow_mut() = ();
1873            return;
1874        }
1875
1876        let (resume_updates_tx, mut resume_updates_rx) = watch::channel::<()>();
1877        let (snapshots_tx, mut snapshots_rx) =
1878            mpsc::unbounded::<(LocalSnapshot, UpdatedEntriesSet)>();
1879        snapshots_tx
1880            .unbounded_send((self.snapshot(), Arc::default()))
1881            .ok();
1882
1883        let worktree_id = self.id.to_proto();
1884        let _maintain_remote_snapshot = cx.background_spawn(async move {
1885            let mut is_first = true;
1886            while let Some((snapshot, entry_changes)) = snapshots_rx.next().await {
1887                let update = if is_first {
1888                    is_first = false;
1889                    snapshot.build_initial_update(project_id, worktree_id)
1890                } else {
1891                    snapshot.build_update(project_id, worktree_id, entry_changes)
1892                };
1893
1894                for update in proto::split_worktree_update(update) {
1895                    let _ = resume_updates_rx.try_recv();
1896                    loop {
1897                        let result = callback(update.clone());
1898                        if result.await {
1899                            break;
1900                        } else {
1901                            log::info!("waiting to resume updates");
1902                            if resume_updates_rx.next().await.is_none() {
1903                                return Some(());
1904                            }
1905                        }
1906                    }
1907                }
1908            }
1909            Some(())
1910        });
1911
1912        self.update_observer = Some(UpdateObservationState {
1913            snapshots_tx,
1914            resume_updates: resume_updates_tx,
1915            _maintain_remote_snapshot,
1916        });
1917    }
1918
1919    pub fn share_private_files(&mut self, cx: &Context<Worktree>) {
1920        self.share_private_files = true;
1921        self.restart_background_scanners(cx);
1922    }
1923
1924    pub fn update_abs_path_and_refresh(
1925        &mut self,
1926        new_path: Arc<SanitizedPath>,
1927        cx: &Context<Worktree>,
1928    ) {
1929        self.snapshot.git_repositories = Default::default();
1930        self.snapshot.ignores_by_parent_abs_path = Default::default();
1931        let root_name = new_path
1932            .as_path()
1933            .file_name()
1934            .and_then(|f| f.to_str())
1935            .map_or(RelPath::empty().into(), |f| {
1936                RelPath::unix(f).unwrap().into()
1937            });
1938        self.snapshot.update_abs_path(new_path, root_name);
1939        self.restart_background_scanners(cx);
1940    }
1941    #[cfg(feature = "test-support")]
1942    pub fn repositories(&self) -> Vec<Arc<Path>> {
1943        self.git_repositories
1944            .values()
1945            .map(|entry| entry.work_directory_abs_path.clone())
1946            .collect::<Vec<_>>()
1947    }
1948}
1949
1950impl RemoteWorktree {
1951    pub fn project_id(&self) -> u64 {
1952        self.project_id
1953    }
1954
1955    pub fn client(&self) -> AnyProtoClient {
1956        self.client.clone()
1957    }
1958
1959    pub fn disconnected_from_host(&mut self) {
1960        self.updates_tx.take();
1961        self.snapshot_subscriptions.clear();
1962        self.disconnected = true;
1963    }
1964
1965    pub fn update_from_remote(&self, update: proto::UpdateWorktree) {
1966        if let Some(updates_tx) = &self.updates_tx {
1967            updates_tx
1968                .unbounded_send(update)
1969                .expect("consumer runs to completion");
1970        }
1971    }
1972
1973    fn observe_updates<F, Fut>(&mut self, project_id: u64, cx: &Context<Worktree>, callback: F)
1974    where
1975        F: 'static + Send + Fn(proto::UpdateWorktree) -> Fut,
1976        Fut: 'static + Send + Future<Output = bool>,
1977    {
1978        let (tx, mut rx) = mpsc::unbounded();
1979        let initial_update = self
1980            .snapshot
1981            .build_initial_update(project_id, self.id().to_proto());
1982        self.update_observer = Some(tx);
1983        cx.spawn(async move |this, cx| {
1984            let mut update = initial_update;
1985            'outer: loop {
1986                // SSH projects use a special project ID of 0, and we need to
1987                // remap it to the correct one here.
1988                update.project_id = project_id;
1989
1990                for chunk in split_worktree_update(update) {
1991                    if !callback(chunk).await {
1992                        break 'outer;
1993                    }
1994                }
1995
1996                if let Some(next_update) = rx.next().await {
1997                    update = next_update;
1998                } else {
1999                    break;
2000                }
2001            }
2002            this.update(cx, |this, _| {
2003                let this = this.as_remote_mut().unwrap();
2004                this.update_observer.take();
2005            })
2006        })
2007        .detach();
2008    }
2009
2010    fn observed_snapshot(&self, scan_id: usize) -> bool {
2011        self.completed_scan_id >= scan_id
2012    }
2013
2014    pub fn wait_for_snapshot(
2015        &mut self,
2016        scan_id: usize,
2017    ) -> impl Future<Output = Result<()>> + use<> {
2018        let (tx, rx) = oneshot::channel();
2019        if self.observed_snapshot(scan_id) {
2020            let _ = tx.send(());
2021        } else if self.disconnected {
2022            drop(tx);
2023        } else {
2024            match self
2025                .snapshot_subscriptions
2026                .binary_search_by_key(&scan_id, |probe| probe.0)
2027            {
2028                Ok(ix) | Err(ix) => self.snapshot_subscriptions.insert(ix, (scan_id, tx)),
2029            }
2030        }
2031
2032        async move {
2033            rx.await?;
2034            Ok(())
2035        }
2036    }
2037
2038    pub fn insert_entry(
2039        &mut self,
2040        entry: proto::Entry,
2041        scan_id: usize,
2042        cx: &Context<Worktree>,
2043    ) -> Task<Result<Entry>> {
2044        let wait_for_snapshot = self.wait_for_snapshot(scan_id);
2045        cx.spawn(async move |this, cx| {
2046            wait_for_snapshot.await?;
2047            this.update(cx, |worktree, _| {
2048                let worktree = worktree.as_remote_mut().unwrap();
2049                let snapshot = &mut worktree.background_snapshot.lock().0;
2050                let entry = snapshot.insert_entry(entry, &worktree.file_scan_inclusions);
2051                worktree.snapshot = snapshot.clone();
2052                entry
2053            })?
2054        })
2055    }
2056
2057    fn delete_entry(
2058        &self,
2059        entry_id: ProjectEntryId,
2060        trash: bool,
2061        cx: &Context<Worktree>,
2062    ) -> Option<Task<Result<()>>> {
2063        let response = self.client.request(proto::DeleteProjectEntry {
2064            project_id: self.project_id,
2065            entry_id: entry_id.to_proto(),
2066            use_trash: trash,
2067        });
2068        Some(cx.spawn(async move |this, cx| {
2069            let response = response.await?;
2070            let scan_id = response.worktree_scan_id as usize;
2071
2072            this.update(cx, move |this, _| {
2073                this.as_remote_mut().unwrap().wait_for_snapshot(scan_id)
2074            })?
2075            .await?;
2076
2077            this.update(cx, |this, _| {
2078                let this = this.as_remote_mut().unwrap();
2079                let snapshot = &mut this.background_snapshot.lock().0;
2080                snapshot.delete_entry(entry_id);
2081                this.snapshot = snapshot.clone();
2082            })
2083        }))
2084    }
2085
2086    // fn rename_entry(
2087    //     &self,
2088    //     entry_id: ProjectEntryId,
2089    //     new_path: impl Into<Arc<RelPath>>,
2090    //     cx: &Context<Worktree>,
2091    // ) -> Task<Result<CreatedEntry>> {
2092    //     let new_path: Arc<RelPath> = new_path.into();
2093    //     let response = self.client.request(proto::RenameProjectEntry {
2094    //         project_id: self.project_id,
2095    //         entry_id: entry_id.to_proto(),
2096    //         new_worktree_id: new_path.worktree_id,
2097    //         new_path: new_path.as_ref().to_proto(),
2098    //     });
2099    //     cx.spawn(async move |this, cx| {
2100    //         let response = response.await?;
2101    //         match response.entry {
2102    //             Some(entry) => this
2103    //                 .update(cx, |this, cx| {
2104    //                     this.as_remote_mut().unwrap().insert_entry(
2105    //                         entry,
2106    //                         response.worktree_scan_id as usize,
2107    //                         cx,
2108    //                     )
2109    //                 })?
2110    //                 .await
2111    //                 .map(CreatedEntry::Included),
2112    //             None => {
2113    //                 let abs_path =
2114    //                     this.read_with(cx, |worktree, _| worktree.absolutize(&new_path))?;
2115    //                 Ok(CreatedEntry::Excluded { abs_path })
2116    //             }
2117    //         }
2118    //     })
2119    // }
2120
2121    fn copy_external_entries(
2122        &self,
2123        target_directory: Arc<RelPath>,
2124        paths_to_copy: Vec<Arc<Path>>,
2125        local_fs: Arc<dyn Fs>,
2126        cx: &Context<Worktree>,
2127    ) -> Task<anyhow::Result<Vec<ProjectEntryId>>> {
2128        let client = self.client.clone();
2129        let worktree_id = self.id().to_proto();
2130        let project_id = self.project_id;
2131
2132        cx.background_spawn(async move {
2133            let mut requests = Vec::new();
2134            for root_path_to_copy in paths_to_copy {
2135                let Some(filename) = root_path_to_copy
2136                    .file_name()
2137                    .and_then(|name| name.to_str())
2138                    .and_then(|filename| RelPath::unix(filename).ok())
2139                else {
2140                    continue;
2141                };
2142                for (abs_path, is_directory) in
2143                    read_dir_items(local_fs.as_ref(), &root_path_to_copy).await?
2144                {
2145                    let Some(relative_path) = abs_path
2146                        .strip_prefix(&root_path_to_copy)
2147                        .map_err(|e| anyhow::Error::from(e))
2148                        .and_then(|relative_path| RelPath::new(relative_path, PathStyle::local()))
2149                        .log_err()
2150                    else {
2151                        continue;
2152                    };
2153                    let content = if is_directory {
2154                        None
2155                    } else {
2156                        Some(local_fs.load_bytes(&abs_path).await?)
2157                    };
2158
2159                    let mut target_path = target_directory.join(filename);
2160                    if relative_path.file_name().is_some() {
2161                        target_path = target_path.join(&relative_path);
2162                    }
2163
2164                    requests.push(proto::CreateProjectEntry {
2165                        project_id,
2166                        worktree_id,
2167                        path: target_path.to_proto(),
2168                        is_directory,
2169                        content,
2170                    });
2171                }
2172            }
2173            requests.sort_unstable_by(|a, b| a.path.cmp(&b.path));
2174            requests.dedup();
2175
2176            let mut copied_entry_ids = Vec::new();
2177            for request in requests {
2178                let response = client.request(request).await?;
2179                copied_entry_ids.extend(response.entry.map(|e| ProjectEntryId::from_proto(e.id)));
2180            }
2181
2182            Ok(copied_entry_ids)
2183        })
2184    }
2185}
2186
2187impl Snapshot {
2188    pub fn new(
2189        id: WorktreeId,
2190        root_name: Arc<RelPath>,
2191        abs_path: Arc<Path>,
2192        path_style: PathStyle,
2193    ) -> Self {
2194        Snapshot {
2195            id,
2196            abs_path: SanitizedPath::from_arc(abs_path),
2197            path_style,
2198            root_char_bag: root_name
2199                .as_unix_str()
2200                .chars()
2201                .map(|c| c.to_ascii_lowercase())
2202                .collect(),
2203            root_name,
2204            always_included_entries: Default::default(),
2205            entries_by_path: Default::default(),
2206            entries_by_id: Default::default(),
2207            scan_id: 1,
2208            completed_scan_id: 0,
2209        }
2210    }
2211
2212    pub fn id(&self) -> WorktreeId {
2213        self.id
2214    }
2215
2216    // TODO:
2217    // Consider the following:
2218    //
2219    // ```rust
2220    // let abs_path: Arc<Path> = snapshot.abs_path(); // e.g. "C:\Users\user\Desktop\project"
2221    // let some_non_trimmed_path = Path::new("\\\\?\\C:\\Users\\user\\Desktop\\project\\main.rs");
2222    // // The caller perform some actions here:
2223    // some_non_trimmed_path.strip_prefix(abs_path);  // This fails
2224    // some_non_trimmed_path.starts_with(abs_path);   // This fails too
2225    // ```
2226    //
2227    // This is definitely a bug, but it's not clear if we should handle it here or not.
2228    pub fn abs_path(&self) -> &Arc<Path> {
2229        SanitizedPath::cast_arc_ref(&self.abs_path)
2230    }
2231
2232    fn build_initial_update(&self, project_id: u64, worktree_id: u64) -> proto::UpdateWorktree {
2233        let mut updated_entries = self
2234            .entries_by_path
2235            .iter()
2236            .map(proto::Entry::from)
2237            .collect::<Vec<_>>();
2238        updated_entries.sort_unstable_by_key(|e| e.id);
2239
2240        proto::UpdateWorktree {
2241            project_id,
2242            worktree_id,
2243            abs_path: self.abs_path().to_string_lossy().into_owned(),
2244            root_name: self.root_name().to_proto(),
2245            updated_entries,
2246            removed_entries: Vec::new(),
2247            scan_id: self.scan_id as u64,
2248            is_last_update: self.completed_scan_id == self.scan_id,
2249            // Sent in separate messages.
2250            updated_repositories: Vec::new(),
2251            removed_repositories: Vec::new(),
2252        }
2253    }
2254
2255    pub fn work_directory_abs_path(&self, work_directory: &WorkDirectory) -> PathBuf {
2256        match work_directory {
2257            WorkDirectory::InProject { relative_path } => self.absolutize(relative_path),
2258            WorkDirectory::AboveProject { absolute_path, .. } => absolute_path.as_ref().to_owned(),
2259        }
2260    }
2261
2262    pub fn absolutize(&self, path: &RelPath) -> PathBuf {
2263        if path.file_name().is_some() {
2264            let mut abs_path = self.abs_path.to_string();
2265            for component in path.components() {
2266                if !abs_path.ends_with(self.path_style.primary_separator()) {
2267                    abs_path.push_str(self.path_style.primary_separator());
2268                }
2269                abs_path.push_str(component);
2270            }
2271            PathBuf::from(abs_path)
2272        } else {
2273            self.abs_path.as_path().to_path_buf()
2274        }
2275    }
2276
2277    pub fn contains_entry(&self, entry_id: ProjectEntryId) -> bool {
2278        self.entries_by_id.get(&entry_id, ()).is_some()
2279    }
2280
2281    fn insert_entry(
2282        &mut self,
2283        entry: proto::Entry,
2284        always_included_paths: &PathMatcher,
2285    ) -> Result<Entry> {
2286        let entry = Entry::try_from((&self.root_char_bag, always_included_paths, entry))?;
2287        let old_entry = self.entries_by_id.insert_or_replace(
2288            PathEntry {
2289                id: entry.id,
2290                path: entry.path.clone(),
2291                is_ignored: entry.is_ignored,
2292                scan_id: 0,
2293            },
2294            (),
2295        );
2296        if let Some(old_entry) = old_entry {
2297            self.entries_by_path.remove(&PathKey(old_entry.path), ());
2298        }
2299        self.entries_by_path.insert_or_replace(entry.clone(), ());
2300        Ok(entry)
2301    }
2302
2303    fn delete_entry(&mut self, entry_id: ProjectEntryId) -> Option<Arc<RelPath>> {
2304        let removed_entry = self.entries_by_id.remove(&entry_id, ())?;
2305        self.entries_by_path = {
2306            let mut cursor = self.entries_by_path.cursor::<TraversalProgress>(());
2307            let mut new_entries_by_path =
2308                cursor.slice(&TraversalTarget::path(&removed_entry.path), Bias::Left);
2309            while let Some(entry) = cursor.item() {
2310                if entry.path.starts_with(&removed_entry.path) {
2311                    self.entries_by_id.remove(&entry.id, ());
2312                    cursor.next();
2313                } else {
2314                    break;
2315                }
2316            }
2317            new_entries_by_path.append(cursor.suffix(), ());
2318            new_entries_by_path
2319        };
2320
2321        Some(removed_entry.path)
2322    }
2323
2324    fn update_abs_path(&mut self, abs_path: Arc<SanitizedPath>, root_name: Arc<RelPath>) {
2325        self.abs_path = abs_path;
2326        if root_name != self.root_name {
2327            self.root_char_bag = root_name
2328                .as_unix_str()
2329                .chars()
2330                .map(|c| c.to_ascii_lowercase())
2331                .collect();
2332            self.root_name = root_name;
2333        }
2334    }
2335
2336    pub fn apply_remote_update(
2337        &mut self,
2338        update: proto::UpdateWorktree,
2339        always_included_paths: &PathMatcher,
2340    ) {
2341        log::debug!(
2342            "applying remote worktree update. {} entries updated, {} removed",
2343            update.updated_entries.len(),
2344            update.removed_entries.len()
2345        );
2346        if let Some(root_name) = RelPath::from_proto(&update.root_name).log_err() {
2347            self.update_abs_path(
2348                SanitizedPath::new_arc(&Path::new(&update.abs_path)),
2349                root_name,
2350            );
2351        }
2352
2353        let mut entries_by_path_edits = Vec::new();
2354        let mut entries_by_id_edits = Vec::new();
2355
2356        for entry_id in update.removed_entries {
2357            let entry_id = ProjectEntryId::from_proto(entry_id);
2358            entries_by_id_edits.push(Edit::Remove(entry_id));
2359            if let Some(entry) = self.entry_for_id(entry_id) {
2360                entries_by_path_edits.push(Edit::Remove(PathKey(entry.path.clone())));
2361            }
2362        }
2363
2364        for entry in update.updated_entries {
2365            let Some(entry) =
2366                Entry::try_from((&self.root_char_bag, always_included_paths, entry)).log_err()
2367            else {
2368                continue;
2369            };
2370            if let Some(PathEntry { path, .. }) = self.entries_by_id.get(&entry.id, ()) {
2371                entries_by_path_edits.push(Edit::Remove(PathKey(path.clone())));
2372            }
2373            if let Some(old_entry) = self.entries_by_path.get(&PathKey(entry.path.clone()), ())
2374                && old_entry.id != entry.id
2375            {
2376                entries_by_id_edits.push(Edit::Remove(old_entry.id));
2377            }
2378            entries_by_id_edits.push(Edit::Insert(PathEntry {
2379                id: entry.id,
2380                path: entry.path.clone(),
2381                is_ignored: entry.is_ignored,
2382                scan_id: 0,
2383            }));
2384            entries_by_path_edits.push(Edit::Insert(entry));
2385        }
2386
2387        self.entries_by_path.edit(entries_by_path_edits, ());
2388        self.entries_by_id.edit(entries_by_id_edits, ());
2389
2390        self.scan_id = update.scan_id as usize;
2391        if update.is_last_update {
2392            self.completed_scan_id = update.scan_id as usize;
2393        }
2394    }
2395
2396    pub fn entry_count(&self) -> usize {
2397        self.entries_by_path.summary().count
2398    }
2399
2400    pub fn visible_entry_count(&self) -> usize {
2401        self.entries_by_path.summary().non_ignored_count
2402    }
2403
2404    pub fn dir_count(&self) -> usize {
2405        let summary = self.entries_by_path.summary();
2406        summary.count - summary.file_count
2407    }
2408
2409    pub fn visible_dir_count(&self) -> usize {
2410        let summary = self.entries_by_path.summary();
2411        summary.non_ignored_count - summary.non_ignored_file_count
2412    }
2413
2414    pub fn file_count(&self) -> usize {
2415        self.entries_by_path.summary().file_count
2416    }
2417
2418    pub fn visible_file_count(&self) -> usize {
2419        self.entries_by_path.summary().non_ignored_file_count
2420    }
2421
2422    fn traverse_from_offset(
2423        &self,
2424        include_files: bool,
2425        include_dirs: bool,
2426        include_ignored: bool,
2427        start_offset: usize,
2428    ) -> Traversal<'_> {
2429        let mut cursor = self.entries_by_path.cursor(());
2430        cursor.seek(
2431            &TraversalTarget::Count {
2432                count: start_offset,
2433                include_files,
2434                include_dirs,
2435                include_ignored,
2436            },
2437            Bias::Right,
2438        );
2439        Traversal {
2440            snapshot: self,
2441            cursor,
2442            include_files,
2443            include_dirs,
2444            include_ignored,
2445        }
2446    }
2447
2448    pub fn traverse_from_path(
2449        &self,
2450        include_files: bool,
2451        include_dirs: bool,
2452        include_ignored: bool,
2453        path: &RelPath,
2454    ) -> Traversal<'_> {
2455        Traversal::new(self, include_files, include_dirs, include_ignored, path)
2456    }
2457
2458    pub fn files(&self, include_ignored: bool, start: usize) -> Traversal<'_> {
2459        self.traverse_from_offset(true, false, include_ignored, start)
2460    }
2461
2462    pub fn directories(&self, include_ignored: bool, start: usize) -> Traversal<'_> {
2463        self.traverse_from_offset(false, true, include_ignored, start)
2464    }
2465
2466    pub fn entries(&self, include_ignored: bool, start: usize) -> Traversal<'_> {
2467        self.traverse_from_offset(true, true, include_ignored, start)
2468    }
2469
2470    pub fn paths(&self) -> impl Iterator<Item = &RelPath> {
2471        self.entries_by_path
2472            .cursor::<()>(())
2473            .filter(move |entry| !entry.path.is_empty())
2474            .map(|entry| entry.path.as_ref())
2475    }
2476
2477    pub fn child_entries<'a>(&'a self, parent_path: &'a RelPath) -> ChildEntriesIter<'a> {
2478        let options = ChildEntriesOptions {
2479            include_files: true,
2480            include_dirs: true,
2481            include_ignored: true,
2482        };
2483        self.child_entries_with_options(parent_path, options)
2484    }
2485
2486    pub fn child_entries_with_options<'a>(
2487        &'a self,
2488        parent_path: &'a RelPath,
2489        options: ChildEntriesOptions,
2490    ) -> ChildEntriesIter<'a> {
2491        let mut cursor = self.entries_by_path.cursor(());
2492        cursor.seek(&TraversalTarget::path(parent_path), Bias::Right);
2493        let traversal = Traversal {
2494            snapshot: self,
2495            cursor,
2496            include_files: options.include_files,
2497            include_dirs: options.include_dirs,
2498            include_ignored: options.include_ignored,
2499        };
2500        ChildEntriesIter {
2501            traversal,
2502            parent_path,
2503        }
2504    }
2505
2506    pub fn root_entry(&self) -> Option<&Entry> {
2507        self.entries_by_path.first()
2508    }
2509
2510    /// Returns `None` for a single file worktree, or `Some(self.abs_path())` if
2511    /// it is a directory.
2512    pub fn root_dir(&self) -> Option<Arc<Path>> {
2513        self.root_entry()
2514            .filter(|entry| entry.is_dir())
2515            .map(|_| self.abs_path().clone())
2516    }
2517
2518    pub fn root_name(&self) -> &RelPath {
2519        &self.root_name
2520    }
2521
2522    pub fn root_name_str(&self) -> &str {
2523        self.root_name.as_unix_str()
2524    }
2525
2526    pub fn scan_id(&self) -> usize {
2527        self.scan_id
2528    }
2529
2530    pub fn entry_for_path(&self, path: &RelPath) -> Option<&Entry> {
2531        self.traverse_from_path(true, true, true, path)
2532            .entry()
2533            .and_then(|entry| {
2534                if entry.path.as_ref() == path {
2535                    Some(entry)
2536                } else {
2537                    None
2538                }
2539            })
2540    }
2541
2542    /// Resolves a path to an executable using the following heuristics:
2543    ///
2544    /// 1. If the path starts with `~`, it is expanded to the user's home directory.
2545    /// 2. If the path is relative and contains more than one component,
2546    ///    it is joined to the worktree root path.
2547    /// 3. If the path is relative and exists in the worktree
2548    ///    (even if falls under an exclusion filter),
2549    ///    it is joined to the worktree root path.
2550    /// 4. Otherwise the path is returned unmodified.
2551    ///
2552    /// Relative paths that do not exist in the worktree may
2553    /// still be found using the `PATH` environment variable.
2554    pub fn resolve_relative_path(&self, path: PathBuf) -> PathBuf {
2555        if let Some(path_str) = path.to_str() {
2556            if let Some(remaining_path) = path_str.strip_prefix("~/") {
2557                return home_dir().join(remaining_path);
2558            } else if path_str == "~" {
2559                return home_dir().to_path_buf();
2560            }
2561        }
2562
2563        if let Ok(rel_path) = RelPath::new(&path, self.path_style)
2564            && (path.components().count() > 1 || self.entry_for_path(&rel_path).is_some())
2565        {
2566            self.abs_path().join(path)
2567        } else {
2568            path
2569        }
2570    }
2571
2572    pub fn entry_for_id(&self, id: ProjectEntryId) -> Option<&Entry> {
2573        let entry = self.entries_by_id.get(&id, ())?;
2574        self.entry_for_path(&entry.path)
2575    }
2576
2577    pub fn path_style(&self) -> PathStyle {
2578        self.path_style
2579    }
2580}
2581
2582impl LocalSnapshot {
2583    fn local_repo_for_work_directory_path(&self, path: &RelPath) -> Option<&LocalRepositoryEntry> {
2584        self.git_repositories
2585            .iter()
2586            .map(|(_, entry)| entry)
2587            .find(|entry| entry.work_directory.path_key() == PathKey(path.into()))
2588    }
2589
2590    fn build_update(
2591        &self,
2592        project_id: u64,
2593        worktree_id: u64,
2594        entry_changes: UpdatedEntriesSet,
2595    ) -> proto::UpdateWorktree {
2596        let mut updated_entries = Vec::new();
2597        let mut removed_entries = Vec::new();
2598
2599        for (_, entry_id, path_change) in entry_changes.iter() {
2600            if let PathChange::Removed = path_change {
2601                removed_entries.push(entry_id.0 as u64);
2602            } else if let Some(entry) = self.entry_for_id(*entry_id) {
2603                updated_entries.push(proto::Entry::from(entry));
2604            }
2605        }
2606
2607        removed_entries.sort_unstable();
2608        updated_entries.sort_unstable_by_key(|e| e.id);
2609
2610        // TODO - optimize, knowing that removed_entries are sorted.
2611        removed_entries.retain(|id| updated_entries.binary_search_by_key(id, |e| e.id).is_err());
2612
2613        proto::UpdateWorktree {
2614            project_id,
2615            worktree_id,
2616            abs_path: self.abs_path().to_string_lossy().into_owned(),
2617            root_name: self.root_name().to_proto(),
2618            updated_entries,
2619            removed_entries,
2620            scan_id: self.scan_id as u64,
2621            is_last_update: self.completed_scan_id == self.scan_id,
2622            // Sent in separate messages.
2623            updated_repositories: Vec::new(),
2624            removed_repositories: Vec::new(),
2625        }
2626    }
2627
2628    async fn insert_entry(&mut self, mut entry: Entry, fs: &dyn Fs) -> Entry {
2629        log::trace!("insert entry {:?}", entry.path);
2630        if entry.is_file() && entry.path.file_name() == Some(&GITIGNORE) {
2631            let abs_path = self.absolutize(&entry.path);
2632            match build_gitignore(&abs_path, fs).await {
2633                Ok(ignore) => {
2634                    self.ignores_by_parent_abs_path
2635                        .insert(abs_path.parent().unwrap().into(), (Arc::new(ignore), true));
2636                }
2637                Err(error) => {
2638                    log::error!(
2639                        "error loading .gitignore file {:?} - {:?}",
2640                        &entry.path,
2641                        error
2642                    );
2643                }
2644            }
2645        }
2646
2647        if entry.kind == EntryKind::PendingDir
2648            && let Some(existing_entry) = self.entries_by_path.get(&PathKey(entry.path.clone()), ())
2649        {
2650            entry.kind = existing_entry.kind;
2651        }
2652
2653        let scan_id = self.scan_id;
2654        let removed = self.entries_by_path.insert_or_replace(entry.clone(), ());
2655        if let Some(removed) = removed
2656            && removed.id != entry.id
2657        {
2658            self.entries_by_id.remove(&removed.id, ());
2659        }
2660        self.entries_by_id.insert_or_replace(
2661            PathEntry {
2662                id: entry.id,
2663                path: entry.path.clone(),
2664                is_ignored: entry.is_ignored,
2665                scan_id,
2666            },
2667            (),
2668        );
2669
2670        entry
2671    }
2672
2673    fn ancestor_inodes_for_path(&self, path: &RelPath) -> TreeSet<u64> {
2674        let mut inodes = TreeSet::default();
2675        for ancestor in path.ancestors().skip(1) {
2676            if let Some(entry) = self.entry_for_path(ancestor) {
2677                inodes.insert(entry.inode);
2678            }
2679        }
2680        inodes
2681    }
2682
2683    async fn ignore_stack_for_abs_path(
2684        &self,
2685        abs_path: &Path,
2686        is_dir: bool,
2687        fs: &dyn Fs,
2688    ) -> IgnoreStack {
2689        let mut new_ignores = Vec::new();
2690        let mut repo_root = None;
2691        for (index, ancestor) in abs_path.ancestors().enumerate() {
2692            if index > 0 {
2693                if let Some((ignore, _)) = self.ignores_by_parent_abs_path.get(ancestor) {
2694                    new_ignores.push((ancestor, Some(ignore.clone())));
2695                } else {
2696                    new_ignores.push((ancestor, None));
2697                }
2698            }
2699
2700            let metadata = fs.metadata(&ancestor.join(DOT_GIT)).await.ok().flatten();
2701            if metadata.is_some() {
2702                repo_root = Some(Arc::from(ancestor));
2703                break;
2704            }
2705        }
2706
2707        let mut ignore_stack = if let Some(global_gitignore) = self.global_gitignore.clone() {
2708            IgnoreStack::global(global_gitignore)
2709        } else {
2710            IgnoreStack::none()
2711        };
2712
2713        if let Some((repo_exclude, _)) = repo_root
2714            .as_ref()
2715            .and_then(|abs_path| self.repo_exclude_by_work_dir_abs_path.get(abs_path))
2716        {
2717            ignore_stack = ignore_stack.append(IgnoreKind::RepoExclude, repo_exclude.clone());
2718        }
2719        ignore_stack.repo_root = repo_root;
2720        for (parent_abs_path, ignore) in new_ignores.into_iter().rev() {
2721            if ignore_stack.is_abs_path_ignored(parent_abs_path, true) {
2722                ignore_stack = IgnoreStack::all();
2723                break;
2724            } else if let Some(ignore) = ignore {
2725                ignore_stack =
2726                    ignore_stack.append(IgnoreKind::Gitignore(parent_abs_path.into()), ignore);
2727            }
2728        }
2729
2730        if ignore_stack.is_abs_path_ignored(abs_path, is_dir) {
2731            ignore_stack = IgnoreStack::all();
2732        }
2733
2734        ignore_stack
2735    }
2736
2737    #[cfg(feature = "test-support")]
2738    pub fn expanded_entries(&self) -> impl Iterator<Item = &Entry> {
2739        self.entries_by_path
2740            .cursor::<()>(())
2741            .filter(|entry| entry.kind == EntryKind::Dir && (entry.is_external || entry.is_ignored))
2742    }
2743
2744    #[cfg(feature = "test-support")]
2745    pub fn check_invariants(&self, git_state: bool) {
2746        use pretty_assertions::assert_eq;
2747
2748        assert_eq!(
2749            self.entries_by_path
2750                .cursor::<()>(())
2751                .map(|e| (&e.path, e.id))
2752                .collect::<Vec<_>>(),
2753            self.entries_by_id
2754                .cursor::<()>(())
2755                .map(|e| (&e.path, e.id))
2756                .collect::<collections::BTreeSet<_>>()
2757                .into_iter()
2758                .collect::<Vec<_>>(),
2759            "entries_by_path and entries_by_id are inconsistent"
2760        );
2761
2762        let mut files = self.files(true, 0);
2763        let mut visible_files = self.files(false, 0);
2764        for entry in self.entries_by_path.cursor::<()>(()) {
2765            if entry.is_file() {
2766                assert_eq!(files.next().unwrap().inode, entry.inode);
2767                if (!entry.is_ignored && !entry.is_external) || entry.is_always_included {
2768                    assert_eq!(visible_files.next().unwrap().inode, entry.inode);
2769                }
2770            }
2771        }
2772
2773        assert!(files.next().is_none());
2774        assert!(visible_files.next().is_none());
2775
2776        let mut bfs_paths = Vec::new();
2777        let mut stack = self
2778            .root_entry()
2779            .map(|e| e.path.as_ref())
2780            .into_iter()
2781            .collect::<Vec<_>>();
2782        while let Some(path) = stack.pop() {
2783            bfs_paths.push(path);
2784            let ix = stack.len();
2785            for child_entry in self.child_entries(path) {
2786                stack.insert(ix, &child_entry.path);
2787            }
2788        }
2789
2790        let dfs_paths_via_iter = self
2791            .entries_by_path
2792            .cursor::<()>(())
2793            .map(|e| e.path.as_ref())
2794            .collect::<Vec<_>>();
2795        assert_eq!(bfs_paths, dfs_paths_via_iter);
2796
2797        let dfs_paths_via_traversal = self
2798            .entries(true, 0)
2799            .map(|e| e.path.as_ref())
2800            .collect::<Vec<_>>();
2801
2802        assert_eq!(dfs_paths_via_traversal, dfs_paths_via_iter);
2803
2804        if git_state {
2805            for ignore_parent_abs_path in self.ignores_by_parent_abs_path.keys() {
2806                let ignore_parent_path = &RelPath::new(
2807                    ignore_parent_abs_path
2808                        .strip_prefix(self.abs_path.as_path())
2809                        .unwrap(),
2810                    PathStyle::local(),
2811                )
2812                .unwrap();
2813                assert!(self.entry_for_path(ignore_parent_path).is_some());
2814                assert!(
2815                    self.entry_for_path(
2816                        &ignore_parent_path.join(RelPath::unix(GITIGNORE).unwrap())
2817                    )
2818                    .is_some()
2819                );
2820            }
2821        }
2822    }
2823
2824    #[cfg(feature = "test-support")]
2825    pub fn entries_without_ids(&self, include_ignored: bool) -> Vec<(&RelPath, u64, bool)> {
2826        let mut paths = Vec::new();
2827        for entry in self.entries_by_path.cursor::<()>(()) {
2828            if include_ignored || !entry.is_ignored {
2829                paths.push((entry.path.as_ref(), entry.inode, entry.is_ignored));
2830            }
2831        }
2832        paths.sort_by(|a, b| a.0.cmp(b.0));
2833        paths
2834    }
2835}
2836
2837impl BackgroundScannerState {
2838    fn should_scan_directory(&self, entry: &Entry) -> bool {
2839        (self.scanning_enabled && !entry.is_external && (!entry.is_ignored || entry.is_always_included))
2840            || entry.path.file_name() == Some(DOT_GIT)
2841            || entry.path.file_name() == Some(local_settings_folder_name())
2842            || entry.path.file_name() == Some(local_vscode_folder_name())
2843            || self.scanned_dirs.contains(&entry.id) // If we've ever scanned it, keep scanning
2844            || self
2845                .paths_to_scan
2846                .iter()
2847                .any(|p| p.starts_with(&entry.path))
2848            || self
2849                .path_prefixes_to_scan
2850                .iter()
2851                .any(|p| entry.path.starts_with(p))
2852    }
2853
2854    async fn enqueue_scan_dir(
2855        &self,
2856        abs_path: Arc<Path>,
2857        entry: &Entry,
2858        scan_job_tx: &Sender<ScanJob>,
2859        fs: &dyn Fs,
2860    ) {
2861        let path = entry.path.clone();
2862        let ignore_stack = self
2863            .snapshot
2864            .ignore_stack_for_abs_path(&abs_path, true, fs)
2865            .await;
2866        let mut ancestor_inodes = self.snapshot.ancestor_inodes_for_path(&path);
2867
2868        if !ancestor_inodes.contains(&entry.inode) {
2869            ancestor_inodes.insert(entry.inode);
2870            scan_job_tx
2871                .try_send(ScanJob {
2872                    abs_path,
2873                    path,
2874                    ignore_stack,
2875                    scan_queue: scan_job_tx.clone(),
2876                    ancestor_inodes,
2877                    is_external: entry.is_external,
2878                })
2879                .unwrap();
2880        }
2881    }
2882
2883    fn reuse_entry_id(&mut self, entry: &mut Entry) {
2884        if let Some(mtime) = entry.mtime {
2885            // If an entry with the same inode was removed from the worktree during this scan,
2886            // then it *might* represent the same file or directory. But the OS might also have
2887            // re-used the inode for a completely different file or directory.
2888            //
2889            // Conditionally reuse the old entry's id:
2890            // * if the mtime is the same, the file was probably been renamed.
2891            // * if the path is the same, the file may just have been updated
2892            if let Some(removed_entry) = self.removed_entries.remove(&entry.inode) {
2893                if removed_entry.mtime == Some(mtime) || removed_entry.path == entry.path {
2894                    entry.id = removed_entry.id;
2895                }
2896            } else if let Some(existing_entry) = self.snapshot.entry_for_path(&entry.path) {
2897                entry.id = existing_entry.id;
2898            }
2899        }
2900    }
2901
2902    fn entry_id_for(
2903        &mut self,
2904        next_entry_id: &AtomicUsize,
2905        path: &RelPath,
2906        metadata: &fs::Metadata,
2907    ) -> ProjectEntryId {
2908        // If an entry with the same inode was removed from the worktree during this scan,
2909        // then it *might* represent the same file or directory. But the OS might also have
2910        // re-used the inode for a completely different file or directory.
2911        //
2912        // Conditionally reuse the old entry's id:
2913        // * if the mtime is the same, the file was probably been renamed.
2914        // * if the path is the same, the file may just have been updated
2915        if let Some(removed_entry) = self.removed_entries.remove(&metadata.inode) {
2916            if removed_entry.mtime == Some(metadata.mtime) || *removed_entry.path == *path {
2917                return removed_entry.id;
2918            }
2919        } else if let Some(existing_entry) = self.snapshot.entry_for_path(path) {
2920            return existing_entry.id;
2921        }
2922        ProjectEntryId::new(next_entry_id)
2923    }
2924
2925    async fn insert_entry(&mut self, entry: Entry, fs: &dyn Fs, watcher: &dyn Watcher) -> Entry {
2926        let entry = self.snapshot.insert_entry(entry, fs).await;
2927        if entry.path.file_name() == Some(&DOT_GIT) {
2928            self.insert_git_repository(entry.path.clone(), fs, watcher)
2929                .await;
2930        }
2931
2932        #[cfg(feature = "test-support")]
2933        self.snapshot.check_invariants(false);
2934
2935        entry
2936    }
2937
2938    fn populate_dir(
2939        &mut self,
2940        parent_path: Arc<RelPath>,
2941        entries: impl IntoIterator<Item = Entry>,
2942        ignore: Option<Arc<Gitignore>>,
2943    ) {
2944        let mut parent_entry = if let Some(parent_entry) = self
2945            .snapshot
2946            .entries_by_path
2947            .get(&PathKey(parent_path.clone()), ())
2948        {
2949            parent_entry.clone()
2950        } else {
2951            log::warn!(
2952                "populating a directory {:?} that has been removed",
2953                parent_path
2954            );
2955            return;
2956        };
2957
2958        match parent_entry.kind {
2959            EntryKind::PendingDir | EntryKind::UnloadedDir => parent_entry.kind = EntryKind::Dir,
2960            EntryKind::Dir => {}
2961            _ => return,
2962        }
2963
2964        if let Some(ignore) = ignore {
2965            let abs_parent_path = self
2966                .snapshot
2967                .abs_path
2968                .as_path()
2969                .join(parent_path.as_std_path())
2970                .into();
2971            self.snapshot
2972                .ignores_by_parent_abs_path
2973                .insert(abs_parent_path, (ignore, false));
2974        }
2975
2976        let parent_entry_id = parent_entry.id;
2977        self.scanned_dirs.insert(parent_entry_id);
2978        let mut entries_by_path_edits = vec![Edit::Insert(parent_entry)];
2979        let mut entries_by_id_edits = Vec::new();
2980
2981        for entry in entries {
2982            entries_by_id_edits.push(Edit::Insert(PathEntry {
2983                id: entry.id,
2984                path: entry.path.clone(),
2985                is_ignored: entry.is_ignored,
2986                scan_id: self.snapshot.scan_id,
2987            }));
2988            entries_by_path_edits.push(Edit::Insert(entry));
2989        }
2990
2991        self.snapshot
2992            .entries_by_path
2993            .edit(entries_by_path_edits, ());
2994        self.snapshot.entries_by_id.edit(entries_by_id_edits, ());
2995
2996        if let Err(ix) = self.changed_paths.binary_search(&parent_path) {
2997            self.changed_paths.insert(ix, parent_path.clone());
2998        }
2999
3000        #[cfg(feature = "test-support")]
3001        self.snapshot.check_invariants(false);
3002    }
3003
3004    fn remove_path(&mut self, path: &RelPath, watcher: &dyn Watcher) {
3005        log::trace!("background scanner removing path {path:?}");
3006        let mut new_entries;
3007        let removed_entries;
3008        {
3009            let mut cursor = self
3010                .snapshot
3011                .entries_by_path
3012                .cursor::<TraversalProgress>(());
3013            new_entries = cursor.slice(&TraversalTarget::path(path), Bias::Left);
3014            removed_entries = cursor.slice(&TraversalTarget::successor(path), Bias::Left);
3015            new_entries.append(cursor.suffix(), ());
3016        }
3017        self.snapshot.entries_by_path = new_entries;
3018
3019        let mut removed_ids = Vec::with_capacity(removed_entries.summary().count);
3020        let mut removed_dir_abs_paths = Vec::new();
3021        for entry in removed_entries.cursor::<()>(()) {
3022            if entry.is_dir() {
3023                removed_dir_abs_paths.push(self.snapshot.absolutize(&entry.path));
3024            }
3025
3026            match self.removed_entries.entry(entry.inode) {
3027                hash_map::Entry::Occupied(mut e) => {
3028                    let prev_removed_entry = e.get_mut();
3029                    if entry.id > prev_removed_entry.id {
3030                        *prev_removed_entry = entry.clone();
3031                    }
3032                }
3033                hash_map::Entry::Vacant(e) => {
3034                    e.insert(entry.clone());
3035                }
3036            }
3037
3038            if entry.path.file_name() == Some(GITIGNORE) {
3039                let abs_parent_path = self.snapshot.absolutize(&entry.path.parent().unwrap());
3040                if let Some((_, needs_update)) = self
3041                    .snapshot
3042                    .ignores_by_parent_abs_path
3043                    .get_mut(abs_parent_path.as_path())
3044                {
3045                    *needs_update = true;
3046                }
3047            }
3048
3049            if let Err(ix) = removed_ids.binary_search(&entry.id) {
3050                removed_ids.insert(ix, entry.id);
3051            }
3052        }
3053
3054        self.snapshot
3055            .entries_by_id
3056            .edit(removed_ids.iter().map(|&id| Edit::Remove(id)).collect(), ());
3057        self.snapshot
3058            .git_repositories
3059            .retain(|id, _| removed_ids.binary_search(id).is_err());
3060
3061        for removed_dir_abs_path in removed_dir_abs_paths {
3062            watcher.remove(&removed_dir_abs_path).log_err();
3063        }
3064
3065        #[cfg(feature = "test-support")]
3066        self.snapshot.check_invariants(false);
3067    }
3068
3069    async fn insert_git_repository(
3070        &mut self,
3071        dot_git_path: Arc<RelPath>,
3072        fs: &dyn Fs,
3073        watcher: &dyn Watcher,
3074    ) {
3075        let work_dir_path: Arc<RelPath> = match dot_git_path.parent() {
3076            Some(parent_dir) => {
3077                // Guard against repositories inside the repository metadata
3078                if parent_dir
3079                    .components()
3080                    .any(|component| component == DOT_GIT)
3081                {
3082                    log::debug!(
3083                        "not building git repository for nested `.git` directory, `.git` path in the worktree: {dot_git_path:?}"
3084                    );
3085                    return;
3086                };
3087
3088                parent_dir.into()
3089            }
3090            None => {
3091                // `dot_git_path.parent().is_none()` means `.git` directory is the opened worktree itself,
3092                // no files inside that directory are tracked by git, so no need to build the repo around it
3093                log::debug!(
3094                    "not building git repository for the worktree itself, `.git` path in the worktree: {dot_git_path:?}"
3095                );
3096                return;
3097            }
3098        };
3099
3100        let dot_git_abs_path = Arc::from(self.snapshot.absolutize(&dot_git_path).as_ref());
3101
3102        self.insert_git_repository_for_path(
3103            WorkDirectory::InProject {
3104                relative_path: work_dir_path,
3105            },
3106            dot_git_abs_path,
3107            fs,
3108            watcher,
3109        )
3110        .await
3111        .log_err();
3112    }
3113
3114    async fn insert_git_repository_for_path(
3115        &mut self,
3116        work_directory: WorkDirectory,
3117        dot_git_abs_path: Arc<Path>,
3118        fs: &dyn Fs,
3119        watcher: &dyn Watcher,
3120    ) -> Result<LocalRepositoryEntry> {
3121        let work_dir_entry = self
3122            .snapshot
3123            .entry_for_path(&work_directory.path_key().0)
3124            .with_context(|| {
3125                format!(
3126                    "working directory `{}` not indexed",
3127                    work_directory
3128                        .path_key()
3129                        .0
3130                        .display(self.snapshot.path_style)
3131                )
3132            })?;
3133        let work_directory_abs_path = self.snapshot.work_directory_abs_path(&work_directory);
3134
3135        let (repository_dir_abs_path, common_dir_abs_path) =
3136            discover_git_paths(&dot_git_abs_path, fs).await;
3137        watcher
3138            .add(&common_dir_abs_path)
3139            .context("failed to add common directory to watcher")
3140            .log_err();
3141        watcher
3142            .add(&repository_dir_abs_path)
3143            .context("failed to add repository directory to watcher")
3144            .log_err();
3145
3146        let work_directory_id = work_dir_entry.id;
3147
3148        let local_repository = LocalRepositoryEntry {
3149            work_directory_id,
3150            work_directory,
3151            work_directory_abs_path: work_directory_abs_path.as_path().into(),
3152            git_dir_scan_id: 0,
3153            dot_git_abs_path,
3154            common_dir_abs_path,
3155            repository_dir_abs_path,
3156        };
3157
3158        self.snapshot
3159            .git_repositories
3160            .insert(work_directory_id, local_repository.clone());
3161
3162        log::trace!("inserting new local git repository");
3163        Ok(local_repository)
3164    }
3165}
3166
3167async fn is_git_dir(path: &Path, fs: &dyn Fs) -> bool {
3168    if let Some(file_name) = path.file_name()
3169        && file_name == DOT_GIT
3170    {
3171        return true;
3172    }
3173
3174    // If we're in a bare repository, we are not inside a `.git` folder. In a
3175    // bare repository, the root folder contains what would normally be in the
3176    // `.git` folder.
3177    let head_metadata = fs.metadata(&path.join("HEAD")).await;
3178    if !matches!(head_metadata, Ok(Some(_))) {
3179        return false;
3180    }
3181    let config_metadata = fs.metadata(&path.join("config")).await;
3182    matches!(config_metadata, Ok(Some(_)))
3183}
3184
3185async fn build_gitignore(abs_path: &Path, fs: &dyn Fs) -> Result<Gitignore> {
3186    let contents = fs
3187        .load(abs_path)
3188        .await
3189        .with_context(|| format!("failed to load gitignore file at {}", abs_path.display()))?;
3190    let parent = abs_path.parent().unwrap_or_else(|| Path::new("/"));
3191    let mut builder = GitignoreBuilder::new(parent);
3192    for line in contents.lines() {
3193        builder.add_line(Some(abs_path.into()), line)?;
3194    }
3195    Ok(builder.build()?)
3196}
3197
3198impl Deref for Worktree {
3199    type Target = Snapshot;
3200
3201    fn deref(&self) -> &Self::Target {
3202        match self {
3203            Worktree::Local(worktree) => &worktree.snapshot,
3204            Worktree::Remote(worktree) => &worktree.snapshot,
3205        }
3206    }
3207}
3208
3209impl Deref for LocalWorktree {
3210    type Target = LocalSnapshot;
3211
3212    fn deref(&self) -> &Self::Target {
3213        &self.snapshot
3214    }
3215}
3216
3217impl Deref for RemoteWorktree {
3218    type Target = Snapshot;
3219
3220    fn deref(&self) -> &Self::Target {
3221        &self.snapshot
3222    }
3223}
3224
3225impl fmt::Debug for LocalWorktree {
3226    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3227        self.snapshot.fmt(f)
3228    }
3229}
3230
3231impl fmt::Debug for Snapshot {
3232    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3233        struct EntriesById<'a>(&'a SumTree<PathEntry>);
3234        struct EntriesByPath<'a>(&'a SumTree<Entry>);
3235
3236        impl fmt::Debug for EntriesByPath<'_> {
3237            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3238                f.debug_map()
3239                    .entries(self.0.iter().map(|entry| (&entry.path, entry.id)))
3240                    .finish()
3241            }
3242        }
3243
3244        impl fmt::Debug for EntriesById<'_> {
3245            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3246                f.debug_list().entries(self.0.iter()).finish()
3247            }
3248        }
3249
3250        f.debug_struct("Snapshot")
3251            .field("id", &self.id)
3252            .field("root_name", &self.root_name)
3253            .field("entries_by_path", &EntriesByPath(&self.entries_by_path))
3254            .field("entries_by_id", &EntriesById(&self.entries_by_id))
3255            .finish()
3256    }
3257}
3258
3259#[derive(Debug, Clone, PartialEq)]
3260pub struct File {
3261    pub worktree: Entity<Worktree>,
3262    pub path: Arc<RelPath>,
3263    pub disk_state: DiskState,
3264    pub entry_id: Option<ProjectEntryId>,
3265    pub is_local: bool,
3266    pub is_private: bool,
3267}
3268
3269impl language::File for File {
3270    fn as_local(&self) -> Option<&dyn language::LocalFile> {
3271        if self.is_local { Some(self) } else { None }
3272    }
3273
3274    fn disk_state(&self) -> DiskState {
3275        self.disk_state
3276    }
3277
3278    fn path(&self) -> &Arc<RelPath> {
3279        &self.path
3280    }
3281
3282    fn full_path(&self, cx: &App) -> PathBuf {
3283        self.worktree.read(cx).full_path(&self.path)
3284    }
3285
3286    /// Returns the last component of this handle's absolute path. If this handle refers to the root
3287    /// of its worktree, then this method will return the name of the worktree itself.
3288    fn file_name<'a>(&'a self, cx: &'a App) -> &'a str {
3289        self.path
3290            .file_name()
3291            .unwrap_or_else(|| self.worktree.read(cx).root_name_str())
3292    }
3293
3294    fn worktree_id(&self, cx: &App) -> WorktreeId {
3295        self.worktree.read(cx).id()
3296    }
3297
3298    fn to_proto(&self, cx: &App) -> rpc::proto::File {
3299        rpc::proto::File {
3300            worktree_id: self.worktree.read(cx).id().to_proto(),
3301            entry_id: self.entry_id.map(|id| id.to_proto()),
3302            path: self.path.as_ref().to_proto(),
3303            mtime: self.disk_state.mtime().map(|time| time.into()),
3304            is_deleted: self.disk_state.is_deleted(),
3305            is_historic: matches!(self.disk_state, DiskState::Historic { .. }),
3306        }
3307    }
3308
3309    fn is_private(&self) -> bool {
3310        self.is_private
3311    }
3312
3313    fn path_style(&self, cx: &App) -> PathStyle {
3314        self.worktree.read(cx).path_style()
3315    }
3316
3317    fn can_open(&self) -> bool {
3318        true
3319    }
3320}
3321
3322impl language::LocalFile for File {
3323    fn abs_path(&self, cx: &App) -> PathBuf {
3324        self.worktree.read(cx).absolutize(&self.path)
3325    }
3326
3327    fn load(&self, cx: &App) -> Task<Result<String>> {
3328        let worktree = self.worktree.read(cx).as_local().unwrap();
3329        let abs_path = worktree.absolutize(&self.path);
3330        let fs = worktree.fs.clone();
3331        cx.background_spawn(async move { fs.load(&abs_path).await })
3332    }
3333
3334    fn load_bytes(&self, cx: &App) -> Task<Result<Vec<u8>>> {
3335        let worktree = self.worktree.read(cx).as_local().unwrap();
3336        let abs_path = worktree.absolutize(&self.path);
3337        let fs = worktree.fs.clone();
3338        cx.background_spawn(async move { fs.load_bytes(&abs_path).await })
3339    }
3340}
3341
3342impl File {
3343    pub fn for_entry(entry: Entry, worktree: Entity<Worktree>) -> Arc<Self> {
3344        Arc::new(Self {
3345            worktree,
3346            path: entry.path.clone(),
3347            disk_state: if let Some(mtime) = entry.mtime {
3348                DiskState::Present {
3349                    mtime,
3350                    size: entry.size,
3351                }
3352            } else {
3353                DiskState::New
3354            },
3355            entry_id: Some(entry.id),
3356            is_local: true,
3357            is_private: entry.is_private,
3358        })
3359    }
3360
3361    pub fn from_proto(
3362        proto: rpc::proto::File,
3363        worktree: Entity<Worktree>,
3364        cx: &App,
3365    ) -> Result<Self> {
3366        let worktree_id = worktree.read(cx).as_remote().context("not remote")?.id();
3367
3368        anyhow::ensure!(
3369            worktree_id.to_proto() == proto.worktree_id,
3370            "worktree id does not match file"
3371        );
3372
3373        let disk_state = if proto.is_historic {
3374            DiskState::Historic {
3375                was_deleted: proto.is_deleted,
3376            }
3377        } else if proto.is_deleted {
3378            DiskState::Deleted
3379        } else if let Some(mtime) = proto.mtime.map(&Into::into) {
3380            DiskState::Present { mtime, size: 0 }
3381        } else {
3382            DiskState::New
3383        };
3384
3385        Ok(Self {
3386            worktree,
3387            path: RelPath::from_proto(&proto.path).context("invalid path in file protobuf")?,
3388            disk_state,
3389            entry_id: proto.entry_id.map(ProjectEntryId::from_proto),
3390            is_local: false,
3391            is_private: false,
3392        })
3393    }
3394
3395    pub fn from_dyn(file: Option<&Arc<dyn language::File>>) -> Option<&Self> {
3396        file.and_then(|f| {
3397            let f: &dyn language::File = f.borrow();
3398            let f: &dyn Any = f;
3399            f.downcast_ref()
3400        })
3401    }
3402
3403    pub fn worktree_id(&self, cx: &App) -> WorktreeId {
3404        self.worktree.read(cx).id()
3405    }
3406
3407    pub fn project_entry_id(&self) -> Option<ProjectEntryId> {
3408        match self.disk_state {
3409            DiskState::Deleted => None,
3410            _ => self.entry_id,
3411        }
3412    }
3413}
3414
3415#[derive(Clone, Debug, PartialEq, Eq)]
3416pub struct Entry {
3417    pub id: ProjectEntryId,
3418    pub kind: EntryKind,
3419    pub path: Arc<RelPath>,
3420    pub inode: u64,
3421    pub mtime: Option<MTime>,
3422
3423    pub canonical_path: Option<Arc<Path>>,
3424    /// Whether this entry is ignored by Git.
3425    ///
3426    /// We only scan ignored entries once the directory is expanded and
3427    /// exclude them from searches.
3428    pub is_ignored: bool,
3429
3430    /// Whether this entry is hidden or inside hidden directory.
3431    ///
3432    /// We only scan hidden entries once the directory is expanded.
3433    pub is_hidden: bool,
3434
3435    /// Whether this entry is always included in searches.
3436    ///
3437    /// This is used for entries that are always included in searches, even
3438    /// if they are ignored by git. Overridden by file_scan_exclusions.
3439    pub is_always_included: bool,
3440
3441    /// Whether this entry's canonical path is outside of the worktree.
3442    /// This means the entry is only accessible from the worktree root via a
3443    /// symlink.
3444    ///
3445    /// We only scan entries outside of the worktree once the symlinked
3446    /// directory is expanded. External entries are treated like gitignored
3447    /// entries in that they are not included in searches.
3448    pub is_external: bool,
3449
3450    /// Whether this entry is considered to be a `.env` file.
3451    pub is_private: bool,
3452    /// The entry's size on disk, in bytes.
3453    pub size: u64,
3454    pub char_bag: CharBag,
3455    pub is_fifo: bool,
3456}
3457
3458#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3459pub enum EntryKind {
3460    UnloadedDir,
3461    PendingDir,
3462    Dir,
3463    File,
3464}
3465
3466#[derive(Clone, Copy, Debug, PartialEq)]
3467pub enum PathChange {
3468    /// A filesystem entry was was created.
3469    Added,
3470    /// A filesystem entry was removed.
3471    Removed,
3472    /// A filesystem entry was updated.
3473    Updated,
3474    /// A filesystem entry was either updated or added. We don't know
3475    /// whether or not it already existed, because the path had not
3476    /// been loaded before the event.
3477    AddedOrUpdated,
3478    /// A filesystem entry was found during the initial scan of the worktree.
3479    Loaded,
3480}
3481
3482#[derive(Clone, Debug, PartialEq, Eq)]
3483pub struct UpdatedGitRepository {
3484    /// ID of the repository's working directory.
3485    ///
3486    /// For a repo that's above the worktree root, this is the ID of the worktree root, and hence not unique.
3487    /// It's included here to aid the GitStore in detecting when a repository's working directory is renamed.
3488    pub work_directory_id: ProjectEntryId,
3489    pub old_work_directory_abs_path: Option<Arc<Path>>,
3490    pub new_work_directory_abs_path: Option<Arc<Path>>,
3491    /// For a normal git repository checkout, the absolute path to the .git directory.
3492    /// For a worktree, the absolute path to the worktree's subdirectory inside the .git directory.
3493    pub dot_git_abs_path: Option<Arc<Path>>,
3494    pub repository_dir_abs_path: Option<Arc<Path>>,
3495    pub common_dir_abs_path: Option<Arc<Path>>,
3496}
3497
3498pub type UpdatedEntriesSet = Arc<[(Arc<RelPath>, ProjectEntryId, PathChange)]>;
3499pub type UpdatedGitRepositoriesSet = Arc<[UpdatedGitRepository]>;
3500
3501#[derive(Clone, Debug)]
3502pub struct PathProgress<'a> {
3503    pub max_path: &'a RelPath,
3504}
3505
3506#[derive(Clone, Debug)]
3507pub struct PathSummary<S> {
3508    pub max_path: Arc<RelPath>,
3509    pub item_summary: S,
3510}
3511
3512impl<S: Summary> Summary for PathSummary<S> {
3513    type Context<'a> = S::Context<'a>;
3514
3515    fn zero(cx: Self::Context<'_>) -> Self {
3516        Self {
3517            max_path: RelPath::empty().into(),
3518            item_summary: S::zero(cx),
3519        }
3520    }
3521
3522    fn add_summary(&mut self, rhs: &Self, cx: Self::Context<'_>) {
3523        self.max_path = rhs.max_path.clone();
3524        self.item_summary.add_summary(&rhs.item_summary, cx);
3525    }
3526}
3527
3528impl<'a, S: Summary> sum_tree::Dimension<'a, PathSummary<S>> for PathProgress<'a> {
3529    fn zero(_: <PathSummary<S> as Summary>::Context<'_>) -> Self {
3530        Self {
3531            max_path: RelPath::empty(),
3532        }
3533    }
3534
3535    fn add_summary(
3536        &mut self,
3537        summary: &'a PathSummary<S>,
3538        _: <PathSummary<S> as Summary>::Context<'_>,
3539    ) {
3540        self.max_path = summary.max_path.as_ref()
3541    }
3542}
3543
3544impl<'a> sum_tree::Dimension<'a, PathSummary<GitSummary>> for GitSummary {
3545    fn zero(_cx: ()) -> Self {
3546        Default::default()
3547    }
3548
3549    fn add_summary(&mut self, summary: &'a PathSummary<GitSummary>, _: ()) {
3550        *self += summary.item_summary
3551    }
3552}
3553
3554impl<'a>
3555    sum_tree::SeekTarget<'a, PathSummary<GitSummary>, Dimensions<TraversalProgress<'a>, GitSummary>>
3556    for PathTarget<'_>
3557{
3558    fn cmp(
3559        &self,
3560        cursor_location: &Dimensions<TraversalProgress<'a>, GitSummary>,
3561        _: (),
3562    ) -> Ordering {
3563        self.cmp_path(cursor_location.0.max_path)
3564    }
3565}
3566
3567impl<'a, S: Summary> sum_tree::Dimension<'a, PathSummary<S>> for PathKey {
3568    fn zero(_: S::Context<'_>) -> Self {
3569        Default::default()
3570    }
3571
3572    fn add_summary(&mut self, summary: &'a PathSummary<S>, _: S::Context<'_>) {
3573        self.0 = summary.max_path.clone();
3574    }
3575}
3576
3577impl<'a, S: Summary> sum_tree::Dimension<'a, PathSummary<S>> for TraversalProgress<'a> {
3578    fn zero(_cx: S::Context<'_>) -> Self {
3579        Default::default()
3580    }
3581
3582    fn add_summary(&mut self, summary: &'a PathSummary<S>, _: S::Context<'_>) {
3583        self.max_path = summary.max_path.as_ref();
3584    }
3585}
3586
3587impl Entry {
3588    fn new(
3589        path: Arc<RelPath>,
3590        metadata: &fs::Metadata,
3591        id: ProjectEntryId,
3592        root_char_bag: CharBag,
3593        canonical_path: Option<Arc<Path>>,
3594    ) -> Self {
3595        let char_bag = char_bag_for_path(root_char_bag, &path);
3596        Self {
3597            id,
3598            kind: if metadata.is_dir {
3599                EntryKind::PendingDir
3600            } else {
3601                EntryKind::File
3602            },
3603            path,
3604            inode: metadata.inode,
3605            mtime: Some(metadata.mtime),
3606            size: metadata.len,
3607            canonical_path,
3608            is_ignored: false,
3609            is_hidden: false,
3610            is_always_included: false,
3611            is_external: false,
3612            is_private: false,
3613            char_bag,
3614            is_fifo: metadata.is_fifo,
3615        }
3616    }
3617
3618    pub fn is_created(&self) -> bool {
3619        self.mtime.is_some()
3620    }
3621
3622    pub fn is_dir(&self) -> bool {
3623        self.kind.is_dir()
3624    }
3625
3626    pub fn is_file(&self) -> bool {
3627        self.kind.is_file()
3628    }
3629}
3630
3631impl EntryKind {
3632    pub fn is_dir(&self) -> bool {
3633        matches!(
3634            self,
3635            EntryKind::Dir | EntryKind::PendingDir | EntryKind::UnloadedDir
3636        )
3637    }
3638
3639    pub fn is_unloaded(&self) -> bool {
3640        matches!(self, EntryKind::UnloadedDir)
3641    }
3642
3643    pub fn is_file(&self) -> bool {
3644        matches!(self, EntryKind::File)
3645    }
3646}
3647
3648impl sum_tree::Item for Entry {
3649    type Summary = EntrySummary;
3650
3651    fn summary(&self, _cx: ()) -> Self::Summary {
3652        let non_ignored_count = if (self.is_ignored || self.is_external) && !self.is_always_included
3653        {
3654            0
3655        } else {
3656            1
3657        };
3658        let file_count;
3659        let non_ignored_file_count;
3660        if self.is_file() {
3661            file_count = 1;
3662            non_ignored_file_count = non_ignored_count;
3663        } else {
3664            file_count = 0;
3665            non_ignored_file_count = 0;
3666        }
3667
3668        EntrySummary {
3669            max_path: self.path.clone(),
3670            count: 1,
3671            non_ignored_count,
3672            file_count,
3673            non_ignored_file_count,
3674        }
3675    }
3676}
3677
3678impl sum_tree::KeyedItem for Entry {
3679    type Key = PathKey;
3680
3681    fn key(&self) -> Self::Key {
3682        PathKey(self.path.clone())
3683    }
3684}
3685
3686#[derive(Clone, Debug)]
3687pub struct EntrySummary {
3688    max_path: Arc<RelPath>,
3689    count: usize,
3690    non_ignored_count: usize,
3691    file_count: usize,
3692    non_ignored_file_count: usize,
3693}
3694
3695impl Default for EntrySummary {
3696    fn default() -> Self {
3697        Self {
3698            max_path: Arc::from(RelPath::empty()),
3699            count: 0,
3700            non_ignored_count: 0,
3701            file_count: 0,
3702            non_ignored_file_count: 0,
3703        }
3704    }
3705}
3706
3707impl sum_tree::ContextLessSummary for EntrySummary {
3708    fn zero() -> Self {
3709        Default::default()
3710    }
3711
3712    fn add_summary(&mut self, rhs: &Self) {
3713        self.max_path = rhs.max_path.clone();
3714        self.count += rhs.count;
3715        self.non_ignored_count += rhs.non_ignored_count;
3716        self.file_count += rhs.file_count;
3717        self.non_ignored_file_count += rhs.non_ignored_file_count;
3718    }
3719}
3720
3721#[derive(Clone, Debug)]
3722struct PathEntry {
3723    id: ProjectEntryId,
3724    path: Arc<RelPath>,
3725    is_ignored: bool,
3726    scan_id: usize,
3727}
3728
3729impl sum_tree::Item for PathEntry {
3730    type Summary = PathEntrySummary;
3731
3732    fn summary(&self, _cx: ()) -> Self::Summary {
3733        PathEntrySummary { max_id: self.id }
3734    }
3735}
3736
3737impl sum_tree::KeyedItem for PathEntry {
3738    type Key = ProjectEntryId;
3739
3740    fn key(&self) -> Self::Key {
3741        self.id
3742    }
3743}
3744
3745#[derive(Clone, Debug, Default)]
3746struct PathEntrySummary {
3747    max_id: ProjectEntryId,
3748}
3749
3750impl sum_tree::ContextLessSummary for PathEntrySummary {
3751    fn zero() -> Self {
3752        Default::default()
3753    }
3754
3755    fn add_summary(&mut self, summary: &Self) {
3756        self.max_id = summary.max_id;
3757    }
3758}
3759
3760impl<'a> sum_tree::Dimension<'a, PathEntrySummary> for ProjectEntryId {
3761    fn zero(_cx: ()) -> Self {
3762        Default::default()
3763    }
3764
3765    fn add_summary(&mut self, summary: &'a PathEntrySummary, _: ()) {
3766        *self = summary.max_id;
3767    }
3768}
3769
3770#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
3771pub struct PathKey(pub Arc<RelPath>);
3772
3773impl Default for PathKey {
3774    fn default() -> Self {
3775        Self(RelPath::empty().into())
3776    }
3777}
3778
3779impl<'a> sum_tree::Dimension<'a, EntrySummary> for PathKey {
3780    fn zero(_cx: ()) -> Self {
3781        Default::default()
3782    }
3783
3784    fn add_summary(&mut self, summary: &'a EntrySummary, _: ()) {
3785        self.0 = summary.max_path.clone();
3786    }
3787}
3788
3789struct BackgroundScanner {
3790    state: async_lock::Mutex<BackgroundScannerState>,
3791    fs: Arc<dyn Fs>,
3792    fs_case_sensitive: bool,
3793    status_updates_tx: UnboundedSender<ScanState>,
3794    executor: BackgroundExecutor,
3795    scan_requests_rx: channel::Receiver<ScanRequest>,
3796    path_prefixes_to_scan_rx: channel::Receiver<PathPrefixScanRequest>,
3797    next_entry_id: Arc<AtomicUsize>,
3798    phase: BackgroundScannerPhase,
3799    watcher: Arc<dyn Watcher>,
3800    settings: WorktreeSettings,
3801    share_private_files: bool,
3802}
3803
3804#[derive(Copy, Clone, PartialEq)]
3805enum BackgroundScannerPhase {
3806    InitialScan,
3807    EventsReceivedDuringInitialScan,
3808    Events,
3809}
3810
3811impl BackgroundScanner {
3812    async fn run(&mut self, mut fs_events_rx: Pin<Box<dyn Send + Stream<Item = Vec<PathEvent>>>>) {
3813        let root_abs_path;
3814        let scanning_enabled;
3815        {
3816            let state = self.state.lock().await;
3817            root_abs_path = state.snapshot.abs_path.clone();
3818            scanning_enabled = state.scanning_enabled;
3819        }
3820
3821        // If the worktree root does not contain a git repository, then find
3822        // the git repository in an ancestor directory. Find any gitignore files
3823        // in ancestor directories.
3824        let repo = if scanning_enabled {
3825            let (ignores, exclude, repo) =
3826                discover_ancestor_git_repo(self.fs.clone(), &root_abs_path).await;
3827            self.state
3828                .lock()
3829                .await
3830                .snapshot
3831                .ignores_by_parent_abs_path
3832                .extend(ignores);
3833            if let Some(exclude) = exclude {
3834                self.state
3835                    .lock()
3836                    .await
3837                    .snapshot
3838                    .repo_exclude_by_work_dir_abs_path
3839                    .insert(root_abs_path.as_path().into(), (exclude, false));
3840            }
3841
3842            repo
3843        } else {
3844            None
3845        };
3846
3847        let containing_git_repository = if let Some((ancestor_dot_git, work_directory)) = repo
3848            && scanning_enabled
3849        {
3850            maybe!(async {
3851                self.state
3852                    .lock()
3853                    .await
3854                    .insert_git_repository_for_path(
3855                        work_directory,
3856                        ancestor_dot_git.clone().into(),
3857                        self.fs.as_ref(),
3858                        self.watcher.as_ref(),
3859                    )
3860                    .await
3861                    .log_err()?;
3862                Some(ancestor_dot_git)
3863            })
3864            .await
3865        } else {
3866            None
3867        };
3868
3869        log::trace!("containing git repository: {containing_git_repository:?}");
3870
3871        let global_gitignore_file = paths::global_gitignore_path();
3872        let mut global_gitignore_events = if let Some(global_gitignore_path) =
3873            &global_gitignore_file
3874            && scanning_enabled
3875        {
3876            let is_file = self.fs.is_file(&global_gitignore_path).await;
3877            self.state.lock().await.snapshot.global_gitignore = if is_file {
3878                build_gitignore(global_gitignore_path, self.fs.as_ref())
3879                    .await
3880                    .ok()
3881                    .map(Arc::new)
3882            } else {
3883                None
3884            };
3885            if is_file {
3886                self.fs
3887                    .watch(global_gitignore_path, FS_WATCH_LATENCY)
3888                    .await
3889                    .0
3890            } else {
3891                Box::pin(futures::stream::pending())
3892            }
3893        } else {
3894            self.state.lock().await.snapshot.global_gitignore = None;
3895            Box::pin(futures::stream::pending())
3896        };
3897
3898        let (scan_job_tx, scan_job_rx) = channel::unbounded();
3899        {
3900            let mut state = self.state.lock().await;
3901            state.snapshot.scan_id += 1;
3902            if let Some(mut root_entry) = state.snapshot.root_entry().cloned() {
3903                let ignore_stack = state
3904                    .snapshot
3905                    .ignore_stack_for_abs_path(root_abs_path.as_path(), true, self.fs.as_ref())
3906                    .await;
3907                if ignore_stack.is_abs_path_ignored(root_abs_path.as_path(), true) {
3908                    root_entry.is_ignored = true;
3909                    let mut root_entry = root_entry.clone();
3910                    state.reuse_entry_id(&mut root_entry);
3911                    state
3912                        .insert_entry(root_entry, self.fs.as_ref(), self.watcher.as_ref())
3913                        .await;
3914                }
3915                if root_entry.is_dir() && state.scanning_enabled {
3916                    state
3917                        .enqueue_scan_dir(
3918                            root_abs_path.as_path().into(),
3919                            &root_entry,
3920                            &scan_job_tx,
3921                            self.fs.as_ref(),
3922                        )
3923                        .await;
3924                }
3925            }
3926        };
3927
3928        // Perform an initial scan of the directory.
3929        drop(scan_job_tx);
3930        self.scan_dirs(true, scan_job_rx).await;
3931        {
3932            let mut state = self.state.lock().await;
3933            state.snapshot.completed_scan_id = state.snapshot.scan_id;
3934        }
3935
3936        self.send_status_update(false, SmallVec::new(), &[]).await;
3937
3938        // Process any any FS events that occurred while performing the initial scan.
3939        // For these events, update events cannot be as precise, because we didn't
3940        // have the previous state loaded yet.
3941        self.phase = BackgroundScannerPhase::EventsReceivedDuringInitialScan;
3942        if let Poll::Ready(Some(mut paths)) = futures::poll!(fs_events_rx.next()) {
3943            while let Poll::Ready(Some(more_paths)) = futures::poll!(fs_events_rx.next()) {
3944                paths.extend(more_paths);
3945            }
3946            self.process_events(
3947                paths
3948                    .into_iter()
3949                    .filter(|event| event.kind.is_some())
3950                    .collect(),
3951            )
3952            .await;
3953        }
3954        if let Some(abs_path) = containing_git_repository {
3955            self.process_events(vec![PathEvent {
3956                path: abs_path,
3957                kind: Some(fs::PathEventKind::Changed),
3958            }])
3959            .await;
3960        }
3961
3962        // Continue processing events until the worktree is dropped.
3963        self.phase = BackgroundScannerPhase::Events;
3964
3965        loop {
3966            select_biased! {
3967                // Process any path refresh requests from the worktree. Prioritize
3968                // these before handling changes reported by the filesystem.
3969                request = self.next_scan_request().fuse() => {
3970                    let Ok(request) = request else { break };
3971                    if !self.process_scan_request(request, false).await {
3972                        return;
3973                    }
3974                }
3975
3976                path_prefix_request = self.path_prefixes_to_scan_rx.recv().fuse() => {
3977                    let Ok(request) = path_prefix_request else { break };
3978                    log::trace!("adding path prefix {:?}", request.path);
3979
3980                    let did_scan = self.forcibly_load_paths(std::slice::from_ref(&request.path)).await;
3981                    if did_scan {
3982                        let abs_path =
3983                        {
3984                            let mut state = self.state.lock().await;
3985                            state.path_prefixes_to_scan.insert(request.path.clone());
3986                            state.snapshot.absolutize(&request.path)
3987                        };
3988
3989                        if let Some(abs_path) = self.fs.canonicalize(&abs_path).await.log_err() {
3990                            self.process_events(vec![PathEvent {
3991                                path: abs_path,
3992                                kind: Some(fs::PathEventKind::Changed),
3993                            }])
3994                            .await;
3995                        }
3996                    }
3997                    self.send_status_update(false, request.done, &[]).await;
3998                }
3999
4000                paths = fs_events_rx.next().fuse() => {
4001                    let Some(mut paths) = paths else { break };
4002                    while let Poll::Ready(Some(more_paths)) = futures::poll!(fs_events_rx.next()) {
4003                        paths.extend(more_paths);
4004                    }
4005                    self.process_events(paths.into_iter().filter(|event| event.kind.is_some()).collect()).await;
4006                }
4007
4008                _ = global_gitignore_events.next().fuse() => {
4009                    if let Some(path) = &global_gitignore_file {
4010                        self.update_global_gitignore(&path).await;
4011                    }
4012                }
4013            }
4014        }
4015    }
4016
4017    async fn process_scan_request(&self, mut request: ScanRequest, scanning: bool) -> bool {
4018        log::debug!("rescanning paths {:?}", request.relative_paths);
4019
4020        request.relative_paths.sort_unstable();
4021        self.forcibly_load_paths(&request.relative_paths).await;
4022
4023        let root_path = self.state.lock().await.snapshot.abs_path.clone();
4024        let root_canonical_path = self.fs.canonicalize(root_path.as_path()).await;
4025        let root_canonical_path = match &root_canonical_path {
4026            Ok(path) => SanitizedPath::new(path),
4027            Err(err) => {
4028                log::error!("failed to canonicalize root path {root_path:?}: {err:#}");
4029                return true;
4030            }
4031        };
4032        let abs_paths = request
4033            .relative_paths
4034            .iter()
4035            .map(|path| {
4036                if path.file_name().is_some() {
4037                    root_canonical_path.as_path().join(path.as_std_path())
4038                } else {
4039                    root_canonical_path.as_path().to_path_buf()
4040                }
4041            })
4042            .collect::<Vec<_>>();
4043
4044        {
4045            let mut state = self.state.lock().await;
4046            let is_idle = state.snapshot.completed_scan_id == state.snapshot.scan_id;
4047            state.snapshot.scan_id += 1;
4048            if is_idle {
4049                state.snapshot.completed_scan_id = state.snapshot.scan_id;
4050            }
4051        }
4052
4053        self.reload_entries_for_paths(
4054            &root_path,
4055            &root_canonical_path,
4056            &request.relative_paths,
4057            abs_paths,
4058            None,
4059        )
4060        .await;
4061
4062        self.send_status_update(scanning, request.done, &[]).await
4063    }
4064
4065    async fn process_events(&self, mut events: Vec<PathEvent>) {
4066        let root_path = self.state.lock().await.snapshot.abs_path.clone();
4067        let root_canonical_path = self.fs.canonicalize(root_path.as_path()).await;
4068        let root_canonical_path = match &root_canonical_path {
4069            Ok(path) => SanitizedPath::new(path),
4070            Err(err) => {
4071                let new_path = self
4072                    .state
4073                    .lock()
4074                    .await
4075                    .snapshot
4076                    .root_file_handle
4077                    .clone()
4078                    .and_then(|handle| match handle.current_path(&self.fs) {
4079                        Ok(new_path) => Some(new_path),
4080                        Err(e) => {
4081                            log::error!("Failed to refresh worktree root path: {e:#}");
4082                            None
4083                        }
4084                    })
4085                    .map(|path| SanitizedPath::new_arc(&path))
4086                    .filter(|new_path| *new_path != root_path);
4087
4088                if let Some(new_path) = new_path {
4089                    log::info!(
4090                        "root renamed from {:?} to {:?}",
4091                        root_path.as_path(),
4092                        new_path.as_path(),
4093                    );
4094                    self.status_updates_tx
4095                        .unbounded_send(ScanState::RootUpdated { new_path })
4096                        .ok();
4097                } else {
4098                    log::error!("root path could not be canonicalized: {err:#}");
4099                }
4100                return;
4101            }
4102        };
4103
4104        // Certain directories may have FS changes, but do not lead to git data changes that Zed cares about.
4105        // Ignore these, to avoid Zed unnecessarily rescanning git metadata.
4106        let skipped_files_in_dot_git = [COMMIT_MESSAGE, INDEX_LOCK];
4107        let skipped_dirs_in_dot_git = [FSMONITOR_DAEMON, LFS_DIR];
4108
4109        let mut relative_paths = Vec::with_capacity(events.len());
4110        let mut dot_git_abs_paths = Vec::new();
4111        let mut work_dirs_needing_exclude_update = Vec::new();
4112        events.sort_unstable_by(|left, right| left.path.cmp(&right.path));
4113        events.dedup_by(|left, right| {
4114            if left.path == right.path {
4115                if matches!(left.kind, Some(fs::PathEventKind::Rescan)) {
4116                    right.kind = left.kind;
4117                }
4118                true
4119            } else if left.path.starts_with(&right.path) {
4120                if matches!(left.kind, Some(fs::PathEventKind::Rescan)) {
4121                    right.kind = left.kind;
4122                }
4123                true
4124            } else {
4125                false
4126            }
4127        });
4128        {
4129            let snapshot = &self.state.lock().await.snapshot;
4130
4131            let mut ranges_to_drop = SmallVec::<[Range<usize>; 4]>::new();
4132
4133            fn skip_ix(ranges: &mut SmallVec<[Range<usize>; 4]>, ix: usize) {
4134                if let Some(last_range) = ranges.last_mut()
4135                    && last_range.end == ix
4136                {
4137                    last_range.end += 1;
4138                } else {
4139                    ranges.push(ix..ix + 1);
4140                }
4141            }
4142
4143            for (ix, event) in events.iter().enumerate() {
4144                let abs_path = SanitizedPath::new(&event.path);
4145
4146                let mut is_git_related = false;
4147                let mut dot_git_paths = None;
4148
4149                for ancestor in abs_path.as_path().ancestors() {
4150                    if is_git_dir(ancestor, self.fs.as_ref()).await {
4151                        let path_in_git_dir = abs_path
4152                            .as_path()
4153                            .strip_prefix(ancestor)
4154                            .expect("stripping off the ancestor");
4155                        dot_git_paths = Some((ancestor.to_owned(), path_in_git_dir.to_owned()));
4156                        break;
4157                    }
4158                }
4159
4160                if let Some((dot_git_abs_path, path_in_git_dir)) = dot_git_paths {
4161                    // We ignore `""` as well, as that is going to be the
4162                    // `.git` folder itself. WE do not care about it, if
4163                    // there are changes within we will see them, we need
4164                    // this ignore to prevent us from accidentally observing
4165                    // the ignored created file due to the events not being
4166                    // empty after filtering.
4167
4168                    let is_dot_git_changed = {
4169                        path_in_git_dir == Path::new("")
4170                            && event.kind == Some(PathEventKind::Changed)
4171                            && abs_path
4172                                .strip_prefix(root_canonical_path)
4173                                .ok()
4174                                .and_then(|it| RelPath::new(it, PathStyle::local()).ok())
4175                                .is_some_and(|it| {
4176                                    snapshot
4177                                        .entry_for_path(&it)
4178                                        .is_some_and(|entry| entry.kind == EntryKind::Dir)
4179                                })
4180                    };
4181                    let condition = skipped_files_in_dot_git.iter().any(|skipped| {
4182                        OsStr::new(skipped) == path_in_git_dir.as_path().as_os_str()
4183                    }) || skipped_dirs_in_dot_git
4184                        .iter()
4185                        .any(|skipped_git_subdir| path_in_git_dir.starts_with(skipped_git_subdir))
4186                        || is_dot_git_changed;
4187                    if condition {
4188                        log::debug!(
4189                            "ignoring event {abs_path:?} as it's in the .git directory among skipped files or directories"
4190                        );
4191                        skip_ix(&mut ranges_to_drop, ix);
4192                        continue;
4193                    }
4194
4195                    is_git_related = true;
4196                    if !dot_git_abs_paths.contains(&dot_git_abs_path) {
4197                        dot_git_abs_paths.push(dot_git_abs_path);
4198                    }
4199                }
4200
4201                let relative_path = if let Ok(path) = abs_path.strip_prefix(&root_canonical_path)
4202                    && let Ok(path) = RelPath::new(path, PathStyle::local())
4203                {
4204                    path
4205                } else {
4206                    if is_git_related {
4207                        log::debug!(
4208                            "ignoring event {abs_path:?}, since it's in git dir outside of root path {root_canonical_path:?}",
4209                        );
4210                    } else {
4211                        log::error!(
4212                            "ignoring event {abs_path:?} outside of root path {root_canonical_path:?}",
4213                        );
4214                    }
4215                    skip_ix(&mut ranges_to_drop, ix);
4216                    continue;
4217                };
4218
4219                let absolute_path = abs_path.to_path_buf();
4220                if absolute_path.ends_with(Path::new(DOT_GIT).join(REPO_EXCLUDE)) {
4221                    if let Some(repository) = snapshot
4222                        .git_repositories
4223                        .values()
4224                        .find(|repo| repo.common_dir_abs_path.join(REPO_EXCLUDE) == absolute_path)
4225                    {
4226                        work_dirs_needing_exclude_update
4227                            .push(repository.work_directory_abs_path.clone());
4228                    }
4229                }
4230
4231                if abs_path.file_name() == Some(OsStr::new(GITIGNORE)) {
4232                    for (_, repo) in snapshot
4233                        .git_repositories
4234                        .iter()
4235                        .filter(|(_, repo)| repo.directory_contains(&relative_path))
4236                    {
4237                        if !dot_git_abs_paths.iter().any(|dot_git_abs_path| {
4238                            dot_git_abs_path == repo.common_dir_abs_path.as_ref()
4239                        }) {
4240                            dot_git_abs_paths.push(repo.common_dir_abs_path.to_path_buf());
4241                        }
4242                    }
4243                }
4244
4245                let parent_dir_is_loaded = relative_path.parent().is_none_or(|parent| {
4246                    snapshot
4247                        .entry_for_path(parent)
4248                        .is_some_and(|entry| entry.kind == EntryKind::Dir)
4249                });
4250                if !parent_dir_is_loaded {
4251                    log::debug!("ignoring event {relative_path:?} within unloaded directory");
4252                    skip_ix(&mut ranges_to_drop, ix);
4253                    continue;
4254                }
4255
4256                if self.settings.is_path_excluded(&relative_path) {
4257                    if !is_git_related {
4258                        log::debug!("ignoring FS event for excluded path {relative_path:?}");
4259                    }
4260                    skip_ix(&mut ranges_to_drop, ix);
4261                    continue;
4262                }
4263
4264                relative_paths.push(EventRoot {
4265                    path: relative_path.into_arc(),
4266                    was_rescanned: matches!(event.kind, Some(fs::PathEventKind::Rescan)),
4267                });
4268            }
4269
4270            for range_to_drop in ranges_to_drop.into_iter().rev() {
4271                events.drain(range_to_drop);
4272            }
4273        }
4274
4275        if relative_paths.is_empty() && dot_git_abs_paths.is_empty() {
4276            return;
4277        }
4278
4279        if !work_dirs_needing_exclude_update.is_empty() {
4280            let mut state = self.state.lock().await;
4281            for work_dir_abs_path in work_dirs_needing_exclude_update {
4282                if let Some((_, needs_update)) = state
4283                    .snapshot
4284                    .repo_exclude_by_work_dir_abs_path
4285                    .get_mut(&work_dir_abs_path)
4286                {
4287                    *needs_update = true;
4288                }
4289            }
4290        }
4291
4292        self.state.lock().await.snapshot.scan_id += 1;
4293
4294        let (scan_job_tx, scan_job_rx) = channel::unbounded();
4295        log::debug!(
4296            "received fs events {:?}",
4297            relative_paths
4298                .iter()
4299                .map(|event_root| &event_root.path)
4300                .collect::<Vec<_>>()
4301        );
4302        self.reload_entries_for_paths(
4303            &root_path,
4304            &root_canonical_path,
4305            &relative_paths
4306                .iter()
4307                .map(|event_root| event_root.path.clone())
4308                .collect::<Vec<_>>(),
4309            events
4310                .into_iter()
4311                .map(|event| event.path)
4312                .collect::<Vec<_>>(),
4313            Some(scan_job_tx.clone()),
4314        )
4315        .await;
4316
4317        let affected_repo_roots = if !dot_git_abs_paths.is_empty() {
4318            self.update_git_repositories(dot_git_abs_paths).await
4319        } else {
4320            Vec::new()
4321        };
4322
4323        {
4324            let mut ignores_to_update = self.ignores_needing_update().await;
4325            ignores_to_update.extend(affected_repo_roots);
4326            let ignores_to_update = self.order_ignores(ignores_to_update).await;
4327            let snapshot = self.state.lock().await.snapshot.clone();
4328            self.update_ignore_statuses_for_paths(scan_job_tx, snapshot, ignores_to_update)
4329                .await;
4330            self.scan_dirs(false, scan_job_rx).await;
4331        }
4332
4333        {
4334            let mut state = self.state.lock().await;
4335            state.snapshot.completed_scan_id = state.snapshot.scan_id;
4336            for (_, entry) in mem::take(&mut state.removed_entries) {
4337                state.scanned_dirs.remove(&entry.id);
4338            }
4339        }
4340        self.send_status_update(false, SmallVec::new(), &relative_paths)
4341            .await;
4342    }
4343
4344    async fn update_global_gitignore(&self, abs_path: &Path) {
4345        let ignore = build_gitignore(abs_path, self.fs.as_ref())
4346            .await
4347            .log_err()
4348            .map(Arc::new);
4349        let (prev_snapshot, ignore_stack, abs_path) = {
4350            let mut state = self.state.lock().await;
4351            state.snapshot.global_gitignore = ignore;
4352            let abs_path = state.snapshot.abs_path().clone();
4353            let ignore_stack = state
4354                .snapshot
4355                .ignore_stack_for_abs_path(&abs_path, true, self.fs.as_ref())
4356                .await;
4357            (state.snapshot.clone(), ignore_stack, abs_path)
4358        };
4359        let (scan_job_tx, scan_job_rx) = channel::unbounded();
4360        self.update_ignore_statuses_for_paths(
4361            scan_job_tx,
4362            prev_snapshot,
4363            vec![(abs_path, ignore_stack)],
4364        )
4365        .await;
4366        self.scan_dirs(false, scan_job_rx).await;
4367        self.send_status_update(false, SmallVec::new(), &[]).await;
4368    }
4369
4370    async fn forcibly_load_paths(&self, paths: &[Arc<RelPath>]) -> bool {
4371        let (scan_job_tx, scan_job_rx) = channel::unbounded();
4372        {
4373            let mut state = self.state.lock().await;
4374            let root_path = state.snapshot.abs_path.clone();
4375            for path in paths {
4376                for ancestor in path.ancestors() {
4377                    if let Some(entry) = state.snapshot.entry_for_path(ancestor)
4378                        && entry.kind == EntryKind::UnloadedDir
4379                    {
4380                        let abs_path = root_path.join(ancestor.as_std_path());
4381                        state
4382                            .enqueue_scan_dir(
4383                                abs_path.into(),
4384                                entry,
4385                                &scan_job_tx,
4386                                self.fs.as_ref(),
4387                            )
4388                            .await;
4389                        state.paths_to_scan.insert(path.clone());
4390                        break;
4391                    }
4392                }
4393            }
4394            drop(scan_job_tx);
4395        }
4396        while let Ok(job) = scan_job_rx.recv().await {
4397            self.scan_dir(&job).await.log_err();
4398        }
4399
4400        !mem::take(&mut self.state.lock().await.paths_to_scan).is_empty()
4401    }
4402
4403    async fn scan_dirs(
4404        &self,
4405        enable_progress_updates: bool,
4406        scan_jobs_rx: channel::Receiver<ScanJob>,
4407    ) {
4408        if self
4409            .status_updates_tx
4410            .unbounded_send(ScanState::Started)
4411            .is_err()
4412        {
4413            return;
4414        }
4415
4416        let progress_update_count = AtomicUsize::new(0);
4417        self.executor
4418            .scoped_priority(Priority::Low, |scope| {
4419                for _ in 0..self.executor.num_cpus() {
4420                    scope.spawn(async {
4421                        let mut last_progress_update_count = 0;
4422                        let progress_update_timer = self.progress_timer(enable_progress_updates).fuse();
4423                        futures::pin_mut!(progress_update_timer);
4424
4425                        loop {
4426                            select_biased! {
4427                                // Process any path refresh requests before moving on to process
4428                                // the scan queue, so that user operations are prioritized.
4429                                request = self.next_scan_request().fuse() => {
4430                                    let Ok(request) = request else { break };
4431                                    if !self.process_scan_request(request, true).await {
4432                                        return;
4433                                    }
4434                                }
4435
4436                                // Send periodic progress updates to the worktree. Use an atomic counter
4437                                // to ensure that only one of the workers sends a progress update after
4438                                // the update interval elapses.
4439                                _ = progress_update_timer => {
4440                                    match progress_update_count.compare_exchange(
4441                                        last_progress_update_count,
4442                                        last_progress_update_count + 1,
4443                                        SeqCst,
4444                                        SeqCst
4445                                    ) {
4446                                        Ok(_) => {
4447                                            last_progress_update_count += 1;
4448                                            self.send_status_update(true, SmallVec::new(), &[])
4449                                                .await;
4450                                        }
4451                                        Err(count) => {
4452                                            last_progress_update_count = count;
4453                                        }
4454                                    }
4455                                    progress_update_timer.set(self.progress_timer(enable_progress_updates).fuse());
4456                                }
4457
4458                                // Recursively load directories from the file system.
4459                                job = scan_jobs_rx.recv().fuse() => {
4460                                    let Ok(job) = job else { break };
4461                                    if let Err(err) = self.scan_dir(&job).await
4462                                        && job.path.is_empty() {
4463                                            log::error!("error scanning directory {:?}: {}", job.abs_path, err);
4464                                        }
4465                                }
4466                            }
4467                        }
4468                    });
4469                }
4470            })
4471            .await;
4472    }
4473
4474    async fn send_status_update(
4475        &self,
4476        scanning: bool,
4477        barrier: SmallVec<[barrier::Sender; 1]>,
4478        event_roots: &[EventRoot],
4479    ) -> bool {
4480        let mut state = self.state.lock().await;
4481        if state.changed_paths.is_empty() && event_roots.is_empty() && scanning {
4482            return true;
4483        }
4484
4485        let merged_event_roots = merge_event_roots(&state.changed_paths, event_roots);
4486
4487        let new_snapshot = state.snapshot.clone();
4488        let old_snapshot = mem::replace(&mut state.prev_snapshot, new_snapshot.snapshot.clone());
4489        let changes = build_diff(
4490            self.phase,
4491            &old_snapshot,
4492            &new_snapshot,
4493            &merged_event_roots,
4494        );
4495        state.changed_paths.clear();
4496
4497        self.status_updates_tx
4498            .unbounded_send(ScanState::Updated {
4499                snapshot: new_snapshot,
4500                changes,
4501                scanning,
4502                barrier,
4503            })
4504            .is_ok()
4505    }
4506
4507    async fn scan_dir(&self, job: &ScanJob) -> Result<()> {
4508        let root_abs_path;
4509        let root_char_bag;
4510        {
4511            let snapshot = &self.state.lock().await.snapshot;
4512            if self.settings.is_path_excluded(&job.path) {
4513                log::error!("skipping excluded directory {:?}", job.path);
4514                return Ok(());
4515            }
4516            log::trace!("scanning directory {:?}", job.path);
4517            root_abs_path = snapshot.abs_path().clone();
4518            root_char_bag = snapshot.root_char_bag;
4519        }
4520
4521        let next_entry_id = self.next_entry_id.clone();
4522        let mut ignore_stack = job.ignore_stack.clone();
4523        let mut new_ignore = None;
4524        let mut root_canonical_path = None;
4525        let mut new_entries: Vec<Entry> = Vec::new();
4526        let mut new_jobs: Vec<Option<ScanJob>> = Vec::new();
4527        let mut child_paths = self
4528            .fs
4529            .read_dir(&job.abs_path)
4530            .await?
4531            .filter_map(|entry| async {
4532                match entry {
4533                    Ok(entry) => Some(entry),
4534                    Err(error) => {
4535                        log::error!("error processing entry {:?}", error);
4536                        None
4537                    }
4538                }
4539            })
4540            .collect::<Vec<_>>()
4541            .await;
4542
4543        // Ensure that .git and .gitignore are processed first.
4544        swap_to_front(&mut child_paths, GITIGNORE);
4545        swap_to_front(&mut child_paths, DOT_GIT);
4546
4547        if let Some(path) = child_paths.first()
4548            && path.ends_with(DOT_GIT)
4549        {
4550            ignore_stack.repo_root = Some(job.abs_path.clone());
4551        }
4552
4553        for child_abs_path in child_paths {
4554            let child_abs_path: Arc<Path> = child_abs_path.into();
4555            let child_name = child_abs_path.file_name().unwrap();
4556            let Some(child_path) = child_name
4557                .to_str()
4558                .and_then(|name| Some(job.path.join(RelPath::unix(name).ok()?)))
4559            else {
4560                continue;
4561            };
4562
4563            if child_name == DOT_GIT {
4564                let mut state = self.state.lock().await;
4565                state
4566                    .insert_git_repository(
4567                        child_path.clone(),
4568                        self.fs.as_ref(),
4569                        self.watcher.as_ref(),
4570                    )
4571                    .await;
4572            } else if child_name == GITIGNORE {
4573                match build_gitignore(&child_abs_path, self.fs.as_ref()).await {
4574                    Ok(ignore) => {
4575                        let ignore = Arc::new(ignore);
4576                        ignore_stack = ignore_stack
4577                            .append(IgnoreKind::Gitignore(job.abs_path.clone()), ignore.clone());
4578                        new_ignore = Some(ignore);
4579                    }
4580                    Err(error) => {
4581                        log::error!(
4582                            "error loading .gitignore file {:?} - {:?}",
4583                            child_name,
4584                            error
4585                        );
4586                    }
4587                }
4588            }
4589
4590            if self.settings.is_path_excluded(&child_path) {
4591                log::debug!("skipping excluded child entry {child_path:?}");
4592                self.state
4593                    .lock()
4594                    .await
4595                    .remove_path(&child_path, self.watcher.as_ref());
4596                continue;
4597            }
4598
4599            let child_metadata = match self.fs.metadata(&child_abs_path).await {
4600                Ok(Some(metadata)) => metadata,
4601                Ok(None) => continue,
4602                Err(err) => {
4603                    log::error!("error processing {:?}: {err:#}", child_abs_path.display());
4604                    continue;
4605                }
4606            };
4607
4608            let mut child_entry = Entry::new(
4609                child_path.clone(),
4610                &child_metadata,
4611                ProjectEntryId::new(&next_entry_id),
4612                root_char_bag,
4613                None,
4614            );
4615
4616            if job.is_external {
4617                child_entry.is_external = true;
4618            } else if child_metadata.is_symlink {
4619                let canonical_path = match self.fs.canonicalize(&child_abs_path).await {
4620                    Ok(path) => path,
4621                    Err(err) => {
4622                        log::error!("error reading target of symlink {child_abs_path:?}: {err:#}",);
4623                        continue;
4624                    }
4625                };
4626
4627                // lazily canonicalize the root path in order to determine if
4628                // symlinks point outside of the worktree.
4629                let root_canonical_path = match &root_canonical_path {
4630                    Some(path) => path,
4631                    None => match self.fs.canonicalize(&root_abs_path).await {
4632                        Ok(path) => root_canonical_path.insert(path),
4633                        Err(err) => {
4634                            log::error!("error canonicalizing root {:?}: {:?}", root_abs_path, err);
4635                            continue;
4636                        }
4637                    },
4638                };
4639
4640                if !canonical_path.starts_with(root_canonical_path) {
4641                    child_entry.is_external = true;
4642                }
4643
4644                child_entry.canonical_path = Some(canonical_path.into());
4645            }
4646
4647            if child_entry.is_dir() {
4648                child_entry.is_ignored = ignore_stack.is_abs_path_ignored(&child_abs_path, true);
4649                child_entry.is_always_included =
4650                    self.settings.is_path_always_included(&child_path, true);
4651
4652                // Avoid recursing until crash in the case of a recursive symlink
4653                if job.ancestor_inodes.contains(&child_entry.inode) {
4654                    new_jobs.push(None);
4655                } else {
4656                    let mut ancestor_inodes = job.ancestor_inodes.clone();
4657                    ancestor_inodes.insert(child_entry.inode);
4658
4659                    new_jobs.push(Some(ScanJob {
4660                        abs_path: child_abs_path.clone(),
4661                        path: child_path,
4662                        is_external: child_entry.is_external,
4663                        ignore_stack: if child_entry.is_ignored {
4664                            IgnoreStack::all()
4665                        } else {
4666                            ignore_stack.clone()
4667                        },
4668                        ancestor_inodes,
4669                        scan_queue: job.scan_queue.clone(),
4670                    }));
4671                }
4672            } else {
4673                child_entry.is_ignored = ignore_stack.is_abs_path_ignored(&child_abs_path, false);
4674                child_entry.is_always_included =
4675                    self.settings.is_path_always_included(&child_path, false);
4676            }
4677
4678            {
4679                let relative_path = job
4680                    .path
4681                    .join(RelPath::unix(child_name.to_str().unwrap()).unwrap());
4682                if self.is_path_private(&relative_path) {
4683                    log::debug!("detected private file: {relative_path:?}");
4684                    child_entry.is_private = true;
4685                }
4686                if self.settings.is_path_hidden(&relative_path) {
4687                    log::debug!("detected hidden file: {relative_path:?}");
4688                    child_entry.is_hidden = true;
4689                }
4690            }
4691
4692            new_entries.push(child_entry);
4693        }
4694
4695        let mut state = self.state.lock().await;
4696
4697        // Identify any subdirectories that should not be scanned.
4698        let mut job_ix = 0;
4699        for entry in &mut new_entries {
4700            state.reuse_entry_id(entry);
4701            if entry.is_dir() {
4702                if state.should_scan_directory(entry) {
4703                    job_ix += 1;
4704                } else {
4705                    log::debug!("defer scanning directory {:?}", entry.path);
4706                    entry.kind = EntryKind::UnloadedDir;
4707                    new_jobs.remove(job_ix);
4708                }
4709            }
4710            if entry.is_always_included {
4711                state
4712                    .snapshot
4713                    .always_included_entries
4714                    .push(entry.path.clone());
4715            }
4716        }
4717
4718        state.populate_dir(job.path.clone(), new_entries, new_ignore);
4719        self.watcher.add(job.abs_path.as_ref()).log_err();
4720
4721        for new_job in new_jobs.into_iter().flatten() {
4722            job.scan_queue
4723                .try_send(new_job)
4724                .expect("channel is unbounded");
4725        }
4726
4727        Ok(())
4728    }
4729
4730    /// All list arguments should be sorted before calling this function
4731    async fn reload_entries_for_paths(
4732        &self,
4733        root_abs_path: &SanitizedPath,
4734        root_canonical_path: &SanitizedPath,
4735        relative_paths: &[Arc<RelPath>],
4736        abs_paths: Vec<PathBuf>,
4737        scan_queue_tx: Option<Sender<ScanJob>>,
4738    ) {
4739        // grab metadata for all requested paths
4740        let metadata = futures::future::join_all(
4741            abs_paths
4742                .iter()
4743                .map(|abs_path| async move {
4744                    let metadata = self.fs.metadata(abs_path).await?;
4745                    if let Some(metadata) = metadata {
4746                        let canonical_path = self.fs.canonicalize(abs_path).await?;
4747
4748                        // If we're on a case-insensitive filesystem (default on macOS), we want
4749                        // to only ignore metadata for non-symlink files if their absolute-path matches
4750                        // the canonical-path.
4751                        // Because if not, this might be a case-only-renaming (`mv test.txt TEST.TXT`)
4752                        // and we want to ignore the metadata for the old path (`test.txt`) so it's
4753                        // treated as removed.
4754                        if !self.fs_case_sensitive && !metadata.is_symlink {
4755                            let canonical_file_name = canonical_path.file_name();
4756                            let file_name = abs_path.file_name();
4757                            if canonical_file_name != file_name {
4758                                return Ok(None);
4759                            }
4760                        }
4761
4762                        anyhow::Ok(Some((metadata, SanitizedPath::new_arc(&canonical_path))))
4763                    } else {
4764                        Ok(None)
4765                    }
4766                })
4767                .collect::<Vec<_>>(),
4768        )
4769        .await;
4770
4771        let mut new_ancestor_repo = if relative_paths.iter().any(|path| path.is_empty()) {
4772            Some(discover_ancestor_git_repo(self.fs.clone(), &root_abs_path).await)
4773        } else {
4774            None
4775        };
4776
4777        let mut state = self.state.lock().await;
4778        let doing_recursive_update = scan_queue_tx.is_some();
4779
4780        // Remove any entries for paths that no longer exist or are being recursively
4781        // refreshed. Do this before adding any new entries, so that renames can be
4782        // detected regardless of the order of the paths.
4783        for (path, metadata) in relative_paths.iter().zip(metadata.iter()) {
4784            if matches!(metadata, Ok(None)) || doing_recursive_update {
4785                state.remove_path(path, self.watcher.as_ref());
4786            }
4787        }
4788
4789        for (path, metadata) in relative_paths.iter().zip(metadata.into_iter()) {
4790            let abs_path: Arc<Path> = root_abs_path.join(path.as_std_path()).into();
4791            match metadata {
4792                Ok(Some((metadata, canonical_path))) => {
4793                    let ignore_stack = state
4794                        .snapshot
4795                        .ignore_stack_for_abs_path(&abs_path, metadata.is_dir, self.fs.as_ref())
4796                        .await;
4797                    let is_external = !canonical_path.starts_with(&root_canonical_path);
4798                    let entry_id = state.entry_id_for(self.next_entry_id.as_ref(), path, &metadata);
4799                    let mut fs_entry = Entry::new(
4800                        path.clone(),
4801                        &metadata,
4802                        entry_id,
4803                        state.snapshot.root_char_bag,
4804                        if metadata.is_symlink {
4805                            Some(canonical_path.as_path().to_path_buf().into())
4806                        } else {
4807                            None
4808                        },
4809                    );
4810
4811                    let is_dir = fs_entry.is_dir();
4812                    fs_entry.is_ignored = ignore_stack.is_abs_path_ignored(&abs_path, is_dir);
4813                    fs_entry.is_external = is_external;
4814                    fs_entry.is_private = self.is_path_private(path);
4815                    fs_entry.is_always_included =
4816                        self.settings.is_path_always_included(path, is_dir);
4817                    fs_entry.is_hidden = self.settings.is_path_hidden(path);
4818
4819                    if let (Some(scan_queue_tx), true) = (&scan_queue_tx, is_dir) {
4820                        if state.should_scan_directory(&fs_entry)
4821                            || (fs_entry.path.is_empty()
4822                                && abs_path.file_name() == Some(OsStr::new(DOT_GIT)))
4823                        {
4824                            state
4825                                .enqueue_scan_dir(
4826                                    abs_path,
4827                                    &fs_entry,
4828                                    scan_queue_tx,
4829                                    self.fs.as_ref(),
4830                                )
4831                                .await;
4832                        } else {
4833                            fs_entry.kind = EntryKind::UnloadedDir;
4834                        }
4835                    }
4836
4837                    state
4838                        .insert_entry(fs_entry.clone(), self.fs.as_ref(), self.watcher.as_ref())
4839                        .await;
4840
4841                    if path.is_empty()
4842                        && let Some((ignores, exclude, repo)) = new_ancestor_repo.take()
4843                    {
4844                        log::trace!("updating ancestor git repository");
4845                        state.snapshot.ignores_by_parent_abs_path.extend(ignores);
4846                        if let Some((ancestor_dot_git, work_directory)) = repo {
4847                            if let Some(exclude) = exclude {
4848                                let work_directory_abs_path = self
4849                                    .state
4850                                    .lock()
4851                                    .await
4852                                    .snapshot
4853                                    .work_directory_abs_path(&work_directory);
4854
4855                                state
4856                                    .snapshot
4857                                    .repo_exclude_by_work_dir_abs_path
4858                                    .insert(work_directory_abs_path.into(), (exclude, false));
4859                            }
4860                            state
4861                                .insert_git_repository_for_path(
4862                                    work_directory,
4863                                    ancestor_dot_git.into(),
4864                                    self.fs.as_ref(),
4865                                    self.watcher.as_ref(),
4866                                )
4867                                .await
4868                                .log_err();
4869                        }
4870                    }
4871                }
4872                Ok(None) => {
4873                    self.remove_repo_path(path.clone(), &mut state.snapshot);
4874                }
4875                Err(err) => {
4876                    log::error!("error reading file {abs_path:?} on event: {err:#}");
4877                }
4878            }
4879        }
4880
4881        util::extend_sorted(
4882            &mut state.changed_paths,
4883            relative_paths.iter().cloned(),
4884            usize::MAX,
4885            Ord::cmp,
4886        );
4887    }
4888
4889    fn remove_repo_path(&self, path: Arc<RelPath>, snapshot: &mut LocalSnapshot) -> Option<()> {
4890        if !path.components().any(|component| component == DOT_GIT)
4891            && let Some(local_repo) = snapshot.local_repo_for_work_directory_path(&path)
4892        {
4893            let id = local_repo.work_directory_id;
4894            log::debug!("remove repo path: {:?}", path);
4895            snapshot.git_repositories.remove(&id);
4896            return Some(());
4897        }
4898
4899        Some(())
4900    }
4901
4902    async fn update_ignore_statuses_for_paths(
4903        &self,
4904        scan_job_tx: Sender<ScanJob>,
4905        prev_snapshot: LocalSnapshot,
4906        ignores_to_update: Vec<(Arc<Path>, IgnoreStack)>,
4907    ) {
4908        let (ignore_queue_tx, ignore_queue_rx) = channel::unbounded();
4909        {
4910            for (parent_abs_path, ignore_stack) in ignores_to_update {
4911                ignore_queue_tx
4912                    .send_blocking(UpdateIgnoreStatusJob {
4913                        abs_path: parent_abs_path,
4914                        ignore_stack,
4915                        ignore_queue: ignore_queue_tx.clone(),
4916                        scan_queue: scan_job_tx.clone(),
4917                    })
4918                    .unwrap();
4919            }
4920        }
4921        drop(ignore_queue_tx);
4922
4923        self.executor
4924            .scoped(|scope| {
4925                for _ in 0..self.executor.num_cpus() {
4926                    scope.spawn(async {
4927                        loop {
4928                            select_biased! {
4929                                // Process any path refresh requests before moving on to process
4930                                // the queue of ignore statuses.
4931                                request = self.next_scan_request().fuse() => {
4932                                    let Ok(request) = request else { break };
4933                                    if !self.process_scan_request(request, true).await {
4934                                        return;
4935                                    }
4936                                }
4937
4938                                // Recursively process directories whose ignores have changed.
4939                                job = ignore_queue_rx.recv().fuse() => {
4940                                    let Ok(job) = job else { break };
4941                                    self.update_ignore_status(job, &prev_snapshot).await;
4942                                }
4943                            }
4944                        }
4945                    });
4946                }
4947            })
4948            .await;
4949    }
4950
4951    async fn ignores_needing_update(&self) -> Vec<Arc<Path>> {
4952        let mut ignores_to_update = Vec::new();
4953        let mut excludes_to_load: Vec<(Arc<Path>, PathBuf)> = Vec::new();
4954
4955        // First pass: collect updates and drop stale entries without awaiting.
4956        {
4957            let snapshot = &mut self.state.lock().await.snapshot;
4958            let abs_path = snapshot.abs_path.clone();
4959            let mut repo_exclude_keys_to_remove: Vec<Arc<Path>> = Vec::new();
4960
4961            for (work_dir_abs_path, (_, needs_update)) in
4962                snapshot.repo_exclude_by_work_dir_abs_path.iter_mut()
4963            {
4964                let repository = snapshot
4965                    .git_repositories
4966                    .iter()
4967                    .find(|(_, repo)| &repo.work_directory_abs_path == work_dir_abs_path);
4968
4969                if *needs_update {
4970                    *needs_update = false;
4971                    ignores_to_update.push(work_dir_abs_path.clone());
4972
4973                    if let Some((_, repository)) = repository {
4974                        let exclude_abs_path = repository.common_dir_abs_path.join(REPO_EXCLUDE);
4975                        excludes_to_load.push((work_dir_abs_path.clone(), exclude_abs_path));
4976                    }
4977                }
4978
4979                if repository.is_none() {
4980                    repo_exclude_keys_to_remove.push(work_dir_abs_path.clone());
4981                }
4982            }
4983
4984            for key in repo_exclude_keys_to_remove {
4985                snapshot.repo_exclude_by_work_dir_abs_path.remove(&key);
4986            }
4987
4988            snapshot
4989                .ignores_by_parent_abs_path
4990                .retain(|parent_abs_path, (_, needs_update)| {
4991                    if let Ok(parent_path) = parent_abs_path.strip_prefix(abs_path.as_path())
4992                        && let Some(parent_path) =
4993                            RelPath::new(&parent_path, PathStyle::local()).log_err()
4994                    {
4995                        if *needs_update {
4996                            *needs_update = false;
4997                            if snapshot.snapshot.entry_for_path(&parent_path).is_some() {
4998                                ignores_to_update.push(parent_abs_path.clone());
4999                            }
5000                        }
5001
5002                        let ignore_path = parent_path.join(RelPath::unix(GITIGNORE).unwrap());
5003                        if snapshot.snapshot.entry_for_path(&ignore_path).is_none() {
5004                            return false;
5005                        }
5006                    }
5007                    true
5008                });
5009        }
5010
5011        // Load gitignores asynchronously (outside the lock)
5012        let mut loaded_excludes: Vec<(Arc<Path>, Arc<Gitignore>)> = Vec::new();
5013        for (work_dir_abs_path, exclude_abs_path) in excludes_to_load {
5014            if let Ok(current_exclude) = build_gitignore(&exclude_abs_path, self.fs.as_ref()).await
5015            {
5016                loaded_excludes.push((work_dir_abs_path, Arc::new(current_exclude)));
5017            }
5018        }
5019
5020        // Second pass: apply updates.
5021        if !loaded_excludes.is_empty() {
5022            let snapshot = &mut self.state.lock().await.snapshot;
5023
5024            for (work_dir_abs_path, exclude) in loaded_excludes {
5025                if let Some((existing_exclude, _)) = snapshot
5026                    .repo_exclude_by_work_dir_abs_path
5027                    .get_mut(&work_dir_abs_path)
5028                {
5029                    *existing_exclude = exclude;
5030                }
5031            }
5032        }
5033
5034        ignores_to_update
5035    }
5036
5037    async fn order_ignores(&self, mut ignores: Vec<Arc<Path>>) -> Vec<(Arc<Path>, IgnoreStack)> {
5038        let fs = self.fs.clone();
5039        let snapshot = self.state.lock().await.snapshot.clone();
5040        ignores.sort_unstable();
5041        let mut ignores_to_update = ignores.into_iter().peekable();
5042
5043        let mut result = vec![];
5044        while let Some(parent_abs_path) = ignores_to_update.next() {
5045            while ignores_to_update
5046                .peek()
5047                .map_or(false, |p| p.starts_with(&parent_abs_path))
5048            {
5049                ignores_to_update.next().unwrap();
5050            }
5051            let ignore_stack = snapshot
5052                .ignore_stack_for_abs_path(&parent_abs_path, true, fs.as_ref())
5053                .await;
5054            result.push((parent_abs_path, ignore_stack));
5055        }
5056
5057        result
5058    }
5059
5060    async fn update_ignore_status(&self, job: UpdateIgnoreStatusJob, snapshot: &LocalSnapshot) {
5061        log::trace!("update ignore status {:?}", job.abs_path);
5062
5063        let mut ignore_stack = job.ignore_stack;
5064        if let Some((ignore, _)) = snapshot.ignores_by_parent_abs_path.get(&job.abs_path) {
5065            ignore_stack =
5066                ignore_stack.append(IgnoreKind::Gitignore(job.abs_path.clone()), ignore.clone());
5067        }
5068
5069        let mut entries_by_id_edits = Vec::new();
5070        let mut entries_by_path_edits = Vec::new();
5071        let Some(path) = job
5072            .abs_path
5073            .strip_prefix(snapshot.abs_path.as_path())
5074            .map_err(|_| {
5075                anyhow::anyhow!(
5076                    "Failed to strip prefix '{}' from path '{}'",
5077                    snapshot.abs_path.as_path().display(),
5078                    job.abs_path.display()
5079                )
5080            })
5081            .log_err()
5082        else {
5083            return;
5084        };
5085
5086        let Some(path) = RelPath::new(&path, PathStyle::local()).log_err() else {
5087            return;
5088        };
5089
5090        if let Ok(Some(metadata)) = self.fs.metadata(&job.abs_path.join(DOT_GIT)).await
5091            && metadata.is_dir
5092        {
5093            ignore_stack.repo_root = Some(job.abs_path.clone());
5094        }
5095
5096        for mut entry in snapshot.child_entries(&path).cloned() {
5097            let was_ignored = entry.is_ignored;
5098            let abs_path: Arc<Path> = snapshot.absolutize(&entry.path).into();
5099            entry.is_ignored = ignore_stack.is_abs_path_ignored(&abs_path, entry.is_dir());
5100
5101            if entry.is_dir() {
5102                let child_ignore_stack = if entry.is_ignored {
5103                    IgnoreStack::all()
5104                } else {
5105                    ignore_stack.clone()
5106                };
5107
5108                // Scan any directories that were previously ignored and weren't previously scanned.
5109                if was_ignored && !entry.is_ignored && entry.kind.is_unloaded() {
5110                    let state = self.state.lock().await;
5111                    if state.should_scan_directory(&entry) {
5112                        state
5113                            .enqueue_scan_dir(
5114                                abs_path.clone(),
5115                                &entry,
5116                                &job.scan_queue,
5117                                self.fs.as_ref(),
5118                            )
5119                            .await;
5120                    }
5121                }
5122
5123                job.ignore_queue
5124                    .send(UpdateIgnoreStatusJob {
5125                        abs_path: abs_path.clone(),
5126                        ignore_stack: child_ignore_stack,
5127                        ignore_queue: job.ignore_queue.clone(),
5128                        scan_queue: job.scan_queue.clone(),
5129                    })
5130                    .await
5131                    .unwrap();
5132            }
5133
5134            if entry.is_ignored != was_ignored {
5135                let mut path_entry = snapshot.entries_by_id.get(&entry.id, ()).unwrap().clone();
5136                path_entry.scan_id = snapshot.scan_id;
5137                path_entry.is_ignored = entry.is_ignored;
5138                entries_by_id_edits.push(Edit::Insert(path_entry));
5139                entries_by_path_edits.push(Edit::Insert(entry));
5140            }
5141        }
5142
5143        let state = &mut self.state.lock().await;
5144        for edit in &entries_by_path_edits {
5145            if let Edit::Insert(entry) = edit
5146                && let Err(ix) = state.changed_paths.binary_search(&entry.path)
5147            {
5148                state.changed_paths.insert(ix, entry.path.clone());
5149            }
5150        }
5151
5152        state
5153            .snapshot
5154            .entries_by_path
5155            .edit(entries_by_path_edits, ());
5156        state.snapshot.entries_by_id.edit(entries_by_id_edits, ());
5157    }
5158
5159    async fn update_git_repositories(&self, dot_git_paths: Vec<PathBuf>) -> Vec<Arc<Path>> {
5160        log::trace!("reloading repositories: {dot_git_paths:?}");
5161        let mut state = self.state.lock().await;
5162        let scan_id = state.snapshot.scan_id;
5163        let mut affected_repo_roots = Vec::new();
5164        for dot_git_dir in dot_git_paths {
5165            let existing_repository_entry =
5166                state
5167                    .snapshot
5168                    .git_repositories
5169                    .iter()
5170                    .find_map(|(_, repo)| {
5171                        let dot_git_dir = SanitizedPath::new(&dot_git_dir);
5172                        if SanitizedPath::new(repo.common_dir_abs_path.as_ref()) == dot_git_dir
5173                            || SanitizedPath::new(repo.repository_dir_abs_path.as_ref())
5174                                == dot_git_dir
5175                        {
5176                            Some(repo.clone())
5177                        } else {
5178                            None
5179                        }
5180                    });
5181
5182            match existing_repository_entry {
5183                None => {
5184                    let Ok(relative) = dot_git_dir.strip_prefix(state.snapshot.abs_path()) else {
5185                        debug_panic!(
5186                            "update_git_repositories called with .git directory outside the worktree root"
5187                        );
5188                        return Vec::new();
5189                    };
5190                    affected_repo_roots.push(dot_git_dir.parent().unwrap().into());
5191                    state
5192                        .insert_git_repository(
5193                            RelPath::new(relative, PathStyle::local())
5194                                .unwrap()
5195                                .into_arc(),
5196                            self.fs.as_ref(),
5197                            self.watcher.as_ref(),
5198                        )
5199                        .await;
5200                }
5201                Some(local_repository) => {
5202                    state.snapshot.git_repositories.update(
5203                        &local_repository.work_directory_id,
5204                        |entry| {
5205                            entry.git_dir_scan_id = scan_id;
5206                        },
5207                    );
5208                }
5209            };
5210        }
5211
5212        // Remove any git repositories whose .git entry no longer exists.
5213        let snapshot = &mut state.snapshot;
5214        let mut ids_to_preserve = HashSet::default();
5215        for (&work_directory_id, entry) in snapshot.git_repositories.iter() {
5216            let exists_in_snapshot =
5217                snapshot
5218                    .entry_for_id(work_directory_id)
5219                    .is_some_and(|entry| {
5220                        snapshot
5221                            .entry_for_path(&entry.path.join(RelPath::unix(DOT_GIT).unwrap()))
5222                            .is_some()
5223                    });
5224
5225            if exists_in_snapshot
5226                || matches!(
5227                    self.fs.metadata(&entry.common_dir_abs_path).await,
5228                    Ok(Some(_))
5229                )
5230            {
5231                ids_to_preserve.insert(work_directory_id);
5232            }
5233        }
5234
5235        snapshot
5236            .git_repositories
5237            .retain(|work_directory_id, entry| {
5238                let preserve = ids_to_preserve.contains(work_directory_id);
5239                if !preserve {
5240                    affected_repo_roots.push(entry.dot_git_abs_path.parent().unwrap().into());
5241                    snapshot
5242                        .repo_exclude_by_work_dir_abs_path
5243                        .remove(&entry.work_directory_abs_path);
5244                }
5245                preserve
5246            });
5247
5248        affected_repo_roots
5249    }
5250
5251    async fn progress_timer(&self, running: bool) {
5252        if !running {
5253            return futures::future::pending().await;
5254        }
5255
5256        #[cfg(feature = "test-support")]
5257        if self.fs.is_fake() {
5258            return self.executor.simulate_random_delay().await;
5259        }
5260
5261        self.executor.timer(FS_WATCH_LATENCY).await
5262    }
5263
5264    fn is_path_private(&self, path: &RelPath) -> bool {
5265        !self.share_private_files && self.settings.is_path_private(path)
5266    }
5267
5268    async fn next_scan_request(&self) -> Result<ScanRequest> {
5269        let mut request = self.scan_requests_rx.recv().await?;
5270        while let Ok(next_request) = self.scan_requests_rx.try_recv() {
5271            request.relative_paths.extend(next_request.relative_paths);
5272            request.done.extend(next_request.done);
5273        }
5274        Ok(request)
5275    }
5276}
5277
5278async fn discover_ancestor_git_repo(
5279    fs: Arc<dyn Fs>,
5280    root_abs_path: &SanitizedPath,
5281) -> (
5282    HashMap<Arc<Path>, (Arc<Gitignore>, bool)>,
5283    Option<Arc<Gitignore>>,
5284    Option<(PathBuf, WorkDirectory)>,
5285) {
5286    let mut exclude = None;
5287    let mut ignores = HashMap::default();
5288    for (index, ancestor) in root_abs_path.as_path().ancestors().enumerate() {
5289        if index != 0 {
5290            if ancestor == paths::home_dir() {
5291                // Unless $HOME is itself the worktree root, don't consider it as a
5292                // containing git repository---expensive and likely unwanted.
5293                break;
5294            } else if let Ok(ignore) = build_gitignore(&ancestor.join(GITIGNORE), fs.as_ref()).await
5295            {
5296                ignores.insert(ancestor.into(), (ignore.into(), false));
5297            }
5298        }
5299
5300        let ancestor_dot_git = ancestor.join(DOT_GIT);
5301        log::trace!("considering ancestor: {ancestor_dot_git:?}");
5302        // Check whether the directory or file called `.git` exists (in the
5303        // case of worktrees it's a file.)
5304        if fs
5305            .metadata(&ancestor_dot_git)
5306            .await
5307            .is_ok_and(|metadata| metadata.is_some())
5308        {
5309            if index != 0 {
5310                // We canonicalize, since the FS events use the canonicalized path.
5311                if let Some(ancestor_dot_git) = fs.canonicalize(&ancestor_dot_git).await.log_err() {
5312                    let location_in_repo = root_abs_path
5313                        .as_path()
5314                        .strip_prefix(ancestor)
5315                        .unwrap()
5316                        .into();
5317                    log::info!("inserting parent git repo for this worktree: {location_in_repo:?}");
5318                    // We associate the external git repo with our root folder and
5319                    // also mark where in the git repo the root folder is located.
5320                    return (
5321                        ignores,
5322                        exclude,
5323                        Some((
5324                            ancestor_dot_git,
5325                            WorkDirectory::AboveProject {
5326                                absolute_path: ancestor.into(),
5327                                location_in_repo,
5328                            },
5329                        )),
5330                    );
5331                };
5332            }
5333
5334            let repo_exclude_abs_path = ancestor_dot_git.join(REPO_EXCLUDE);
5335            if let Ok(repo_exclude) = build_gitignore(&repo_exclude_abs_path, fs.as_ref()).await {
5336                exclude = Some(Arc::new(repo_exclude));
5337            }
5338
5339            // Reached root of git repository.
5340            break;
5341        }
5342    }
5343
5344    (ignores, exclude, None)
5345}
5346
5347fn merge_event_roots(changed_paths: &[Arc<RelPath>], event_roots: &[EventRoot]) -> Vec<EventRoot> {
5348    let mut merged_event_roots = Vec::with_capacity(changed_paths.len() + event_roots.len());
5349    let mut changed_paths = changed_paths.iter().peekable();
5350    let mut event_roots = event_roots.iter().peekable();
5351    while let (Some(path), Some(event_root)) = (changed_paths.peek(), event_roots.peek()) {
5352        match path.cmp(&&event_root.path) {
5353            Ordering::Less => {
5354                merged_event_roots.push(EventRoot {
5355                    path: (*changed_paths.next().expect("peeked changed path")).clone(),
5356                    was_rescanned: false,
5357                });
5358            }
5359            Ordering::Equal => {
5360                merged_event_roots.push((*event_roots.next().expect("peeked event root")).clone());
5361                changed_paths.next();
5362            }
5363            Ordering::Greater => {
5364                merged_event_roots.push((*event_roots.next().expect("peeked event root")).clone());
5365            }
5366        }
5367    }
5368    merged_event_roots.extend(changed_paths.map(|path| EventRoot {
5369        path: path.clone(),
5370        was_rescanned: false,
5371    }));
5372    merged_event_roots.extend(event_roots.cloned());
5373    merged_event_roots
5374}
5375
5376fn build_diff(
5377    phase: BackgroundScannerPhase,
5378    old_snapshot: &Snapshot,
5379    new_snapshot: &Snapshot,
5380    event_roots: &[EventRoot],
5381) -> UpdatedEntriesSet {
5382    use BackgroundScannerPhase::*;
5383    use PathChange::{Added, AddedOrUpdated, Loaded, Removed, Updated};
5384
5385    // Identify which paths have changed. Use the known set of changed
5386    // parent paths to optimize the search.
5387    let mut changes = Vec::new();
5388
5389    let mut old_paths = old_snapshot.entries_by_path.cursor::<PathKey>(());
5390    let mut new_paths = new_snapshot.entries_by_path.cursor::<PathKey>(());
5391    let mut last_newly_loaded_dir_path = None;
5392    old_paths.next();
5393    new_paths.next();
5394    for event_root in event_roots {
5395        let path = PathKey(event_root.path.clone());
5396        if old_paths.item().is_some_and(|e| e.path < path.0) {
5397            old_paths.seek_forward(&path, Bias::Left);
5398        }
5399        if new_paths.item().is_some_and(|e| e.path < path.0) {
5400            new_paths.seek_forward(&path, Bias::Left);
5401        }
5402        loop {
5403            match (old_paths.item(), new_paths.item()) {
5404                (Some(old_entry), Some(new_entry)) => {
5405                    if old_entry.path > path.0
5406                        && new_entry.path > path.0
5407                        && !old_entry.path.starts_with(&path.0)
5408                        && !new_entry.path.starts_with(&path.0)
5409                    {
5410                        break;
5411                    }
5412
5413                    match Ord::cmp(&old_entry.path, &new_entry.path) {
5414                        Ordering::Less => {
5415                            changes.push((old_entry.path.clone(), old_entry.id, Removed));
5416                            old_paths.next();
5417                        }
5418                        Ordering::Equal => {
5419                            if phase == EventsReceivedDuringInitialScan {
5420                                if old_entry.id != new_entry.id {
5421                                    changes.push((old_entry.path.clone(), old_entry.id, Removed));
5422                                }
5423                                // If the worktree was not fully initialized when this event was generated,
5424                                // we can't know whether this entry was added during the scan or whether
5425                                // it was merely updated.
5426                                changes.push((
5427                                    new_entry.path.clone(),
5428                                    new_entry.id,
5429                                    AddedOrUpdated,
5430                                ));
5431                            } else if old_entry.id != new_entry.id {
5432                                changes.push((old_entry.path.clone(), old_entry.id, Removed));
5433                                changes.push((new_entry.path.clone(), new_entry.id, Added));
5434                            } else if old_entry != new_entry {
5435                                if old_entry.kind.is_unloaded() {
5436                                    last_newly_loaded_dir_path = Some(&new_entry.path);
5437                                    changes.push((new_entry.path.clone(), new_entry.id, Loaded));
5438                                } else {
5439                                    changes.push((new_entry.path.clone(), new_entry.id, Updated));
5440                                }
5441                            } else if event_root.was_rescanned {
5442                                changes.push((new_entry.path.clone(), new_entry.id, Updated));
5443                            }
5444                            old_paths.next();
5445                            new_paths.next();
5446                        }
5447                        Ordering::Greater => {
5448                            let is_newly_loaded = phase == InitialScan
5449                                || last_newly_loaded_dir_path
5450                                    .as_ref()
5451                                    .is_some_and(|dir| new_entry.path.starts_with(dir));
5452                            changes.push((
5453                                new_entry.path.clone(),
5454                                new_entry.id,
5455                                if is_newly_loaded { Loaded } else { Added },
5456                            ));
5457                            new_paths.next();
5458                        }
5459                    }
5460                }
5461                (Some(old_entry), None) => {
5462                    changes.push((old_entry.path.clone(), old_entry.id, Removed));
5463                    old_paths.next();
5464                }
5465                (None, Some(new_entry)) => {
5466                    let is_newly_loaded = phase == InitialScan
5467                        || last_newly_loaded_dir_path
5468                            .as_ref()
5469                            .is_some_and(|dir| new_entry.path.starts_with(dir));
5470                    changes.push((
5471                        new_entry.path.clone(),
5472                        new_entry.id,
5473                        if is_newly_loaded { Loaded } else { Added },
5474                    ));
5475                    new_paths.next();
5476                }
5477                (None, None) => break,
5478            }
5479        }
5480    }
5481
5482    changes.into()
5483}
5484
5485fn swap_to_front(child_paths: &mut Vec<PathBuf>, file: &str) {
5486    let position = child_paths
5487        .iter()
5488        .position(|path| path.file_name().unwrap() == file);
5489    if let Some(position) = position {
5490        let temp = child_paths.remove(position);
5491        child_paths.insert(0, temp);
5492    }
5493}
5494
5495fn char_bag_for_path(root_char_bag: CharBag, path: &RelPath) -> CharBag {
5496    let mut result = root_char_bag;
5497    result.extend(path.as_unix_str().chars().map(|c| c.to_ascii_lowercase()));
5498    result
5499}
5500
5501#[derive(Debug)]
5502struct ScanJob {
5503    abs_path: Arc<Path>,
5504    path: Arc<RelPath>,
5505    ignore_stack: IgnoreStack,
5506    scan_queue: Sender<ScanJob>,
5507    ancestor_inodes: TreeSet<u64>,
5508    is_external: bool,
5509}
5510
5511struct UpdateIgnoreStatusJob {
5512    abs_path: Arc<Path>,
5513    ignore_stack: IgnoreStack,
5514    ignore_queue: Sender<UpdateIgnoreStatusJob>,
5515    scan_queue: Sender<ScanJob>,
5516}
5517
5518pub trait WorktreeModelHandle {
5519    #[cfg(feature = "test-support")]
5520    fn flush_fs_events<'a>(
5521        &self,
5522        cx: &'a mut gpui::TestAppContext,
5523    ) -> futures::future::LocalBoxFuture<'a, ()>;
5524
5525    #[cfg(feature = "test-support")]
5526    fn flush_fs_events_in_root_git_repository<'a>(
5527        &self,
5528        cx: &'a mut gpui::TestAppContext,
5529    ) -> futures::future::LocalBoxFuture<'a, ()>;
5530}
5531
5532impl WorktreeModelHandle for Entity<Worktree> {
5533    // When the worktree's FS event stream sometimes delivers "redundant" events for FS changes that
5534    // occurred before the worktree was constructed. These events can cause the worktree to perform
5535    // extra directory scans, and emit extra scan-state notifications.
5536    //
5537    // This function mutates the worktree's directory and waits for those mutations to be picked up,
5538    // to ensure that all redundant FS events have already been processed.
5539    #[cfg(feature = "test-support")]
5540    fn flush_fs_events<'a>(
5541        &self,
5542        cx: &'a mut gpui::TestAppContext,
5543    ) -> futures::future::LocalBoxFuture<'a, ()> {
5544        let file_name = "fs-event-sentinel";
5545
5546        let tree = self.clone();
5547        let (fs, root_path) = self.read_with(cx, |tree, _| {
5548            let tree = tree.as_local().unwrap();
5549            (tree.fs.clone(), tree.abs_path.clone())
5550        });
5551
5552        async move {
5553            // Subscribe to events BEFORE creating the file to avoid race condition
5554            // where events fire before subscription is set up
5555            let mut events = cx.events(&tree);
5556
5557            fs.create_file(&root_path.join(file_name), Default::default())
5558                .await
5559                .unwrap();
5560
5561            // Check if condition is already met before waiting for events
5562            let file_exists = || {
5563                tree.read_with(cx, |tree, _| {
5564                    tree.entry_for_path(RelPath::unix(file_name).unwrap())
5565                        .is_some()
5566                })
5567            };
5568
5569            // Use select to avoid blocking indefinitely if events are delayed
5570            while !file_exists() {
5571                futures::select_biased! {
5572                    _ = events.next() => {}
5573                    _ = futures::FutureExt::fuse(cx.background_executor.timer(std::time::Duration::from_millis(10))) => {}
5574                }
5575            }
5576
5577            fs.remove_file(&root_path.join(file_name), Default::default())
5578                .await
5579                .unwrap();
5580
5581            // Check if condition is already met before waiting for events
5582            let file_gone = || {
5583                tree.read_with(cx, |tree, _| {
5584                    tree.entry_for_path(RelPath::unix(file_name).unwrap())
5585                        .is_none()
5586                })
5587            };
5588
5589            // Use select to avoid blocking indefinitely if events are delayed
5590            while !file_gone() {
5591                futures::select_biased! {
5592                    _ = events.next() => {}
5593                    _ = futures::FutureExt::fuse(cx.background_executor.timer(std::time::Duration::from_millis(10))) => {}
5594                }
5595            }
5596
5597            cx.update(|cx| tree.read(cx).as_local().unwrap().scan_complete())
5598                .await;
5599        }
5600        .boxed_local()
5601    }
5602
5603    // This function is similar to flush_fs_events, except that it waits for events to be flushed in
5604    // the .git folder of the root repository.
5605    // The reason for its existence is that a repository's .git folder might live *outside* of the
5606    // worktree and thus its FS events might go through a different path.
5607    // In order to flush those, we need to create artificial events in the .git folder and wait
5608    // for the repository to be reloaded.
5609    #[cfg(feature = "test-support")]
5610    fn flush_fs_events_in_root_git_repository<'a>(
5611        &self,
5612        cx: &'a mut gpui::TestAppContext,
5613    ) -> futures::future::LocalBoxFuture<'a, ()> {
5614        let file_name = "fs-event-sentinel";
5615
5616        let tree = self.clone();
5617        let (fs, root_path, mut git_dir_scan_id) = self.read_with(cx, |tree, _| {
5618            let tree = tree.as_local().unwrap();
5619            let local_repo_entry = tree
5620                .git_repositories
5621                .values()
5622                .min_by_key(|local_repo_entry| local_repo_entry.work_directory.clone())
5623                .unwrap();
5624            (
5625                tree.fs.clone(),
5626                local_repo_entry.common_dir_abs_path.clone(),
5627                local_repo_entry.git_dir_scan_id,
5628            )
5629        });
5630
5631        let scan_id_increased = |tree: &mut Worktree, git_dir_scan_id: &mut usize| {
5632            let tree = tree.as_local().unwrap();
5633            // let repository = tree.repositories.first().unwrap();
5634            let local_repo_entry = tree
5635                .git_repositories
5636                .values()
5637                .min_by_key(|local_repo_entry| local_repo_entry.work_directory.clone())
5638                .unwrap();
5639
5640            if local_repo_entry.git_dir_scan_id > *git_dir_scan_id {
5641                *git_dir_scan_id = local_repo_entry.git_dir_scan_id;
5642                true
5643            } else {
5644                false
5645            }
5646        };
5647
5648        async move {
5649            // Subscribe to events BEFORE creating the file to avoid race condition
5650            // where events fire before subscription is set up
5651            let mut events = cx.events(&tree);
5652
5653            fs.create_file(&root_path.join(file_name), Default::default())
5654                .await
5655                .unwrap();
5656
5657            // Use select to avoid blocking indefinitely if events are delayed
5658            while !tree.update(cx, |tree, _| scan_id_increased(tree, &mut git_dir_scan_id)) {
5659                futures::select_biased! {
5660                    _ = events.next() => {}
5661                    _ = futures::FutureExt::fuse(cx.background_executor.timer(std::time::Duration::from_millis(10))) => {}
5662                }
5663            }
5664
5665            fs.remove_file(&root_path.join(file_name), Default::default())
5666                .await
5667                .unwrap();
5668
5669            // Use select to avoid blocking indefinitely if events are delayed
5670            while !tree.update(cx, |tree, _| scan_id_increased(tree, &mut git_dir_scan_id)) {
5671                futures::select_biased! {
5672                    _ = events.next() => {}
5673                    _ = futures::FutureExt::fuse(cx.background_executor.timer(std::time::Duration::from_millis(10))) => {}
5674                }
5675            }
5676
5677            cx.update(|cx| tree.read(cx).as_local().unwrap().scan_complete())
5678                .await;
5679        }
5680        .boxed_local()
5681    }
5682}
5683
5684#[derive(Clone, Debug)]
5685struct TraversalProgress<'a> {
5686    max_path: &'a RelPath,
5687    count: usize,
5688    non_ignored_count: usize,
5689    file_count: usize,
5690    non_ignored_file_count: usize,
5691}
5692
5693impl TraversalProgress<'_> {
5694    fn count(&self, include_files: bool, include_dirs: bool, include_ignored: bool) -> usize {
5695        match (include_files, include_dirs, include_ignored) {
5696            (true, true, true) => self.count,
5697            (true, true, false) => self.non_ignored_count,
5698            (true, false, true) => self.file_count,
5699            (true, false, false) => self.non_ignored_file_count,
5700            (false, true, true) => self.count - self.file_count,
5701            (false, true, false) => self.non_ignored_count - self.non_ignored_file_count,
5702            (false, false, _) => 0,
5703        }
5704    }
5705}
5706
5707impl<'a> sum_tree::Dimension<'a, EntrySummary> for TraversalProgress<'a> {
5708    fn zero(_cx: ()) -> Self {
5709        Default::default()
5710    }
5711
5712    fn add_summary(&mut self, summary: &'a EntrySummary, _: ()) {
5713        self.max_path = summary.max_path.as_ref();
5714        self.count += summary.count;
5715        self.non_ignored_count += summary.non_ignored_count;
5716        self.file_count += summary.file_count;
5717        self.non_ignored_file_count += summary.non_ignored_file_count;
5718    }
5719}
5720
5721impl Default for TraversalProgress<'_> {
5722    fn default() -> Self {
5723        Self {
5724            max_path: RelPath::empty(),
5725            count: 0,
5726            non_ignored_count: 0,
5727            file_count: 0,
5728            non_ignored_file_count: 0,
5729        }
5730    }
5731}
5732
5733#[derive(Debug)]
5734pub struct Traversal<'a> {
5735    snapshot: &'a Snapshot,
5736    cursor: sum_tree::Cursor<'a, 'static, Entry, TraversalProgress<'a>>,
5737    include_ignored: bool,
5738    include_files: bool,
5739    include_dirs: bool,
5740}
5741
5742impl<'a> Traversal<'a> {
5743    fn new(
5744        snapshot: &'a Snapshot,
5745        include_files: bool,
5746        include_dirs: bool,
5747        include_ignored: bool,
5748        start_path: &RelPath,
5749    ) -> Self {
5750        let mut cursor = snapshot.entries_by_path.cursor(());
5751        cursor.seek(&TraversalTarget::path(start_path), Bias::Left);
5752        let mut traversal = Self {
5753            snapshot,
5754            cursor,
5755            include_files,
5756            include_dirs,
5757            include_ignored,
5758        };
5759        if traversal.end_offset() == traversal.start_offset() {
5760            traversal.next();
5761        }
5762        traversal
5763    }
5764
5765    pub fn advance(&mut self) -> bool {
5766        self.advance_by(1)
5767    }
5768
5769    pub fn advance_by(&mut self, count: usize) -> bool {
5770        self.cursor.seek_forward(
5771            &TraversalTarget::Count {
5772                count: self.end_offset() + count,
5773                include_dirs: self.include_dirs,
5774                include_files: self.include_files,
5775                include_ignored: self.include_ignored,
5776            },
5777            Bias::Left,
5778        )
5779    }
5780
5781    pub fn advance_to_sibling(&mut self) -> bool {
5782        while let Some(entry) = self.cursor.item() {
5783            self.cursor
5784                .seek_forward(&TraversalTarget::successor(&entry.path), Bias::Left);
5785            if let Some(entry) = self.cursor.item()
5786                && (self.include_files || !entry.is_file())
5787                && (self.include_dirs || !entry.is_dir())
5788                && (self.include_ignored || !entry.is_ignored || entry.is_always_included)
5789            {
5790                return true;
5791            }
5792        }
5793        false
5794    }
5795
5796    pub fn back_to_parent(&mut self) -> bool {
5797        let Some(parent_path) = self.cursor.item().and_then(|entry| entry.path.parent()) else {
5798            return false;
5799        };
5800        self.cursor
5801            .seek(&TraversalTarget::path(parent_path), Bias::Left)
5802    }
5803
5804    pub fn entry(&self) -> Option<&'a Entry> {
5805        self.cursor.item()
5806    }
5807
5808    pub fn snapshot(&self) -> &'a Snapshot {
5809        self.snapshot
5810    }
5811
5812    pub fn start_offset(&self) -> usize {
5813        self.cursor
5814            .start()
5815            .count(self.include_files, self.include_dirs, self.include_ignored)
5816    }
5817
5818    pub fn end_offset(&self) -> usize {
5819        self.cursor
5820            .end()
5821            .count(self.include_files, self.include_dirs, self.include_ignored)
5822    }
5823}
5824
5825impl<'a> Iterator for Traversal<'a> {
5826    type Item = &'a Entry;
5827
5828    fn next(&mut self) -> Option<Self::Item> {
5829        if let Some(item) = self.entry() {
5830            self.advance();
5831            Some(item)
5832        } else {
5833            None
5834        }
5835    }
5836}
5837
5838#[derive(Debug, Clone, Copy)]
5839pub enum PathTarget<'a> {
5840    Path(&'a RelPath),
5841    Successor(&'a RelPath),
5842}
5843
5844impl PathTarget<'_> {
5845    fn cmp_path(&self, other: &RelPath) -> Ordering {
5846        match self {
5847            PathTarget::Path(path) => path.cmp(&other),
5848            PathTarget::Successor(path) => {
5849                if other.starts_with(path) {
5850                    Ordering::Greater
5851                } else {
5852                    Ordering::Equal
5853                }
5854            }
5855        }
5856    }
5857}
5858
5859impl<'a, S: Summary> SeekTarget<'a, PathSummary<S>, PathProgress<'a>> for PathTarget<'_> {
5860    fn cmp(&self, cursor_location: &PathProgress<'a>, _: S::Context<'_>) -> Ordering {
5861        self.cmp_path(cursor_location.max_path)
5862    }
5863}
5864
5865impl<'a, S: Summary> SeekTarget<'a, PathSummary<S>, TraversalProgress<'a>> for PathTarget<'_> {
5866    fn cmp(&self, cursor_location: &TraversalProgress<'a>, _: S::Context<'_>) -> Ordering {
5867        self.cmp_path(cursor_location.max_path)
5868    }
5869}
5870
5871#[derive(Debug)]
5872enum TraversalTarget<'a> {
5873    Path(PathTarget<'a>),
5874    Count {
5875        count: usize,
5876        include_files: bool,
5877        include_ignored: bool,
5878        include_dirs: bool,
5879    },
5880}
5881
5882impl<'a> TraversalTarget<'a> {
5883    fn path(path: &'a RelPath) -> Self {
5884        Self::Path(PathTarget::Path(path))
5885    }
5886
5887    fn successor(path: &'a RelPath) -> Self {
5888        Self::Path(PathTarget::Successor(path))
5889    }
5890
5891    fn cmp_progress(&self, progress: &TraversalProgress) -> Ordering {
5892        match self {
5893            TraversalTarget::Path(path) => path.cmp_path(progress.max_path),
5894            TraversalTarget::Count {
5895                count,
5896                include_files,
5897                include_dirs,
5898                include_ignored,
5899            } => Ord::cmp(
5900                count,
5901                &progress.count(*include_files, *include_dirs, *include_ignored),
5902            ),
5903        }
5904    }
5905}
5906
5907impl<'a> SeekTarget<'a, EntrySummary, TraversalProgress<'a>> for TraversalTarget<'_> {
5908    fn cmp(&self, cursor_location: &TraversalProgress<'a>, _: ()) -> Ordering {
5909        self.cmp_progress(cursor_location)
5910    }
5911}
5912
5913impl<'a> SeekTarget<'a, PathSummary<sum_tree::NoSummary>, TraversalProgress<'a>>
5914    for TraversalTarget<'_>
5915{
5916    fn cmp(&self, cursor_location: &TraversalProgress<'a>, _: ()) -> Ordering {
5917        self.cmp_progress(cursor_location)
5918    }
5919}
5920
5921pub struct ChildEntriesOptions {
5922    pub include_files: bool,
5923    pub include_dirs: bool,
5924    pub include_ignored: bool,
5925}
5926
5927pub struct ChildEntriesIter<'a> {
5928    parent_path: &'a RelPath,
5929    traversal: Traversal<'a>,
5930}
5931
5932impl<'a> Iterator for ChildEntriesIter<'a> {
5933    type Item = &'a Entry;
5934
5935    fn next(&mut self) -> Option<Self::Item> {
5936        if let Some(item) = self.traversal.entry()
5937            && item.path.starts_with(self.parent_path)
5938        {
5939            self.traversal.advance_to_sibling();
5940            return Some(item);
5941        }
5942        None
5943    }
5944}
5945
5946impl<'a> From<&'a Entry> for proto::Entry {
5947    fn from(entry: &'a Entry) -> Self {
5948        Self {
5949            id: entry.id.to_proto(),
5950            is_dir: entry.is_dir(),
5951            path: entry.path.as_ref().to_proto(),
5952            inode: entry.inode,
5953            mtime: entry.mtime.map(|time| time.into()),
5954            is_ignored: entry.is_ignored,
5955            is_hidden: entry.is_hidden,
5956            is_external: entry.is_external,
5957            is_fifo: entry.is_fifo,
5958            size: Some(entry.size),
5959            canonical_path: entry
5960                .canonical_path
5961                .as_ref()
5962                .map(|path| path.to_string_lossy().into_owned()),
5963        }
5964    }
5965}
5966
5967impl TryFrom<(&CharBag, &PathMatcher, proto::Entry)> for Entry {
5968    type Error = anyhow::Error;
5969
5970    fn try_from(
5971        (root_char_bag, always_included, entry): (&CharBag, &PathMatcher, proto::Entry),
5972    ) -> Result<Self> {
5973        let kind = if entry.is_dir {
5974            EntryKind::Dir
5975        } else {
5976            EntryKind::File
5977        };
5978
5979        let path =
5980            RelPath::from_proto(&entry.path).context("invalid relative path in proto message")?;
5981        let char_bag = char_bag_for_path(*root_char_bag, &path);
5982        let is_always_included = always_included.is_match(&path);
5983        Ok(Entry {
5984            id: ProjectEntryId::from_proto(entry.id),
5985            kind,
5986            path,
5987            inode: entry.inode,
5988            mtime: entry.mtime.map(|time| time.into()),
5989            size: entry.size.unwrap_or(0),
5990            canonical_path: entry
5991                .canonical_path
5992                .map(|path_string| Arc::from(PathBuf::from(path_string))),
5993            is_ignored: entry.is_ignored,
5994            is_hidden: entry.is_hidden,
5995            is_always_included,
5996            is_external: entry.is_external,
5997            is_private: false,
5998            char_bag,
5999            is_fifo: entry.is_fifo,
6000        })
6001    }
6002}
6003
6004#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
6005pub struct ProjectEntryId(usize);
6006
6007impl ProjectEntryId {
6008    pub const MAX: Self = Self(usize::MAX);
6009    pub const MIN: Self = Self(usize::MIN);
6010
6011    pub fn new(counter: &AtomicUsize) -> Self {
6012        Self(counter.fetch_add(1, SeqCst))
6013    }
6014
6015    pub fn from_proto(id: u64) -> Self {
6016        Self(id as usize)
6017    }
6018
6019    pub fn to_proto(self) -> u64 {
6020        self.0 as u64
6021    }
6022
6023    pub fn from_usize(id: usize) -> Self {
6024        ProjectEntryId(id)
6025    }
6026
6027    pub fn to_usize(self) -> usize {
6028        self.0
6029    }
6030}
6031
6032#[cfg(feature = "test-support")]
6033impl CreatedEntry {
6034    pub fn into_included(self) -> Option<Entry> {
6035        match self {
6036            CreatedEntry::Included(entry) => Some(entry),
6037            CreatedEntry::Excluded { .. } => None,
6038        }
6039    }
6040}
6041
6042fn parse_gitfile(content: &str) -> anyhow::Result<&Path> {
6043    let path = content
6044        .strip_prefix("gitdir:")
6045        .with_context(|| format!("parsing gitfile content {content:?}"))?;
6046    Ok(Path::new(path.trim()))
6047}
6048
6049async fn discover_git_paths(dot_git_abs_path: &Arc<Path>, fs: &dyn Fs) -> (Arc<Path>, Arc<Path>) {
6050    let mut repository_dir_abs_path = dot_git_abs_path.clone();
6051    let mut common_dir_abs_path = dot_git_abs_path.clone();
6052
6053    if let Some(path) = fs
6054        .load(dot_git_abs_path)
6055        .await
6056        .ok()
6057        .as_ref()
6058        .and_then(|contents| parse_gitfile(contents).log_err())
6059    {
6060        let path = dot_git_abs_path
6061            .parent()
6062            .unwrap_or(Path::new(""))
6063            .join(path);
6064        if let Some(path) = fs.canonicalize(&path).await.log_err() {
6065            repository_dir_abs_path = Path::new(&path).into();
6066            common_dir_abs_path = repository_dir_abs_path.clone();
6067
6068            if let Some(commondir_contents) = fs.load(&path.join("commondir")).await.ok()
6069                && let Some(commondir_path) = fs
6070                    .canonicalize(&path.join(commondir_contents.trim()))
6071                    .await
6072                    .log_err()
6073            {
6074                common_dir_abs_path = commondir_path.as_path().into();
6075            }
6076        }
6077    };
6078    (repository_dir_abs_path, common_dir_abs_path)
6079}
6080
6081struct NullWatcher;
6082
6083impl fs::Watcher for NullWatcher {
6084    fn add(&self, _path: &Path) -> Result<()> {
6085        Ok(())
6086    }
6087
6088    fn remove(&self, _path: &Path) -> Result<()> {
6089        Ok(())
6090    }
6091}
6092
6093const FILE_ANALYSIS_BYTES: usize = 1024;
6094
6095async fn decode_file_text(
6096    fs: &dyn Fs,
6097    abs_path: &Path,
6098) -> Result<(String, &'static Encoding, bool)> {
6099    let mut file = fs
6100        .open_sync(&abs_path)
6101        .await
6102        .with_context(|| format!("opening file {abs_path:?}"))?;
6103
6104    // First, read the beginning of the file to determine its kind and encoding.
6105    // We do not want to load an entire large blob into memory only to discard it.
6106    let mut file_first_bytes = Vec::with_capacity(FILE_ANALYSIS_BYTES);
6107    let mut buf = [0u8; FILE_ANALYSIS_BYTES];
6108    let mut reached_eof = false;
6109    loop {
6110        if file_first_bytes.len() >= FILE_ANALYSIS_BYTES {
6111            break;
6112        }
6113        let n = file
6114            .read(&mut buf)
6115            .with_context(|| format!("reading bytes of the file {abs_path:?}"))?;
6116        if n == 0 {
6117            reached_eof = true;
6118            break;
6119        }
6120        file_first_bytes.extend_from_slice(&buf[..n]);
6121    }
6122    let (bom_encoding, byte_content) = decode_byte_header(&file_first_bytes);
6123    anyhow::ensure!(
6124        byte_content != ByteContent::Binary,
6125        "Binary files are not supported"
6126    );
6127
6128    // If the file is eligible for opening, read the rest of the file.
6129    let mut content = file_first_bytes;
6130    if !reached_eof {
6131        let mut buf = [0u8; 8 * 1024];
6132        loop {
6133            let n = file
6134                .read(&mut buf)
6135                .with_context(|| format!("reading remaining bytes of the file {abs_path:?}"))?;
6136            if n == 0 {
6137                break;
6138            }
6139            content.extend_from_slice(&buf[..n]);
6140        }
6141    }
6142    decode_byte_full(content, bom_encoding, byte_content)
6143}
6144
6145fn decode_byte_header(prefix: &[u8]) -> (Option<&'static Encoding>, ByteContent) {
6146    if let Some((encoding, _bom_len)) = Encoding::for_bom(prefix) {
6147        return (Some(encoding), ByteContent::Unknown);
6148    }
6149    (None, analyze_byte_content(prefix))
6150}
6151
6152fn decode_byte_full(
6153    bytes: Vec<u8>,
6154    bom_encoding: Option<&'static Encoding>,
6155    byte_content: ByteContent,
6156) -> Result<(String, &'static Encoding, bool)> {
6157    if let Some(encoding) = bom_encoding {
6158        let (cow, _) = encoding.decode_with_bom_removal(&bytes);
6159        return Ok((cow.into_owned(), encoding, true));
6160    }
6161
6162    match byte_content {
6163        ByteContent::Utf16Le => {
6164            let encoding = encoding_rs::UTF_16LE;
6165            let (cow, _, _) = encoding.decode(&bytes);
6166            return Ok((cow.into_owned(), encoding, false));
6167        }
6168        ByteContent::Utf16Be => {
6169            let encoding = encoding_rs::UTF_16BE;
6170            let (cow, _, _) = encoding.decode(&bytes);
6171            return Ok((cow.into_owned(), encoding, false));
6172        }
6173        ByteContent::Binary => {
6174            anyhow::bail!("Binary files are not supported");
6175        }
6176        ByteContent::Unknown => {}
6177    }
6178
6179    fn detect_encoding(bytes: Vec<u8>) -> (String, &'static Encoding) {
6180        let mut detector = EncodingDetector::new();
6181        detector.feed(&bytes, true);
6182
6183        let encoding = detector.guess(None, true); // Use None for TLD hint to ensure neutral detection logic.
6184
6185        let (cow, _, _) = encoding.decode(&bytes);
6186        (cow.into_owned(), encoding)
6187    }
6188
6189    match String::from_utf8(bytes) {
6190        Ok(text) => {
6191            // ISO-2022-JP (and other ISO-2022 variants) consists entirely of 7-bit ASCII bytes,
6192            // so it is valid UTF-8. However, it contains escape sequences starting with '\x1b'.
6193            // If we find an escape character, we double-check the encoding to prevent
6194            // displaying raw escape sequences instead of the correct characters.
6195            if text.contains('\x1b') {
6196                let (s, enc) = detect_encoding(text.into_bytes());
6197                Ok((s, enc, false))
6198            } else {
6199                Ok((text, encoding_rs::UTF_8, false))
6200            }
6201        }
6202        Err(e) => {
6203            let (s, enc) = detect_encoding(e.into_bytes());
6204            Ok((s, enc, false))
6205        }
6206    }
6207}
6208
6209#[derive(Debug, PartialEq)]
6210enum ByteContent {
6211    Utf16Le,
6212    Utf16Be,
6213    Binary,
6214    Unknown,
6215}
6216
6217// Heuristic check using null byte distribution plus a generic text-likeness
6218// heuristic. This prefers UTF-16 when many bytes are NUL and otherwise
6219// distinguishes between text-like and binary-like content.
6220fn analyze_byte_content(bytes: &[u8]) -> ByteContent {
6221    if bytes.len() < 2 {
6222        return ByteContent::Unknown;
6223    }
6224
6225    if is_known_binary_header(bytes) {
6226        return ByteContent::Binary;
6227    }
6228
6229    let limit = bytes.len().min(FILE_ANALYSIS_BYTES);
6230    let mut even_null_count = 0usize;
6231    let mut odd_null_count = 0usize;
6232    let mut non_text_like_count = 0usize;
6233
6234    for (i, &byte) in bytes[..limit].iter().enumerate() {
6235        if byte == 0 {
6236            if i % 2 == 0 {
6237                even_null_count += 1;
6238            } else {
6239                odd_null_count += 1;
6240            }
6241            non_text_like_count += 1;
6242            continue;
6243        }
6244
6245        let is_text_like = match byte {
6246            b'\t' | b'\n' | b'\r' | 0x0C => true,
6247            0x20..=0x7E => true,
6248            // Treat bytes that are likely part of UTF-8 or single-byte encodings as text-like.
6249            0x80..=0xBF | 0xC2..=0xF4 => true,
6250            _ => false,
6251        };
6252
6253        if !is_text_like {
6254            non_text_like_count += 1;
6255        }
6256    }
6257
6258    let total_null_count = even_null_count + odd_null_count;
6259
6260    // If there are no NUL bytes at all, this is overwhelmingly likely to be text.
6261    if total_null_count == 0 {
6262        return ByteContent::Unknown;
6263    }
6264
6265    let has_significant_nulls = total_null_count >= limit / 16;
6266    let nulls_skew_to_even = even_null_count > odd_null_count * 4;
6267    let nulls_skew_to_odd = odd_null_count > even_null_count * 4;
6268
6269    if has_significant_nulls {
6270        let sample = &bytes[..limit];
6271
6272        // UTF-16BE ASCII: [0x00, char] — nulls at even positions (high byte first)
6273        // UTF-16LE ASCII: [char, 0x00] — nulls at odd positions (low byte first)
6274
6275        if nulls_skew_to_even && is_plausible_utf16_text(sample, false) {
6276            return ByteContent::Utf16Be;
6277        }
6278
6279        if nulls_skew_to_odd && is_plausible_utf16_text(sample, true) {
6280            return ByteContent::Utf16Le;
6281        }
6282
6283        return ByteContent::Binary;
6284    }
6285
6286    if non_text_like_count * 100 < limit * 8 {
6287        ByteContent::Unknown
6288    } else {
6289        ByteContent::Binary
6290    }
6291}
6292
6293fn is_known_binary_header(bytes: &[u8]) -> bool {
6294    bytes.starts_with(b"%PDF-") // PDF
6295        || bytes.starts_with(b"PK\x03\x04") // ZIP local header
6296        || bytes.starts_with(b"PK\x05\x06") // ZIP end of central directory
6297        || bytes.starts_with(b"PK\x07\x08") // ZIP spanning/splitting
6298        || bytes.starts_with(b"\x89PNG\r\n\x1a\n") // PNG
6299        || bytes.starts_with(b"\xFF\xD8\xFF") // JPEG
6300        || bytes.starts_with(b"GIF87a") // GIF87a
6301        || bytes.starts_with(b"GIF89a") // GIF89a
6302        || bytes.starts_with(b"IWAD") // Doom IWAD archive
6303        || bytes.starts_with(b"PWAD") // Doom PWAD archive
6304        || bytes.starts_with(b"RIFF") // WAV, AVI, WebP
6305        || bytes.starts_with(b"OggS") // OGG (Vorbis, Opus, FLAC)
6306        || bytes.starts_with(b"fLaC") // FLAC
6307        || bytes.starts_with(b"ID3") // MP3 with ID3v2 tag
6308        || bytes.starts_with(b"\xFF\xFB") // MP3 frame sync (MPEG1 Layer3)
6309        || bytes.starts_with(b"\xFF\xFA") // MP3 frame sync (MPEG1 Layer3)
6310        || bytes.starts_with(b"\xFF\xF3") // MP3 frame sync (MPEG2 Layer3)
6311        || bytes.starts_with(b"\xFF\xF2") // MP3 frame sync (MPEG2 Layer3)
6312}
6313
6314// Null byte skew alone is not enough to identify UTF-16 -- binary formats with
6315// small 16-bit values (like PCM audio) produce the same pattern. Decode the
6316// bytes as UTF-16 and reject if too many code units land in control character
6317// ranges or form unpaired surrogates, which real text almost never contains.
6318fn is_plausible_utf16_text(bytes: &[u8], little_endian: bool) -> bool {
6319    let mut suspicious_count = 0usize;
6320    let mut total = 0usize;
6321
6322    let mut i = 0;
6323    while let Some(code_unit) = read_u16(bytes, i, little_endian) {
6324        total += 1;
6325
6326        match code_unit {
6327            0x0009 | 0x000A | 0x000C | 0x000D => {}
6328            // C0/C1 control characters and non-characters
6329            0x0000..=0x001F | 0x007F..=0x009F | 0xFFFE | 0xFFFF => suspicious_count += 1,
6330            0xD800..=0xDBFF => {
6331                let next_offset = i + 2;
6332                let has_low_surrogate = read_u16(bytes, next_offset, little_endian)
6333                    .is_some_and(|next| (0xDC00..=0xDFFF).contains(&next));
6334                if has_low_surrogate {
6335                    total += 1;
6336                    i += 2;
6337                } else {
6338                    suspicious_count += 1;
6339                }
6340            }
6341            // Lone low surrogate without a preceding high surrogate
6342            0xDC00..=0xDFFF => suspicious_count += 1,
6343            _ => {}
6344        }
6345
6346        i += 2;
6347    }
6348
6349    if total == 0 {
6350        return false;
6351    }
6352
6353    // Real UTF-16 text has near-zero control characters; binary data with
6354    // small 16-bit values typically exceeds 5%. 2% provides a safe margin.
6355    suspicious_count * 100 < total * 2
6356}
6357
6358fn read_u16(bytes: &[u8], offset: usize, little_endian: bool) -> Option<u16> {
6359    let pair = [*bytes.get(offset)?, *bytes.get(offset + 1)?];
6360    if little_endian {
6361        return Some(u16::from_le_bytes(pair));
6362    }
6363    Some(u16::from_be_bytes(pair))
6364}
6365
6366#[cfg(test)]
6367mod tests {
6368    use super::*;
6369
6370    /// reproduction of issue #50785
6371    fn build_pcm16_wav_bytes() -> Vec<u8> {
6372        let header: Vec<u8> = vec![
6373            /*  RIFF header  */
6374            0x52, 0x49, 0x46, 0x46, // "RIFF"
6375            0xc6, 0xcf, 0x00, 0x00, // file size: 8
6376            0x57, 0x41, 0x56, 0x45, // "WAVE"
6377            /*  fmt chunk  */
6378            0x66, 0x6d, 0x74, 0x20, // "fmt "
6379            0x10, 0x00, 0x00, 0x00, // chunk size: 16
6380            0x01, 0x00, // format: PCM (1)
6381            0x01, 0x00, // channels: 1 (mono)
6382            0x80, 0x3e, 0x00, 0x00, // sample rate: 16000
6383            0x00, 0x7d, 0x00, 0x00, // byte rate: 32000
6384            0x02, 0x00, // block align: 2
6385            0x10, 0x00, // bits per sample: 16
6386            /*  LIST chunk  */
6387            0x4c, 0x49, 0x53, 0x54, // "LIST"
6388            0x1a, 0x00, 0x00, 0x00, // chunk size: 26
6389            0x49, 0x4e, 0x46, 0x4f, // "INFO"
6390            0x49, 0x53, 0x46, 0x54, // "ISFT"
6391            0x0d, 0x00, 0x00, 0x00, // sub-chunk size: 13
6392            0x4c, 0x61, 0x76, 0x66, 0x36, 0x32, 0x2e, 0x33, // "Lavf62.3"
6393            0x2e, 0x31, 0x30, 0x30, 0x00, // ".100\0"
6394            /* padding byte for word alignment */
6395            0x00, // data chunk header
6396            0x64, 0x61, 0x74, 0x61, // "data"
6397            0x80, 0xcf, 0x00, 0x00, // chunk size
6398        ];
6399
6400        let mut bytes = header;
6401
6402        // fill remaining space up to `FILE_ANALYSIS_BYTES` with synthetic PCM
6403        let audio_bytes_needed = FILE_ANALYSIS_BYTES - bytes.len();
6404        for i in 0..(audio_bytes_needed / 2) {
6405            let sample = (i & 0xFF) as u8;
6406            bytes.push(sample); // low byte: varies
6407            bytes.push(0x00); // high byte: zero for small values
6408        }
6409
6410        bytes
6411    }
6412
6413    #[test]
6414    fn test_pcm16_wav_detected_as_binary() {
6415        let wav_bytes = build_pcm16_wav_bytes();
6416        assert_eq!(wav_bytes.len(), FILE_ANALYSIS_BYTES);
6417
6418        let result = analyze_byte_content(&wav_bytes);
6419        assert_eq!(
6420            result,
6421            ByteContent::Binary,
6422            "PCM 16-bit WAV should be detected as Binary via RIFF header"
6423        );
6424    }
6425
6426    #[test]
6427    fn test_le16_binary_not_misdetected_as_utf16le() {
6428        let mut bytes = b"FAKE".to_vec();
6429        while bytes.len() < FILE_ANALYSIS_BYTES {
6430            let sample = (bytes.len() & 0xFF) as u8;
6431            bytes.push(sample);
6432            bytes.push(0x00);
6433        }
6434        bytes.truncate(FILE_ANALYSIS_BYTES);
6435
6436        let result = analyze_byte_content(&bytes);
6437        assert_eq!(
6438            result,
6439            ByteContent::Binary,
6440            "LE 16-bit binary with control characters should be detected as Binary"
6441        );
6442    }
6443
6444    #[test]
6445    fn test_be16_binary_not_misdetected_as_utf16be() {
6446        let mut bytes = b"FAKE".to_vec();
6447        while bytes.len() < FILE_ANALYSIS_BYTES {
6448            bytes.push(0x00);
6449            let sample = (bytes.len() & 0xFF) as u8;
6450            bytes.push(sample);
6451        }
6452        bytes.truncate(FILE_ANALYSIS_BYTES);
6453
6454        let result = analyze_byte_content(&bytes);
6455        assert_eq!(
6456            result,
6457            ByteContent::Binary,
6458            "BE 16-bit binary with control characters should be detected as Binary"
6459        );
6460    }
6461
6462    #[test]
6463    fn test_utf16le_text_detected_as_utf16le() {
6464        let text = "Hello, world! This is a UTF-16 test string. ";
6465        let mut bytes = Vec::new();
6466        while bytes.len() < FILE_ANALYSIS_BYTES {
6467            bytes.extend(text.encode_utf16().flat_map(|u| u.to_le_bytes()));
6468        }
6469        bytes.truncate(FILE_ANALYSIS_BYTES);
6470
6471        assert_eq!(analyze_byte_content(&bytes), ByteContent::Utf16Le);
6472    }
6473
6474    #[test]
6475    fn test_utf16be_text_detected_as_utf16be() {
6476        let text = "Hello, world! This is a UTF-16 test string. ";
6477        let mut bytes = Vec::new();
6478        while bytes.len() < FILE_ANALYSIS_BYTES {
6479            bytes.extend(text.encode_utf16().flat_map(|u| u.to_be_bytes()));
6480        }
6481        bytes.truncate(FILE_ANALYSIS_BYTES);
6482
6483        assert_eq!(analyze_byte_content(&bytes), ByteContent::Utf16Be);
6484    }
6485
6486    #[test]
6487    fn test_known_binary_headers() {
6488        let cases: &[(&[u8], &str)] = &[
6489            (b"RIFF\x00\x00\x00\x00WAVE", "WAV"),
6490            (b"RIFF\x00\x00\x00\x00AVI ", "AVI"),
6491            (b"OggS\x00\x02", "OGG"),
6492            (b"fLaC\x00\x00", "FLAC"),
6493            (b"ID3\x03\x00", "MP3 ID3v2"),
6494            (b"\xFF\xFB\x90\x00", "MP3 MPEG1 Layer3"),
6495            (b"\xFF\xF3\x90\x00", "MP3 MPEG2 Layer3"),
6496        ];
6497
6498        for (header, label) in cases {
6499            let mut bytes = header.to_vec();
6500            bytes.resize(FILE_ANALYSIS_BYTES, 0x41); // pad with 'A'
6501            assert_eq!(
6502                analyze_byte_content(&bytes),
6503                ByteContent::Binary,
6504                "{label} should be detected as Binary"
6505            );
6506        }
6507    }
6508}