edit prediction cli: Skip worktree scan (#44658)

Agus Zubiaga and Max Brunsfeld created

Release Notes:

- N/A

Co-authored-by: Max Brunsfeld <maxbrunsfeld@gmail.com>

Change summary

crates/edit_prediction/src/license_detection.rs    |  3 
crates/edit_prediction/src/udiff.rs                | 53 +++++++++------
crates/edit_prediction_cli/src/load_project.rs     | 24 ++++++
crates/edit_prediction_cli/src/main.rs             | 10 ++
crates/edit_prediction_cli/src/retrieve_context.rs | 15 ----
crates/project/src/project.rs                      |  6 +
crates/project/src/worktree_store.rs               |  9 ++
crates/remote_server/src/headless_project.rs       |  1 
crates/worktree/src/worktree.rs                    | 54 ++++++++++++---
crates/worktree/src/worktree_tests.rs              | 25 +++++++
10 files changed, 149 insertions(+), 51 deletions(-)

Detailed changes

crates/edit_prediction/src/license_detection.rs 🔗

@@ -735,6 +735,7 @@ mod tests {
             true,
             fs.clone(),
             Default::default(),
+            true,
             &mut cx.to_async(),
         )
         .await
@@ -758,6 +759,7 @@ mod tests {
             true,
             fs.clone(),
             Default::default(),
+            true,
             &mut cx.to_async(),
         )
         .await
@@ -816,6 +818,7 @@ mod tests {
             true,
             fs.clone(),
             Default::default(),
+            true,
             &mut cx.to_async(),
         )
         .await

crates/edit_prediction/src/udiff.rs 🔗

@@ -15,7 +15,9 @@ use collections::HashMap;
 use gpui::AsyncApp;
 use gpui::Entity;
 use language::{Anchor, Buffer, OffsetRangeExt as _, TextBufferSnapshot};
-use project::Project;
+use project::{Project, ProjectPath};
+use util::paths::PathStyle;
+use util::rel_path::RelPath;
 
 #[derive(Clone, Debug)]
 pub struct OpenedBuffers(#[allow(unused)] HashMap<String, Entity<Buffer>>);
@@ -28,18 +30,27 @@ pub async fn apply_diff(
 ) -> Result<OpenedBuffers> {
     let mut included_files = HashMap::default();
 
+    let worktree_id = project.read_with(cx, |project, cx| {
+        anyhow::Ok(
+            project
+                .visible_worktrees(cx)
+                .next()
+                .context("no worktrees")?
+                .read(cx)
+                .id(),
+        )
+    })??;
+
     for line in diff_str.lines() {
         let diff_line = DiffLine::parse(line);
 
         if let DiffLine::OldPath { path } = diff_line {
             let buffer = project
                 .update(cx, |project, cx| {
-                    let project_path =
-                        project
-                            .find_project_path(path.as_ref(), cx)
-                            .with_context(|| {
-                                format!("Failed to find worktree for new path: {}", path)
-                            })?;
+                    let project_path = ProjectPath {
+                        worktree_id,
+                        path: RelPath::new(Path::new(path.as_ref()), PathStyle::Posix)?.into_arc(),
+                    };
                     anyhow::Ok(project.open_buffer(project_path, cx))
                 })??
                 .await?;
@@ -726,38 +737,38 @@ mod tests {
         let project = Project::test(fs, [path!("/root").as_ref()], cx).await;
 
         let diff = indoc! {r#"
-            --- a/root/file1
-            +++ b/root/file1
+            --- a/file1
+            +++ b/file1
              one
              two
             -three
             +3
              four
              five
-            --- a/root/file1
-            +++ b/root/file1
+            --- a/file1
+            +++ b/file1
              3
             -four
             -five
             +4
             +5
-            --- a/root/file1
-            +++ b/root/file1
+            --- a/file1
+            +++ b/file1
             -one
             -two
              3
              4
-            --- a/root/file2
-            +++ b/root/file2
+            --- a/file2
+            +++ b/file2
             +5
              six
-            --- a/root/file2
-            +++ b/root/file2
+            --- a/file2
+            +++ b/file2
              seven
             +7.5
              eight
-            --- a/root/file2
-            +++ b/root/file2
+            --- a/file2
+            +++ b/file2
              ten
             +11
         "#};
@@ -826,8 +837,8 @@ mod tests {
         let project = Project::test(fs, [path!("/root").as_ref()], cx).await;
 
         let diff = indoc! {r#"
-            --- a/root/file1
-            +++ b/root/file1
+            --- a/file1
+            +++ b/file1
              one
              two
             -three

crates/edit_prediction_cli/src/load_project.rs 🔗

@@ -11,7 +11,7 @@ use futures::{
     lock::{Mutex, OwnedMutexGuard},
 };
 use gpui::{AsyncApp, Entity};
-use language::{Anchor, Buffer, ToOffset, ToPoint};
+use language::{Anchor, Buffer, LanguageNotFound, ToOffset, ToPoint};
 use project::buffer_store::BufferStoreEvent;
 use project::{Project, ProjectPath};
 use std::{
@@ -77,6 +77,19 @@ async fn cursor_position(
     project: &Entity<Project>,
     cx: &mut AsyncApp,
 ) -> (Entity<Buffer>, Anchor) {
+    let language_registry = project
+        .read_with(cx, |project, _| project.languages().clone())
+        .unwrap();
+    let result = language_registry
+        .load_language_for_file_path(&example.cursor_path)
+        .await;
+
+    if let Err(error) = result
+        && !error.is::<LanguageNotFound>()
+    {
+        panic!("Failed to load language for file path: {}", error);
+    }
+
     let worktree = project
         .read_with(cx, |project, cx| {
             project.visible_worktrees(cx).next().unwrap()
@@ -115,7 +128,8 @@ async fn cursor_position(
         let mut matches = text.match_indices(&cursor_excerpt);
         let (excerpt_offset, _) = matches.next().unwrap_or_else(|| {
             panic!(
-                "\nExcerpt:\n\n{cursor_excerpt}\nBuffer text:\n{text}\n.Cursor excerpt did not exist in buffer."
+                "\nExcerpt:\n\n{cursor_excerpt}\nBuffer text:\n{text}\n.Example: {}\nCursor excerpt did not exist in buffer.",
+                example.name
             );
         });
         assert!(matches.next().is_none(), "More than one cursor position match found for {}", &example.name);
@@ -151,6 +165,12 @@ async fn setup_project(
         })
         .unwrap();
 
+    project
+        .update(cx, |project, cx| {
+            project.disable_worktree_scanner(cx);
+        })
+        .unwrap();
+
     let worktree = project
         .update(cx, |project, cx| {
             project.create_worktree(&example.worktree_path(), true, cx)

crates/edit_prediction_cli/src/main.rs 🔗

@@ -45,7 +45,7 @@ enum Command {
     /// Parse markdown examples and output a combined .jsonl file
     ParseExample,
     /// Create git worktrees for each example and load file contents
-    LoadBuffer,
+    LoadProject,
     /// Retrieve context for input examples.
     Context,
     /// Generate a prompt string for a specific model
@@ -144,15 +144,19 @@ fn main() {
                 _ => (),
             };
 
-            for data in examples.chunks_mut(args.max_parallelism) {
+            let chunks = examples.chunks_mut(args.max_parallelism);
+            let total_chunks = chunks.len();
+            for (batch_ix, data) in chunks.enumerate() {
                 let mut futures = Vec::new();
+                eprintln!("Processing batch: {}/{}", batch_ix + 1, total_chunks);
+
                 for example in data.iter_mut() {
                     let cx = cx.clone();
                     let app_state = app_state.clone();
                     futures.push(async {
                         match &command {
                             Command::ParseExample => {}
-                            Command::LoadBuffer => {
+                            Command::LoadProject => {
                                 run_load_project(example, app_state.clone(), cx).await;
                             }
                             Command::Context => {

crates/edit_prediction_cli/src/retrieve_context.rs 🔗

@@ -8,7 +8,7 @@ use collections::HashSet;
 use edit_prediction::{DebugEvent, EditPredictionStore};
 use futures::{FutureExt as _, StreamExt as _, channel::mpsc};
 use gpui::{AsyncApp, Entity, Task};
-use language::{Buffer, LanguageNotFound};
+use language::Buffer;
 use project::Project;
 use std::{sync::Arc, time::Duration};
 
@@ -71,19 +71,6 @@ async fn wait_for_language_server_to_start(
     buffer: &Entity<Buffer>,
     cx: &mut AsyncApp,
 ) {
-    let language_registry = project
-        .read_with(cx, |project, _| project.languages().clone())
-        .unwrap();
-    let result = language_registry
-        .load_language_for_file_path(&example.cursor_path)
-        .await;
-
-    if let Err(error) = result
-        && !error.is::<LanguageNotFound>()
-    {
-        panic!("Failed to load language for file path: {}", error);
-    }
-
     let Some(language_id) = buffer
         .read_with(cx, |buffer, _cx| {
             buffer.language().map(|language| language.id())

crates/project/src/project.rs 🔗

@@ -2622,6 +2622,12 @@ impl Project {
         !self.is_local()
     }
 
+    pub fn disable_worktree_scanner(&mut self, cx: &mut Context<Self>) {
+        self.worktree_store.update(cx, |worktree_store, _cx| {
+            worktree_store.disable_scanner();
+        });
+    }
+
     #[inline]
     pub fn create_buffer(
         &mut self,

crates/project/src/worktree_store.rs 🔗

@@ -57,6 +57,7 @@ pub struct WorktreeStore {
     retain_worktrees: bool,
     worktrees: Vec<WorktreeHandle>,
     worktrees_reordered: bool,
+    scanning_enabled: bool,
     #[allow(clippy::type_complexity)]
     loading_worktrees:
         HashMap<Arc<SanitizedPath>, Shared<Task<Result<Entity<Worktree>, Arc<anyhow::Error>>>>>,
@@ -93,6 +94,7 @@ impl WorktreeStore {
             downstream_client: None,
             worktrees: Vec::new(),
             worktrees_reordered: false,
+            scanning_enabled: true,
             retain_worktrees,
             state: WorktreeStoreState::Local { fs },
         }
@@ -110,6 +112,7 @@ impl WorktreeStore {
             downstream_client: None,
             worktrees: Vec::new(),
             worktrees_reordered: false,
+            scanning_enabled: true,
             retain_worktrees,
             state: WorktreeStoreState::Remote {
                 upstream_client,
@@ -119,6 +122,10 @@ impl WorktreeStore {
         }
     }
 
+    pub fn disable_scanner(&mut self) {
+        self.scanning_enabled = false;
+    }
+
     /// Iterates through all worktrees, including ones that don't appear in the project panel
     pub fn worktrees(&self) -> impl '_ + DoubleEndedIterator<Item = Entity<Worktree>> {
         self.worktrees
@@ -576,6 +583,7 @@ impl WorktreeStore {
         cx: &mut Context<Self>,
     ) -> Task<Result<Entity<Worktree>, Arc<anyhow::Error>>> {
         let next_entry_id = self.next_entry_id.clone();
+        let scanning_enabled = self.scanning_enabled;
 
         cx.spawn(async move |this, cx| {
             let worktree = Worktree::local(
@@ -583,6 +591,7 @@ impl WorktreeStore {
                 visible,
                 fs,
                 next_entry_id,
+                scanning_enabled,
                 cx,
             )
             .await;

crates/worktree/src/worktree.rs 🔗

@@ -14,7 +14,7 @@ use futures::{
         mpsc::{self, UnboundedSender},
         oneshot,
     },
-    select_biased,
+    select_biased, stream,
     task::Poll,
 };
 use fuzzy::CharBag;
@@ -129,6 +129,7 @@ pub struct LocalWorktree {
     next_entry_id: Arc<AtomicUsize>,
     settings: WorktreeSettings,
     share_private_files: bool,
+    scanning_enabled: bool,
 }
 
 pub struct PathPrefixScanRequest {
@@ -356,6 +357,7 @@ impl Worktree {
         visible: bool,
         fs: Arc<dyn Fs>,
         next_entry_id: Arc<AtomicUsize>,
+        scanning_enabled: bool,
         cx: &mut AsyncApp,
     ) -> Result<Entity<Self>> {
         let abs_path = path.into();
@@ -459,6 +461,7 @@ impl Worktree {
                 fs_case_sensitive,
                 visible,
                 settings,
+                scanning_enabled,
             };
             worktree.start_background_scanner(scan_requests_rx, path_prefixes_to_scan_rx, cx);
             Worktree::Local(worktree)
@@ -1049,13 +1052,18 @@ impl LocalWorktree {
         let share_private_files = self.share_private_files;
         let next_entry_id = self.next_entry_id.clone();
         let fs = self.fs.clone();
+        let scanning_enabled = self.scanning_enabled;
         let settings = self.settings.clone();
         let (scan_states_tx, mut scan_states_rx) = mpsc::unbounded();
         let background_scanner = cx.background_spawn({
             let abs_path = snapshot.abs_path.as_path().to_path_buf();
             let background = cx.background_executor().clone();
             async move {
-                let (events, watcher) = fs.watch(&abs_path, FS_WATCH_LATENCY).await;
+                let (events, watcher) = if scanning_enabled {
+                    fs.watch(&abs_path, FS_WATCH_LATENCY).await
+                } else {
+                    (Box::pin(stream::pending()) as _, Arc::new(NullWatcher) as _)
+                };
                 let fs_case_sensitive = fs.is_case_sensitive().await.unwrap_or_else(|e| {
                     log::error!("Failed to determine whether filesystem is case sensitive: {e:#}");
                     true
@@ -1080,6 +1088,7 @@ impl LocalWorktree {
                     }),
                     phase: BackgroundScannerPhase::InitialScan,
                     share_private_files,
+                    scanning_enabled,
                     settings,
                     watcher,
                 };
@@ -3617,6 +3626,7 @@ struct BackgroundScanner {
     watcher: Arc<dyn Watcher>,
     settings: WorktreeSettings,
     share_private_files: bool,
+    scanning_enabled: bool,
 }
 
 #[derive(Copy, Clone, PartialEq)]
@@ -3632,14 +3642,23 @@ impl BackgroundScanner {
         // the git repository in an ancestor directory. Find any gitignore files
         // in ancestor directories.
         let root_abs_path = self.state.lock().await.snapshot.abs_path.clone();
-        let (ignores, repo) = discover_ancestor_git_repo(self.fs.clone(), &root_abs_path).await;
-        self.state
-            .lock()
-            .await
-            .snapshot
-            .ignores_by_parent_abs_path
-            .extend(ignores);
-        let containing_git_repository = if let Some((ancestor_dot_git, work_directory)) = repo {
+
+        let repo = if self.scanning_enabled {
+            let (ignores, repo) = discover_ancestor_git_repo(self.fs.clone(), &root_abs_path).await;
+            self.state
+                .lock()
+                .await
+                .snapshot
+                .ignores_by_parent_abs_path
+                .extend(ignores);
+            repo
+        } else {
+            None
+        };
+
+        let containing_git_repository = if let Some((ancestor_dot_git, work_directory)) = repo
+            && self.scanning_enabled
+        {
             maybe!(async {
                 self.state
                     .lock()
@@ -3663,6 +3682,7 @@ impl BackgroundScanner {
 
         let mut global_gitignore_events = if let Some(global_gitignore_path) =
             &paths::global_gitignore_path()
+            && self.scanning_enabled
         {
             let is_file = self.fs.is_file(&global_gitignore_path).await;
             self.state.lock().await.snapshot.global_gitignore = if is_file {
@@ -3705,7 +3725,7 @@ impl BackgroundScanner {
                         .insert_entry(root_entry, self.fs.as_ref(), self.watcher.as_ref())
                         .await;
                 }
-                if root_entry.is_dir() {
+                if root_entry.is_dir() && self.scanning_enabled {
                     state
                         .enqueue_scan_dir(
                             root_abs_path.as_path().into(),
@@ -5641,3 +5661,15 @@ async fn discover_git_paths(dot_git_abs_path: &Arc<Path>, fs: &dyn Fs) -> (Arc<P
     };
     (repository_dir_abs_path, common_dir_abs_path)
 }
+
+struct NullWatcher;
+
+impl fs::Watcher for NullWatcher {
+    fn add(&self, _path: &Path) -> Result<()> {
+        Ok(())
+    }
+
+    fn remove(&self, _path: &Path) -> Result<()> {
+        Ok(())
+    }
+}

crates/worktree/src/worktree_tests.rs 🔗

@@ -44,6 +44,7 @@ async fn test_traversal(cx: &mut TestAppContext) {
         true,
         fs,
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -108,6 +109,7 @@ async fn test_circular_symlinks(cx: &mut TestAppContext) {
         true,
         fs.clone(),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -207,6 +209,7 @@ async fn test_symlinks_pointing_outside(cx: &mut TestAppContext) {
         true,
         fs.clone(),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -357,6 +360,7 @@ async fn test_renaming_case_only(cx: &mut TestAppContext) {
         true,
         fs.clone(),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -434,6 +438,7 @@ async fn test_open_gitignored_files(cx: &mut TestAppContext) {
         true,
         fs.clone(),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -598,6 +603,7 @@ async fn test_dirs_no_longer_ignored(cx: &mut TestAppContext) {
         true,
         fs.clone(),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -698,6 +704,7 @@ async fn test_write_file(cx: &mut TestAppContext) {
         true,
         Arc::new(RealFs::new(None, cx.executor())),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -791,6 +798,7 @@ async fn test_file_scan_inclusions(cx: &mut TestAppContext) {
         true,
         Arc::new(RealFs::new(None, cx.executor())),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -856,6 +864,7 @@ async fn test_file_scan_exclusions_overrules_inclusions(cx: &mut TestAppContext)
         true,
         Arc::new(RealFs::new(None, cx.executor())),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -914,6 +923,7 @@ async fn test_file_scan_inclusions_reindexes_on_setting_change(cx: &mut TestAppC
         true,
         Arc::new(RealFs::new(None, cx.executor())),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -999,6 +1009,7 @@ async fn test_file_scan_exclusions(cx: &mut TestAppContext) {
         true,
         Arc::new(RealFs::new(None, cx.executor())),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -1080,6 +1091,7 @@ async fn test_hidden_files(cx: &mut TestAppContext) {
         true,
         Arc::new(RealFs::new(None, cx.executor())),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -1190,6 +1202,7 @@ async fn test_fs_events_in_exclusions(cx: &mut TestAppContext) {
         true,
         Arc::new(RealFs::new(None, cx.executor())),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -1301,6 +1314,7 @@ async fn test_fs_events_in_dot_git_worktree(cx: &mut TestAppContext) {
         true,
         Arc::new(RealFs::new(None, cx.executor())),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -1339,6 +1353,7 @@ async fn test_create_directory_during_initial_scan(cx: &mut TestAppContext) {
         true,
         fs,
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -1407,6 +1422,7 @@ async fn test_create_dir_all_on_create_entry(cx: &mut TestAppContext) {
         true,
         fs_fake,
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -1448,6 +1464,7 @@ async fn test_create_dir_all_on_create_entry(cx: &mut TestAppContext) {
         true,
         fs_real,
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -1556,6 +1573,7 @@ async fn test_create_file_in_expanded_gitignored_dir(cx: &mut TestAppContext) {
         true,
         fs.clone(),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -1651,6 +1669,7 @@ async fn test_fs_event_for_gitignored_dir_does_not_lose_contents(cx: &mut TestAp
         true,
         fs.clone(),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -1728,6 +1747,7 @@ async fn test_random_worktree_operations_during_initial_scan(
         true,
         fs.clone(),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -1818,6 +1838,7 @@ async fn test_random_worktree_changes(cx: &mut TestAppContext, mut rng: StdRng)
         true,
         fs.clone(),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -1890,6 +1911,7 @@ async fn test_random_worktree_changes(cx: &mut TestAppContext, mut rng: StdRng)
             true,
             fs.clone(),
             Default::default(),
+            true,
             &mut cx.to_async(),
         )
         .await
@@ -2203,6 +2225,7 @@ async fn test_private_single_file_worktree(cx: &mut TestAppContext) {
         true,
         fs.clone(),
         Default::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -2235,6 +2258,7 @@ async fn test_repository_above_root(executor: BackgroundExecutor, cx: &mut TestA
         true,
         fs.clone(),
         Arc::default(),
+        true,
         &mut cx.to_async(),
     )
     .await
@@ -2312,6 +2336,7 @@ async fn test_global_gitignore(executor: BackgroundExecutor, cx: &mut TestAppCon
         true,
         fs.clone(),
         Arc::default(),
+        true,
         &mut cx.to_async(),
     )
     .await