git: Use the CLI for loading commit SHAs and details (#29351)

Cole Miller created

Since #28065 merged we've seen deadlocks inside iconv when opening Zed
in a repository containing many submodules. These calls to iconv happen
inside libgit2, in our implementations of the methods `head_sha`,
`merge_head_shas`, and `show` on `RealGitRepository`. This PR moves
those methods to use the git CLI instead, sidestepping the issue. For
the sake of efficiency, a new `revparse_batch` method is added that uses
`git cat-file` to resolve several ref names in one invocation. I
originally intended to make `show` operate in batch mode as well (or
instead), but I can't see a good way to do that with the git CLI; `git
show` always bails on the first ref that it can't resolve, and
`for-each-ref` doesn't support symbolic refs like `HEAD`.

Separately, I removed the calls to `show` in `MergeDetails::load`, going
back to only loading the SHAs of the various merge heads. Loading full
commit details was intended to support the inlays feature that ended up
being cut from #28065, and we can add it back in when we need it.

Release Notes:

- N/A

Change summary

crates/agent/src/thread.rs                   |   2 
crates/fs/src/fake_git_repo.rs               |  25 +--
crates/git/src/repository.rs                 | 145 ++++++++++++++-------
crates/project/src/git_store.rs              |  72 +++-------
crates/project/src/git_store/conflict_set.rs |   2 
5 files changed, 129 insertions(+), 117 deletions(-)

Detailed changes

crates/agent/src/thread.rs 🔗

@@ -1964,7 +1964,7 @@ impl Thread {
                             };
 
                             let remote_url = backend.remote_url("origin");
-                            let head_sha = backend.head_sha();
+                            let head_sha = backend.head_sha().await;
                             let diff = backend.diff(DiffType::HeadToWorktree).await.ok();
 
                             GitState {

crates/fs/src/fake_git_repo.rs 🔗

@@ -34,8 +34,8 @@ pub struct FakeGitRepositoryState {
     pub blames: HashMap<RepoPath, Blame>,
     pub current_branch_name: Option<String>,
     pub branches: HashSet<String>,
-    pub merge_head_shas: Vec<String>,
     pub simulated_index_write_error_message: Option<String>,
+    pub refs: HashMap<String, String>,
 }
 
 impl FakeGitRepositoryState {
@@ -48,20 +48,13 @@ impl FakeGitRepositoryState {
             blames: Default::default(),
             current_branch_name: Default::default(),
             branches: Default::default(),
-            merge_head_shas: Default::default(),
             simulated_index_write_error_message: Default::default(),
+            refs: HashMap::from_iter([("HEAD".into(), "abc".into())]),
         }
     }
 }
 
 impl FakeGitRepository {
-    fn with_state<F, T>(&self, write: bool, f: F) -> Result<T>
-    where
-        F: FnOnce(&mut FakeGitRepositoryState) -> T,
-    {
-        self.fs.with_git_state(&self.dot_git_path, write, f)
-    }
-
     fn with_state_async<F, T>(&self, write: bool, f: F) -> BoxFuture<'static, Result<T>>
     where
         F: 'static + Send + FnOnce(&mut FakeGitRepositoryState) -> Result<T>,
@@ -141,13 +134,13 @@ impl GitRepository for FakeGitRepository {
         None
     }
 
-    fn head_sha(&self) -> Option<String> {
-        None
-    }
-
-    fn merge_head_shas(&self) -> Vec<String> {
-        self.with_state(false, |state| state.merge_head_shas.clone())
-            .unwrap()
+    fn revparse_batch(&self, revs: Vec<String>) -> BoxFuture<Result<Vec<Option<String>>>> {
+        self.with_state_async(false, |state| {
+            Ok(revs
+                .into_iter()
+                .map(|rev| state.refs.get(&rev).cloned())
+                .collect())
+        })
     }
 
     fn show(&self, commit: String) -> BoxFuture<Result<CommitDetails>> {

crates/git/src/repository.rs 🔗

@@ -1,7 +1,7 @@
 use crate::commit::parse_git_diff_name_status;
 use crate::status::{GitStatus, StatusCode};
 use crate::{Oid, SHORT_SHA_LENGTH};
-use anyhow::{Context as _, Result, anyhow};
+use anyhow::{Context as _, Result, anyhow, bail};
 use collections::HashMap;
 use futures::future::BoxFuture;
 use futures::{AsyncWriteExt, FutureExt as _, select_biased};
@@ -13,17 +13,16 @@ use schemars::JsonSchema;
 use serde::Deserialize;
 use std::borrow::{Borrow, Cow};
 use std::ffi::{OsStr, OsString};
+use std::io::prelude::*;
 use std::path::Component;
 use std::process::{ExitStatus, Stdio};
 use std::sync::LazyLock;
 use std::{
     cmp::Ordering,
-    path::{Path, PathBuf},
-    sync::Arc,
-};
-use std::{
     future,
     io::{BufRead, BufReader, BufWriter, Read},
+    path::{Path, PathBuf},
+    sync::Arc,
 };
 use sum_tree::MapSeekTarget;
 use thiserror::Error;
@@ -197,10 +196,20 @@ pub trait GitRepository: Send + Sync {
     /// Returns the URL of the remote with the given name.
     fn remote_url(&self, name: &str) -> Option<String>;
 
-    /// Returns the SHA of the current HEAD.
-    fn head_sha(&self) -> Option<String>;
+    /// Resolve a list of refs to SHAs.
+    fn revparse_batch(&self, revs: Vec<String>) -> BoxFuture<Result<Vec<Option<String>>>>;
 
-    fn merge_head_shas(&self) -> Vec<String>;
+    fn head_sha(&self) -> BoxFuture<Option<String>> {
+        async move {
+            self.revparse_batch(vec!["HEAD".into()])
+                .await
+                .unwrap_or_default()
+                .into_iter()
+                .next()
+                .flatten()
+        }
+        .boxed()
+    }
 
     fn merge_message(&self) -> BoxFuture<Option<String>>;
 
@@ -392,27 +401,37 @@ impl GitRepository for RealGitRepository {
     }
 
     fn show(&self, commit: String) -> BoxFuture<Result<CommitDetails>> {
-        let repo = self.repository.clone();
+        let working_directory = self.working_directory();
         self.executor
             .spawn(async move {
-                let repo = repo.lock();
-                let Ok(commit) = repo.revparse_single(&commit)?.into_commit() else {
-                    anyhow::bail!("{} is not a commit", commit);
-                };
-                let details = CommitDetails {
-                    sha: commit.id().to_string().into(),
-                    message: String::from_utf8_lossy(commit.message_raw_bytes())
-                        .to_string()
-                        .into(),
-                    commit_timestamp: commit.time().seconds(),
-                    author_email: String::from_utf8_lossy(commit.author().email_bytes())
-                        .to_string()
-                        .into(),
-                    author_name: String::from_utf8_lossy(commit.author().name_bytes())
-                        .to_string()
-                        .into(),
-                };
-                Ok(details)
+                let working_directory = working_directory?;
+                let output = new_std_command("git")
+                    .current_dir(&working_directory)
+                    .args([
+                        "--no-optional-locks",
+                        "show",
+                        "--no-patch",
+                        "--format=%H%x00%B%x00%at%x00%ae%x00%an",
+                        &commit,
+                    ])
+                    .output()?;
+                let output = std::str::from_utf8(&output.stdout)?;
+                let fields = output.split('\0').collect::<Vec<_>>();
+                if fields.len() != 5 {
+                    bail!("unexpected git-show output for {commit:?}: {output:?}")
+                }
+                let sha = fields[0].to_string().into();
+                let message = fields[1].to_string().into();
+                let commit_timestamp = fields[2].parse()?;
+                let author_email = fields[3].to_string().into();
+                let author_name = fields[4].to_string().into();
+                Ok(CommitDetails {
+                    sha,
+                    message,
+                    commit_timestamp,
+                    author_email,
+                    author_name,
+                })
             })
             .boxed()
     }
@@ -702,34 +721,62 @@ impl GitRepository for RealGitRepository {
         remote.url().map(|url| url.to_string())
     }
 
-    fn head_sha(&self) -> Option<String> {
-        Some(self.repository.lock().head().ok()?.target()?.to_string())
-    }
+    fn revparse_batch(&self, revs: Vec<String>) -> BoxFuture<Result<Vec<Option<String>>>> {
+        let working_directory = self.working_directory();
+        self.executor
+            .spawn(async move {
+                let working_directory = working_directory?;
+                let mut process = new_std_command("git")
+                    .current_dir(&working_directory)
+                    .args([
+                        "--no-optional-locks",
+                        "cat-file",
+                        "--batch-check=%(objectname)",
+                        "-z",
+                    ])
+                    .stdin(Stdio::piped())
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .spawn()?;
+
+                let stdin = process
+                    .stdin
+                    .take()
+                    .ok_or_else(|| anyhow!("no stdin for git cat-file subprocess"))?;
+                let mut stdin = BufWriter::new(stdin);
+                for rev in &revs {
+                    write!(&mut stdin, "{rev}\0")?;
+                }
+                drop(stdin);
+
+                let output = process.wait_with_output()?;
+                let output = std::str::from_utf8(&output.stdout)?;
+                let shas = output
+                    .lines()
+                    .map(|line| {
+                        if line.ends_with("missing") {
+                            None
+                        } else {
+                            Some(line.to_string())
+                        }
+                    })
+                    .collect::<Vec<_>>();
 
-    fn merge_head_shas(&self) -> Vec<String> {
-        let mut shas = Vec::default();
-        self.repository
-            .lock()
-            .mergehead_foreach(|oid| {
-                shas.push(oid.to_string());
-                true
+                if shas.len() != revs.len() {
+                    // In an octopus merge, git cat-file still only outputs the first sha from MERGE_HEAD.
+                    bail!("unexpected number of shas")
+                }
+
+                Ok(shas)
             })
-            .ok();
-        if let Some(oid) = self
-            .repository
-            .lock()
-            .find_reference("CHERRY_PICK_HEAD")
-            .ok()
-            .and_then(|reference| reference.target())
-        {
-            shas.push(oid.to_string())
-        }
-        shas
+            .boxed()
     }
 
     fn merge_message(&self) -> BoxFuture<Option<String>> {
         let path = self.path().join("MERGE_MSG");
-        async move { std::fs::read_to_string(&path).ok() }.boxed()
+        self.executor
+            .spawn(async move { std::fs::read_to_string(&path).ok() })
+            .boxed()
     }
 
     fn status(&self, path_prefixes: &[RepoPath]) -> BoxFuture<Result<GitStatus>> {

crates/project/src/git_store.rs 🔗

@@ -16,7 +16,7 @@ use fs::Fs;
 use futures::{
     FutureExt, StreamExt as _,
     channel::{mpsc, oneshot},
-    future::{self, Shared, try_join_all},
+    future::{self, Shared},
 };
 use git::{
     BuildPermalinkParams, GitHostingProviderRegistry, WORK_DIRECTORY_REPO_PATH,
@@ -233,11 +233,7 @@ pub struct RepositoryId(pub u64);
 pub struct MergeDetails {
     pub conflicted_paths: TreeSet<RepoPath>,
     pub message: Option<SharedString>,
-    pub apply_head: Option<CommitDetails>,
-    pub cherry_pick_head: Option<CommitDetails>,
-    pub merge_heads: Vec<CommitDetails>,
-    pub rebase_head: Option<CommitDetails>,
-    pub revert_head: Option<CommitDetails>,
+    pub heads: Vec<Option<SharedString>>,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -1005,6 +1001,7 @@ impl GitStore {
 
                         let sha = backend
                             .head_sha()
+                            .await
                             .ok_or_else(|| anyhow!("failed to read HEAD SHA"))?;
 
                         let provider_registry =
@@ -2695,43 +2692,22 @@ impl MergeDetails {
         status: &SumTree<StatusEntry>,
         prev_snapshot: &RepositorySnapshot,
     ) -> Result<(MergeDetails, bool)> {
-        fn sha_eq<'a>(
-            l: impl IntoIterator<Item = &'a CommitDetails>,
-            r: impl IntoIterator<Item = &'a CommitDetails>,
-        ) -> bool {
-            l.into_iter()
-                .map(|commit| &commit.sha)
-                .eq(r.into_iter().map(|commit| &commit.sha))
-        }
-
-        let merge_heads = try_join_all(
-            backend
-                .merge_head_shas()
-                .into_iter()
-                .map(|sha| backend.show(sha)),
-        )
-        .await?;
-        let cherry_pick_head = backend.show("CHERRY_PICK_HEAD".into()).await.ok();
-        let rebase_head = backend.show("REBASE_HEAD".into()).await.ok();
-        let revert_head = backend.show("REVERT_HEAD".into()).await.ok();
-        let apply_head = backend.show("APPLY_HEAD".into()).await.ok();
-        let message = backend.merge_message().await.map(SharedString::from);
-        let merge_heads_changed = !sha_eq(
-            merge_heads.as_slice(),
-            prev_snapshot.merge.merge_heads.as_slice(),
-        ) || !sha_eq(
-            cherry_pick_head.as_ref(),
-            prev_snapshot.merge.cherry_pick_head.as_ref(),
-        ) || !sha_eq(
-            apply_head.as_ref(),
-            prev_snapshot.merge.apply_head.as_ref(),
-        ) || !sha_eq(
-            rebase_head.as_ref(),
-            prev_snapshot.merge.rebase_head.as_ref(),
-        ) || !sha_eq(
-            revert_head.as_ref(),
-            prev_snapshot.merge.revert_head.as_ref(),
-        );
+        let message = backend.merge_message().await;
+        let heads = backend
+            .revparse_batch(vec![
+                "MERGE_HEAD".into(),
+                "CHERRY_PICK_HEAD".into(),
+                "REBASE_HEAD".into(),
+                "REVERT_HEAD".into(),
+                "APPLY_HEAD".into(),
+            ])
+            .await
+            .log_err()
+            .unwrap_or_default()
+            .into_iter()
+            .map(|opt| opt.map(SharedString::from))
+            .collect::<Vec<_>>();
+        let merge_heads_changed = heads != prev_snapshot.merge.heads;
         let conflicted_paths = if merge_heads_changed {
             TreeSet::from_ordered_entries(
                 status
@@ -2744,12 +2720,8 @@ impl MergeDetails {
         };
         let details = MergeDetails {
             conflicted_paths,
-            message,
-            apply_head,
-            cherry_pick_head,
-            merge_heads,
-            rebase_head,
-            revert_head,
+            message: message.map(SharedString::from),
+            heads,
         };
         Ok((details, merge_heads_changed))
     }
@@ -4578,7 +4550,7 @@ async fn compute_snapshot(
     }
 
     // Useful when branch is None in detached head state
-    let head_commit = match backend.head_sha() {
+    let head_commit = match backend.head_sha().await {
         Some(head_sha) => backend.show(head_sha).await.log_err(),
         None => None,
     };

crates/project/src/git_store/conflict_set.rs 🔗

@@ -532,7 +532,7 @@ mod tests {
                 },
             );
             // Cause the repository to emit MergeHeadsChanged.
-            state.merge_head_shas = vec!["abc".into(), "def".into()]
+            state.refs.insert("MERGE_HEAD".into(), "123".into())
         })
         .unwrap();