git: Fix stage/unstage failure with a large number of files (#47800)

Marco Mihai Condrache created

Git commands can fail when staging or unstaging a very large number of
files because each file path is passed as a separate CLI argument. Once
the argument list grows beyond the OS limit, git errors with Argument
list too long (os error 7).

Since git 2.26, pathspecs can be read from stdin, which lets us avoid
passing thousands of paths as arguments and bypass this limitation.

I looked for existing issues related to this but couldn’t find any.

Repro:

- Clone rust-lang repo
- Delete root tests folder
- Try to stage or unstage (40,000+ files)
- Git fails with Argument list too long (os error 7)

Release Notes:

- Fixed an issue where stage/unstage could fail when operating on a very
large number of files

Change summary

crates/git/src/repository.rs | 137 ++++++++++++++++++++++++++++++-------
1 file changed, 110 insertions(+), 27 deletions(-)

Detailed changes

crates/git/src/repository.rs 🔗

@@ -1162,8 +1162,42 @@ impl GitRepository for RealGitRepository {
                 return Ok(());
             }
 
+            let working_directory = working_directory?;
+            let mut child = new_smol_command(&git_binary_path)
+                .current_dir(&working_directory)
+                .envs(env.iter())
+                .args([
+                    "checkout",
+                    &commit,
+                    "--pathspec-from-file=-",
+                    "--pathspec-file-nul",
+                ])
+                .stdin(Stdio::piped())
+                .stdout(Stdio::null())
+                .stderr(Stdio::piped())
+                .spawn()
+                .context("failed to spawn git checkout")?;
+
+            let mut stdin = child.stdin.take().context("failed to get stdin")?;
+            for path in &paths {
+                stdin.write_all(path.as_unix_str().as_bytes()).await?;
+                stdin.write_all(b"\0").await?;
+            }
+            drop(stdin);
+
+            let output = child.output().await?;
+            if output.status.success() {
+                return Ok(());
+            }
+
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            if !stderr.contains("pathspec-from-file") {
+                anyhow::bail!("Failed to checkout files:\n{}", stderr);
+            }
+
+            // Fallback for older git versions: pass paths as command-line arguments
             let output = new_smol_command(&git_binary_path)
-                .current_dir(&working_directory?)
+                .current_dir(&working_directory)
                 .envs(env.iter())
                 .args(["checkout", &commit, "--"])
                 .args(paths.iter().map(|path| path.as_unix_str()))
@@ -1850,20 +1884,33 @@ impl GitRepository for RealGitRepository {
         let git_binary_path = self.any_git_binary_path.clone();
         self.executor
             .spawn(async move {
-                if !paths.is_empty() {
-                    let output = new_smol_command(&git_binary_path)
-                        .current_dir(&working_directory?)
-                        .envs(env.iter())
-                        .args(["update-index", "--add", "--remove", "--"])
-                        .args(paths.iter().map(|p| p.as_unix_str()))
-                        .output()
-                        .await?;
-                    anyhow::ensure!(
-                        output.status.success(),
-                        "Failed to stage paths:\n{}",
-                        String::from_utf8_lossy(&output.stderr),
-                    );
+                if paths.is_empty() {
+                    return Ok(());
+                }
+
+                let mut child = new_smol_command(&git_binary_path)
+                    .current_dir(&working_directory?)
+                    .envs(env.iter())
+                    .args(["update-index", "--add", "--remove", "-z", "--stdin"])
+                    .stdin(Stdio::piped())
+                    .stdout(Stdio::null())
+                    .stderr(Stdio::piped())
+                    .spawn()
+                    .context("failed to spawn git update-index")?;
+
+                let mut stdin = child.stdin.take().context("failed to get stdin")?;
+                for path in &paths {
+                    stdin.write_all(path.as_unix_str().as_bytes()).await?;
+                    stdin.write_all(b"\0").await?;
                 }
+                drop(stdin);
+
+                let output = child.output().await?;
+                anyhow::ensure!(
+                    output.status.success(),
+                    "Failed to stage paths:\n{}",
+                    String::from_utf8_lossy(&output.stderr),
+                );
                 Ok(())
             })
             .boxed()
@@ -1879,21 +1926,57 @@ impl GitRepository for RealGitRepository {
 
         self.executor
             .spawn(async move {
-                if !paths.is_empty() {
-                    let output = new_smol_command(&git_binary_path)
-                        .current_dir(&working_directory?)
-                        .envs(env.iter())
-                        .args(["reset", "--quiet", "--"])
-                        .args(paths.iter().map(|p| p.as_std_path()))
-                        .output()
-                        .await?;
+                if paths.is_empty() {
+                    return Ok(());
+                }
 
-                    anyhow::ensure!(
-                        output.status.success(),
-                        "Failed to unstage:\n{}",
-                        String::from_utf8_lossy(&output.stderr),
-                    );
+                let working_directory = working_directory?;
+                let mut child = new_smol_command(&git_binary_path)
+                    .current_dir(&working_directory)
+                    .envs(env.iter())
+                    .args([
+                        "reset",
+                        "--quiet",
+                        "--pathspec-from-file=-",
+                        "--pathspec-file-nul",
+                    ])
+                    .stdin(Stdio::piped())
+                    .stdout(Stdio::null())
+                    .stderr(Stdio::piped())
+                    .spawn()
+                    .context("failed to spawn git reset")?;
+
+                let mut stdin = child.stdin.take().context("failed to get stdin")?;
+                for path in &paths {
+                    stdin.write_all(path.as_unix_str().as_bytes()).await?;
+                    stdin.write_all(b"\0").await?;
+                }
+                drop(stdin);
+
+                let output = child.output().await?;
+                if output.status.success() {
+                    return Ok(());
                 }
+
+                let stderr = String::from_utf8_lossy(&output.stderr);
+                if !stderr.contains("pathspec-from-file") {
+                    anyhow::bail!("Failed to unstage:\n{}", stderr);
+                }
+
+                // Fallback for older git versions: pass paths as command-line arguments
+                let output = new_smol_command(&git_binary_path)
+                    .current_dir(&working_directory)
+                    .envs(env.iter())
+                    .args(["reset", "--quiet", "--"])
+                    .args(paths.iter().map(|p| p.as_std_path()))
+                    .output()
+                    .await?;
+
+                anyhow::ensure!(
+                    output.status.success(),
+                    "Failed to unstage:\n{}",
+                    String::from_utf8_lossy(&output.stderr),
+                );
                 Ok(())
             })
             .boxed()