Chunk git status entries (#25627)

Conrad Irwin created

Prevents us trying to write 5M untracked files to postgres in one commit

Release Notes:

- N/A *or* Added/Fixed/Improved ...

Change summary

crates/proto/Cargo.toml   |  1 
crates/proto/src/proto.rs | 56 ++++++++++++++++++++++++----------------
2 files changed, 33 insertions(+), 24 deletions(-)

Detailed changes

crates/proto/Cargo.toml 🔗

@@ -18,7 +18,6 @@ doctest = false
 
 [dependencies]
 anyhow.workspace = true
-collections.workspace = true
 prost.workspace = true
 serde.workspace = true
 

crates/proto/src/proto.rs 🔗

@@ -7,7 +7,6 @@ mod typed_envelope;
 pub use error::*;
 pub use typed_envelope::*;
 
-use collections::HashMap;
 pub use prost::{DecodeError, Message};
 use serde::Serialize;
 use std::{
@@ -746,16 +745,10 @@ pub const MAX_WORKTREE_UPDATE_MAX_CHUNK_SIZE: usize = 2;
 pub const MAX_WORKTREE_UPDATE_MAX_CHUNK_SIZE: usize = 256;
 
 pub fn split_worktree_update(mut message: UpdateWorktree) -> impl Iterator<Item = UpdateWorktree> {
-    let mut done_files = false;
-
-    let mut repository_map = message
-        .updated_repositories
-        .into_iter()
-        .map(|repo| (repo.work_directory_id, repo))
-        .collect::<HashMap<_, _>>();
+    let mut done = false;
 
     iter::from_fn(move || {
-        if done_files {
+        if done {
             return None;
         }
 
@@ -777,28 +770,45 @@ pub fn split_worktree_update(mut message: UpdateWorktree) -> impl Iterator<Item
             .drain(..removed_entries_chunk_size)
             .collect();
 
-        done_files = message.updated_entries.is_empty() && message.removed_entries.is_empty();
-
         let mut updated_repositories = Vec::new();
-
-        if !repository_map.is_empty() {
-            for entry in &updated_entries {
-                if let Some(repo) = repository_map.remove(&entry.id) {
-                    updated_repositories.push(repo);
-                }
+        let mut limit = MAX_WORKTREE_UPDATE_MAX_CHUNK_SIZE;
+        while let Some(repo) = message.updated_repositories.first_mut() {
+            let updated_statuses_limit = cmp::min(repo.updated_statuses.len(), limit);
+            let removed_statuses_limit = cmp::min(repo.removed_statuses.len(), limit);
+
+            updated_repositories.push(RepositoryEntry {
+                work_directory_id: repo.work_directory_id,
+                branch: repo.branch.clone(),
+                branch_summary: repo.branch_summary.clone(),
+                updated_statuses: repo
+                    .updated_statuses
+                    .drain(..updated_statuses_limit)
+                    .collect(),
+                removed_statuses: repo
+                    .removed_statuses
+                    .drain(..removed_statuses_limit)
+                    .collect(),
+                current_merge_conflicts: repo.current_merge_conflicts.clone(),
+            });
+            if repo.removed_statuses.is_empty() && repo.updated_statuses.is_empty() {
+                message.updated_repositories.remove(0);
+            }
+            limit = limit.saturating_sub(removed_statuses_limit + updated_statuses_limit);
+            if limit == 0 {
+                break;
             }
         }
 
-        let removed_repositories = if done_files {
+        done = message.updated_entries.is_empty()
+            && message.removed_entries.is_empty()
+            && message.updated_repositories.is_empty();
+
+        let removed_repositories = if done {
             mem::take(&mut message.removed_repositories)
         } else {
             Default::default()
         };
 
-        if done_files {
-            updated_repositories.extend(mem::take(&mut repository_map).into_values());
-        }
-
         Some(UpdateWorktree {
             project_id: message.project_id,
             worktree_id: message.worktree_id,
@@ -807,7 +817,7 @@ pub fn split_worktree_update(mut message: UpdateWorktree) -> impl Iterator<Item
             updated_entries,
             removed_entries,
             scan_id: message.scan_id,
-            is_last_update: done_files && message.is_last_update,
+            is_last_update: done && message.is_last_update,
             updated_repositories,
             removed_repositories,
         })