git: Don't show binary files in past commit view (#46312)

Cole Miller created

Also improves the commit view to use the same status-based buffer header
visual overrides as the project diff as a driveby.

Fixes https://github.com/zed-industries/zed/issues/45735

Release Notes:

- git: Binary files are no longer shown in garbled form when viewing an
old commit.

Change summary

crates/editor/src/editor.rs      |   9 --
crates/editor/src/element.rs     |   2 
crates/git/src/repository.rs     |  35 +++++++-
crates/git_ui/src/commit_view.rs | 121 ++++++++++++++++++++++++++++++---
crates/language/src/buffer.rs    |   4 +
crates/project/src/git_store.rs  |   2 
crates/proto/proto/git.proto     |   1 
crates/worktree/src/worktree.rs  |   4 +
8 files changed, 151 insertions(+), 27 deletions(-)

Detailed changes

crates/editor/src/editor.rs 🔗

@@ -22489,7 +22489,7 @@ impl Editor {
         }
 
         new_selections_by_buffer
-            .retain(|buffer, _| Self::can_open_excerpts_in_file(buffer.read(cx).file()));
+            .retain(|buffer, _| buffer.read(cx).file().is_none_or(|file| file.can_open()));
 
         if new_selections_by_buffer.is_empty() {
             return;
@@ -22597,13 +22597,6 @@ impl Editor {
         });
     }
 
-    // Allow opening excerpts for buffers that either belong to the current project
-    // or represent synthetic/non-local files (e.g., git blobs). File-less buffers
-    // are also supported so tests and other in-memory views keep working.
-    fn can_open_excerpts_in_file(file: Option<&Arc<dyn language::File>>) -> bool {
-        file.is_none_or(|file| project::File::from_dyn(Some(file)).is_some() || !file.is_local())
-    }
-
     fn marked_text_ranges(&self, cx: &App) -> Option<Vec<Range<MultiBufferOffsetUtf16>>> {
         let snapshot = self.buffer.read(cx).read(cx);
         let (_, ranges) = self.text_highlights::<InputComposition>(cx)?;

crates/editor/src/element.rs 🔗

@@ -3913,7 +3913,7 @@ impl EditorElement {
             .map(|project| project.read(cx).visible_worktrees(cx).count() > 1)
             .unwrap_or_default();
         let file = for_excerpt.buffer.file();
-        let can_open_excerpts = Editor::can_open_excerpts_in_file(file);
+        let can_open_excerpts = file.is_none_or(|file| file.can_open());
         let path_style = file.map(|file| file.path_style(cx));
         let relative_path = for_excerpt.buffer.resolve_file_path(include_root, cx);
         let (parent_path, filename) = if let Some(path) = &relative_path {

crates/git/src/repository.rs 🔗

@@ -248,6 +248,7 @@ pub struct CommitFile {
     pub path: RepoPath,
     pub old_text: Option<String>,
     pub new_text: Option<String>,
+    pub is_binary: bool,
 }
 
 impl CommitDetails {
@@ -256,6 +257,13 @@ impl CommitDetails {
     }
 }
 
+/// Detects if content is binary by checking for NUL bytes in the first 8000 bytes.
+/// This matches git's binary detection heuristic.
+pub fn is_binary_content(content: &[u8]) -> bool {
+    let check_len = content.len().min(8000);
+    content[..check_len].contains(&0)
+}
+
 #[derive(Debug, Clone, Hash, PartialEq, Eq)]
 pub struct Remote {
     pub name: SharedString,
@@ -903,13 +911,19 @@ impl GitRepository for RealGitRepository {
                 let len = info_line.trim_end().parse().with_context(|| {
                     format!("invalid object size output from cat-file {info_line}")
                 })?;
-                let mut text = vec![0; len];
-                stdout.read_exact(&mut text).await?;
+                let mut text_bytes = vec![0; len];
+                stdout.read_exact(&mut text_bytes).await?;
                 stdout.read_exact(&mut newline).await?;
-                let text = String::from_utf8_lossy(&text).to_string();
 
                 let mut old_text = None;
                 let mut new_text = None;
+                let mut is_binary = is_binary_content(&text_bytes);
+                let text = if is_binary {
+                    String::new()
+                } else {
+                    String::from_utf8_lossy(&text_bytes).to_string()
+                };
+
                 match status_code {
                     StatusCode::Modified => {
                         info_line.clear();
@@ -917,11 +931,17 @@ impl GitRepository for RealGitRepository {
                         let len = info_line.trim_end().parse().with_context(|| {
                             format!("invalid object size output from cat-file {}", info_line)
                         })?;
-                        let mut parent_text = vec![0; len];
-                        stdout.read_exact(&mut parent_text).await?;
+                        let mut parent_bytes = vec![0; len];
+                        stdout.read_exact(&mut parent_bytes).await?;
                         stdout.read_exact(&mut newline).await?;
-                        old_text = Some(String::from_utf8_lossy(&parent_text).to_string());
-                        new_text = Some(text);
+                        is_binary = is_binary || is_binary_content(&parent_bytes);
+                        if is_binary {
+                            old_text = Some(String::new());
+                            new_text = Some(String::new());
+                        } else {
+                            old_text = Some(String::from_utf8_lossy(&parent_bytes).to_string());
+                            new_text = Some(text);
+                        }
                     }
                     StatusCode::Added => new_text = Some(text),
                     StatusCode::Deleted => old_text = Some(text),
@@ -932,6 +952,7 @@ impl GitRepository for RealGitRepository {
                     path: RepoPath(Arc::from(rel_path)),
                     old_text,
                     new_text,
+                    is_binary,
                 })
             }
 

crates/git_ui/src/commit_view.rs 🔗

@@ -1,8 +1,10 @@
 use anyhow::{Context as _, Result};
 use buffer_diff::BufferDiff;
+use collections::HashMap;
 use editor::display_map::{BlockPlacement, BlockProperties, BlockStyle};
-use editor::{Editor, EditorEvent, ExcerptRange, MultiBuffer, multibuffer_context_lines};
-use git::repository::{CommitDetails, CommitDiff, RepoPath};
+use editor::{Addon, Editor, EditorEvent, ExcerptRange, MultiBuffer, multibuffer_context_lines};
+use git::repository::{CommitDetails, CommitDiff, RepoPath, is_binary_content};
+use git::status::{FileStatus, StatusCode, TrackedStatus};
 use git::{
     BuildCommitPermalinkParams, GitHostingProviderRegistry, GitRemote, ParsedGitRemote,
     parse_git_remote_url,
@@ -20,6 +22,7 @@ use multi_buffer::PathKey;
 use project::{Project, WorktreeId, git_store::Repository};
 use std::{
     any::{Any, TypeId},
+    collections::HashSet,
     path::PathBuf,
     sync::Arc,
 };
@@ -69,9 +72,28 @@ struct GitBlob {
     path: RepoPath,
     worktree_id: WorktreeId,
     is_deleted: bool,
+    is_binary: bool,
     display_name: String,
 }
 
+struct CommitDiffAddon {
+    file_statuses: HashMap<language::BufferId, FileStatus>,
+}
+
+impl Addon for CommitDiffAddon {
+    fn to_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn override_status_for_buffer_id(
+        &self,
+        buffer_id: language::BufferId,
+        _cx: &App,
+    ) -> Option<FileStatus> {
+        self.file_statuses.get(&buffer_id).copied()
+    }
+}
+
 const COMMIT_MESSAGE_SORT_PREFIX: u64 = 0;
 const FILE_NAMESPACE_SORT_PREFIX: u64 = 1;
 
@@ -226,10 +248,27 @@ impl CommitView {
         let repository_clone = repository.clone();
 
         cx.spawn(async move |this, cx| {
+            let mut binary_buffer_ids: HashSet<language::BufferId> = HashSet::default();
+            let mut file_statuses: HashMap<language::BufferId, FileStatus> = HashMap::default();
+
             for file in commit_diff.files {
+                let is_created = file.old_text.is_none();
                 let is_deleted = file.new_text.is_none();
-                let new_text = file.new_text.unwrap_or_default();
-                let old_text = file.old_text;
+                let raw_new_text = file.new_text.unwrap_or_default();
+                let raw_old_text = file.old_text;
+
+                let is_binary = file.is_binary
+                    || is_binary_content(raw_new_text.as_bytes())
+                    || raw_old_text
+                        .as_ref()
+                        .is_some_and(|text| is_binary_content(text.as_bytes()));
+
+                let new_text = if is_binary {
+                    "(binary file not shown)".to_string()
+                } else {
+                    raw_new_text
+                };
+                let old_text = if is_binary { None } else { raw_old_text };
                 let worktree_id = repository_clone
                     .update(cx, |repository, cx| {
                         repository
@@ -249,19 +288,46 @@ impl CommitView {
                 let file = Arc::new(GitBlob {
                     path: file.path.clone(),
                     is_deleted,
+                    is_binary,
                     worktree_id,
                     display_name,
                 }) as Arc<dyn language::File>;
 
                 let buffer = build_buffer(new_text, file, &language_registry, cx).await?;
-                let buffer_diff =
-                    build_buffer_diff(old_text, &buffer, &language_registry, cx).await?;
+                let buffer_id = cx.update(|cx| buffer.read(cx).remote_id());
+
+                let status_code = if is_created {
+                    StatusCode::Added
+                } else if is_deleted {
+                    StatusCode::Deleted
+                } else {
+                    StatusCode::Modified
+                };
+                file_statuses.insert(
+                    buffer_id,
+                    FileStatus::Tracked(TrackedStatus {
+                        index_status: status_code,
+                        worktree_status: StatusCode::Unmodified,
+                    }),
+                );
+
+                if is_binary {
+                    binary_buffer_ids.insert(buffer_id);
+                }
+
+                let buffer_diff = if is_binary {
+                    None
+                } else {
+                    Some(build_buffer_diff(old_text, &buffer, &language_registry, cx).await?)
+                };
 
                 this.update(cx, |this, cx| {
                     this.multibuffer.update(cx, |multibuffer, cx| {
                         let snapshot = buffer.read(cx).snapshot();
                         let path = snapshot.file().unwrap().path().clone();
-                        let excerpt_ranges = {
+                        let excerpt_ranges = if is_binary {
+                            vec![language::Point::zero()..snapshot.max_point()]
+                        } else if let Some(buffer_diff) = &buffer_diff {
                             let diff_snapshot = buffer_diff.read(cx).snapshot(cx);
                             let mut hunks = diff_snapshot.hunks(&snapshot).peekable();
                             if hunks.peek().is_none() {
@@ -271,6 +337,8 @@ impl CommitView {
                                     .map(|hunk| hunk.buffer_range.to_point(&snapshot))
                                     .collect::<Vec<_>>()
                             }
+                        } else {
+                            vec![language::Point::zero()..snapshot.max_point()]
                         };
 
                         let _is_newly_added = multibuffer.set_excerpts_for_path(
@@ -280,11 +348,26 @@ impl CommitView {
                             multibuffer_context_lines(cx),
                             cx,
                         );
-                        multibuffer.add_diff(buffer_diff, cx);
+                        if let Some(buffer_diff) = buffer_diff {
+                            multibuffer.add_diff(buffer_diff, cx);
+                        }
                     });
                 })?;
             }
 
+            this.update(cx, |this, cx| {
+                this.editor.update(cx, |editor, _cx| {
+                    editor.register_addon(CommitDiffAddon { file_statuses });
+                });
+                if !binary_buffer_ids.is_empty() {
+                    this.editor.update(cx, |editor, cx| {
+                        for buffer_id in binary_buffer_ids {
+                            editor.fold_buffer(buffer_id, cx);
+                        }
+                    });
+                }
+            })?;
+
             anyhow::Ok(())
         })
         .detach();
@@ -741,6 +824,10 @@ impl language::File for GitBlob {
     fn is_private(&self) -> bool {
         false
     }
+
+    fn can_open(&self) -> bool {
+        !self.is_binary
+    }
 }
 
 async fn build_buffer(
@@ -949,10 +1036,22 @@ impl Item for CommitView {
     where
         Self: Sized,
     {
+        let file_statuses = self
+            .editor
+            .read(cx)
+            .addon::<CommitDiffAddon>()
+            .map(|addon| addon.file_statuses.clone())
+            .unwrap_or_default();
         Task::ready(Some(cx.new(|cx| {
-            let editor = cx.new(|cx| {
-                self.editor
-                    .update(cx, |editor, cx| editor.clone(window, cx))
+            let editor = cx.new({
+                let file_statuses = file_statuses.clone();
+                |cx| {
+                    let mut editor = self
+                        .editor
+                        .update(cx, |editor, cx| editor.clone(window, cx));
+                    editor.register_addon(CommitDiffAddon { file_statuses });
+                    editor
+                }
             });
             let multibuffer = editor.read(cx).buffer().clone();
             Self {

crates/language/src/buffer.rs 🔗

@@ -423,6 +423,10 @@ pub trait File: Send + Sync + Any {
 
     /// Return whether Zed considers this to be a private file.
     fn is_private(&self) -> bool;
+
+    fn can_open(&self) -> bool {
+        !self.is_local()
+    }
 }
 
 /// The file's storage status - whether it's stored (`Present`), and if so when it was last

crates/project/src/git_store.rs 🔗

@@ -2389,6 +2389,7 @@ impl GitStore {
                     path: file.path.to_proto(),
                     old_text: file.old_text,
                     new_text: file.new_text,
+                    is_binary: file.is_binary,
                 })
                 .collect(),
         })
@@ -4126,6 +4127,7 @@ impl Repository {
                                     path: RepoPath::from_proto(&file.path)?,
                                     old_text: file.old_text,
                                     new_text: file.new_text,
+                                    is_binary: file.is_binary,
                                 })
                             })
                             .collect::<Result<Vec<_>>>()?,

crates/proto/proto/git.proto 🔗

@@ -283,6 +283,7 @@ message CommitFile {
     string path = 1;
     optional string old_text = 2;
     optional string new_text = 3;
+    bool is_binary = 4;
 }
 
 message GitReset {

crates/worktree/src/worktree.rs 🔗

@@ -3252,6 +3252,10 @@ impl language::File for File {
     fn path_style(&self, cx: &App) -> PathStyle {
         self.worktree.read(cx).path_style()
     }
+
+    fn can_open(&self) -> bool {
+        true
+    }
 }
 
 impl language::LocalFile for File {