1use crate::commit::get_messages;
2use crate::{GitRemote, Oid};
3use anyhow::{Context as _, Result, anyhow};
4use collections::{HashMap, HashSet};
5use futures::AsyncWriteExt;
6use gpui::SharedString;
7use serde::{Deserialize, Serialize};
8use std::process::Stdio;
9use std::{ops::Range, path::Path};
10use text::Rope;
11use time::OffsetDateTime;
12use time::UtcOffset;
13use time::macros::format_description;
14
15pub use git2 as libgit;
16
17#[derive(Debug, Clone, Default)]
18pub struct Blame {
19 pub entries: Vec<BlameEntry>,
20 pub messages: HashMap<Oid, String>,
21 pub remote_url: Option<String>,
22}
23
24#[derive(Clone, Debug, Default)]
25pub struct ParsedCommitMessage {
26 pub message: SharedString,
27 pub permalink: Option<url::Url>,
28 pub pull_request: Option<crate::hosting_provider::PullRequest>,
29 pub remote: Option<GitRemote>,
30}
31
32impl Blame {
33 pub async fn for_path(
34 git_binary: &Path,
35 working_directory: &Path,
36 path: &Path,
37 content: &Rope,
38 remote_url: Option<String>,
39 ) -> Result<Self> {
40 let output = run_git_blame(git_binary, working_directory, path, content).await?;
41 let mut entries = parse_git_blame(&output)?;
42 entries.sort_unstable_by(|a, b| a.range.start.cmp(&b.range.start));
43
44 let mut unique_shas = HashSet::default();
45
46 for entry in entries.iter_mut() {
47 unique_shas.insert(entry.sha);
48 }
49
50 let shas = unique_shas.into_iter().collect::<Vec<_>>();
51 let messages = get_messages(working_directory, &shas)
52 .await
53 .context("failed to get commit messages")?;
54
55 Ok(Self {
56 entries,
57 messages,
58 remote_url,
59 })
60 }
61}
62
63const GIT_BLAME_NO_COMMIT_ERROR: &str = "fatal: no such ref: HEAD";
64const GIT_BLAME_NO_PATH: &str = "fatal: no such path";
65
66#[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)]
67pub struct BlameEntry {
68 pub sha: Oid,
69
70 pub range: Range<u32>,
71
72 pub original_line_number: u32,
73
74 pub author: Option<String>,
75 pub author_mail: Option<String>,
76 pub author_time: Option<i64>,
77 pub author_tz: Option<String>,
78
79 pub committer_name: Option<String>,
80 pub committer_email: Option<String>,
81 pub committer_time: Option<i64>,
82 pub committer_tz: Option<String>,
83
84 pub summary: Option<String>,
85
86 pub previous: Option<String>,
87 pub filename: String,
88}
89
90impl BlameEntry {
91 // Returns a BlameEntry by parsing the first line of a `git blame --incremental`
92 // entry. The line MUST have this format:
93 //
94 // <40-byte-hex-sha1> <sourceline> <resultline> <num-lines>
95 fn new_from_blame_line(line: &str) -> Result<BlameEntry> {
96 let mut parts = line.split_whitespace();
97
98 let sha = parts
99 .next()
100 .and_then(|line| line.parse::<Oid>().ok())
101 .with_context(|| format!("parsing sha from {line}"))?;
102
103 let original_line_number = parts
104 .next()
105 .and_then(|line| line.parse::<u32>().ok())
106 .with_context(|| format!("parsing original line number from {line}"))?;
107 let final_line_number = parts
108 .next()
109 .and_then(|line| line.parse::<u32>().ok())
110 .with_context(|| format!("parsing final line number from {line}"))?;
111
112 let line_count = parts
113 .next()
114 .and_then(|line| line.parse::<u32>().ok())
115 .with_context(|| format!("parsing line count from {line}"))?;
116
117 let start_line = final_line_number.saturating_sub(1);
118 let end_line = start_line + line_count;
119 let range = start_line..end_line;
120
121 Ok(Self {
122 sha,
123 range,
124 original_line_number,
125 ..Default::default()
126 })
127 }
128
129 pub fn author_offset_date_time(&self) -> Result<time::OffsetDateTime> {
130 if let (Some(author_time), Some(author_tz)) = (self.author_time, &self.author_tz) {
131 let format = format_description!("[offset_hour][offset_minute]");
132 let offset = UtcOffset::parse(author_tz, &format)?;
133 let date_time_utc = OffsetDateTime::from_unix_timestamp(author_time)?;
134
135 Ok(date_time_utc.to_offset(offset))
136 } else {
137 // Directly return current time in UTC if there's no committer time or timezone
138 Ok(time::OffsetDateTime::now_utc())
139 }
140 }
141}
142
143// parse_git_blame parses the output of `git blame --incremental`, which returns
144// all the blame-entries for a given path incrementally, as it finds them.
145//
146// Each entry *always* starts with:
147//
148// <40-byte-hex-sha1> <sourceline> <resultline> <num-lines>
149//
150// Each entry *always* ends with:
151//
152// filename <whitespace-quoted-filename-goes-here>
153//
154// Line numbers are 1-indexed.
155//
156// A `git blame --incremental` entry looks like this:
157//
158// 6ad46b5257ba16d12c5ca9f0d4900320959df7f4 2 2 1
159// author Joe Schmoe
160// author-mail <joe.schmoe@example.com>
161// author-time 1709741400
162// author-tz +0100
163// committer Joe Schmoe
164// committer-mail <joe.schmoe@example.com>
165// committer-time 1709741400
166// committer-tz +0100
167// summary Joe's cool commit
168// previous 486c2409237a2c627230589e567024a96751d475 index.js
169// filename index.js
170//
171// If the entry has the same SHA as an entry that was already printed then no
172// signature information is printed:
173//
174// 6ad46b5257ba16d12c5ca9f0d4900320959df7f4 3 4 1
175// previous 486c2409237a2c627230589e567024a96751d475 index.js
176// filename index.js
177//
178// More about `--incremental` output: https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-blame.html
179fn parse_git_blame(output: &str) -> Result<Vec<BlameEntry>> {
180 let mut entries: Vec<BlameEntry> = Vec::new();
181 let mut index: HashMap<Oid, usize> = HashMap::default();
182
183 let mut current_entry: Option<BlameEntry> = None;
184
185 for line in output.lines() {
186 let mut done = false;
187
188 match &mut current_entry {
189 None => {
190 let mut new_entry = BlameEntry::new_from_blame_line(line)?;
191
192 if let Some(existing_entry) = index
193 .get(&new_entry.sha)
194 .and_then(|slot| entries.get(*slot))
195 {
196 new_entry.author.clone_from(&existing_entry.author);
197 new_entry
198 .author_mail
199 .clone_from(&existing_entry.author_mail);
200 new_entry.author_time = existing_entry.author_time;
201 new_entry.author_tz.clone_from(&existing_entry.author_tz);
202 new_entry
203 .committer_name
204 .clone_from(&existing_entry.committer_name);
205 new_entry
206 .committer_email
207 .clone_from(&existing_entry.committer_email);
208 new_entry.committer_time = existing_entry.committer_time;
209 new_entry
210 .committer_tz
211 .clone_from(&existing_entry.committer_tz);
212 new_entry.summary.clone_from(&existing_entry.summary);
213 }
214
215 current_entry.replace(new_entry);
216 }
217 Some(entry) => {
218 let Some((key, value)) = line.split_once(' ') else {
219 continue;
220 };
221 let is_committed = !entry.sha.is_zero();
222 match key {
223 "filename" => {
224 entry.filename = value.into();
225 done = true;
226 }
227 "previous" => entry.previous = Some(value.into()),
228
229 "summary" if is_committed => entry.summary = Some(value.into()),
230 "author" if is_committed => entry.author = Some(value.into()),
231 "author-mail" if is_committed => entry.author_mail = Some(value.into()),
232 "author-time" if is_committed => {
233 entry.author_time = Some(value.parse::<i64>()?)
234 }
235 "author-tz" if is_committed => entry.author_tz = Some(value.into()),
236
237 "committer" if is_committed => entry.committer_name = Some(value.into()),
238 "committer-mail" if is_committed => entry.committer_email = Some(value.into()),
239 "committer-time" if is_committed => {
240 entry.committer_time = Some(value.parse::<i64>()?)
241 }
242 "committer-tz" if is_committed => entry.committer_tz = Some(value.into()),
243 _ => {}
244 }
245 }
246 };
247
248 if done {
249 if let Some(entry) = current_entry.take() {
250 index.insert(entry.sha, entries.len());
251
252 // We only want annotations that have a commit.
253 if !entry.sha.is_zero() {
254 entries.push(entry);
255 }
256 }
257 }
258 }
259
260 Ok(entries)
261}
262
263#[cfg(test)]
264mod tests {
265 use std::path::PathBuf;
266
267 use super::BlameEntry;
268 use super::parse_git_blame;
269
270 fn read_test_data(filename: &str) -> String {
271 let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
272 path.push("test_data");
273 path.push(filename);
274
275 std::fs::read_to_string(&path)
276 .unwrap_or_else(|_| panic!("Could not read test data at {:?}. Is it generated?", path))
277 }
278
279 fn assert_eq_golden(entries: &Vec<BlameEntry>, golden_filename: &str) {
280 let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
281 path.push("test_data");
282 path.push("golden");
283 path.push(format!("{}.json", golden_filename));
284
285 let mut have_json =
286 serde_json::to_string_pretty(&entries).expect("could not serialize entries to JSON");
287 // We always want to save with a trailing newline.
288 have_json.push('\n');
289
290 let update = std::env::var("UPDATE_GOLDEN")
291 .map(|val| val.eq_ignore_ascii_case("true"))
292 .unwrap_or(false);
293
294 if update {
295 std::fs::create_dir_all(path.parent().unwrap())
296 .expect("could not create golden test data directory");
297 std::fs::write(&path, have_json).expect("could not write out golden data");
298 } else {
299 let want_json =
300 std::fs::read_to_string(&path).unwrap_or_else(|_| {
301 panic!("could not read golden test data file at {:?}. Did you run the test with UPDATE_GOLDEN=true before?", path);
302 }).replace("\r\n", "\n");
303
304 pretty_assertions::assert_eq!(have_json, want_json, "wrong blame entries");
305 }
306 }
307
308 #[test]
309 fn test_parse_git_blame_not_committed() {
310 let output = read_test_data("blame_incremental_not_committed");
311 let entries = parse_git_blame(&output).unwrap();
312 assert_eq_golden(&entries, "blame_incremental_not_committed");
313 }
314
315 #[test]
316 fn test_parse_git_blame_simple() {
317 let output = read_test_data("blame_incremental_simple");
318 let entries = parse_git_blame(&output).unwrap();
319 assert_eq_golden(&entries, "blame_incremental_simple");
320 }
321
322 #[test]
323 fn test_parse_git_blame_complex() {
324 let output = read_test_data("blame_incremental_complex");
325 let entries = parse_git_blame(&output).unwrap();
326 assert_eq_golden(&entries, "blame_incremental_complex");
327 }
328}