1use std::borrow::Cow;
2
3#[derive(Debug, PartialEq)]
4pub enum DiffLine<'a> {
5 OldPath { path: Cow<'a, str> },
6 NewPath { path: Cow<'a, str> },
7 HunkHeader(Option<HunkLocation>),
8 Context(&'a str),
9 Deletion(&'a str),
10 Addition(&'a str),
11 Garbage,
12}
13
14#[derive(Debug, PartialEq)]
15pub struct HunkLocation {
16 start_line_old: u32,
17 count_old: u32,
18 start_line_new: u32,
19 count_new: u32,
20}
21
22impl<'a> DiffLine<'a> {
23 pub fn parse(line: &'a str) -> Self {
24 Self::try_parse(line).unwrap_or(Self::Garbage)
25 }
26
27 fn try_parse(line: &'a str) -> Option<Self> {
28 if let Some(header) = line.strip_prefix("---").and_then(eat_required_whitespace) {
29 let path = parse_header_path("a/", header);
30 Some(Self::OldPath { path })
31 } else if let Some(header) = line.strip_prefix("+++").and_then(eat_required_whitespace) {
32 Some(Self::NewPath {
33 path: parse_header_path("b/", header),
34 })
35 } else if let Some(header) = line.strip_prefix("@@").and_then(eat_required_whitespace) {
36 if header.starts_with("...") {
37 return Some(Self::HunkHeader(None));
38 }
39
40 let (start_line_old, header) = header.strip_prefix('-')?.split_once(',')?;
41 let mut parts = header.split_ascii_whitespace();
42 let count_old = parts.next()?;
43 let (start_line_new, count_new) = parts.next()?.strip_prefix('+')?.split_once(',')?;
44
45 Some(Self::HunkHeader(Some(HunkLocation {
46 start_line_old: start_line_old.parse::<u32>().ok()?.saturating_sub(1),
47 count_old: count_old.parse().ok()?,
48 start_line_new: start_line_new.parse::<u32>().ok()?.saturating_sub(1),
49 count_new: count_new.parse().ok()?,
50 })))
51 } else if let Some(deleted_header) = line.strip_prefix("-") {
52 Some(Self::Deletion(deleted_header))
53 } else if line.is_empty() {
54 Some(Self::Context(""))
55 } else if let Some(context) = line.strip_prefix(" ") {
56 Some(Self::Context(context))
57 } else {
58 Some(Self::Addition(line.strip_prefix("+")?))
59 }
60 }
61}
62
63fn parse_header_path<'a>(strip_prefix: &'static str, header: &'a str) -> Cow<'a, str> {
64 if !header.contains(['"', '\\']) {
65 let path = header.split_ascii_whitespace().next().unwrap_or(header);
66 return Cow::Borrowed(path.strip_prefix(strip_prefix).unwrap_or(path));
67 }
68
69 let mut path = String::with_capacity(header.len());
70 let mut in_quote = false;
71 let mut chars = header.chars().peekable();
72 let mut strip_prefix = Some(strip_prefix);
73
74 while let Some(char) = chars.next() {
75 if char == '"' {
76 in_quote = !in_quote;
77 } else if char == '\\' {
78 let Some(&next_char) = chars.peek() else {
79 break;
80 };
81 chars.next();
82 path.push(next_char);
83 } else if char.is_ascii_whitespace() && !in_quote {
84 break;
85 } else {
86 path.push(char);
87 }
88
89 if let Some(prefix) = strip_prefix
90 && path == prefix
91 {
92 strip_prefix.take();
93 path.clear();
94 }
95 }
96
97 Cow::Owned(path)
98}
99
100fn eat_required_whitespace(header: &str) -> Option<&str> {
101 let trimmed = header.trim_ascii_start();
102
103 if trimmed.len() == header.len() {
104 None
105 } else {
106 Some(trimmed)
107 }
108}
109
110#[cfg(test)]
111mod tests {
112 use super::*;
113 use indoc::indoc;
114
115 #[test]
116 fn parse_lines_simple() {
117 let input = indoc! {"
118 diff --git a/text.txt b/text.txt
119 index 86c770d..a1fd855 100644
120 --- a/file.txt
121 +++ b/file.txt
122 @@ -1,2 +1,3 @@
123 context
124 -deleted
125 +inserted
126 garbage
127
128 --- b/file.txt
129 +++ a/file.txt
130 "};
131
132 let lines = input.lines().map(DiffLine::parse).collect::<Vec<_>>();
133
134 pretty_assertions::assert_eq!(
135 lines,
136 &[
137 DiffLine::Garbage,
138 DiffLine::Garbage,
139 DiffLine::OldPath {
140 path: "file.txt".into()
141 },
142 DiffLine::NewPath {
143 path: "file.txt".into()
144 },
145 DiffLine::HunkHeader(Some(HunkLocation {
146 start_line_old: 0,
147 count_old: 2,
148 start_line_new: 0,
149 count_new: 3
150 })),
151 DiffLine::Context("context"),
152 DiffLine::Deletion("deleted"),
153 DiffLine::Addition("inserted"),
154 DiffLine::Garbage,
155 DiffLine::Context(""),
156 DiffLine::OldPath {
157 path: "b/file.txt".into()
158 },
159 DiffLine::NewPath {
160 path: "a/file.txt".into()
161 },
162 ]
163 );
164 }
165
166 #[test]
167 fn file_header_extra_space() {
168 let options = ["--- file", "--- file", "---\tfile"];
169
170 for option in options {
171 pretty_assertions::assert_eq!(
172 DiffLine::parse(option),
173 DiffLine::OldPath {
174 path: "file".into()
175 },
176 "{option}",
177 );
178 }
179 }
180
181 #[test]
182 fn hunk_header_extra_space() {
183 let options = [
184 "@@ -1,2 +1,3 @@",
185 "@@ -1,2 +1,3 @@",
186 "@@\t-1,2\t+1,3\t@@",
187 "@@ -1,2 +1,3 @@",
188 "@@ -1,2 +1,3 @@",
189 "@@ -1,2 +1,3 @@",
190 "@@ -1,2 +1,3 @@ garbage",
191 ];
192
193 for option in options {
194 pretty_assertions::assert_eq!(
195 DiffLine::parse(option),
196 DiffLine::HunkHeader(Some(HunkLocation {
197 start_line_old: 0,
198 count_old: 2,
199 start_line_new: 0,
200 count_new: 3
201 })),
202 "{option}",
203 );
204 }
205 }
206
207 #[test]
208 fn hunk_header_without_location() {
209 pretty_assertions::assert_eq!(DiffLine::parse("@@ ... @@"), DiffLine::HunkHeader(None));
210 }
211
212 #[test]
213 fn test_parse_path() {
214 assert_eq!(parse_header_path("a/", "foo.txt"), "foo.txt");
215 assert_eq!(
216 parse_header_path("a/", "foo/bar/baz.txt"),
217 "foo/bar/baz.txt"
218 );
219 assert_eq!(parse_header_path("a/", "a/foo.txt"), "foo.txt");
220 assert_eq!(
221 parse_header_path("a/", "a/foo/bar/baz.txt"),
222 "foo/bar/baz.txt"
223 );
224
225 // Extra
226 assert_eq!(
227 parse_header_path("a/", "a/foo/bar/baz.txt 2025"),
228 "foo/bar/baz.txt"
229 );
230 assert_eq!(
231 parse_header_path("a/", "a/foo/bar/baz.txt\t2025"),
232 "foo/bar/baz.txt"
233 );
234 assert_eq!(
235 parse_header_path("a/", "a/foo/bar/baz.txt \""),
236 "foo/bar/baz.txt"
237 );
238
239 // Quoted
240 assert_eq!(
241 parse_header_path("a/", "a/foo/bar/\"baz quox.txt\""),
242 "foo/bar/baz quox.txt"
243 );
244 assert_eq!(
245 parse_header_path("a/", "\"a/foo/bar/baz quox.txt\""),
246 "foo/bar/baz quox.txt"
247 );
248 assert_eq!(
249 parse_header_path("a/", "\"foo/bar/baz quox.txt\""),
250 "foo/bar/baz quox.txt"
251 );
252 assert_eq!(parse_header_path("a/", "\"whatever 🤷\""), "whatever 🤷");
253 assert_eq!(
254 parse_header_path("a/", "\"foo/bar/baz quox.txt\" 2025"),
255 "foo/bar/baz quox.txt"
256 );
257 // unescaped quotes are dropped
258 assert_eq!(parse_header_path("a/", "foo/\"bar\""), "foo/bar");
259
260 // Escaped
261 assert_eq!(
262 parse_header_path("a/", "\"foo/\\\"bar\\\"/baz.txt\""),
263 "foo/\"bar\"/baz.txt"
264 );
265 assert_eq!(
266 parse_header_path("a/", "\"C:\\\\Projects\\\\My App\\\\old file.txt\""),
267 "C:\\Projects\\My App\\old file.txt"
268 );
269 }
270}