udiff.rs

  1use std::borrow::Cow;
  2
  3#[derive(Debug, PartialEq)]
  4pub enum DiffLine<'a> {
  5    OldPath { path: Cow<'a, str> },
  6    NewPath { path: Cow<'a, str> },
  7    HunkHeader(Option<HunkLocation>),
  8    Context(&'a str),
  9    Deletion(&'a str),
 10    Addition(&'a str),
 11    Garbage,
 12}
 13
 14#[derive(Debug, PartialEq)]
 15pub struct HunkLocation {
 16    start_line_old: u32,
 17    count_old: u32,
 18    start_line_new: u32,
 19    count_new: u32,
 20}
 21
 22impl<'a> DiffLine<'a> {
 23    pub fn parse(line: &'a str) -> Self {
 24        Self::try_parse(line).unwrap_or(Self::Garbage)
 25    }
 26
 27    fn try_parse(line: &'a str) -> Option<Self> {
 28        if let Some(header) = line.strip_prefix("---").and_then(eat_required_whitespace) {
 29            let path = parse_header_path("a/", header);
 30            Some(Self::OldPath { path })
 31        } else if let Some(header) = line.strip_prefix("+++").and_then(eat_required_whitespace) {
 32            Some(Self::NewPath {
 33                path: parse_header_path("b/", header),
 34            })
 35        } else if let Some(header) = line.strip_prefix("@@").and_then(eat_required_whitespace) {
 36            if header.starts_with("...") {
 37                return Some(Self::HunkHeader(None));
 38            }
 39
 40            let (start_line_old, header) = header.strip_prefix('-')?.split_once(',')?;
 41            let mut parts = header.split_ascii_whitespace();
 42            let count_old = parts.next()?;
 43            let (start_line_new, count_new) = parts.next()?.strip_prefix('+')?.split_once(',')?;
 44
 45            Some(Self::HunkHeader(Some(HunkLocation {
 46                start_line_old: start_line_old.parse::<u32>().ok()?.saturating_sub(1),
 47                count_old: count_old.parse().ok()?,
 48                start_line_new: start_line_new.parse::<u32>().ok()?.saturating_sub(1),
 49                count_new: count_new.parse().ok()?,
 50            })))
 51        } else if let Some(deleted_header) = line.strip_prefix("-") {
 52            Some(Self::Deletion(deleted_header))
 53        } else if line.is_empty() {
 54            Some(Self::Context(""))
 55        } else if let Some(context) = line.strip_prefix(" ") {
 56            Some(Self::Context(context))
 57        } else {
 58            Some(Self::Addition(line.strip_prefix("+")?))
 59        }
 60    }
 61}
 62
 63fn parse_header_path<'a>(strip_prefix: &'static str, header: &'a str) -> Cow<'a, str> {
 64    if !header.contains(['"', '\\']) {
 65        let path = header.split_ascii_whitespace().next().unwrap_or(header);
 66        return Cow::Borrowed(path.strip_prefix(strip_prefix).unwrap_or(path));
 67    }
 68
 69    let mut path = String::with_capacity(header.len());
 70    let mut in_quote = false;
 71    let mut chars = header.chars().peekable();
 72    let mut strip_prefix = Some(strip_prefix);
 73
 74    while let Some(char) = chars.next() {
 75        if char == '"' {
 76            in_quote = !in_quote;
 77        } else if char == '\\' {
 78            let Some(&next_char) = chars.peek() else {
 79                break;
 80            };
 81            chars.next();
 82            path.push(next_char);
 83        } else if char.is_ascii_whitespace() && !in_quote {
 84            break;
 85        } else {
 86            path.push(char);
 87        }
 88
 89        if let Some(prefix) = strip_prefix
 90            && path == prefix
 91        {
 92            strip_prefix.take();
 93            path.clear();
 94        }
 95    }
 96
 97    Cow::Owned(path)
 98}
 99
100fn eat_required_whitespace(header: &str) -> Option<&str> {
101    let trimmed = header.trim_ascii_start();
102
103    if trimmed.len() == header.len() {
104        None
105    } else {
106        Some(trimmed)
107    }
108}
109
110#[cfg(test)]
111mod tests {
112    use super::*;
113    use indoc::indoc;
114
115    #[test]
116    fn parse_lines_simple() {
117        let input = indoc! {"
118            diff --git a/text.txt b/text.txt
119            index 86c770d..a1fd855 100644
120            --- a/file.txt
121            +++ b/file.txt
122            @@ -1,2 +1,3 @@
123             context
124            -deleted
125            +inserted
126            garbage
127
128            --- b/file.txt
129            +++ a/file.txt
130        "};
131
132        let lines = input.lines().map(DiffLine::parse).collect::<Vec<_>>();
133
134        pretty_assertions::assert_eq!(
135            lines,
136            &[
137                DiffLine::Garbage,
138                DiffLine::Garbage,
139                DiffLine::OldPath {
140                    path: "file.txt".into()
141                },
142                DiffLine::NewPath {
143                    path: "file.txt".into()
144                },
145                DiffLine::HunkHeader(Some(HunkLocation {
146                    start_line_old: 0,
147                    count_old: 2,
148                    start_line_new: 0,
149                    count_new: 3
150                })),
151                DiffLine::Context("context"),
152                DiffLine::Deletion("deleted"),
153                DiffLine::Addition("inserted"),
154                DiffLine::Garbage,
155                DiffLine::Context(""),
156                DiffLine::OldPath {
157                    path: "b/file.txt".into()
158                },
159                DiffLine::NewPath {
160                    path: "a/file.txt".into()
161                },
162            ]
163        );
164    }
165
166    #[test]
167    fn file_header_extra_space() {
168        let options = ["--- file", "---   file", "---\tfile"];
169
170        for option in options {
171            pretty_assertions::assert_eq!(
172                DiffLine::parse(option),
173                DiffLine::OldPath {
174                    path: "file".into()
175                },
176                "{option}",
177            );
178        }
179    }
180
181    #[test]
182    fn hunk_header_extra_space() {
183        let options = [
184            "@@ -1,2 +1,3 @@",
185            "@@  -1,2  +1,3 @@",
186            "@@\t-1,2\t+1,3\t@@",
187            "@@ -1,2  +1,3 @@",
188            "@@ -1,2   +1,3 @@",
189            "@@ -1,2 +1,3   @@",
190            "@@ -1,2 +1,3 @@ garbage",
191        ];
192
193        for option in options {
194            pretty_assertions::assert_eq!(
195                DiffLine::parse(option),
196                DiffLine::HunkHeader(Some(HunkLocation {
197                    start_line_old: 0,
198                    count_old: 2,
199                    start_line_new: 0,
200                    count_new: 3
201                })),
202                "{option}",
203            );
204        }
205    }
206
207    #[test]
208    fn hunk_header_without_location() {
209        pretty_assertions::assert_eq!(DiffLine::parse("@@ ... @@"), DiffLine::HunkHeader(None));
210    }
211
212    #[test]
213    fn test_parse_path() {
214        assert_eq!(parse_header_path("a/", "foo.txt"), "foo.txt");
215        assert_eq!(
216            parse_header_path("a/", "foo/bar/baz.txt"),
217            "foo/bar/baz.txt"
218        );
219        assert_eq!(parse_header_path("a/", "a/foo.txt"), "foo.txt");
220        assert_eq!(
221            parse_header_path("a/", "a/foo/bar/baz.txt"),
222            "foo/bar/baz.txt"
223        );
224
225        // Extra
226        assert_eq!(
227            parse_header_path("a/", "a/foo/bar/baz.txt  2025"),
228            "foo/bar/baz.txt"
229        );
230        assert_eq!(
231            parse_header_path("a/", "a/foo/bar/baz.txt\t2025"),
232            "foo/bar/baz.txt"
233        );
234        assert_eq!(
235            parse_header_path("a/", "a/foo/bar/baz.txt \""),
236            "foo/bar/baz.txt"
237        );
238
239        // Quoted
240        assert_eq!(
241            parse_header_path("a/", "a/foo/bar/\"baz quox.txt\""),
242            "foo/bar/baz quox.txt"
243        );
244        assert_eq!(
245            parse_header_path("a/", "\"a/foo/bar/baz quox.txt\""),
246            "foo/bar/baz quox.txt"
247        );
248        assert_eq!(
249            parse_header_path("a/", "\"foo/bar/baz quox.txt\""),
250            "foo/bar/baz quox.txt"
251        );
252        assert_eq!(parse_header_path("a/", "\"whatever 🤷\""), "whatever 🤷");
253        assert_eq!(
254            parse_header_path("a/", "\"foo/bar/baz quox.txt\"  2025"),
255            "foo/bar/baz quox.txt"
256        );
257        // unescaped quotes are dropped
258        assert_eq!(parse_header_path("a/", "foo/\"bar\""), "foo/bar");
259
260        // Escaped
261        assert_eq!(
262            parse_header_path("a/", "\"foo/\\\"bar\\\"/baz.txt\""),
263            "foo/\"bar\"/baz.txt"
264        );
265        assert_eq!(
266            parse_header_path("a/", "\"C:\\\\Projects\\\\My App\\\\old file.txt\""),
267            "C:\\Projects\\My App\\old file.txt"
268        );
269    }
270}