udiff.rs

  1use std::{borrow::Cow, fmt::Display};
  2
  3#[derive(Debug, PartialEq)]
  4pub enum DiffLine<'a> {
  5    OldPath { path: Cow<'a, str> },
  6    NewPath { path: Cow<'a, str> },
  7    HunkHeader(Option<HunkLocation>),
  8    Context(&'a str),
  9    Deletion(&'a str),
 10    Addition(&'a str),
 11    Garbage(&'a str),
 12}
 13
 14#[derive(Debug, PartialEq)]
 15pub struct HunkLocation {
 16    start_line_old: u32,
 17    count_old: u32,
 18    start_line_new: u32,
 19    count_new: u32,
 20}
 21
 22impl<'a> DiffLine<'a> {
 23    pub fn parse(line: &'a str) -> Self {
 24        Self::try_parse(line).unwrap_or(Self::Garbage(line))
 25    }
 26
 27    fn try_parse(line: &'a str) -> Option<Self> {
 28        if let Some(header) = line.strip_prefix("---").and_then(eat_required_whitespace) {
 29            let path = parse_header_path("a/", header);
 30            Some(Self::OldPath { path })
 31        } else if let Some(header) = line.strip_prefix("+++").and_then(eat_required_whitespace) {
 32            Some(Self::NewPath {
 33                path: parse_header_path("b/", header),
 34            })
 35        } else if let Some(header) = line.strip_prefix("@@").and_then(eat_required_whitespace) {
 36            if header.starts_with("...") {
 37                return Some(Self::HunkHeader(None));
 38            }
 39
 40            let (start_line_old, header) = header.strip_prefix('-')?.split_once(',')?;
 41            let mut parts = header.split_ascii_whitespace();
 42            let count_old = parts.next()?;
 43            let (start_line_new, count_new) = parts.next()?.strip_prefix('+')?.split_once(',')?;
 44
 45            Some(Self::HunkHeader(Some(HunkLocation {
 46                start_line_old: start_line_old.parse::<u32>().ok()?.saturating_sub(1),
 47                count_old: count_old.parse().ok()?,
 48                start_line_new: start_line_new.parse::<u32>().ok()?.saturating_sub(1),
 49                count_new: count_new.parse().ok()?,
 50            })))
 51        } else if let Some(deleted_header) = line.strip_prefix("-") {
 52            Some(Self::Deletion(deleted_header))
 53        } else if line.is_empty() {
 54            Some(Self::Context(""))
 55        } else if let Some(context) = line.strip_prefix(" ") {
 56            Some(Self::Context(context))
 57        } else {
 58            Some(Self::Addition(line.strip_prefix("+")?))
 59        }
 60    }
 61}
 62
 63impl<'a> Display for DiffLine<'a> {
 64    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 65        match self {
 66            DiffLine::OldPath { path } => write!(f, "--- {path}"),
 67            DiffLine::NewPath { path } => write!(f, "+++ {path}"),
 68            DiffLine::HunkHeader(Some(hunk_location)) => {
 69                write!(
 70                    f,
 71                    "@@ -{},{} +{},{} @@",
 72                    hunk_location.start_line_old + 1,
 73                    hunk_location.count_old,
 74                    hunk_location.start_line_new + 1,
 75                    hunk_location.count_new
 76                )
 77            }
 78            DiffLine::HunkHeader(None) => write!(f, "@@ ... @@"),
 79            DiffLine::Context(content) => write!(f, " {content}"),
 80            DiffLine::Deletion(content) => write!(f, "-{content}"),
 81            DiffLine::Addition(content) => write!(f, "+{content}"),
 82            DiffLine::Garbage(line) => write!(f, "{line}"),
 83        }
 84    }
 85}
 86
 87fn parse_header_path<'a>(strip_prefix: &'static str, header: &'a str) -> Cow<'a, str> {
 88    if !header.contains(['"', '\\']) {
 89        let path = header.split_ascii_whitespace().next().unwrap_or(header);
 90        return Cow::Borrowed(path.strip_prefix(strip_prefix).unwrap_or(path));
 91    }
 92
 93    let mut path = String::with_capacity(header.len());
 94    let mut in_quote = false;
 95    let mut chars = header.chars().peekable();
 96    let mut strip_prefix = Some(strip_prefix);
 97
 98    while let Some(char) = chars.next() {
 99        if char == '"' {
100            in_quote = !in_quote;
101        } else if char == '\\' {
102            let Some(&next_char) = chars.peek() else {
103                break;
104            };
105            chars.next();
106            path.push(next_char);
107        } else if char.is_ascii_whitespace() && !in_quote {
108            break;
109        } else {
110            path.push(char);
111        }
112
113        if let Some(prefix) = strip_prefix
114            && path == prefix
115        {
116            strip_prefix.take();
117            path.clear();
118        }
119    }
120
121    Cow::Owned(path)
122}
123
124fn eat_required_whitespace(header: &str) -> Option<&str> {
125    let trimmed = header.trim_ascii_start();
126
127    if trimmed.len() == header.len() {
128        None
129    } else {
130        Some(trimmed)
131    }
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137    use indoc::indoc;
138
139    #[test]
140    fn parse_lines_simple() {
141        let input = indoc! {"
142            diff --git a/text.txt b/text.txt
143            index 86c770d..a1fd855 100644
144            --- a/file.txt
145            +++ b/file.txt
146            @@ -1,2 +1,3 @@
147             context
148            -deleted
149            +inserted
150            garbage
151
152            --- b/file.txt
153            +++ a/file.txt
154        "};
155
156        let lines = input.lines().map(DiffLine::parse).collect::<Vec<_>>();
157
158        pretty_assertions::assert_eq!(
159            lines,
160            &[
161                DiffLine::Garbage("diff --git a/text.txt b/text.txt"),
162                DiffLine::Garbage("index 86c770d..a1fd855 100644"),
163                DiffLine::OldPath {
164                    path: "file.txt".into()
165                },
166                DiffLine::NewPath {
167                    path: "file.txt".into()
168                },
169                DiffLine::HunkHeader(Some(HunkLocation {
170                    start_line_old: 0,
171                    count_old: 2,
172                    start_line_new: 0,
173                    count_new: 3
174                })),
175                DiffLine::Context("context"),
176                DiffLine::Deletion("deleted"),
177                DiffLine::Addition("inserted"),
178                DiffLine::Garbage("garbage"),
179                DiffLine::Context(""),
180                DiffLine::OldPath {
181                    path: "b/file.txt".into()
182                },
183                DiffLine::NewPath {
184                    path: "a/file.txt".into()
185                },
186            ]
187        );
188    }
189
190    #[test]
191    fn file_header_extra_space() {
192        let options = ["--- file", "---   file", "---\tfile"];
193
194        for option in options {
195            pretty_assertions::assert_eq!(
196                DiffLine::parse(option),
197                DiffLine::OldPath {
198                    path: "file".into()
199                },
200                "{option}",
201            );
202        }
203    }
204
205    #[test]
206    fn hunk_header_extra_space() {
207        let options = [
208            "@@ -1,2 +1,3 @@",
209            "@@  -1,2  +1,3 @@",
210            "@@\t-1,2\t+1,3\t@@",
211            "@@ -1,2  +1,3 @@",
212            "@@ -1,2   +1,3 @@",
213            "@@ -1,2 +1,3   @@",
214            "@@ -1,2 +1,3 @@ garbage",
215        ];
216
217        for option in options {
218            pretty_assertions::assert_eq!(
219                DiffLine::parse(option),
220                DiffLine::HunkHeader(Some(HunkLocation {
221                    start_line_old: 0,
222                    count_old: 2,
223                    start_line_new: 0,
224                    count_new: 3
225                })),
226                "{option}",
227            );
228        }
229    }
230
231    #[test]
232    fn hunk_header_without_location() {
233        pretty_assertions::assert_eq!(DiffLine::parse("@@ ... @@"), DiffLine::HunkHeader(None));
234    }
235
236    #[test]
237    fn test_parse_path() {
238        assert_eq!(parse_header_path("a/", "foo.txt"), "foo.txt");
239        assert_eq!(
240            parse_header_path("a/", "foo/bar/baz.txt"),
241            "foo/bar/baz.txt"
242        );
243        assert_eq!(parse_header_path("a/", "a/foo.txt"), "foo.txt");
244        assert_eq!(
245            parse_header_path("a/", "a/foo/bar/baz.txt"),
246            "foo/bar/baz.txt"
247        );
248
249        // Extra
250        assert_eq!(
251            parse_header_path("a/", "a/foo/bar/baz.txt  2025"),
252            "foo/bar/baz.txt"
253        );
254        assert_eq!(
255            parse_header_path("a/", "a/foo/bar/baz.txt\t2025"),
256            "foo/bar/baz.txt"
257        );
258        assert_eq!(
259            parse_header_path("a/", "a/foo/bar/baz.txt \""),
260            "foo/bar/baz.txt"
261        );
262
263        // Quoted
264        assert_eq!(
265            parse_header_path("a/", "a/foo/bar/\"baz quox.txt\""),
266            "foo/bar/baz quox.txt"
267        );
268        assert_eq!(
269            parse_header_path("a/", "\"a/foo/bar/baz quox.txt\""),
270            "foo/bar/baz quox.txt"
271        );
272        assert_eq!(
273            parse_header_path("a/", "\"foo/bar/baz quox.txt\""),
274            "foo/bar/baz quox.txt"
275        );
276        assert_eq!(parse_header_path("a/", "\"whatever 🤷\""), "whatever 🤷");
277        assert_eq!(
278            parse_header_path("a/", "\"foo/bar/baz quox.txt\"  2025"),
279            "foo/bar/baz quox.txt"
280        );
281        // unescaped quotes are dropped
282        assert_eq!(parse_header_path("a/", "foo/\"bar\""), "foo/bar");
283
284        // Escaped
285        assert_eq!(
286            parse_header_path("a/", "\"foo/\\\"bar\\\"/baz.txt\""),
287            "foo/\"bar\"/baz.txt"
288        );
289        assert_eq!(
290            parse_header_path("a/", "\"C:\\\\Projects\\\\My App\\\\old file.txt\""),
291            "C:\\Projects\\My App\\old file.txt"
292        );
293    }
294}