udiff.rs

   1use std::{
   2    borrow::Cow,
   3    fmt::{Display, Write},
   4    mem,
   5    ops::Range,
   6};
   7
   8use anyhow::{Context as _, Result, anyhow};
   9
  10pub fn strip_diff_path_prefix<'a>(diff: &'a str, prefix: &str) -> Cow<'a, str> {
  11    if prefix.is_empty() {
  12        return Cow::Borrowed(diff);
  13    }
  14
  15    let prefix_with_slash = format!("{}/", prefix);
  16    let mut needs_rewrite = false;
  17
  18    for line in diff.lines() {
  19        match DiffLine::parse(line) {
  20            DiffLine::OldPath { path } | DiffLine::NewPath { path } => {
  21                if path.starts_with(&prefix_with_slash) {
  22                    needs_rewrite = true;
  23                    break;
  24                }
  25            }
  26            _ => {}
  27        }
  28    }
  29
  30    if !needs_rewrite {
  31        return Cow::Borrowed(diff);
  32    }
  33
  34    let mut result = String::with_capacity(diff.len());
  35    for line in diff.lines() {
  36        match DiffLine::parse(line) {
  37            DiffLine::OldPath { path } => {
  38                let stripped = path
  39                    .strip_prefix(&prefix_with_slash)
  40                    .unwrap_or(path.as_ref());
  41                result.push_str(&format!("--- a/{}\n", stripped));
  42            }
  43            DiffLine::NewPath { path } => {
  44                let stripped = path
  45                    .strip_prefix(&prefix_with_slash)
  46                    .unwrap_or(path.as_ref());
  47                result.push_str(&format!("+++ b/{}\n", stripped));
  48            }
  49            _ => {
  50                result.push_str(line);
  51                result.push('\n');
  52            }
  53        }
  54    }
  55
  56    Cow::Owned(result)
  57}
  58
  59/// Strip unnecessary git metadata lines from a diff, keeping only the lines
  60/// needed for patch application: path headers (--- and +++), hunk headers (@@),
  61/// and content lines (+, -, space).
  62pub fn strip_diff_metadata(diff: &str) -> String {
  63    let mut result = String::new();
  64
  65    for line in diff.lines() {
  66        let dominated = DiffLine::parse(line);
  67        match dominated {
  68            // Keep path headers, hunk headers, and content lines
  69            DiffLine::OldPath { .. }
  70            | DiffLine::NewPath { .. }
  71            | DiffLine::HunkHeader(_)
  72            | DiffLine::Context(_)
  73            | DiffLine::Deletion(_)
  74            | DiffLine::Addition(_)
  75            | DiffLine::NoNewlineAtEOF => {
  76                result.push_str(line);
  77                result.push('\n');
  78            }
  79            // Skip garbage lines (diff --git, index, etc.)
  80            DiffLine::Garbage(_) => {}
  81        }
  82    }
  83
  84    result
  85}
  86
  87/// Marker used to encode cursor position in patch comment lines.
  88pub const CURSOR_POSITION_MARKER: &str = "[CURSOR_POSITION]";
  89
  90/// Extract cursor offset from a patch and return `(clean_patch, cursor_offset)`.
  91///
  92/// Cursor position is encoded as a comment line (starting with `#`) containing
  93/// `[CURSOR_POSITION]`. A `^` in the line indicates the cursor column; a `<`
  94/// indicates column 0. The offset is computed relative to addition (`+`) and
  95/// context (` `) lines accumulated so far in the hunk, which represent the
  96/// cursor position within the new text contributed by the hunk.
  97pub fn extract_cursor_from_patch(patch: &str) -> (String, Option<usize>) {
  98    let mut clean_patch = String::new();
  99    let mut cursor_offset: Option<usize> = None;
 100    let mut line_start_offset = 0usize;
 101    let mut prev_line_start_offset = 0usize;
 102
 103    for line in patch.lines() {
 104        let diff_line = DiffLine::parse(line);
 105
 106        match &diff_line {
 107            DiffLine::Garbage(content)
 108                if content.starts_with('#') && content.contains(CURSOR_POSITION_MARKER) =>
 109            {
 110                let caret_column = if let Some(caret_pos) = content.find('^') {
 111                    caret_pos
 112                } else if content.find('<').is_some() {
 113                    0
 114                } else {
 115                    continue;
 116                };
 117                let cursor_column = caret_column.saturating_sub('#'.len_utf8());
 118                cursor_offset = Some(prev_line_start_offset + cursor_column);
 119            }
 120            _ => {
 121                if !clean_patch.is_empty() {
 122                    clean_patch.push('\n');
 123                }
 124                clean_patch.push_str(line);
 125
 126                match diff_line {
 127                    DiffLine::Addition(content) | DiffLine::Context(content) => {
 128                        prev_line_start_offset = line_start_offset;
 129                        line_start_offset += content.len() + 1;
 130                    }
 131                    _ => {}
 132                }
 133            }
 134        }
 135    }
 136
 137    if patch.ends_with('\n') && !clean_patch.is_empty() {
 138        clean_patch.push('\n');
 139    }
 140
 141    (clean_patch, cursor_offset)
 142}
 143
 144/// Find all byte offsets where `hunk.context` occurs as a substring of `text`.
 145///
 146/// If no exact matches are found and the context ends with `'\n'` but `text`
 147/// does not, retries without the trailing newline, accepting only a match at
 148/// the very end of `text`. When this fallback fires, the hunk's context is
 149/// trimmed and its edit ranges are clamped so that downstream code doesn't
 150/// index past the end of the matched region. This handles diffs that are
 151/// missing a `\ No newline at end of file` marker: the parser always appends
 152/// `'\n'` via `writeln!`, so the context can have a trailing newline that
 153/// doesn't exist in the source text.
 154pub fn find_context_candidates(text: &str, hunk: &mut Hunk) -> Vec<usize> {
 155    let candidates: Vec<usize> = text
 156        .match_indices(&hunk.context)
 157        .map(|(offset, _)| offset)
 158        .collect();
 159
 160    if !candidates.is_empty() {
 161        return candidates;
 162    }
 163
 164    if hunk.context.ends_with('\n') && !hunk.context.is_empty() {
 165        let old_len = hunk.context.len();
 166        hunk.context.pop();
 167        let new_len = hunk.context.len();
 168
 169        if !hunk.context.is_empty() {
 170            let candidates: Vec<usize> = text
 171                .match_indices(&hunk.context)
 172                .filter(|(offset, _)| offset + new_len == text.len())
 173                .map(|(offset, _)| offset)
 174                .collect();
 175
 176            if !candidates.is_empty() {
 177                for edit in &mut hunk.edits {
 178                    let touched_phantom = edit.range.end > new_len;
 179                    edit.range.start = edit.range.start.min(new_len);
 180                    edit.range.end = edit.range.end.min(new_len);
 181                    if touched_phantom {
 182                        // The replacement text was also written with a
 183                        // trailing '\n' that corresponds to the phantom
 184                        // newline we just removed from the context.
 185                        if edit.text.ends_with('\n') {
 186                            edit.text.pop();
 187                        }
 188                    }
 189                }
 190                return candidates;
 191            }
 192
 193            // Restore if fallback didn't help either.
 194            hunk.context.push('\n');
 195            debug_assert_eq!(hunk.context.len(), old_len);
 196        } else {
 197            hunk.context.push('\n');
 198        }
 199    }
 200
 201    Vec::new()
 202}
 203
 204/// Given multiple candidate offsets where context matches, use line numbers to disambiguate.
 205/// Returns the offset that matches the expected line, or None if no match or no line number available.
 206pub fn disambiguate_by_line_number(
 207    candidates: &[usize],
 208    expected_line: Option<u32>,
 209    offset_to_line: &dyn Fn(usize) -> u32,
 210) -> Option<usize> {
 211    match candidates.len() {
 212        0 => None,
 213        1 => Some(candidates[0]),
 214        _ => {
 215            let expected = expected_line?;
 216            candidates
 217                .iter()
 218                .copied()
 219                .find(|&offset| offset_to_line(offset) == expected)
 220        }
 221    }
 222}
 223
 224pub fn apply_diff_to_string(diff_str: &str, text: &str) -> Result<String> {
 225    apply_diff_to_string_with_hunk_offset(diff_str, text).map(|(text, _)| text)
 226}
 227
 228/// Applies a diff to a string and returns the result along with the offset where
 229/// the first hunk's context matched in the original text. This offset can be used
 230/// to adjust cursor positions that are relative to the hunk's content.
 231pub fn apply_diff_to_string_with_hunk_offset(
 232    diff_str: &str,
 233    text: &str,
 234) -> Result<(String, Option<usize>)> {
 235    let mut diff = DiffParser::new(diff_str);
 236
 237    let mut text = text.to_string();
 238    let mut first_hunk_offset = None;
 239
 240    while let Some(event) = diff.next().context("Failed to parse diff")? {
 241        match event {
 242            DiffEvent::Hunk {
 243                mut hunk,
 244                path: _,
 245                status: _,
 246            } => {
 247                let candidates = find_context_candidates(&text, &mut hunk);
 248
 249                let hunk_offset =
 250                    disambiguate_by_line_number(&candidates, hunk.start_line, &|offset| {
 251                        text[..offset].matches('\n').count() as u32
 252                    })
 253                    .ok_or_else(|| anyhow!("couldn't resolve hunk"))?;
 254
 255                if first_hunk_offset.is_none() {
 256                    first_hunk_offset = Some(hunk_offset);
 257                }
 258
 259                for edit in hunk.edits.iter().rev() {
 260                    let range = (hunk_offset + edit.range.start)..(hunk_offset + edit.range.end);
 261                    text.replace_range(range, &edit.text);
 262                }
 263            }
 264            DiffEvent::FileEnd { .. } => {}
 265        }
 266    }
 267
 268    Ok((text, first_hunk_offset))
 269}
 270
 271struct PatchFile<'a> {
 272    old_path: Cow<'a, str>,
 273    new_path: Cow<'a, str>,
 274}
 275
 276pub struct DiffParser<'a> {
 277    current_file: Option<PatchFile<'a>>,
 278    current_line: Option<(&'a str, DiffLine<'a>)>,
 279    hunk: Hunk,
 280    diff: std::str::Lines<'a>,
 281    pending_start_line: Option<u32>,
 282    processed_no_newline: bool,
 283    last_diff_op: LastDiffOp,
 284}
 285
 286#[derive(Clone, Copy, Default)]
 287enum LastDiffOp {
 288    #[default]
 289    None,
 290    Context,
 291    Deletion,
 292    Addition,
 293}
 294
 295#[derive(Debug, PartialEq)]
 296pub enum DiffEvent<'a> {
 297    Hunk {
 298        path: Cow<'a, str>,
 299        hunk: Hunk,
 300        status: FileStatus,
 301    },
 302    FileEnd {
 303        renamed_to: Option<Cow<'a, str>>,
 304    },
 305}
 306
 307#[derive(Debug, Clone, Copy, PartialEq)]
 308pub enum FileStatus {
 309    Created,
 310    Modified,
 311    Deleted,
 312}
 313
 314#[derive(Debug, Default, PartialEq)]
 315pub struct Hunk {
 316    pub context: String,
 317    pub edits: Vec<Edit>,
 318    pub start_line: Option<u32>,
 319}
 320
 321impl Hunk {
 322    pub fn is_empty(&self) -> bool {
 323        self.context.is_empty() && self.edits.is_empty()
 324    }
 325}
 326
 327#[derive(Debug, PartialEq)]
 328pub struct Edit {
 329    pub range: Range<usize>,
 330    pub text: String,
 331}
 332
 333impl<'a> DiffParser<'a> {
 334    pub fn new(diff: &'a str) -> Self {
 335        let mut diff = diff.lines();
 336        let current_line = diff.next().map(|line| (line, DiffLine::parse(line)));
 337        DiffParser {
 338            current_file: None,
 339            hunk: Hunk::default(),
 340            current_line,
 341            diff,
 342            pending_start_line: None,
 343            processed_no_newline: false,
 344            last_diff_op: LastDiffOp::None,
 345        }
 346    }
 347
 348    pub fn next(&mut self) -> Result<Option<DiffEvent<'a>>> {
 349        loop {
 350            let (hunk_done, file_done) = match self.current_line.as_ref().map(|e| &e.1) {
 351                Some(DiffLine::OldPath { .. }) | Some(DiffLine::Garbage(_)) | None => (true, true),
 352                Some(DiffLine::HunkHeader(_)) => (true, false),
 353                _ => (false, false),
 354            };
 355
 356            if hunk_done {
 357                if let Some(file) = &self.current_file
 358                    && !self.hunk.is_empty()
 359                {
 360                    let status = if file.old_path == "/dev/null" {
 361                        FileStatus::Created
 362                    } else if file.new_path == "/dev/null" {
 363                        FileStatus::Deleted
 364                    } else {
 365                        FileStatus::Modified
 366                    };
 367                    let path = if status == FileStatus::Created {
 368                        file.new_path.clone()
 369                    } else {
 370                        file.old_path.clone()
 371                    };
 372                    let mut hunk = mem::take(&mut self.hunk);
 373                    hunk.start_line = self.pending_start_line.take();
 374                    self.processed_no_newline = false;
 375                    self.last_diff_op = LastDiffOp::None;
 376                    return Ok(Some(DiffEvent::Hunk { path, hunk, status }));
 377                }
 378            }
 379
 380            if file_done {
 381                if let Some(PatchFile { old_path, new_path }) = self.current_file.take() {
 382                    return Ok(Some(DiffEvent::FileEnd {
 383                        renamed_to: if old_path != new_path && old_path != "/dev/null" {
 384                            Some(new_path)
 385                        } else {
 386                            None
 387                        },
 388                    }));
 389                }
 390            }
 391
 392            let Some((line, parsed_line)) = self.current_line.take() else {
 393                break;
 394            };
 395
 396            (|| {
 397                match parsed_line {
 398                    DiffLine::OldPath { path } => {
 399                        self.current_file = Some(PatchFile {
 400                            old_path: path,
 401                            new_path: "".into(),
 402                        });
 403                    }
 404                    DiffLine::NewPath { path } => {
 405                        if let Some(current_file) = &mut self.current_file {
 406                            current_file.new_path = path
 407                        }
 408                    }
 409                    DiffLine::HunkHeader(location) => {
 410                        if let Some(loc) = location {
 411                            self.pending_start_line = Some(loc.start_line_old);
 412                        }
 413                    }
 414                    DiffLine::Context(ctx) => {
 415                        if self.current_file.is_some() {
 416                            writeln!(&mut self.hunk.context, "{ctx}")?;
 417                            self.last_diff_op = LastDiffOp::Context;
 418                        }
 419                    }
 420                    DiffLine::Deletion(del) => {
 421                        if self.current_file.is_some() {
 422                            let range = self.hunk.context.len()
 423                                ..self.hunk.context.len() + del.len() + '\n'.len_utf8();
 424                            if let Some(last_edit) = self.hunk.edits.last_mut()
 425                                && last_edit.range.end == range.start
 426                            {
 427                                last_edit.range.end = range.end;
 428                            } else {
 429                                self.hunk.edits.push(Edit {
 430                                    range,
 431                                    text: String::new(),
 432                                });
 433                            }
 434                            writeln!(&mut self.hunk.context, "{del}")?;
 435                            self.last_diff_op = LastDiffOp::Deletion;
 436                        }
 437                    }
 438                    DiffLine::Addition(add) => {
 439                        if self.current_file.is_some() {
 440                            let range = self.hunk.context.len()..self.hunk.context.len();
 441                            if let Some(last_edit) = self.hunk.edits.last_mut()
 442                                && last_edit.range.end == range.start
 443                            {
 444                                writeln!(&mut last_edit.text, "{add}").unwrap();
 445                            } else {
 446                                self.hunk.edits.push(Edit {
 447                                    range,
 448                                    text: format!("{add}\n"),
 449                                });
 450                            }
 451                            self.last_diff_op = LastDiffOp::Addition;
 452                        }
 453                    }
 454                    DiffLine::NoNewlineAtEOF => {
 455                        if !self.processed_no_newline {
 456                            self.processed_no_newline = true;
 457                            match self.last_diff_op {
 458                                LastDiffOp::Addition => {
 459                                    // Remove trailing newline from the last addition
 460                                    if let Some(last_edit) = self.hunk.edits.last_mut() {
 461                                        last_edit.text.pop();
 462                                    }
 463                                }
 464                                LastDiffOp::Deletion => {
 465                                    // Remove trailing newline from context (which includes the deletion)
 466                                    self.hunk.context.pop();
 467                                    if let Some(last_edit) = self.hunk.edits.last_mut() {
 468                                        last_edit.range.end -= 1;
 469                                    }
 470                                }
 471                                LastDiffOp::Context | LastDiffOp::None => {
 472                                    // Remove trailing newline from context
 473                                    self.hunk.context.pop();
 474                                }
 475                            }
 476                        }
 477                    }
 478                    DiffLine::Garbage(_) => {}
 479                }
 480
 481                anyhow::Ok(())
 482            })()
 483            .with_context(|| format!("on line:\n\n```\n{}```", line))?;
 484
 485            self.current_line = self.diff.next().map(|line| (line, DiffLine::parse(line)));
 486        }
 487
 488        anyhow::Ok(None)
 489    }
 490}
 491
 492#[derive(Debug, PartialEq)]
 493pub enum DiffLine<'a> {
 494    OldPath { path: Cow<'a, str> },
 495    NewPath { path: Cow<'a, str> },
 496    HunkHeader(Option<HunkLocation>),
 497    Context(&'a str),
 498    Deletion(&'a str),
 499    Addition(&'a str),
 500    NoNewlineAtEOF,
 501    Garbage(&'a str),
 502}
 503
 504#[derive(Debug, PartialEq)]
 505pub struct HunkLocation {
 506    pub start_line_old: u32,
 507    pub count_old: u32,
 508    pub start_line_new: u32,
 509    pub count_new: u32,
 510}
 511
 512impl<'a> DiffLine<'a> {
 513    pub fn parse(line: &'a str) -> Self {
 514        Self::try_parse(line).unwrap_or(Self::Garbage(line))
 515    }
 516
 517    fn try_parse(line: &'a str) -> Option<Self> {
 518        if line.starts_with("\\ No newline") {
 519            return Some(Self::NoNewlineAtEOF);
 520        }
 521        if let Some(header) = line.strip_prefix("---").and_then(eat_required_whitespace) {
 522            let path = parse_header_path("a/", header);
 523            Some(Self::OldPath { path })
 524        } else if let Some(header) = line.strip_prefix("+++").and_then(eat_required_whitespace) {
 525            Some(Self::NewPath {
 526                path: parse_header_path("b/", header),
 527            })
 528        } else if let Some(header) = line.strip_prefix("@@").and_then(eat_required_whitespace) {
 529            if header.starts_with("...") {
 530                return Some(Self::HunkHeader(None));
 531            }
 532
 533            let mut tokens = header.split_whitespace();
 534            let old_range = tokens.next()?.strip_prefix('-')?;
 535            let new_range = tokens.next()?.strip_prefix('+')?;
 536
 537            let (start_line_old, count_old) = old_range.split_once(',').unwrap_or((old_range, "1"));
 538            let (start_line_new, count_new) = new_range.split_once(',').unwrap_or((new_range, "1"));
 539
 540            Some(Self::HunkHeader(Some(HunkLocation {
 541                start_line_old: start_line_old.parse::<u32>().ok()?.saturating_sub(1),
 542                count_old: count_old.parse().ok()?,
 543                start_line_new: start_line_new.parse::<u32>().ok()?.saturating_sub(1),
 544                count_new: count_new.parse().ok()?,
 545            })))
 546        } else if let Some(deleted_header) = line.strip_prefix("-") {
 547            Some(Self::Deletion(deleted_header))
 548        } else if line.is_empty() {
 549            Some(Self::Context(""))
 550        } else if let Some(context) = line.strip_prefix(" ") {
 551            Some(Self::Context(context))
 552        } else {
 553            Some(Self::Addition(line.strip_prefix("+")?))
 554        }
 555    }
 556}
 557
 558impl<'a> Display for DiffLine<'a> {
 559    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 560        match self {
 561            DiffLine::OldPath { path } => write!(f, "--- {path}"),
 562            DiffLine::NewPath { path } => write!(f, "+++ {path}"),
 563            DiffLine::HunkHeader(Some(hunk_location)) => {
 564                write!(
 565                    f,
 566                    "@@ -{},{} +{},{} @@",
 567                    hunk_location.start_line_old + 1,
 568                    hunk_location.count_old,
 569                    hunk_location.start_line_new + 1,
 570                    hunk_location.count_new
 571                )
 572            }
 573            DiffLine::HunkHeader(None) => write!(f, "@@ ... @@"),
 574            DiffLine::Context(content) => write!(f, " {content}"),
 575            DiffLine::Deletion(content) => write!(f, "-{content}"),
 576            DiffLine::Addition(content) => write!(f, "+{content}"),
 577            DiffLine::NoNewlineAtEOF => write!(f, "\\ No newline at end of file"),
 578            DiffLine::Garbage(line) => write!(f, "{line}"),
 579        }
 580    }
 581}
 582
 583fn parse_header_path<'a>(strip_prefix: &'static str, header: &'a str) -> Cow<'a, str> {
 584    if !header.contains(['"', '\\']) {
 585        let path = header.split_ascii_whitespace().next().unwrap_or(header);
 586        return Cow::Borrowed(path.strip_prefix(strip_prefix).unwrap_or(path));
 587    }
 588
 589    let mut path = String::with_capacity(header.len());
 590    let mut in_quote = false;
 591    let mut chars = header.chars().peekable();
 592    let mut strip_prefix = Some(strip_prefix);
 593
 594    while let Some(char) = chars.next() {
 595        if char == '"' {
 596            in_quote = !in_quote;
 597        } else if char == '\\' {
 598            let Some(&next_char) = chars.peek() else {
 599                break;
 600            };
 601            chars.next();
 602            path.push(next_char);
 603        } else if char.is_ascii_whitespace() && !in_quote {
 604            break;
 605        } else {
 606            path.push(char);
 607        }
 608
 609        if let Some(prefix) = strip_prefix
 610            && path == prefix
 611        {
 612            strip_prefix.take();
 613            path.clear();
 614        }
 615    }
 616
 617    Cow::Owned(path)
 618}
 619
 620fn eat_required_whitespace(header: &str) -> Option<&str> {
 621    let trimmed = header.trim_ascii_start();
 622
 623    if trimmed.len() == header.len() {
 624        None
 625    } else {
 626        Some(trimmed)
 627    }
 628}
 629
 630#[cfg(test)]
 631mod tests {
 632    use super::*;
 633    use indoc::indoc;
 634
 635    #[test]
 636    fn parse_lines_simple() {
 637        let input = indoc! {"
 638            diff --git a/text.txt b/text.txt
 639            index 86c770d..a1fd855 100644
 640            --- a/file.txt
 641            +++ b/file.txt
 642            @@ -1,2 +1,3 @@
 643             context
 644            -deleted
 645            +inserted
 646            garbage
 647
 648            --- b/file.txt
 649            +++ a/file.txt
 650        "};
 651
 652        let lines = input.lines().map(DiffLine::parse).collect::<Vec<_>>();
 653
 654        assert_eq!(
 655            lines,
 656            &[
 657                DiffLine::Garbage("diff --git a/text.txt b/text.txt"),
 658                DiffLine::Garbage("index 86c770d..a1fd855 100644"),
 659                DiffLine::OldPath {
 660                    path: "file.txt".into()
 661                },
 662                DiffLine::NewPath {
 663                    path: "file.txt".into()
 664                },
 665                DiffLine::HunkHeader(Some(HunkLocation {
 666                    start_line_old: 0,
 667                    count_old: 2,
 668                    start_line_new: 0,
 669                    count_new: 3
 670                })),
 671                DiffLine::Context("context"),
 672                DiffLine::Deletion("deleted"),
 673                DiffLine::Addition("inserted"),
 674                DiffLine::Garbage("garbage"),
 675                DiffLine::Context(""),
 676                DiffLine::OldPath {
 677                    path: "b/file.txt".into()
 678                },
 679                DiffLine::NewPath {
 680                    path: "a/file.txt".into()
 681                },
 682            ]
 683        );
 684    }
 685
 686    #[test]
 687    fn file_header_extra_space() {
 688        let options = ["--- file", "---   file", "---\tfile"];
 689
 690        for option in options {
 691            assert_eq!(
 692                DiffLine::parse(option),
 693                DiffLine::OldPath {
 694                    path: "file".into()
 695                },
 696                "{option}",
 697            );
 698        }
 699    }
 700
 701    #[test]
 702    fn hunk_header_extra_space() {
 703        let options = [
 704            "@@ -1,2 +1,3 @@",
 705            "@@  -1,2  +1,3 @@",
 706            "@@\t-1,2\t+1,3\t@@",
 707            "@@ -1,2  +1,3 @@",
 708            "@@ -1,2   +1,3 @@",
 709            "@@ -1,2 +1,3   @@",
 710            "@@ -1,2 +1,3 @@ garbage",
 711        ];
 712
 713        for option in options {
 714            assert_eq!(
 715                DiffLine::parse(option),
 716                DiffLine::HunkHeader(Some(HunkLocation {
 717                    start_line_old: 0,
 718                    count_old: 2,
 719                    start_line_new: 0,
 720                    count_new: 3
 721                })),
 722                "{option}",
 723            );
 724        }
 725    }
 726
 727    #[test]
 728    fn hunk_header_without_location() {
 729        assert_eq!(DiffLine::parse("@@ ... @@"), DiffLine::HunkHeader(None));
 730    }
 731
 732    #[test]
 733    fn test_parse_path() {
 734        assert_eq!(parse_header_path("a/", "foo.txt"), "foo.txt");
 735        assert_eq!(
 736            parse_header_path("a/", "foo/bar/baz.txt"),
 737            "foo/bar/baz.txt"
 738        );
 739        assert_eq!(parse_header_path("a/", "a/foo.txt"), "foo.txt");
 740        assert_eq!(
 741            parse_header_path("a/", "a/foo/bar/baz.txt"),
 742            "foo/bar/baz.txt"
 743        );
 744
 745        // Extra
 746        assert_eq!(
 747            parse_header_path("a/", "a/foo/bar/baz.txt  2025"),
 748            "foo/bar/baz.txt"
 749        );
 750        assert_eq!(
 751            parse_header_path("a/", "a/foo/bar/baz.txt\t2025"),
 752            "foo/bar/baz.txt"
 753        );
 754        assert_eq!(
 755            parse_header_path("a/", "a/foo/bar/baz.txt \""),
 756            "foo/bar/baz.txt"
 757        );
 758
 759        // Quoted
 760        assert_eq!(
 761            parse_header_path("a/", "a/foo/bar/\"baz quox.txt\""),
 762            "foo/bar/baz quox.txt"
 763        );
 764        assert_eq!(
 765            parse_header_path("a/", "\"a/foo/bar/baz quox.txt\""),
 766            "foo/bar/baz quox.txt"
 767        );
 768        assert_eq!(
 769            parse_header_path("a/", "\"foo/bar/baz quox.txt\""),
 770            "foo/bar/baz quox.txt"
 771        );
 772        assert_eq!(parse_header_path("a/", "\"whatever 🤷\""), "whatever 🤷");
 773        assert_eq!(
 774            parse_header_path("a/", "\"foo/bar/baz quox.txt\"  2025"),
 775            "foo/bar/baz quox.txt"
 776        );
 777        // unescaped quotes are dropped
 778        assert_eq!(parse_header_path("a/", "foo/\"bar\""), "foo/bar");
 779
 780        // Escaped
 781        assert_eq!(
 782            parse_header_path("a/", "\"foo/\\\"bar\\\"/baz.txt\""),
 783            "foo/\"bar\"/baz.txt"
 784        );
 785        assert_eq!(
 786            parse_header_path("a/", "\"C:\\\\Projects\\\\My App\\\\old file.txt\""),
 787            "C:\\Projects\\My App\\old file.txt"
 788        );
 789    }
 790
 791    #[test]
 792    fn test_parse_diff_with_leading_and_trailing_garbage() {
 793        let diff = indoc! {"
 794            I need to make some changes.
 795
 796            I'll change the following things:
 797            - one
 798              - two
 799            - three
 800
 801            ```
 802            --- a/file.txt
 803            +++ b/file.txt
 804             one
 805            +AND
 806             two
 807            ```
 808
 809            Summary of what I did:
 810            - one
 811              - two
 812            - three
 813
 814            That's about it.
 815        "};
 816
 817        let mut events = Vec::new();
 818        let mut parser = DiffParser::new(diff);
 819        while let Some(event) = parser.next().unwrap() {
 820            events.push(event);
 821        }
 822
 823        assert_eq!(
 824            events,
 825            &[
 826                DiffEvent::Hunk {
 827                    path: "file.txt".into(),
 828                    hunk: Hunk {
 829                        context: "one\ntwo\n".into(),
 830                        edits: vec![Edit {
 831                            range: 4..4,
 832                            text: "AND\n".into()
 833                        }],
 834                        start_line: None,
 835                    },
 836                    status: FileStatus::Modified,
 837                },
 838                DiffEvent::FileEnd { renamed_to: None }
 839            ],
 840        )
 841    }
 842
 843    #[test]
 844    fn test_no_newline_at_eof() {
 845        let diff = indoc! {"
 846            --- a/file.py
 847            +++ b/file.py
 848            @@ -55,7 +55,3 @@ class CustomDataset(Dataset):
 849                         torch.set_rng_state(state)
 850                         mask = self.transform(mask)
 851
 852            -        if self.mode == 'Training':
 853            -            return (img, mask, name)
 854            -        else:
 855            -            return (img, mask, name)
 856            \\ No newline at end of file
 857        "};
 858
 859        let mut events = Vec::new();
 860        let mut parser = DiffParser::new(diff);
 861        while let Some(event) = parser.next().unwrap() {
 862            events.push(event);
 863        }
 864
 865        assert_eq!(
 866            events,
 867            &[
 868                DiffEvent::Hunk {
 869                    path: "file.py".into(),
 870                    hunk: Hunk {
 871                        context: concat!(
 872                            "            torch.set_rng_state(state)\n",
 873                            "            mask = self.transform(mask)\n",
 874                            "\n",
 875                            "        if self.mode == 'Training':\n",
 876                            "            return (img, mask, name)\n",
 877                            "        else:\n",
 878                            "            return (img, mask, name)",
 879                        )
 880                        .into(),
 881                        edits: vec![Edit {
 882                            range: 80..203,
 883                            text: "".into()
 884                        }],
 885                        start_line: Some(54), // @@ -55,7 -> line 54 (0-indexed)
 886                    },
 887                    status: FileStatus::Modified,
 888                },
 889                DiffEvent::FileEnd { renamed_to: None }
 890            ],
 891        );
 892    }
 893
 894    #[test]
 895    fn test_no_newline_at_eof_addition() {
 896        let diff = indoc! {"
 897            --- a/file.txt
 898            +++ b/file.txt
 899            @@ -1,2 +1,3 @@
 900             context
 901            -deleted
 902            +added line
 903            \\ No newline at end of file
 904        "};
 905
 906        let mut events = Vec::new();
 907        let mut parser = DiffParser::new(diff);
 908        while let Some(event) = parser.next().unwrap() {
 909            events.push(event);
 910        }
 911
 912        assert_eq!(
 913            events,
 914            &[
 915                DiffEvent::Hunk {
 916                    path: "file.txt".into(),
 917                    hunk: Hunk {
 918                        context: "context\ndeleted\n".into(),
 919                        edits: vec![Edit {
 920                            range: 8..16,
 921                            text: "added line".into()
 922                        }],
 923                        start_line: Some(0), // @@ -1,2 -> line 0 (0-indexed)
 924                    },
 925                    status: FileStatus::Modified,
 926                },
 927                DiffEvent::FileEnd { renamed_to: None }
 928            ],
 929        );
 930    }
 931
 932    #[test]
 933    fn test_double_no_newline_at_eof() {
 934        // Two consecutive "no newline" markers - the second should be ignored
 935        let diff = indoc! {"
 936            --- a/file.txt
 937            +++ b/file.txt
 938            @@ -1,3 +1,3 @@
 939             line1
 940            -old
 941            +new
 942             line3
 943            \\ No newline at end of file
 944            \\ No newline at end of file
 945        "};
 946
 947        let mut events = Vec::new();
 948        let mut parser = DiffParser::new(diff);
 949        while let Some(event) = parser.next().unwrap() {
 950            events.push(event);
 951        }
 952
 953        assert_eq!(
 954            events,
 955            &[
 956                DiffEvent::Hunk {
 957                    path: "file.txt".into(),
 958                    hunk: Hunk {
 959                        context: "line1\nold\nline3".into(), // Only one newline removed
 960                        edits: vec![Edit {
 961                            range: 6..10, // "old\n" is 4 bytes
 962                            text: "new\n".into()
 963                        }],
 964                        start_line: Some(0),
 965                    },
 966                    status: FileStatus::Modified,
 967                },
 968                DiffEvent::FileEnd { renamed_to: None }
 969            ],
 970        );
 971    }
 972
 973    #[test]
 974    fn test_no_newline_after_context_not_addition() {
 975        // "No newline" after context lines should remove newline from context,
 976        // not from an earlier addition
 977        let diff = indoc! {"
 978            --- a/file.txt
 979            +++ b/file.txt
 980            @@ -1,4 +1,4 @@
 981             line1
 982            -old
 983            +new
 984             line3
 985             line4
 986            \\ No newline at end of file
 987        "};
 988
 989        let mut events = Vec::new();
 990        let mut parser = DiffParser::new(diff);
 991        while let Some(event) = parser.next().unwrap() {
 992            events.push(event);
 993        }
 994
 995        assert_eq!(
 996            events,
 997            &[
 998                DiffEvent::Hunk {
 999                    path: "file.txt".into(),
1000                    hunk: Hunk {
1001                        // newline removed from line4 (context), not from "new" (addition)
1002                        context: "line1\nold\nline3\nline4".into(),
1003                        edits: vec![Edit {
1004                            range: 6..10,         // "old\n" is 4 bytes
1005                            text: "new\n".into()  // Still has newline
1006                        }],
1007                        start_line: Some(0),
1008                    },
1009                    status: FileStatus::Modified,
1010                },
1011                DiffEvent::FileEnd { renamed_to: None }
1012            ],
1013        );
1014    }
1015
1016    #[test]
1017    fn test_strip_diff_metadata() {
1018        let diff_with_metadata = indoc! {r#"
1019            diff --git a/file.txt b/file.txt
1020            index 1234567..abcdefg 100644
1021            --- a/file.txt
1022            +++ b/file.txt
1023            @@ -1,3 +1,4 @@
1024             context line
1025            -removed line
1026            +added line
1027             more context
1028        "#};
1029
1030        let stripped = strip_diff_metadata(diff_with_metadata);
1031
1032        assert_eq!(
1033            stripped,
1034            indoc! {r#"
1035                --- a/file.txt
1036                +++ b/file.txt
1037                @@ -1,3 +1,4 @@
1038                 context line
1039                -removed line
1040                +added line
1041                 more context
1042            "#}
1043        );
1044    }
1045
1046    #[test]
1047    fn test_apply_diff_to_string_no_trailing_newline() {
1048        // Text without trailing newline; diff generated without
1049        // `\ No newline at end of file` marker.
1050        let text = "line1\nline2\nline3";
1051        let diff = indoc! {"
1052            --- a/file.txt
1053            +++ b/file.txt
1054            @@ -1,3 +1,3 @@
1055             line1
1056            -line2
1057            +replaced
1058             line3
1059        "};
1060
1061        let result = apply_diff_to_string(diff, text).unwrap();
1062        assert_eq!(result, "line1\nreplaced\nline3");
1063    }
1064
1065    #[test]
1066    fn test_apply_diff_to_string_trailing_newline_present() {
1067        // When text has a trailing newline, exact matching still works and
1068        // the fallback is never needed.
1069        let text = "line1\nline2\nline3\n";
1070        let diff = indoc! {"
1071            --- a/file.txt
1072            +++ b/file.txt
1073            @@ -1,3 +1,3 @@
1074             line1
1075            -line2
1076            +replaced
1077             line3
1078        "};
1079
1080        let result = apply_diff_to_string(diff, text).unwrap();
1081        assert_eq!(result, "line1\nreplaced\nline3\n");
1082    }
1083
1084    #[test]
1085    fn test_apply_diff_to_string_deletion_at_end_no_trailing_newline() {
1086        // Deletion of the last line when text has no trailing newline.
1087        // The edit range must be clamped so it doesn't index past the
1088        // end of the text.
1089        let text = "line1\nline2\nline3";
1090        let diff = indoc! {"
1091            --- a/file.txt
1092            +++ b/file.txt
1093            @@ -1,3 +1,2 @@
1094             line1
1095             line2
1096            -line3
1097        "};
1098
1099        let result = apply_diff_to_string(diff, text).unwrap();
1100        assert_eq!(result, "line1\nline2\n");
1101    }
1102
1103    #[test]
1104    fn test_apply_diff_to_string_replace_last_line_no_trailing_newline() {
1105        // Replace the last line when text has no trailing newline.
1106        let text = "aaa\nbbb\nccc";
1107        let diff = indoc! {"
1108            --- a/file.txt
1109            +++ b/file.txt
1110            @@ -1,3 +1,3 @@
1111             aaa
1112             bbb
1113            -ccc
1114            +ddd
1115        "};
1116
1117        let result = apply_diff_to_string(diff, text).unwrap();
1118        assert_eq!(result, "aaa\nbbb\nddd");
1119    }
1120
1121    #[test]
1122    fn test_apply_diff_to_string_multibyte_no_trailing_newline() {
1123        // Multi-byte UTF-8 characters near the end; ensures char boundary
1124        // safety when the fallback clamps edit ranges.
1125        let text = "hello\n세계";
1126        let diff = indoc! {"
1127            --- a/file.txt
1128            +++ b/file.txt
1129            @@ -1,2 +1,2 @@
1130             hello
1131            -세계
1132            +world
1133        "};
1134
1135        let result = apply_diff_to_string(diff, text).unwrap();
1136        assert_eq!(result, "hello\nworld");
1137    }
1138
1139    #[test]
1140    fn test_find_context_candidates_no_false_positive_mid_text() {
1141        // The stripped fallback must only match at the end of text, not in
1142        // the middle where a real newline exists.
1143        let text = "aaa\nbbb\nccc\n";
1144        let mut hunk = Hunk {
1145            context: "bbb\n".into(),
1146            edits: vec![],
1147            start_line: None,
1148        };
1149
1150        let candidates = find_context_candidates(text, &mut hunk);
1151        // Exact match at offset 4 — the fallback is not used.
1152        assert_eq!(candidates, vec![4]);
1153    }
1154
1155    #[test]
1156    fn test_find_context_candidates_fallback_at_end() {
1157        let text = "aaa\nbbb";
1158        let mut hunk = Hunk {
1159            context: "bbb\n".into(),
1160            edits: vec![],
1161            start_line: None,
1162        };
1163
1164        let candidates = find_context_candidates(text, &mut hunk);
1165        assert_eq!(candidates, vec![4]);
1166        // Context should be stripped.
1167        assert_eq!(hunk.context, "bbb");
1168    }
1169
1170    #[test]
1171    fn test_find_context_candidates_no_fallback_mid_text() {
1172        // "bbb" appears mid-text followed by a newline, so the exact
1173        // match succeeds. Verify the stripped fallback doesn't produce a
1174        // second, spurious candidate.
1175        let text = "aaa\nbbb\nccc";
1176        let mut hunk = Hunk {
1177            context: "bbb\nccc\n".into(),
1178            edits: vec![],
1179            start_line: None,
1180        };
1181
1182        let candidates = find_context_candidates(text, &mut hunk);
1183        // No exact match (text ends without newline after "ccc"), but the
1184        // stripped context "bbb\nccc" matches at offset 4, which is the end.
1185        assert_eq!(candidates, vec![4]);
1186        assert_eq!(hunk.context, "bbb\nccc");
1187    }
1188
1189    #[test]
1190    fn test_find_context_candidates_clamps_edit_ranges() {
1191        let text = "aaa\nbbb";
1192        let mut hunk = Hunk {
1193            context: "aaa\nbbb\n".into(),
1194            edits: vec![Edit {
1195                range: 4..8, // "bbb\n" — end points at the trailing \n
1196                text: "ccc\n".into(),
1197            }],
1198            start_line: None,
1199        };
1200
1201        let candidates = find_context_candidates(text, &mut hunk);
1202        assert_eq!(candidates, vec![0]);
1203        // Edit range end should be clamped to 7 (new context length).
1204        assert_eq!(hunk.edits[0].range, 4..7);
1205    }
1206}