udiff.rs

   1use std::{
   2    borrow::Cow,
   3    fmt::{Display, Write},
   4    mem,
   5    ops::Range,
   6};
   7
   8use anyhow::{Context as _, Result, anyhow};
   9use imara_diff::{
  10    Algorithm, Sink, diff,
  11    intern::{InternedInput, Interner, Token},
  12};
  13
  14pub fn strip_diff_path_prefix<'a>(diff: &'a str, prefix: &str) -> Cow<'a, str> {
  15    if prefix.is_empty() {
  16        return Cow::Borrowed(diff);
  17    }
  18
  19    let prefix_with_slash = format!("{}/", prefix);
  20    let mut needs_rewrite = false;
  21
  22    for line in diff.lines() {
  23        match DiffLine::parse(line) {
  24            DiffLine::OldPath { path } | DiffLine::NewPath { path } => {
  25                if path.starts_with(&prefix_with_slash) {
  26                    needs_rewrite = true;
  27                    break;
  28                }
  29            }
  30            _ => {}
  31        }
  32    }
  33
  34    if !needs_rewrite {
  35        return Cow::Borrowed(diff);
  36    }
  37
  38    let mut result = String::with_capacity(diff.len());
  39    for line in diff.lines() {
  40        match DiffLine::parse(line) {
  41            DiffLine::OldPath { path } => {
  42                let stripped = path
  43                    .strip_prefix(&prefix_with_slash)
  44                    .unwrap_or(path.as_ref());
  45                result.push_str(&format!("--- a/{}\n", stripped));
  46            }
  47            DiffLine::NewPath { path } => {
  48                let stripped = path
  49                    .strip_prefix(&prefix_with_slash)
  50                    .unwrap_or(path.as_ref());
  51                result.push_str(&format!("+++ b/{}\n", stripped));
  52            }
  53            _ => {
  54                result.push_str(line);
  55                result.push('\n');
  56            }
  57        }
  58    }
  59
  60    Cow::Owned(result)
  61}
  62
  63/// Strip unnecessary git metadata lines from a diff, keeping only the lines
  64/// needed for patch application: path headers (--- and +++), hunk headers (@@),
  65/// and content lines (+, -, space).
  66pub fn strip_diff_metadata(diff: &str) -> String {
  67    let mut result = String::new();
  68
  69    for line in diff.lines() {
  70        let dominated = DiffLine::parse(line);
  71        match dominated {
  72            // Keep path headers, hunk headers, and content lines
  73            DiffLine::OldPath { .. }
  74            | DiffLine::NewPath { .. }
  75            | DiffLine::HunkHeader(_)
  76            | DiffLine::Context(_)
  77            | DiffLine::Deletion(_)
  78            | DiffLine::Addition(_)
  79            | DiffLine::NoNewlineAtEOF => {
  80                result.push_str(line);
  81                result.push('\n');
  82            }
  83            // Skip garbage lines (diff --git, index, etc.)
  84            DiffLine::Garbage(_) => {}
  85        }
  86    }
  87
  88    result
  89}
  90
  91/// Marker used to encode cursor position in patch comment lines.
  92pub const CURSOR_POSITION_MARKER: &str = "[CURSOR_POSITION]";
  93
  94/// Extract cursor offset from a patch and return `(clean_patch, cursor_offset)`.
  95///
  96/// Cursor position is encoded as a comment line (starting with `#`) containing
  97/// `[CURSOR_POSITION]`. A `^` in the line indicates the cursor column; a `<`
  98/// indicates column 0. The offset is computed relative to addition (`+`) and
  99/// context (` `) lines accumulated so far in the hunk, which represent the
 100/// cursor position within the new text contributed by the hunk.
 101pub fn extract_cursor_from_patch(patch: &str) -> (String, Option<usize>) {
 102    let mut clean_patch = String::new();
 103    let mut cursor_offset: Option<usize> = None;
 104    let mut line_start_offset = 0usize;
 105    let mut prev_line_start_offset = 0usize;
 106
 107    for line in patch.lines() {
 108        let diff_line = DiffLine::parse(line);
 109
 110        match &diff_line {
 111            DiffLine::Garbage(content)
 112                if content.starts_with('#') && content.contains(CURSOR_POSITION_MARKER) =>
 113            {
 114                let caret_column = if let Some(caret_pos) = content.find('^') {
 115                    caret_pos
 116                } else if content.find('<').is_some() {
 117                    0
 118                } else {
 119                    continue;
 120                };
 121                let cursor_column = caret_column.saturating_sub('#'.len_utf8());
 122                cursor_offset = Some(prev_line_start_offset + cursor_column);
 123            }
 124            _ => {
 125                if !clean_patch.is_empty() {
 126                    clean_patch.push('\n');
 127                }
 128                clean_patch.push_str(line);
 129
 130                match diff_line {
 131                    DiffLine::Addition(content) | DiffLine::Context(content) => {
 132                        prev_line_start_offset = line_start_offset;
 133                        line_start_offset += content.len() + 1;
 134                    }
 135                    _ => {}
 136                }
 137            }
 138        }
 139    }
 140
 141    if patch.ends_with('\n') && !clean_patch.is_empty() {
 142        clean_patch.push('\n');
 143    }
 144
 145    (clean_patch, cursor_offset)
 146}
 147
 148/// Find all byte offsets where `hunk.context` occurs as a substring of `text`.
 149///
 150/// If no exact matches are found and the context ends with `'\n'` but `text`
 151/// does not, retries without the trailing newline, accepting only a match at
 152/// the very end of `text`. When this fallback fires, the hunk's context is
 153/// trimmed and its edit ranges are clamped so that downstream code doesn't
 154/// index past the end of the matched region. This handles diffs that are
 155/// missing a `\ No newline at end of file` marker: the parser always appends
 156/// `'\n'` via `writeln!`, so the context can have a trailing newline that
 157/// doesn't exist in the source text.
 158pub fn find_context_candidates(text: &str, hunk: &mut Hunk) -> Vec<usize> {
 159    let candidates: Vec<usize> = text
 160        .match_indices(&hunk.context)
 161        .map(|(offset, _)| offset)
 162        .collect();
 163
 164    if !candidates.is_empty() {
 165        return candidates;
 166    }
 167
 168    if hunk.context.ends_with('\n') && !hunk.context.is_empty() {
 169        let old_len = hunk.context.len();
 170        hunk.context.pop();
 171        let new_len = hunk.context.len();
 172
 173        if !hunk.context.is_empty() {
 174            let candidates: Vec<usize> = text
 175                .match_indices(&hunk.context)
 176                .filter(|(offset, _)| offset + new_len == text.len())
 177                .map(|(offset, _)| offset)
 178                .collect();
 179
 180            if !candidates.is_empty() {
 181                for edit in &mut hunk.edits {
 182                    let touched_phantom = edit.range.end > new_len;
 183                    edit.range.start = edit.range.start.min(new_len);
 184                    edit.range.end = edit.range.end.min(new_len);
 185                    if touched_phantom {
 186                        // The replacement text was also written with a
 187                        // trailing '\n' that corresponds to the phantom
 188                        // newline we just removed from the context.
 189                        if edit.text.ends_with('\n') {
 190                            edit.text.pop();
 191                        }
 192                    }
 193                }
 194                return candidates;
 195            }
 196
 197            // Restore if fallback didn't help either.
 198            hunk.context.push('\n');
 199            debug_assert_eq!(hunk.context.len(), old_len);
 200        } else {
 201            hunk.context.push('\n');
 202        }
 203    }
 204
 205    Vec::new()
 206}
 207
 208/// Given multiple candidate offsets where context matches, use line numbers to disambiguate.
 209/// Returns the offset that matches the expected line, or None if no match or no line number available.
 210pub fn disambiguate_by_line_number(
 211    candidates: &[usize],
 212    expected_line: Option<u32>,
 213    offset_to_line: &dyn Fn(usize) -> u32,
 214) -> Option<usize> {
 215    match candidates.len() {
 216        0 => None,
 217        1 => Some(candidates[0]),
 218        _ => {
 219            let expected = expected_line?;
 220            candidates
 221                .iter()
 222                .copied()
 223                .find(|&offset| offset_to_line(offset) == expected)
 224        }
 225    }
 226}
 227
 228pub fn unified_diff_with_context(
 229    old_text: &str,
 230    new_text: &str,
 231    old_start_line: u32,
 232    new_start_line: u32,
 233    context_lines: u32,
 234) -> String {
 235    let input = InternedInput::new(old_text, new_text);
 236    diff(
 237        Algorithm::Histogram,
 238        &input,
 239        OffsetUnifiedDiffBuilder::new(&input, old_start_line, new_start_line, context_lines),
 240    )
 241}
 242
 243struct OffsetUnifiedDiffBuilder<'a> {
 244    before: &'a [Token],
 245    after: &'a [Token],
 246    interner: &'a Interner<&'a str>,
 247    pos: u32,
 248    before_hunk_start: u32,
 249    after_hunk_start: u32,
 250    before_hunk_len: u32,
 251    after_hunk_len: u32,
 252    old_line_offset: u32,
 253    new_line_offset: u32,
 254    context_lines: u32,
 255    buffer: String,
 256    dst: String,
 257}
 258
 259impl<'a> OffsetUnifiedDiffBuilder<'a> {
 260    fn new(
 261        input: &'a InternedInput<&'a str>,
 262        old_line_offset: u32,
 263        new_line_offset: u32,
 264        context_lines: u32,
 265    ) -> Self {
 266        Self {
 267            before_hunk_start: 0,
 268            after_hunk_start: 0,
 269            before_hunk_len: 0,
 270            after_hunk_len: 0,
 271            old_line_offset,
 272            new_line_offset,
 273            context_lines,
 274            buffer: String::with_capacity(8),
 275            dst: String::new(),
 276            interner: &input.interner,
 277            before: &input.before,
 278            after: &input.after,
 279            pos: 0,
 280        }
 281    }
 282
 283    fn print_tokens(&mut self, tokens: &[Token], prefix: char) {
 284        for &token in tokens {
 285            writeln!(&mut self.buffer, "{prefix}{}", self.interner[token]).unwrap();
 286        }
 287    }
 288
 289    fn flush(&mut self) {
 290        if self.before_hunk_len == 0 && self.after_hunk_len == 0 {
 291            return;
 292        }
 293
 294        let end = (self.pos + self.context_lines).min(self.before.len() as u32);
 295        self.update_pos(end, end);
 296
 297        writeln!(
 298            &mut self.dst,
 299            "@@ -{},{} +{},{} @@",
 300            self.before_hunk_start + 1 + self.old_line_offset,
 301            self.before_hunk_len,
 302            self.after_hunk_start + 1 + self.new_line_offset,
 303            self.after_hunk_len,
 304        )
 305        .unwrap();
 306        write!(&mut self.dst, "{}", &self.buffer).unwrap();
 307        self.buffer.clear();
 308        self.before_hunk_len = 0;
 309        self.after_hunk_len = 0;
 310    }
 311
 312    fn update_pos(&mut self, print_to: u32, move_to: u32) {
 313        self.print_tokens(&self.before[self.pos as usize..print_to as usize], ' ');
 314        let len = print_to - self.pos;
 315        self.before_hunk_len += len;
 316        self.after_hunk_len += len;
 317        self.pos = move_to;
 318    }
 319}
 320
 321impl Sink for OffsetUnifiedDiffBuilder<'_> {
 322    type Out = String;
 323
 324    fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
 325        if before.start - self.pos > self.context_lines * 2 {
 326            self.flush();
 327        }
 328        if self.before_hunk_len == 0 && self.after_hunk_len == 0 {
 329            self.pos = before.start.saturating_sub(self.context_lines);
 330            self.before_hunk_start = self.pos;
 331            self.after_hunk_start = after.start.saturating_sub(self.context_lines);
 332        }
 333
 334        self.update_pos(before.start, before.end);
 335        self.before_hunk_len += before.end - before.start;
 336        self.after_hunk_len += after.end - after.start;
 337        self.print_tokens(
 338            &self.before[before.start as usize..before.end as usize],
 339            '-',
 340        );
 341        self.print_tokens(&self.after[after.start as usize..after.end as usize], '+');
 342    }
 343
 344    fn finish(mut self) -> Self::Out {
 345        self.flush();
 346        self.dst
 347    }
 348}
 349
 350pub fn encode_cursor_in_patch(patch: &str, cursor_offset: Option<usize>) -> String {
 351    let Some(cursor_offset) = cursor_offset else {
 352        return patch.to_string();
 353    };
 354
 355    let mut result = String::new();
 356    let mut line_start_offset = 0usize;
 357
 358    for line in patch.lines() {
 359        if matches!(
 360            DiffLine::parse(line),
 361            DiffLine::Garbage(content)
 362                if content.starts_with('#') && content.contains(CURSOR_POSITION_MARKER)
 363        ) {
 364            continue;
 365        }
 366
 367        if !result.is_empty() {
 368            result.push('\n');
 369        }
 370        result.push_str(line);
 371
 372        match DiffLine::parse(line) {
 373            DiffLine::Addition(content) => {
 374                let line_end_offset = line_start_offset + content.len();
 375
 376                if cursor_offset >= line_start_offset && cursor_offset <= line_end_offset {
 377                    let cursor_column = cursor_offset - line_start_offset;
 378
 379                    result.push('\n');
 380                    result.push('#');
 381                    for _ in 0..cursor_column {
 382                        result.push(' ');
 383                    }
 384                    write!(result, "^{}", CURSOR_POSITION_MARKER).unwrap();
 385                }
 386
 387                line_start_offset = line_end_offset + 1;
 388            }
 389            DiffLine::Context(content) => {
 390                line_start_offset += content.len() + 1;
 391            }
 392            _ => {}
 393        }
 394    }
 395
 396    if patch.ends_with('\n') {
 397        result.push('\n');
 398    }
 399
 400    result
 401}
 402
 403pub fn apply_diff_to_string(diff_str: &str, text: &str) -> Result<String> {
 404    apply_diff_to_string_with_hunk_offset(diff_str, text).map(|(text, _)| text)
 405}
 406
 407/// Applies a diff to a string and returns the result along with the offset where
 408/// the first hunk's context matched in the original text. This offset can be used
 409/// to adjust cursor positions that are relative to the hunk's content.
 410pub fn apply_diff_to_string_with_hunk_offset(
 411    diff_str: &str,
 412    text: &str,
 413) -> Result<(String, Option<usize>)> {
 414    let mut diff = DiffParser::new(diff_str);
 415
 416    let mut text = text.to_string();
 417    let mut first_hunk_offset = None;
 418
 419    while let Some(event) = diff.next().context("Failed to parse diff")? {
 420        match event {
 421            DiffEvent::Hunk {
 422                mut hunk,
 423                path: _,
 424                status: _,
 425            } => {
 426                let candidates = find_context_candidates(&text, &mut hunk);
 427
 428                let hunk_offset =
 429                    disambiguate_by_line_number(&candidates, hunk.start_line, &|offset| {
 430                        text[..offset].matches('\n').count() as u32
 431                    })
 432                    .ok_or_else(|| anyhow!("couldn't resolve hunk"))?;
 433
 434                if first_hunk_offset.is_none() {
 435                    first_hunk_offset = Some(hunk_offset);
 436                }
 437
 438                for edit in hunk.edits.iter().rev() {
 439                    let range = (hunk_offset + edit.range.start)..(hunk_offset + edit.range.end);
 440                    text.replace_range(range, &edit.text);
 441                }
 442            }
 443            DiffEvent::FileEnd { .. } => {}
 444        }
 445    }
 446
 447    Ok((text, first_hunk_offset))
 448}
 449
 450struct PatchFile<'a> {
 451    old_path: Cow<'a, str>,
 452    new_path: Cow<'a, str>,
 453}
 454
 455pub struct DiffParser<'a> {
 456    current_file: Option<PatchFile<'a>>,
 457    current_line: Option<(&'a str, DiffLine<'a>)>,
 458    hunk: Hunk,
 459    diff: std::str::Lines<'a>,
 460    pending_start_line: Option<u32>,
 461    processed_no_newline: bool,
 462    last_diff_op: LastDiffOp,
 463}
 464
 465#[derive(Clone, Copy, Default)]
 466enum LastDiffOp {
 467    #[default]
 468    None,
 469    Context,
 470    Deletion,
 471    Addition,
 472}
 473
 474#[derive(Debug, PartialEq)]
 475pub enum DiffEvent<'a> {
 476    Hunk {
 477        path: Cow<'a, str>,
 478        hunk: Hunk,
 479        status: FileStatus,
 480    },
 481    FileEnd {
 482        renamed_to: Option<Cow<'a, str>>,
 483    },
 484}
 485
 486#[derive(Debug, Clone, Copy, PartialEq)]
 487pub enum FileStatus {
 488    Created,
 489    Modified,
 490    Deleted,
 491}
 492
 493#[derive(Debug, Default, PartialEq)]
 494pub struct Hunk {
 495    pub context: String,
 496    pub edits: Vec<Edit>,
 497    pub start_line: Option<u32>,
 498}
 499
 500impl Hunk {
 501    pub fn is_empty(&self) -> bool {
 502        self.context.is_empty() && self.edits.is_empty()
 503    }
 504}
 505
 506#[derive(Debug, PartialEq)]
 507pub struct Edit {
 508    pub range: Range<usize>,
 509    pub text: String,
 510}
 511
 512impl<'a> DiffParser<'a> {
 513    pub fn new(diff: &'a str) -> Self {
 514        let mut diff = diff.lines();
 515        let current_line = diff.next().map(|line| (line, DiffLine::parse(line)));
 516        DiffParser {
 517            current_file: None,
 518            hunk: Hunk::default(),
 519            current_line,
 520            diff,
 521            pending_start_line: None,
 522            processed_no_newline: false,
 523            last_diff_op: LastDiffOp::None,
 524        }
 525    }
 526
 527    pub fn next(&mut self) -> Result<Option<DiffEvent<'a>>> {
 528        loop {
 529            let (hunk_done, file_done) = match self.current_line.as_ref().map(|e| &e.1) {
 530                Some(DiffLine::OldPath { .. }) | Some(DiffLine::Garbage(_)) | None => (true, true),
 531                Some(DiffLine::HunkHeader(_)) => (true, false),
 532                _ => (false, false),
 533            };
 534
 535            if hunk_done {
 536                if let Some(file) = &self.current_file
 537                    && !self.hunk.is_empty()
 538                {
 539                    let status = if file.old_path == "/dev/null" {
 540                        FileStatus::Created
 541                    } else if file.new_path == "/dev/null" {
 542                        FileStatus::Deleted
 543                    } else {
 544                        FileStatus::Modified
 545                    };
 546                    let path = if status == FileStatus::Created {
 547                        file.new_path.clone()
 548                    } else {
 549                        file.old_path.clone()
 550                    };
 551                    let mut hunk = mem::take(&mut self.hunk);
 552                    hunk.start_line = self.pending_start_line.take();
 553                    self.processed_no_newline = false;
 554                    self.last_diff_op = LastDiffOp::None;
 555                    return Ok(Some(DiffEvent::Hunk { path, hunk, status }));
 556                }
 557            }
 558
 559            if file_done {
 560                if let Some(PatchFile { old_path, new_path }) = self.current_file.take() {
 561                    return Ok(Some(DiffEvent::FileEnd {
 562                        renamed_to: if old_path != new_path && old_path != "/dev/null" {
 563                            Some(new_path)
 564                        } else {
 565                            None
 566                        },
 567                    }));
 568                }
 569            }
 570
 571            let Some((line, parsed_line)) = self.current_line.take() else {
 572                break;
 573            };
 574
 575            (|| {
 576                match parsed_line {
 577                    DiffLine::OldPath { path } => {
 578                        self.current_file = Some(PatchFile {
 579                            old_path: path,
 580                            new_path: "".into(),
 581                        });
 582                    }
 583                    DiffLine::NewPath { path } => {
 584                        if let Some(current_file) = &mut self.current_file {
 585                            current_file.new_path = path
 586                        }
 587                    }
 588                    DiffLine::HunkHeader(location) => {
 589                        if let Some(loc) = location {
 590                            self.pending_start_line = Some(loc.start_line_old);
 591                        }
 592                    }
 593                    DiffLine::Context(ctx) => {
 594                        if self.current_file.is_some() {
 595                            writeln!(&mut self.hunk.context, "{ctx}")?;
 596                            self.last_diff_op = LastDiffOp::Context;
 597                        }
 598                    }
 599                    DiffLine::Deletion(del) => {
 600                        if self.current_file.is_some() {
 601                            let range = self.hunk.context.len()
 602                                ..self.hunk.context.len() + del.len() + '\n'.len_utf8();
 603                            if let Some(last_edit) = self.hunk.edits.last_mut()
 604                                && last_edit.range.end == range.start
 605                            {
 606                                last_edit.range.end = range.end;
 607                            } else {
 608                                self.hunk.edits.push(Edit {
 609                                    range,
 610                                    text: String::new(),
 611                                });
 612                            }
 613                            writeln!(&mut self.hunk.context, "{del}")?;
 614                            self.last_diff_op = LastDiffOp::Deletion;
 615                        }
 616                    }
 617                    DiffLine::Addition(add) => {
 618                        if self.current_file.is_some() {
 619                            let range = self.hunk.context.len()..self.hunk.context.len();
 620                            if let Some(last_edit) = self.hunk.edits.last_mut()
 621                                && last_edit.range.end == range.start
 622                            {
 623                                writeln!(&mut last_edit.text, "{add}").unwrap();
 624                            } else {
 625                                self.hunk.edits.push(Edit {
 626                                    range,
 627                                    text: format!("{add}\n"),
 628                                });
 629                            }
 630                            self.last_diff_op = LastDiffOp::Addition;
 631                        }
 632                    }
 633                    DiffLine::NoNewlineAtEOF => {
 634                        if !self.processed_no_newline {
 635                            self.processed_no_newline = true;
 636                            match self.last_diff_op {
 637                                LastDiffOp::Addition => {
 638                                    // Remove trailing newline from the last addition
 639                                    if let Some(last_edit) = self.hunk.edits.last_mut() {
 640                                        last_edit.text.pop();
 641                                    }
 642                                }
 643                                LastDiffOp::Deletion => {
 644                                    // Remove trailing newline from context (which includes the deletion)
 645                                    self.hunk.context.pop();
 646                                    if let Some(last_edit) = self.hunk.edits.last_mut() {
 647                                        last_edit.range.end -= 1;
 648                                    }
 649                                }
 650                                LastDiffOp::Context | LastDiffOp::None => {
 651                                    // Remove trailing newline from context
 652                                    self.hunk.context.pop();
 653                                }
 654                            }
 655                        }
 656                    }
 657                    DiffLine::Garbage(_) => {}
 658                }
 659
 660                anyhow::Ok(())
 661            })()
 662            .with_context(|| format!("on line:\n\n```\n{}```", line))?;
 663
 664            self.current_line = self.diff.next().map(|line| (line, DiffLine::parse(line)));
 665        }
 666
 667        anyhow::Ok(None)
 668    }
 669}
 670
 671#[derive(Debug, PartialEq)]
 672pub enum DiffLine<'a> {
 673    OldPath { path: Cow<'a, str> },
 674    NewPath { path: Cow<'a, str> },
 675    HunkHeader(Option<HunkLocation>),
 676    Context(&'a str),
 677    Deletion(&'a str),
 678    Addition(&'a str),
 679    NoNewlineAtEOF,
 680    Garbage(&'a str),
 681}
 682
 683#[derive(Debug, PartialEq)]
 684pub struct HunkLocation {
 685    pub start_line_old: u32,
 686    pub count_old: u32,
 687    pub start_line_new: u32,
 688    pub count_new: u32,
 689}
 690
 691impl<'a> DiffLine<'a> {
 692    pub fn parse(line: &'a str) -> Self {
 693        Self::try_parse(line).unwrap_or(Self::Garbage(line))
 694    }
 695
 696    fn try_parse(line: &'a str) -> Option<Self> {
 697        if line.starts_with("\\ No newline") {
 698            return Some(Self::NoNewlineAtEOF);
 699        }
 700        if let Some(header) = line.strip_prefix("---").and_then(eat_required_whitespace) {
 701            let path = parse_header_path("a/", header);
 702            Some(Self::OldPath { path })
 703        } else if let Some(header) = line.strip_prefix("+++").and_then(eat_required_whitespace) {
 704            Some(Self::NewPath {
 705                path: parse_header_path("b/", header),
 706            })
 707        } else if let Some(header) = line.strip_prefix("@@").and_then(eat_required_whitespace) {
 708            if header.starts_with("...") {
 709                return Some(Self::HunkHeader(None));
 710            }
 711
 712            let mut tokens = header.split_whitespace();
 713            let old_range = tokens.next()?.strip_prefix('-')?;
 714            let new_range = tokens.next()?.strip_prefix('+')?;
 715
 716            let (start_line_old, count_old) = old_range.split_once(',').unwrap_or((old_range, "1"));
 717            let (start_line_new, count_new) = new_range.split_once(',').unwrap_or((new_range, "1"));
 718
 719            Some(Self::HunkHeader(Some(HunkLocation {
 720                start_line_old: start_line_old.parse::<u32>().ok()?.saturating_sub(1),
 721                count_old: count_old.parse().ok()?,
 722                start_line_new: start_line_new.parse::<u32>().ok()?.saturating_sub(1),
 723                count_new: count_new.parse().ok()?,
 724            })))
 725        } else if let Some(deleted_header) = line.strip_prefix("-") {
 726            Some(Self::Deletion(deleted_header))
 727        } else if line.is_empty() {
 728            Some(Self::Context(""))
 729        } else if let Some(context) = line.strip_prefix(" ") {
 730            Some(Self::Context(context))
 731        } else {
 732            Some(Self::Addition(line.strip_prefix("+")?))
 733        }
 734    }
 735}
 736
 737impl<'a> Display for DiffLine<'a> {
 738    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 739        match self {
 740            DiffLine::OldPath { path } => write!(f, "--- {path}"),
 741            DiffLine::NewPath { path } => write!(f, "+++ {path}"),
 742            DiffLine::HunkHeader(Some(hunk_location)) => {
 743                write!(
 744                    f,
 745                    "@@ -{},{} +{},{} @@",
 746                    hunk_location.start_line_old + 1,
 747                    hunk_location.count_old,
 748                    hunk_location.start_line_new + 1,
 749                    hunk_location.count_new
 750                )
 751            }
 752            DiffLine::HunkHeader(None) => write!(f, "@@ ... @@"),
 753            DiffLine::Context(content) => write!(f, " {content}"),
 754            DiffLine::Deletion(content) => write!(f, "-{content}"),
 755            DiffLine::Addition(content) => write!(f, "+{content}"),
 756            DiffLine::NoNewlineAtEOF => write!(f, "\\ No newline at end of file"),
 757            DiffLine::Garbage(line) => write!(f, "{line}"),
 758        }
 759    }
 760}
 761
 762fn parse_header_path<'a>(strip_prefix: &'static str, header: &'a str) -> Cow<'a, str> {
 763    if !header.contains(['"', '\\']) {
 764        let path = header.split_ascii_whitespace().next().unwrap_or(header);
 765        return Cow::Borrowed(path.strip_prefix(strip_prefix).unwrap_or(path));
 766    }
 767
 768    let mut path = String::with_capacity(header.len());
 769    let mut in_quote = false;
 770    let mut chars = header.chars().peekable();
 771    let mut strip_prefix = Some(strip_prefix);
 772
 773    while let Some(char) = chars.next() {
 774        if char == '"' {
 775            in_quote = !in_quote;
 776        } else if char == '\\' {
 777            let Some(&next_char) = chars.peek() else {
 778                break;
 779            };
 780            chars.next();
 781            path.push(next_char);
 782        } else if char.is_ascii_whitespace() && !in_quote {
 783            break;
 784        } else {
 785            path.push(char);
 786        }
 787
 788        if let Some(prefix) = strip_prefix
 789            && path == prefix
 790        {
 791            strip_prefix.take();
 792            path.clear();
 793        }
 794    }
 795
 796    Cow::Owned(path)
 797}
 798
 799fn eat_required_whitespace(header: &str) -> Option<&str> {
 800    let trimmed = header.trim_ascii_start();
 801
 802    if trimmed.len() == header.len() {
 803        None
 804    } else {
 805        Some(trimmed)
 806    }
 807}
 808
 809#[cfg(test)]
 810mod tests {
 811    use super::*;
 812    use indoc::indoc;
 813
 814    #[test]
 815    fn parse_lines_simple() {
 816        let input = indoc! {"
 817            diff --git a/text.txt b/text.txt
 818            index 86c770d..a1fd855 100644
 819            --- a/file.txt
 820            +++ b/file.txt
 821            @@ -1,2 +1,3 @@
 822             context
 823            -deleted
 824            +inserted
 825            garbage
 826
 827            --- b/file.txt
 828            +++ a/file.txt
 829        "};
 830
 831        let lines = input.lines().map(DiffLine::parse).collect::<Vec<_>>();
 832
 833        assert_eq!(
 834            lines,
 835            &[
 836                DiffLine::Garbage("diff --git a/text.txt b/text.txt"),
 837                DiffLine::Garbage("index 86c770d..a1fd855 100644"),
 838                DiffLine::OldPath {
 839                    path: "file.txt".into()
 840                },
 841                DiffLine::NewPath {
 842                    path: "file.txt".into()
 843                },
 844                DiffLine::HunkHeader(Some(HunkLocation {
 845                    start_line_old: 0,
 846                    count_old: 2,
 847                    start_line_new: 0,
 848                    count_new: 3
 849                })),
 850                DiffLine::Context("context"),
 851                DiffLine::Deletion("deleted"),
 852                DiffLine::Addition("inserted"),
 853                DiffLine::Garbage("garbage"),
 854                DiffLine::Context(""),
 855                DiffLine::OldPath {
 856                    path: "b/file.txt".into()
 857                },
 858                DiffLine::NewPath {
 859                    path: "a/file.txt".into()
 860                },
 861            ]
 862        );
 863    }
 864
 865    #[test]
 866    fn file_header_extra_space() {
 867        let options = ["--- file", "---   file", "---\tfile"];
 868
 869        for option in options {
 870            assert_eq!(
 871                DiffLine::parse(option),
 872                DiffLine::OldPath {
 873                    path: "file".into()
 874                },
 875                "{option}",
 876            );
 877        }
 878    }
 879
 880    #[test]
 881    fn hunk_header_extra_space() {
 882        let options = [
 883            "@@ -1,2 +1,3 @@",
 884            "@@  -1,2  +1,3 @@",
 885            "@@\t-1,2\t+1,3\t@@",
 886            "@@ -1,2  +1,3 @@",
 887            "@@ -1,2   +1,3 @@",
 888            "@@ -1,2 +1,3   @@",
 889            "@@ -1,2 +1,3 @@ garbage",
 890        ];
 891
 892        for option in options {
 893            assert_eq!(
 894                DiffLine::parse(option),
 895                DiffLine::HunkHeader(Some(HunkLocation {
 896                    start_line_old: 0,
 897                    count_old: 2,
 898                    start_line_new: 0,
 899                    count_new: 3
 900                })),
 901                "{option}",
 902            );
 903        }
 904    }
 905
 906    #[test]
 907    fn hunk_header_without_location() {
 908        assert_eq!(DiffLine::parse("@@ ... @@"), DiffLine::HunkHeader(None));
 909    }
 910
 911    #[test]
 912    fn test_parse_path() {
 913        assert_eq!(parse_header_path("a/", "foo.txt"), "foo.txt");
 914        assert_eq!(
 915            parse_header_path("a/", "foo/bar/baz.txt"),
 916            "foo/bar/baz.txt"
 917        );
 918        assert_eq!(parse_header_path("a/", "a/foo.txt"), "foo.txt");
 919        assert_eq!(
 920            parse_header_path("a/", "a/foo/bar/baz.txt"),
 921            "foo/bar/baz.txt"
 922        );
 923
 924        // Extra
 925        assert_eq!(
 926            parse_header_path("a/", "a/foo/bar/baz.txt  2025"),
 927            "foo/bar/baz.txt"
 928        );
 929        assert_eq!(
 930            parse_header_path("a/", "a/foo/bar/baz.txt\t2025"),
 931            "foo/bar/baz.txt"
 932        );
 933        assert_eq!(
 934            parse_header_path("a/", "a/foo/bar/baz.txt \""),
 935            "foo/bar/baz.txt"
 936        );
 937
 938        // Quoted
 939        assert_eq!(
 940            parse_header_path("a/", "a/foo/bar/\"baz quox.txt\""),
 941            "foo/bar/baz quox.txt"
 942        );
 943        assert_eq!(
 944            parse_header_path("a/", "\"a/foo/bar/baz quox.txt\""),
 945            "foo/bar/baz quox.txt"
 946        );
 947        assert_eq!(
 948            parse_header_path("a/", "\"foo/bar/baz quox.txt\""),
 949            "foo/bar/baz quox.txt"
 950        );
 951        assert_eq!(parse_header_path("a/", "\"whatever 🤷\""), "whatever 🤷");
 952        assert_eq!(
 953            parse_header_path("a/", "\"foo/bar/baz quox.txt\"  2025"),
 954            "foo/bar/baz quox.txt"
 955        );
 956        // unescaped quotes are dropped
 957        assert_eq!(parse_header_path("a/", "foo/\"bar\""), "foo/bar");
 958
 959        // Escaped
 960        assert_eq!(
 961            parse_header_path("a/", "\"foo/\\\"bar\\\"/baz.txt\""),
 962            "foo/\"bar\"/baz.txt"
 963        );
 964        assert_eq!(
 965            parse_header_path("a/", "\"C:\\\\Projects\\\\My App\\\\old file.txt\""),
 966            "C:\\Projects\\My App\\old file.txt"
 967        );
 968    }
 969
 970    #[test]
 971    fn test_parse_diff_with_leading_and_trailing_garbage() {
 972        let diff = indoc! {"
 973            I need to make some changes.
 974
 975            I'll change the following things:
 976            - one
 977              - two
 978            - three
 979
 980            ```
 981            --- a/file.txt
 982            +++ b/file.txt
 983             one
 984            +AND
 985             two
 986            ```
 987
 988            Summary of what I did:
 989            - one
 990              - two
 991            - three
 992
 993            That's about it.
 994        "};
 995
 996        let mut events = Vec::new();
 997        let mut parser = DiffParser::new(diff);
 998        while let Some(event) = parser.next().unwrap() {
 999            events.push(event);
1000        }
1001
1002        assert_eq!(
1003            events,
1004            &[
1005                DiffEvent::Hunk {
1006                    path: "file.txt".into(),
1007                    hunk: Hunk {
1008                        context: "one\ntwo\n".into(),
1009                        edits: vec![Edit {
1010                            range: 4..4,
1011                            text: "AND\n".into()
1012                        }],
1013                        start_line: None,
1014                    },
1015                    status: FileStatus::Modified,
1016                },
1017                DiffEvent::FileEnd { renamed_to: None }
1018            ],
1019        )
1020    }
1021
1022    #[test]
1023    fn test_no_newline_at_eof() {
1024        let diff = indoc! {"
1025            --- a/file.py
1026            +++ b/file.py
1027            @@ -55,7 +55,3 @@ class CustomDataset(Dataset):
1028                         torch.set_rng_state(state)
1029                         mask = self.transform(mask)
1030
1031            -        if self.mode == 'Training':
1032            -            return (img, mask, name)
1033            -        else:
1034            -            return (img, mask, name)
1035            \\ No newline at end of file
1036        "};
1037
1038        let mut events = Vec::new();
1039        let mut parser = DiffParser::new(diff);
1040        while let Some(event) = parser.next().unwrap() {
1041            events.push(event);
1042        }
1043
1044        assert_eq!(
1045            events,
1046            &[
1047                DiffEvent::Hunk {
1048                    path: "file.py".into(),
1049                    hunk: Hunk {
1050                        context: concat!(
1051                            "            torch.set_rng_state(state)\n",
1052                            "            mask = self.transform(mask)\n",
1053                            "\n",
1054                            "        if self.mode == 'Training':\n",
1055                            "            return (img, mask, name)\n",
1056                            "        else:\n",
1057                            "            return (img, mask, name)",
1058                        )
1059                        .into(),
1060                        edits: vec![Edit {
1061                            range: 80..203,
1062                            text: "".into()
1063                        }],
1064                        start_line: Some(54), // @@ -55,7 -> line 54 (0-indexed)
1065                    },
1066                    status: FileStatus::Modified,
1067                },
1068                DiffEvent::FileEnd { renamed_to: None }
1069            ],
1070        );
1071    }
1072
1073    #[test]
1074    fn test_no_newline_at_eof_addition() {
1075        let diff = indoc! {"
1076            --- a/file.txt
1077            +++ b/file.txt
1078            @@ -1,2 +1,3 @@
1079             context
1080            -deleted
1081            +added line
1082            \\ No newline at end of file
1083        "};
1084
1085        let mut events = Vec::new();
1086        let mut parser = DiffParser::new(diff);
1087        while let Some(event) = parser.next().unwrap() {
1088            events.push(event);
1089        }
1090
1091        assert_eq!(
1092            events,
1093            &[
1094                DiffEvent::Hunk {
1095                    path: "file.txt".into(),
1096                    hunk: Hunk {
1097                        context: "context\ndeleted\n".into(),
1098                        edits: vec![Edit {
1099                            range: 8..16,
1100                            text: "added line".into()
1101                        }],
1102                        start_line: Some(0), // @@ -1,2 -> line 0 (0-indexed)
1103                    },
1104                    status: FileStatus::Modified,
1105                },
1106                DiffEvent::FileEnd { renamed_to: None }
1107            ],
1108        );
1109    }
1110
1111    #[test]
1112    fn test_double_no_newline_at_eof() {
1113        // Two consecutive "no newline" markers - the second should be ignored
1114        let diff = indoc! {"
1115            --- a/file.txt
1116            +++ b/file.txt
1117            @@ -1,3 +1,3 @@
1118             line1
1119            -old
1120            +new
1121             line3
1122            \\ No newline at end of file
1123            \\ No newline at end of file
1124        "};
1125
1126        let mut events = Vec::new();
1127        let mut parser = DiffParser::new(diff);
1128        while let Some(event) = parser.next().unwrap() {
1129            events.push(event);
1130        }
1131
1132        assert_eq!(
1133            events,
1134            &[
1135                DiffEvent::Hunk {
1136                    path: "file.txt".into(),
1137                    hunk: Hunk {
1138                        context: "line1\nold\nline3".into(), // Only one newline removed
1139                        edits: vec![Edit {
1140                            range: 6..10, // "old\n" is 4 bytes
1141                            text: "new\n".into()
1142                        }],
1143                        start_line: Some(0),
1144                    },
1145                    status: FileStatus::Modified,
1146                },
1147                DiffEvent::FileEnd { renamed_to: None }
1148            ],
1149        );
1150    }
1151
1152    #[test]
1153    fn test_no_newline_after_context_not_addition() {
1154        // "No newline" after context lines should remove newline from context,
1155        // not from an earlier addition
1156        let diff = indoc! {"
1157            --- a/file.txt
1158            +++ b/file.txt
1159            @@ -1,4 +1,4 @@
1160             line1
1161            -old
1162            +new
1163             line3
1164             line4
1165            \\ No newline at end of file
1166        "};
1167
1168        let mut events = Vec::new();
1169        let mut parser = DiffParser::new(diff);
1170        while let Some(event) = parser.next().unwrap() {
1171            events.push(event);
1172        }
1173
1174        assert_eq!(
1175            events,
1176            &[
1177                DiffEvent::Hunk {
1178                    path: "file.txt".into(),
1179                    hunk: Hunk {
1180                        // newline removed from line4 (context), not from "new" (addition)
1181                        context: "line1\nold\nline3\nline4".into(),
1182                        edits: vec![Edit {
1183                            range: 6..10,         // "old\n" is 4 bytes
1184                            text: "new\n".into()  // Still has newline
1185                        }],
1186                        start_line: Some(0),
1187                    },
1188                    status: FileStatus::Modified,
1189                },
1190                DiffEvent::FileEnd { renamed_to: None }
1191            ],
1192        );
1193    }
1194
1195    #[test]
1196    fn test_strip_diff_metadata() {
1197        let diff_with_metadata = indoc! {r#"
1198            diff --git a/file.txt b/file.txt
1199            index 1234567..abcdefg 100644
1200            --- a/file.txt
1201            +++ b/file.txt
1202            @@ -1,3 +1,4 @@
1203             context line
1204            -removed line
1205            +added line
1206             more context
1207        "#};
1208
1209        let stripped = strip_diff_metadata(diff_with_metadata);
1210
1211        assert_eq!(
1212            stripped,
1213            indoc! {r#"
1214                --- a/file.txt
1215                +++ b/file.txt
1216                @@ -1,3 +1,4 @@
1217                 context line
1218                -removed line
1219                +added line
1220                 more context
1221            "#}
1222        );
1223    }
1224
1225    #[test]
1226    fn test_apply_diff_to_string_no_trailing_newline() {
1227        // Text without trailing newline; diff generated without
1228        // `\ No newline at end of file` marker.
1229        let text = "line1\nline2\nline3";
1230        let diff = indoc! {"
1231            --- a/file.txt
1232            +++ b/file.txt
1233            @@ -1,3 +1,3 @@
1234             line1
1235            -line2
1236            +replaced
1237             line3
1238        "};
1239
1240        let result = apply_diff_to_string(diff, text).unwrap();
1241        assert_eq!(result, "line1\nreplaced\nline3");
1242    }
1243
1244    #[test]
1245    fn test_apply_diff_to_string_trailing_newline_present() {
1246        // When text has a trailing newline, exact matching still works and
1247        // the fallback is never needed.
1248        let text = "line1\nline2\nline3\n";
1249        let diff = indoc! {"
1250            --- a/file.txt
1251            +++ b/file.txt
1252            @@ -1,3 +1,3 @@
1253             line1
1254            -line2
1255            +replaced
1256             line3
1257        "};
1258
1259        let result = apply_diff_to_string(diff, text).unwrap();
1260        assert_eq!(result, "line1\nreplaced\nline3\n");
1261    }
1262
1263    #[test]
1264    fn test_apply_diff_to_string_deletion_at_end_no_trailing_newline() {
1265        // Deletion of the last line when text has no trailing newline.
1266        // The edit range must be clamped so it doesn't index past the
1267        // end of the text.
1268        let text = "line1\nline2\nline3";
1269        let diff = indoc! {"
1270            --- a/file.txt
1271            +++ b/file.txt
1272            @@ -1,3 +1,2 @@
1273             line1
1274             line2
1275            -line3
1276        "};
1277
1278        let result = apply_diff_to_string(diff, text).unwrap();
1279        assert_eq!(result, "line1\nline2\n");
1280    }
1281
1282    #[test]
1283    fn test_apply_diff_to_string_replace_last_line_no_trailing_newline() {
1284        // Replace the last line when text has no trailing newline.
1285        let text = "aaa\nbbb\nccc";
1286        let diff = indoc! {"
1287            --- a/file.txt
1288            +++ b/file.txt
1289            @@ -1,3 +1,3 @@
1290             aaa
1291             bbb
1292            -ccc
1293            +ddd
1294        "};
1295
1296        let result = apply_diff_to_string(diff, text).unwrap();
1297        assert_eq!(result, "aaa\nbbb\nddd");
1298    }
1299
1300    #[test]
1301    fn test_apply_diff_to_string_multibyte_no_trailing_newline() {
1302        // Multi-byte UTF-8 characters near the end; ensures char boundary
1303        // safety when the fallback clamps edit ranges.
1304        let text = "hello\n세계";
1305        let diff = indoc! {"
1306            --- a/file.txt
1307            +++ b/file.txt
1308            @@ -1,2 +1,2 @@
1309             hello
1310            -세계
1311            +world
1312        "};
1313
1314        let result = apply_diff_to_string(diff, text).unwrap();
1315        assert_eq!(result, "hello\nworld");
1316    }
1317
1318    #[test]
1319    fn test_find_context_candidates_no_false_positive_mid_text() {
1320        // The stripped fallback must only match at the end of text, not in
1321        // the middle where a real newline exists.
1322        let text = "aaa\nbbb\nccc\n";
1323        let mut hunk = Hunk {
1324            context: "bbb\n".into(),
1325            edits: vec![],
1326            start_line: None,
1327        };
1328
1329        let candidates = find_context_candidates(text, &mut hunk);
1330        // Exact match at offset 4 — the fallback is not used.
1331        assert_eq!(candidates, vec![4]);
1332    }
1333
1334    #[test]
1335    fn test_find_context_candidates_fallback_at_end() {
1336        let text = "aaa\nbbb";
1337        let mut hunk = Hunk {
1338            context: "bbb\n".into(),
1339            edits: vec![],
1340            start_line: None,
1341        };
1342
1343        let candidates = find_context_candidates(text, &mut hunk);
1344        assert_eq!(candidates, vec![4]);
1345        // Context should be stripped.
1346        assert_eq!(hunk.context, "bbb");
1347    }
1348
1349    #[test]
1350    fn test_find_context_candidates_no_fallback_mid_text() {
1351        // "bbb" appears mid-text followed by a newline, so the exact
1352        // match succeeds. Verify the stripped fallback doesn't produce a
1353        // second, spurious candidate.
1354        let text = "aaa\nbbb\nccc";
1355        let mut hunk = Hunk {
1356            context: "bbb\nccc\n".into(),
1357            edits: vec![],
1358            start_line: None,
1359        };
1360
1361        let candidates = find_context_candidates(text, &mut hunk);
1362        // No exact match (text ends without newline after "ccc"), but the
1363        // stripped context "bbb\nccc" matches at offset 4, which is the end.
1364        assert_eq!(candidates, vec![4]);
1365        assert_eq!(hunk.context, "bbb\nccc");
1366    }
1367
1368    #[test]
1369    fn test_find_context_candidates_clamps_edit_ranges() {
1370        let text = "aaa\nbbb";
1371        let mut hunk = Hunk {
1372            context: "aaa\nbbb\n".into(),
1373            edits: vec![Edit {
1374                range: 4..8, // "bbb\n" — end points at the trailing \n
1375                text: "ccc\n".into(),
1376            }],
1377            start_line: None,
1378        };
1379
1380        let candidates = find_context_candidates(text, &mut hunk);
1381        assert_eq!(candidates, vec![0]);
1382        // Edit range end should be clamped to 7 (new context length).
1383        assert_eq!(hunk.edits[0].range, 4..7);
1384    }
1385
1386    #[test]
1387    fn test_unified_diff_with_context_matches_expected_context_window() {
1388        let old_text = "line1\nline2\nline3\nline4\nline5\nCHANGE_ME\nline7\nline8\n";
1389        let new_text = "line1\nline2\nline3\nline4\nline5\nCHANGED\nline7\nline8\n";
1390
1391        let diff_default = unified_diff_with_context(old_text, new_text, 0, 0, 3);
1392        assert_eq!(
1393            diff_default,
1394            "@@ -3,6 +3,6 @@\n line3\n line4\n line5\n-CHANGE_ME\n+CHANGED\n line7\n line8\n"
1395        );
1396
1397        let diff_full_context = unified_diff_with_context(old_text, new_text, 0, 0, 8);
1398        assert_eq!(
1399            diff_full_context,
1400            "@@ -1,8 +1,8 @@\n line1\n line2\n line3\n line4\n line5\n-CHANGE_ME\n+CHANGED\n line7\n line8\n"
1401        );
1402
1403        let diff_no_context = unified_diff_with_context(old_text, new_text, 0, 0, 0);
1404        assert_eq!(diff_no_context, "@@ -6,1 +6,1 @@\n-CHANGE_ME\n+CHANGED\n");
1405    }
1406}