1use std::{
2 borrow::Cow,
3 fmt::{Display, Write},
4 mem,
5 ops::Range,
6};
7
8use anyhow::{Context as _, Result, anyhow};
9use imara_diff::{
10 Algorithm, Sink, diff,
11 intern::{InternedInput, Interner, Token},
12};
13
14pub fn strip_diff_path_prefix<'a>(diff: &'a str, prefix: &str) -> Cow<'a, str> {
15 if prefix.is_empty() {
16 return Cow::Borrowed(diff);
17 }
18
19 let prefix_with_slash = format!("{}/", prefix);
20 let mut needs_rewrite = false;
21
22 for line in diff.lines() {
23 match DiffLine::parse(line) {
24 DiffLine::OldPath { path } | DiffLine::NewPath { path } => {
25 if path.starts_with(&prefix_with_slash) {
26 needs_rewrite = true;
27 break;
28 }
29 }
30 _ => {}
31 }
32 }
33
34 if !needs_rewrite {
35 return Cow::Borrowed(diff);
36 }
37
38 let mut result = String::with_capacity(diff.len());
39 for line in diff.lines() {
40 match DiffLine::parse(line) {
41 DiffLine::OldPath { path } => {
42 let stripped = path
43 .strip_prefix(&prefix_with_slash)
44 .unwrap_or(path.as_ref());
45 result.push_str(&format!("--- a/{}\n", stripped));
46 }
47 DiffLine::NewPath { path } => {
48 let stripped = path
49 .strip_prefix(&prefix_with_slash)
50 .unwrap_or(path.as_ref());
51 result.push_str(&format!("+++ b/{}\n", stripped));
52 }
53 _ => {
54 result.push_str(line);
55 result.push('\n');
56 }
57 }
58 }
59
60 Cow::Owned(result)
61}
62
63/// Strip unnecessary git metadata lines from a diff, keeping only the lines
64/// needed for patch application: path headers (--- and +++), hunk headers (@@),
65/// and content lines (+, -, space).
66pub fn strip_diff_metadata(diff: &str) -> String {
67 let mut result = String::new();
68
69 for line in diff.lines() {
70 let dominated = DiffLine::parse(line);
71 match dominated {
72 // Keep path headers, hunk headers, and content lines
73 DiffLine::OldPath { .. }
74 | DiffLine::NewPath { .. }
75 | DiffLine::HunkHeader(_)
76 | DiffLine::Context(_)
77 | DiffLine::Deletion(_)
78 | DiffLine::Addition(_)
79 | DiffLine::NoNewlineAtEOF => {
80 result.push_str(line);
81 result.push('\n');
82 }
83 // Skip garbage lines (diff --git, index, etc.)
84 DiffLine::Garbage(_) => {}
85 }
86 }
87
88 result
89}
90
91/// Marker used to encode cursor position in patch comment lines.
92pub const CURSOR_POSITION_MARKER: &str = "[CURSOR_POSITION]";
93
94/// Extract cursor offset from a patch and return `(clean_patch, cursor_offset)`.
95///
96/// Cursor position is encoded as a comment line (starting with `#`) containing
97/// `[CURSOR_POSITION]`. A `^` in the line indicates the cursor column; a `<`
98/// indicates column 0. The offset is computed relative to addition (`+`) and
99/// context (` `) lines accumulated so far in the hunk, which represent the
100/// cursor position within the new text contributed by the hunk.
101pub fn extract_cursor_from_patch(patch: &str) -> (String, Option<usize>) {
102 let mut clean_patch = String::new();
103 let mut cursor_offset: Option<usize> = None;
104 let mut line_start_offset = 0usize;
105 let mut prev_line_start_offset = 0usize;
106
107 for line in patch.lines() {
108 let diff_line = DiffLine::parse(line);
109
110 match &diff_line {
111 DiffLine::Garbage(content)
112 if content.starts_with('#') && content.contains(CURSOR_POSITION_MARKER) =>
113 {
114 let caret_column = if let Some(caret_pos) = content.find('^') {
115 caret_pos
116 } else if content.find('<').is_some() {
117 0
118 } else {
119 continue;
120 };
121 let cursor_column = caret_column.saturating_sub('#'.len_utf8());
122 cursor_offset = Some(prev_line_start_offset + cursor_column);
123 }
124 _ => {
125 if !clean_patch.is_empty() {
126 clean_patch.push('\n');
127 }
128 clean_patch.push_str(line);
129
130 match diff_line {
131 DiffLine::Addition(content) | DiffLine::Context(content) => {
132 prev_line_start_offset = line_start_offset;
133 line_start_offset += content.len() + 1;
134 }
135 _ => {}
136 }
137 }
138 }
139 }
140
141 if patch.ends_with('\n') && !clean_patch.is_empty() {
142 clean_patch.push('\n');
143 }
144
145 (clean_patch, cursor_offset)
146}
147
148/// Find all byte offsets where `hunk.context` occurs as a substring of `text`.
149///
150/// If no exact matches are found and the context ends with `'\n'` but `text`
151/// does not, retries without the trailing newline, accepting only a match at
152/// the very end of `text`. When this fallback fires, the hunk's context is
153/// trimmed and its edit ranges are clamped so that downstream code doesn't
154/// index past the end of the matched region. This handles diffs that are
155/// missing a `\ No newline at end of file` marker: the parser always appends
156/// `'\n'` via `writeln!`, so the context can have a trailing newline that
157/// doesn't exist in the source text.
158pub fn find_context_candidates(text: &str, hunk: &mut Hunk) -> Vec<usize> {
159 let candidates: Vec<usize> = text
160 .match_indices(&hunk.context)
161 .map(|(offset, _)| offset)
162 .collect();
163
164 if !candidates.is_empty() {
165 return candidates;
166 }
167
168 if hunk.context.ends_with('\n') && !hunk.context.is_empty() {
169 let old_len = hunk.context.len();
170 hunk.context.pop();
171 let new_len = hunk.context.len();
172
173 if !hunk.context.is_empty() {
174 let candidates: Vec<usize> = text
175 .match_indices(&hunk.context)
176 .filter(|(offset, _)| offset + new_len == text.len())
177 .map(|(offset, _)| offset)
178 .collect();
179
180 if !candidates.is_empty() {
181 for edit in &mut hunk.edits {
182 let touched_phantom = edit.range.end > new_len;
183 edit.range.start = edit.range.start.min(new_len);
184 edit.range.end = edit.range.end.min(new_len);
185 if touched_phantom {
186 // The replacement text was also written with a
187 // trailing '\n' that corresponds to the phantom
188 // newline we just removed from the context.
189 if edit.text.ends_with('\n') {
190 edit.text.pop();
191 }
192 }
193 }
194 return candidates;
195 }
196
197 // Restore if fallback didn't help either.
198 hunk.context.push('\n');
199 debug_assert_eq!(hunk.context.len(), old_len);
200 } else {
201 hunk.context.push('\n');
202 }
203 }
204
205 Vec::new()
206}
207
208/// Given multiple candidate offsets where context matches, use line numbers to disambiguate.
209/// Returns the offset that matches the expected line, or None if no match or no line number available.
210pub fn disambiguate_by_line_number(
211 candidates: &[usize],
212 expected_line: Option<u32>,
213 offset_to_line: &dyn Fn(usize) -> u32,
214) -> Option<usize> {
215 match candidates.len() {
216 0 => None,
217 1 => Some(candidates[0]),
218 _ => {
219 let expected = expected_line?;
220 candidates
221 .iter()
222 .copied()
223 .find(|&offset| offset_to_line(offset) == expected)
224 }
225 }
226}
227
228pub fn unified_diff_with_context(
229 old_text: &str,
230 new_text: &str,
231 old_start_line: u32,
232 new_start_line: u32,
233 context_lines: u32,
234) -> String {
235 let input = InternedInput::new(old_text, new_text);
236 diff(
237 Algorithm::Histogram,
238 &input,
239 OffsetUnifiedDiffBuilder::new(&input, old_start_line, new_start_line, context_lines),
240 )
241}
242
243struct OffsetUnifiedDiffBuilder<'a> {
244 before: &'a [Token],
245 after: &'a [Token],
246 interner: &'a Interner<&'a str>,
247 pos: u32,
248 before_hunk_start: u32,
249 after_hunk_start: u32,
250 before_hunk_len: u32,
251 after_hunk_len: u32,
252 old_line_offset: u32,
253 new_line_offset: u32,
254 context_lines: u32,
255 buffer: String,
256 dst: String,
257}
258
259impl<'a> OffsetUnifiedDiffBuilder<'a> {
260 fn new(
261 input: &'a InternedInput<&'a str>,
262 old_line_offset: u32,
263 new_line_offset: u32,
264 context_lines: u32,
265 ) -> Self {
266 Self {
267 before_hunk_start: 0,
268 after_hunk_start: 0,
269 before_hunk_len: 0,
270 after_hunk_len: 0,
271 old_line_offset,
272 new_line_offset,
273 context_lines,
274 buffer: String::with_capacity(8),
275 dst: String::new(),
276 interner: &input.interner,
277 before: &input.before,
278 after: &input.after,
279 pos: 0,
280 }
281 }
282
283 fn print_tokens(&mut self, tokens: &[Token], prefix: char) {
284 for &token in tokens {
285 writeln!(&mut self.buffer, "{prefix}{}", self.interner[token]).unwrap();
286 }
287 }
288
289 fn flush(&mut self) {
290 if self.before_hunk_len == 0 && self.after_hunk_len == 0 {
291 return;
292 }
293
294 let end = (self.pos + self.context_lines).min(self.before.len() as u32);
295 self.update_pos(end, end);
296
297 writeln!(
298 &mut self.dst,
299 "@@ -{},{} +{},{} @@",
300 self.before_hunk_start + 1 + self.old_line_offset,
301 self.before_hunk_len,
302 self.after_hunk_start + 1 + self.new_line_offset,
303 self.after_hunk_len,
304 )
305 .unwrap();
306 write!(&mut self.dst, "{}", &self.buffer).unwrap();
307 self.buffer.clear();
308 self.before_hunk_len = 0;
309 self.after_hunk_len = 0;
310 }
311
312 fn update_pos(&mut self, print_to: u32, move_to: u32) {
313 self.print_tokens(&self.before[self.pos as usize..print_to as usize], ' ');
314 let len = print_to - self.pos;
315 self.before_hunk_len += len;
316 self.after_hunk_len += len;
317 self.pos = move_to;
318 }
319}
320
321impl Sink for OffsetUnifiedDiffBuilder<'_> {
322 type Out = String;
323
324 fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
325 if before.start - self.pos > self.context_lines * 2 {
326 self.flush();
327 }
328 if self.before_hunk_len == 0 && self.after_hunk_len == 0 {
329 self.pos = before.start.saturating_sub(self.context_lines);
330 self.before_hunk_start = self.pos;
331 self.after_hunk_start = after.start.saturating_sub(self.context_lines);
332 }
333
334 self.update_pos(before.start, before.end);
335 self.before_hunk_len += before.end - before.start;
336 self.after_hunk_len += after.end - after.start;
337 self.print_tokens(
338 &self.before[before.start as usize..before.end as usize],
339 '-',
340 );
341 self.print_tokens(&self.after[after.start as usize..after.end as usize], '+');
342 }
343
344 fn finish(mut self) -> Self::Out {
345 self.flush();
346 self.dst
347 }
348}
349
350pub fn encode_cursor_in_patch(patch: &str, cursor_offset: Option<usize>) -> String {
351 let Some(cursor_offset) = cursor_offset else {
352 return patch.to_string();
353 };
354
355 let mut result = String::new();
356 let mut line_start_offset = 0usize;
357
358 for line in patch.lines() {
359 if matches!(
360 DiffLine::parse(line),
361 DiffLine::Garbage(content)
362 if content.starts_with('#') && content.contains(CURSOR_POSITION_MARKER)
363 ) {
364 continue;
365 }
366
367 if !result.is_empty() {
368 result.push('\n');
369 }
370 result.push_str(line);
371
372 match DiffLine::parse(line) {
373 DiffLine::Addition(content) => {
374 let line_end_offset = line_start_offset + content.len();
375
376 if cursor_offset >= line_start_offset && cursor_offset <= line_end_offset {
377 let cursor_column = cursor_offset - line_start_offset;
378
379 result.push('\n');
380 result.push('#');
381 for _ in 0..cursor_column {
382 result.push(' ');
383 }
384 write!(result, "^{}", CURSOR_POSITION_MARKER).unwrap();
385 }
386
387 line_start_offset = line_end_offset + 1;
388 }
389 DiffLine::Context(content) => {
390 line_start_offset += content.len() + 1;
391 }
392 _ => {}
393 }
394 }
395
396 if patch.ends_with('\n') {
397 result.push('\n');
398 }
399
400 result
401}
402
403pub fn apply_diff_to_string(diff_str: &str, text: &str) -> Result<String> {
404 apply_diff_to_string_with_hunk_offset(diff_str, text).map(|(text, _)| text)
405}
406
407/// Applies a diff to a string and returns the result along with the offset where
408/// the first hunk's context matched in the original text. This offset can be used
409/// to adjust cursor positions that are relative to the hunk's content.
410pub fn apply_diff_to_string_with_hunk_offset(
411 diff_str: &str,
412 text: &str,
413) -> Result<(String, Option<usize>)> {
414 let mut diff = DiffParser::new(diff_str);
415
416 let mut text = text.to_string();
417 let mut first_hunk_offset = None;
418
419 while let Some(event) = diff.next().context("Failed to parse diff")? {
420 match event {
421 DiffEvent::Hunk {
422 mut hunk,
423 path: _,
424 status: _,
425 } => {
426 let candidates = find_context_candidates(&text, &mut hunk);
427
428 let hunk_offset =
429 disambiguate_by_line_number(&candidates, hunk.start_line, &|offset| {
430 text[..offset].matches('\n').count() as u32
431 })
432 .ok_or_else(|| anyhow!("couldn't resolve hunk"))?;
433
434 if first_hunk_offset.is_none() {
435 first_hunk_offset = Some(hunk_offset);
436 }
437
438 for edit in hunk.edits.iter().rev() {
439 let range = (hunk_offset + edit.range.start)..(hunk_offset + edit.range.end);
440 text.replace_range(range, &edit.text);
441 }
442 }
443 DiffEvent::FileEnd { .. } => {}
444 }
445 }
446
447 Ok((text, first_hunk_offset))
448}
449
450struct PatchFile<'a> {
451 old_path: Cow<'a, str>,
452 new_path: Cow<'a, str>,
453}
454
455pub struct DiffParser<'a> {
456 current_file: Option<PatchFile<'a>>,
457 current_line: Option<(&'a str, DiffLine<'a>)>,
458 hunk: Hunk,
459 diff: std::str::Lines<'a>,
460 pending_start_line: Option<u32>,
461 processed_no_newline: bool,
462 last_diff_op: LastDiffOp,
463}
464
465#[derive(Clone, Copy, Default)]
466enum LastDiffOp {
467 #[default]
468 None,
469 Context,
470 Deletion,
471 Addition,
472}
473
474#[derive(Debug, PartialEq)]
475pub enum DiffEvent<'a> {
476 Hunk {
477 path: Cow<'a, str>,
478 hunk: Hunk,
479 status: FileStatus,
480 },
481 FileEnd {
482 renamed_to: Option<Cow<'a, str>>,
483 },
484}
485
486#[derive(Debug, Clone, Copy, PartialEq)]
487pub enum FileStatus {
488 Created,
489 Modified,
490 Deleted,
491}
492
493#[derive(Debug, Default, PartialEq)]
494pub struct Hunk {
495 pub context: String,
496 pub edits: Vec<Edit>,
497 pub start_line: Option<u32>,
498}
499
500impl Hunk {
501 pub fn is_empty(&self) -> bool {
502 self.context.is_empty() && self.edits.is_empty()
503 }
504}
505
506#[derive(Debug, PartialEq)]
507pub struct Edit {
508 pub range: Range<usize>,
509 pub text: String,
510}
511
512impl<'a> DiffParser<'a> {
513 pub fn new(diff: &'a str) -> Self {
514 let mut diff = diff.lines();
515 let current_line = diff.next().map(|line| (line, DiffLine::parse(line)));
516 DiffParser {
517 current_file: None,
518 hunk: Hunk::default(),
519 current_line,
520 diff,
521 pending_start_line: None,
522 processed_no_newline: false,
523 last_diff_op: LastDiffOp::None,
524 }
525 }
526
527 pub fn next(&mut self) -> Result<Option<DiffEvent<'a>>> {
528 loop {
529 let (hunk_done, file_done) = match self.current_line.as_ref().map(|e| &e.1) {
530 Some(DiffLine::OldPath { .. }) | Some(DiffLine::Garbage(_)) | None => (true, true),
531 Some(DiffLine::HunkHeader(_)) => (true, false),
532 _ => (false, false),
533 };
534
535 if hunk_done {
536 if let Some(file) = &self.current_file
537 && !self.hunk.is_empty()
538 {
539 let status = if file.old_path == "/dev/null" {
540 FileStatus::Created
541 } else if file.new_path == "/dev/null" {
542 FileStatus::Deleted
543 } else {
544 FileStatus::Modified
545 };
546 let path = if status == FileStatus::Created {
547 file.new_path.clone()
548 } else {
549 file.old_path.clone()
550 };
551 let mut hunk = mem::take(&mut self.hunk);
552 hunk.start_line = self.pending_start_line.take();
553 self.processed_no_newline = false;
554 self.last_diff_op = LastDiffOp::None;
555 return Ok(Some(DiffEvent::Hunk { path, hunk, status }));
556 }
557 }
558
559 if file_done {
560 if let Some(PatchFile { old_path, new_path }) = self.current_file.take() {
561 return Ok(Some(DiffEvent::FileEnd {
562 renamed_to: if old_path != new_path && old_path != "/dev/null" {
563 Some(new_path)
564 } else {
565 None
566 },
567 }));
568 }
569 }
570
571 let Some((line, parsed_line)) = self.current_line.take() else {
572 break;
573 };
574
575 (|| {
576 match parsed_line {
577 DiffLine::OldPath { path } => {
578 self.current_file = Some(PatchFile {
579 old_path: path,
580 new_path: "".into(),
581 });
582 }
583 DiffLine::NewPath { path } => {
584 if let Some(current_file) = &mut self.current_file {
585 current_file.new_path = path
586 }
587 }
588 DiffLine::HunkHeader(location) => {
589 if let Some(loc) = location {
590 self.pending_start_line = Some(loc.start_line_old);
591 }
592 }
593 DiffLine::Context(ctx) => {
594 if self.current_file.is_some() {
595 writeln!(&mut self.hunk.context, "{ctx}")?;
596 self.last_diff_op = LastDiffOp::Context;
597 }
598 }
599 DiffLine::Deletion(del) => {
600 if self.current_file.is_some() {
601 let range = self.hunk.context.len()
602 ..self.hunk.context.len() + del.len() + '\n'.len_utf8();
603 if let Some(last_edit) = self.hunk.edits.last_mut()
604 && last_edit.range.end == range.start
605 {
606 last_edit.range.end = range.end;
607 } else {
608 self.hunk.edits.push(Edit {
609 range,
610 text: String::new(),
611 });
612 }
613 writeln!(&mut self.hunk.context, "{del}")?;
614 self.last_diff_op = LastDiffOp::Deletion;
615 }
616 }
617 DiffLine::Addition(add) => {
618 if self.current_file.is_some() {
619 let range = self.hunk.context.len()..self.hunk.context.len();
620 if let Some(last_edit) = self.hunk.edits.last_mut()
621 && last_edit.range.end == range.start
622 {
623 writeln!(&mut last_edit.text, "{add}").unwrap();
624 } else {
625 self.hunk.edits.push(Edit {
626 range,
627 text: format!("{add}\n"),
628 });
629 }
630 self.last_diff_op = LastDiffOp::Addition;
631 }
632 }
633 DiffLine::NoNewlineAtEOF => {
634 if !self.processed_no_newline {
635 self.processed_no_newline = true;
636 match self.last_diff_op {
637 LastDiffOp::Addition => {
638 // Remove trailing newline from the last addition
639 if let Some(last_edit) = self.hunk.edits.last_mut() {
640 last_edit.text.pop();
641 }
642 }
643 LastDiffOp::Deletion => {
644 // Remove trailing newline from context (which includes the deletion)
645 self.hunk.context.pop();
646 if let Some(last_edit) = self.hunk.edits.last_mut() {
647 last_edit.range.end -= 1;
648 }
649 }
650 LastDiffOp::Context | LastDiffOp::None => {
651 // Remove trailing newline from context
652 self.hunk.context.pop();
653 }
654 }
655 }
656 }
657 DiffLine::Garbage(_) => {}
658 }
659
660 anyhow::Ok(())
661 })()
662 .with_context(|| format!("on line:\n\n```\n{}```", line))?;
663
664 self.current_line = self.diff.next().map(|line| (line, DiffLine::parse(line)));
665 }
666
667 anyhow::Ok(None)
668 }
669}
670
671#[derive(Debug, PartialEq)]
672pub enum DiffLine<'a> {
673 OldPath { path: Cow<'a, str> },
674 NewPath { path: Cow<'a, str> },
675 HunkHeader(Option<HunkLocation>),
676 Context(&'a str),
677 Deletion(&'a str),
678 Addition(&'a str),
679 NoNewlineAtEOF,
680 Garbage(&'a str),
681}
682
683#[derive(Debug, PartialEq)]
684pub struct HunkLocation {
685 pub start_line_old: u32,
686 pub count_old: u32,
687 pub start_line_new: u32,
688 pub count_new: u32,
689}
690
691impl<'a> DiffLine<'a> {
692 pub fn parse(line: &'a str) -> Self {
693 Self::try_parse(line).unwrap_or(Self::Garbage(line))
694 }
695
696 fn try_parse(line: &'a str) -> Option<Self> {
697 if line.starts_with("\\ No newline") {
698 return Some(Self::NoNewlineAtEOF);
699 }
700 if let Some(header) = line.strip_prefix("---").and_then(eat_required_whitespace) {
701 let path = parse_header_path("a/", header);
702 Some(Self::OldPath { path })
703 } else if let Some(header) = line.strip_prefix("+++").and_then(eat_required_whitespace) {
704 Some(Self::NewPath {
705 path: parse_header_path("b/", header),
706 })
707 } else if let Some(header) = line.strip_prefix("@@").and_then(eat_required_whitespace) {
708 if header.starts_with("...") {
709 return Some(Self::HunkHeader(None));
710 }
711
712 let mut tokens = header.split_whitespace();
713 let old_range = tokens.next()?.strip_prefix('-')?;
714 let new_range = tokens.next()?.strip_prefix('+')?;
715
716 let (start_line_old, count_old) = old_range.split_once(',').unwrap_or((old_range, "1"));
717 let (start_line_new, count_new) = new_range.split_once(',').unwrap_or((new_range, "1"));
718
719 Some(Self::HunkHeader(Some(HunkLocation {
720 start_line_old: start_line_old.parse::<u32>().ok()?.saturating_sub(1),
721 count_old: count_old.parse().ok()?,
722 start_line_new: start_line_new.parse::<u32>().ok()?.saturating_sub(1),
723 count_new: count_new.parse().ok()?,
724 })))
725 } else if let Some(deleted_header) = line.strip_prefix("-") {
726 Some(Self::Deletion(deleted_header))
727 } else if line.is_empty() {
728 Some(Self::Context(""))
729 } else if let Some(context) = line.strip_prefix(" ") {
730 Some(Self::Context(context))
731 } else {
732 Some(Self::Addition(line.strip_prefix("+")?))
733 }
734 }
735}
736
737impl<'a> Display for DiffLine<'a> {
738 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
739 match self {
740 DiffLine::OldPath { path } => write!(f, "--- {path}"),
741 DiffLine::NewPath { path } => write!(f, "+++ {path}"),
742 DiffLine::HunkHeader(Some(hunk_location)) => {
743 write!(
744 f,
745 "@@ -{},{} +{},{} @@",
746 hunk_location.start_line_old + 1,
747 hunk_location.count_old,
748 hunk_location.start_line_new + 1,
749 hunk_location.count_new
750 )
751 }
752 DiffLine::HunkHeader(None) => write!(f, "@@ ... @@"),
753 DiffLine::Context(content) => write!(f, " {content}"),
754 DiffLine::Deletion(content) => write!(f, "-{content}"),
755 DiffLine::Addition(content) => write!(f, "+{content}"),
756 DiffLine::NoNewlineAtEOF => write!(f, "\\ No newline at end of file"),
757 DiffLine::Garbage(line) => write!(f, "{line}"),
758 }
759 }
760}
761
762fn parse_header_path<'a>(strip_prefix: &'static str, header: &'a str) -> Cow<'a, str> {
763 if !header.contains(['"', '\\']) {
764 let path = header.split_ascii_whitespace().next().unwrap_or(header);
765 return Cow::Borrowed(path.strip_prefix(strip_prefix).unwrap_or(path));
766 }
767
768 let mut path = String::with_capacity(header.len());
769 let mut in_quote = false;
770 let mut chars = header.chars().peekable();
771 let mut strip_prefix = Some(strip_prefix);
772
773 while let Some(char) = chars.next() {
774 if char == '"' {
775 in_quote = !in_quote;
776 } else if char == '\\' {
777 let Some(&next_char) = chars.peek() else {
778 break;
779 };
780 chars.next();
781 path.push(next_char);
782 } else if char.is_ascii_whitespace() && !in_quote {
783 break;
784 } else {
785 path.push(char);
786 }
787
788 if let Some(prefix) = strip_prefix
789 && path == prefix
790 {
791 strip_prefix.take();
792 path.clear();
793 }
794 }
795
796 Cow::Owned(path)
797}
798
799fn eat_required_whitespace(header: &str) -> Option<&str> {
800 let trimmed = header.trim_ascii_start();
801
802 if trimmed.len() == header.len() {
803 None
804 } else {
805 Some(trimmed)
806 }
807}
808
809#[cfg(test)]
810mod tests {
811 use super::*;
812 use indoc::indoc;
813
814 #[test]
815 fn parse_lines_simple() {
816 let input = indoc! {"
817 diff --git a/text.txt b/text.txt
818 index 86c770d..a1fd855 100644
819 --- a/file.txt
820 +++ b/file.txt
821 @@ -1,2 +1,3 @@
822 context
823 -deleted
824 +inserted
825 garbage
826
827 --- b/file.txt
828 +++ a/file.txt
829 "};
830
831 let lines = input.lines().map(DiffLine::parse).collect::<Vec<_>>();
832
833 assert_eq!(
834 lines,
835 &[
836 DiffLine::Garbage("diff --git a/text.txt b/text.txt"),
837 DiffLine::Garbage("index 86c770d..a1fd855 100644"),
838 DiffLine::OldPath {
839 path: "file.txt".into()
840 },
841 DiffLine::NewPath {
842 path: "file.txt".into()
843 },
844 DiffLine::HunkHeader(Some(HunkLocation {
845 start_line_old: 0,
846 count_old: 2,
847 start_line_new: 0,
848 count_new: 3
849 })),
850 DiffLine::Context("context"),
851 DiffLine::Deletion("deleted"),
852 DiffLine::Addition("inserted"),
853 DiffLine::Garbage("garbage"),
854 DiffLine::Context(""),
855 DiffLine::OldPath {
856 path: "b/file.txt".into()
857 },
858 DiffLine::NewPath {
859 path: "a/file.txt".into()
860 },
861 ]
862 );
863 }
864
865 #[test]
866 fn file_header_extra_space() {
867 let options = ["--- file", "--- file", "---\tfile"];
868
869 for option in options {
870 assert_eq!(
871 DiffLine::parse(option),
872 DiffLine::OldPath {
873 path: "file".into()
874 },
875 "{option}",
876 );
877 }
878 }
879
880 #[test]
881 fn hunk_header_extra_space() {
882 let options = [
883 "@@ -1,2 +1,3 @@",
884 "@@ -1,2 +1,3 @@",
885 "@@\t-1,2\t+1,3\t@@",
886 "@@ -1,2 +1,3 @@",
887 "@@ -1,2 +1,3 @@",
888 "@@ -1,2 +1,3 @@",
889 "@@ -1,2 +1,3 @@ garbage",
890 ];
891
892 for option in options {
893 assert_eq!(
894 DiffLine::parse(option),
895 DiffLine::HunkHeader(Some(HunkLocation {
896 start_line_old: 0,
897 count_old: 2,
898 start_line_new: 0,
899 count_new: 3
900 })),
901 "{option}",
902 );
903 }
904 }
905
906 #[test]
907 fn hunk_header_without_location() {
908 assert_eq!(DiffLine::parse("@@ ... @@"), DiffLine::HunkHeader(None));
909 }
910
911 #[test]
912 fn test_parse_path() {
913 assert_eq!(parse_header_path("a/", "foo.txt"), "foo.txt");
914 assert_eq!(
915 parse_header_path("a/", "foo/bar/baz.txt"),
916 "foo/bar/baz.txt"
917 );
918 assert_eq!(parse_header_path("a/", "a/foo.txt"), "foo.txt");
919 assert_eq!(
920 parse_header_path("a/", "a/foo/bar/baz.txt"),
921 "foo/bar/baz.txt"
922 );
923
924 // Extra
925 assert_eq!(
926 parse_header_path("a/", "a/foo/bar/baz.txt 2025"),
927 "foo/bar/baz.txt"
928 );
929 assert_eq!(
930 parse_header_path("a/", "a/foo/bar/baz.txt\t2025"),
931 "foo/bar/baz.txt"
932 );
933 assert_eq!(
934 parse_header_path("a/", "a/foo/bar/baz.txt \""),
935 "foo/bar/baz.txt"
936 );
937
938 // Quoted
939 assert_eq!(
940 parse_header_path("a/", "a/foo/bar/\"baz quox.txt\""),
941 "foo/bar/baz quox.txt"
942 );
943 assert_eq!(
944 parse_header_path("a/", "\"a/foo/bar/baz quox.txt\""),
945 "foo/bar/baz quox.txt"
946 );
947 assert_eq!(
948 parse_header_path("a/", "\"foo/bar/baz quox.txt\""),
949 "foo/bar/baz quox.txt"
950 );
951 assert_eq!(parse_header_path("a/", "\"whatever π€·\""), "whatever π€·");
952 assert_eq!(
953 parse_header_path("a/", "\"foo/bar/baz quox.txt\" 2025"),
954 "foo/bar/baz quox.txt"
955 );
956 // unescaped quotes are dropped
957 assert_eq!(parse_header_path("a/", "foo/\"bar\""), "foo/bar");
958
959 // Escaped
960 assert_eq!(
961 parse_header_path("a/", "\"foo/\\\"bar\\\"/baz.txt\""),
962 "foo/\"bar\"/baz.txt"
963 );
964 assert_eq!(
965 parse_header_path("a/", "\"C:\\\\Projects\\\\My App\\\\old file.txt\""),
966 "C:\\Projects\\My App\\old file.txt"
967 );
968 }
969
970 #[test]
971 fn test_parse_diff_with_leading_and_trailing_garbage() {
972 let diff = indoc! {"
973 I need to make some changes.
974
975 I'll change the following things:
976 - one
977 - two
978 - three
979
980 ```
981 --- a/file.txt
982 +++ b/file.txt
983 one
984 +AND
985 two
986 ```
987
988 Summary of what I did:
989 - one
990 - two
991 - three
992
993 That's about it.
994 "};
995
996 let mut events = Vec::new();
997 let mut parser = DiffParser::new(diff);
998 while let Some(event) = parser.next().unwrap() {
999 events.push(event);
1000 }
1001
1002 assert_eq!(
1003 events,
1004 &[
1005 DiffEvent::Hunk {
1006 path: "file.txt".into(),
1007 hunk: Hunk {
1008 context: "one\ntwo\n".into(),
1009 edits: vec![Edit {
1010 range: 4..4,
1011 text: "AND\n".into()
1012 }],
1013 start_line: None,
1014 },
1015 status: FileStatus::Modified,
1016 },
1017 DiffEvent::FileEnd { renamed_to: None }
1018 ],
1019 )
1020 }
1021
1022 #[test]
1023 fn test_no_newline_at_eof() {
1024 let diff = indoc! {"
1025 --- a/file.py
1026 +++ b/file.py
1027 @@ -55,7 +55,3 @@ class CustomDataset(Dataset):
1028 torch.set_rng_state(state)
1029 mask = self.transform(mask)
1030
1031 - if self.mode == 'Training':
1032 - return (img, mask, name)
1033 - else:
1034 - return (img, mask, name)
1035 \\ No newline at end of file
1036 "};
1037
1038 let mut events = Vec::new();
1039 let mut parser = DiffParser::new(diff);
1040 while let Some(event) = parser.next().unwrap() {
1041 events.push(event);
1042 }
1043
1044 assert_eq!(
1045 events,
1046 &[
1047 DiffEvent::Hunk {
1048 path: "file.py".into(),
1049 hunk: Hunk {
1050 context: concat!(
1051 " torch.set_rng_state(state)\n",
1052 " mask = self.transform(mask)\n",
1053 "\n",
1054 " if self.mode == 'Training':\n",
1055 " return (img, mask, name)\n",
1056 " else:\n",
1057 " return (img, mask, name)",
1058 )
1059 .into(),
1060 edits: vec![Edit {
1061 range: 80..203,
1062 text: "".into()
1063 }],
1064 start_line: Some(54), // @@ -55,7 -> line 54 (0-indexed)
1065 },
1066 status: FileStatus::Modified,
1067 },
1068 DiffEvent::FileEnd { renamed_to: None }
1069 ],
1070 );
1071 }
1072
1073 #[test]
1074 fn test_no_newline_at_eof_addition() {
1075 let diff = indoc! {"
1076 --- a/file.txt
1077 +++ b/file.txt
1078 @@ -1,2 +1,3 @@
1079 context
1080 -deleted
1081 +added line
1082 \\ No newline at end of file
1083 "};
1084
1085 let mut events = Vec::new();
1086 let mut parser = DiffParser::new(diff);
1087 while let Some(event) = parser.next().unwrap() {
1088 events.push(event);
1089 }
1090
1091 assert_eq!(
1092 events,
1093 &[
1094 DiffEvent::Hunk {
1095 path: "file.txt".into(),
1096 hunk: Hunk {
1097 context: "context\ndeleted\n".into(),
1098 edits: vec![Edit {
1099 range: 8..16,
1100 text: "added line".into()
1101 }],
1102 start_line: Some(0), // @@ -1,2 -> line 0 (0-indexed)
1103 },
1104 status: FileStatus::Modified,
1105 },
1106 DiffEvent::FileEnd { renamed_to: None }
1107 ],
1108 );
1109 }
1110
1111 #[test]
1112 fn test_double_no_newline_at_eof() {
1113 // Two consecutive "no newline" markers - the second should be ignored
1114 let diff = indoc! {"
1115 --- a/file.txt
1116 +++ b/file.txt
1117 @@ -1,3 +1,3 @@
1118 line1
1119 -old
1120 +new
1121 line3
1122 \\ No newline at end of file
1123 \\ No newline at end of file
1124 "};
1125
1126 let mut events = Vec::new();
1127 let mut parser = DiffParser::new(diff);
1128 while let Some(event) = parser.next().unwrap() {
1129 events.push(event);
1130 }
1131
1132 assert_eq!(
1133 events,
1134 &[
1135 DiffEvent::Hunk {
1136 path: "file.txt".into(),
1137 hunk: Hunk {
1138 context: "line1\nold\nline3".into(), // Only one newline removed
1139 edits: vec![Edit {
1140 range: 6..10, // "old\n" is 4 bytes
1141 text: "new\n".into()
1142 }],
1143 start_line: Some(0),
1144 },
1145 status: FileStatus::Modified,
1146 },
1147 DiffEvent::FileEnd { renamed_to: None }
1148 ],
1149 );
1150 }
1151
1152 #[test]
1153 fn test_no_newline_after_context_not_addition() {
1154 // "No newline" after context lines should remove newline from context,
1155 // not from an earlier addition
1156 let diff = indoc! {"
1157 --- a/file.txt
1158 +++ b/file.txt
1159 @@ -1,4 +1,4 @@
1160 line1
1161 -old
1162 +new
1163 line3
1164 line4
1165 \\ No newline at end of file
1166 "};
1167
1168 let mut events = Vec::new();
1169 let mut parser = DiffParser::new(diff);
1170 while let Some(event) = parser.next().unwrap() {
1171 events.push(event);
1172 }
1173
1174 assert_eq!(
1175 events,
1176 &[
1177 DiffEvent::Hunk {
1178 path: "file.txt".into(),
1179 hunk: Hunk {
1180 // newline removed from line4 (context), not from "new" (addition)
1181 context: "line1\nold\nline3\nline4".into(),
1182 edits: vec![Edit {
1183 range: 6..10, // "old\n" is 4 bytes
1184 text: "new\n".into() // Still has newline
1185 }],
1186 start_line: Some(0),
1187 },
1188 status: FileStatus::Modified,
1189 },
1190 DiffEvent::FileEnd { renamed_to: None }
1191 ],
1192 );
1193 }
1194
1195 #[test]
1196 fn test_strip_diff_metadata() {
1197 let diff_with_metadata = indoc! {r#"
1198 diff --git a/file.txt b/file.txt
1199 index 1234567..abcdefg 100644
1200 --- a/file.txt
1201 +++ b/file.txt
1202 @@ -1,3 +1,4 @@
1203 context line
1204 -removed line
1205 +added line
1206 more context
1207 "#};
1208
1209 let stripped = strip_diff_metadata(diff_with_metadata);
1210
1211 assert_eq!(
1212 stripped,
1213 indoc! {r#"
1214 --- a/file.txt
1215 +++ b/file.txt
1216 @@ -1,3 +1,4 @@
1217 context line
1218 -removed line
1219 +added line
1220 more context
1221 "#}
1222 );
1223 }
1224
1225 #[test]
1226 fn test_apply_diff_to_string_no_trailing_newline() {
1227 // Text without trailing newline; diff generated without
1228 // `\ No newline at end of file` marker.
1229 let text = "line1\nline2\nline3";
1230 let diff = indoc! {"
1231 --- a/file.txt
1232 +++ b/file.txt
1233 @@ -1,3 +1,3 @@
1234 line1
1235 -line2
1236 +replaced
1237 line3
1238 "};
1239
1240 let result = apply_diff_to_string(diff, text).unwrap();
1241 assert_eq!(result, "line1\nreplaced\nline3");
1242 }
1243
1244 #[test]
1245 fn test_apply_diff_to_string_trailing_newline_present() {
1246 // When text has a trailing newline, exact matching still works and
1247 // the fallback is never needed.
1248 let text = "line1\nline2\nline3\n";
1249 let diff = indoc! {"
1250 --- a/file.txt
1251 +++ b/file.txt
1252 @@ -1,3 +1,3 @@
1253 line1
1254 -line2
1255 +replaced
1256 line3
1257 "};
1258
1259 let result = apply_diff_to_string(diff, text).unwrap();
1260 assert_eq!(result, "line1\nreplaced\nline3\n");
1261 }
1262
1263 #[test]
1264 fn test_apply_diff_to_string_deletion_at_end_no_trailing_newline() {
1265 // Deletion of the last line when text has no trailing newline.
1266 // The edit range must be clamped so it doesn't index past the
1267 // end of the text.
1268 let text = "line1\nline2\nline3";
1269 let diff = indoc! {"
1270 --- a/file.txt
1271 +++ b/file.txt
1272 @@ -1,3 +1,2 @@
1273 line1
1274 line2
1275 -line3
1276 "};
1277
1278 let result = apply_diff_to_string(diff, text).unwrap();
1279 assert_eq!(result, "line1\nline2\n");
1280 }
1281
1282 #[test]
1283 fn test_apply_diff_to_string_replace_last_line_no_trailing_newline() {
1284 // Replace the last line when text has no trailing newline.
1285 let text = "aaa\nbbb\nccc";
1286 let diff = indoc! {"
1287 --- a/file.txt
1288 +++ b/file.txt
1289 @@ -1,3 +1,3 @@
1290 aaa
1291 bbb
1292 -ccc
1293 +ddd
1294 "};
1295
1296 let result = apply_diff_to_string(diff, text).unwrap();
1297 assert_eq!(result, "aaa\nbbb\nddd");
1298 }
1299
1300 #[test]
1301 fn test_apply_diff_to_string_multibyte_no_trailing_newline() {
1302 // Multi-byte UTF-8 characters near the end; ensures char boundary
1303 // safety when the fallback clamps edit ranges.
1304 let text = "hello\nμΈκ³";
1305 let diff = indoc! {"
1306 --- a/file.txt
1307 +++ b/file.txt
1308 @@ -1,2 +1,2 @@
1309 hello
1310 -μΈκ³
1311 +world
1312 "};
1313
1314 let result = apply_diff_to_string(diff, text).unwrap();
1315 assert_eq!(result, "hello\nworld");
1316 }
1317
1318 #[test]
1319 fn test_find_context_candidates_no_false_positive_mid_text() {
1320 // The stripped fallback must only match at the end of text, not in
1321 // the middle where a real newline exists.
1322 let text = "aaa\nbbb\nccc\n";
1323 let mut hunk = Hunk {
1324 context: "bbb\n".into(),
1325 edits: vec![],
1326 start_line: None,
1327 };
1328
1329 let candidates = find_context_candidates(text, &mut hunk);
1330 // Exact match at offset 4 β the fallback is not used.
1331 assert_eq!(candidates, vec![4]);
1332 }
1333
1334 #[test]
1335 fn test_find_context_candidates_fallback_at_end() {
1336 let text = "aaa\nbbb";
1337 let mut hunk = Hunk {
1338 context: "bbb\n".into(),
1339 edits: vec![],
1340 start_line: None,
1341 };
1342
1343 let candidates = find_context_candidates(text, &mut hunk);
1344 assert_eq!(candidates, vec![4]);
1345 // Context should be stripped.
1346 assert_eq!(hunk.context, "bbb");
1347 }
1348
1349 #[test]
1350 fn test_find_context_candidates_no_fallback_mid_text() {
1351 // "bbb" appears mid-text followed by a newline, so the exact
1352 // match succeeds. Verify the stripped fallback doesn't produce a
1353 // second, spurious candidate.
1354 let text = "aaa\nbbb\nccc";
1355 let mut hunk = Hunk {
1356 context: "bbb\nccc\n".into(),
1357 edits: vec![],
1358 start_line: None,
1359 };
1360
1361 let candidates = find_context_candidates(text, &mut hunk);
1362 // No exact match (text ends without newline after "ccc"), but the
1363 // stripped context "bbb\nccc" matches at offset 4, which is the end.
1364 assert_eq!(candidates, vec![4]);
1365 assert_eq!(hunk.context, "bbb\nccc");
1366 }
1367
1368 #[test]
1369 fn test_find_context_candidates_clamps_edit_ranges() {
1370 let text = "aaa\nbbb";
1371 let mut hunk = Hunk {
1372 context: "aaa\nbbb\n".into(),
1373 edits: vec![Edit {
1374 range: 4..8, // "bbb\n" β end points at the trailing \n
1375 text: "ccc\n".into(),
1376 }],
1377 start_line: None,
1378 };
1379
1380 let candidates = find_context_candidates(text, &mut hunk);
1381 assert_eq!(candidates, vec![0]);
1382 // Edit range end should be clamped to 7 (new context length).
1383 assert_eq!(hunk.edits[0].range, 4..7);
1384 }
1385
1386 #[test]
1387 fn test_unified_diff_with_context_matches_expected_context_window() {
1388 let old_text = "line1\nline2\nline3\nline4\nline5\nCHANGE_ME\nline7\nline8\n";
1389 let new_text = "line1\nline2\nline3\nline4\nline5\nCHANGED\nline7\nline8\n";
1390
1391 let diff_default = unified_diff_with_context(old_text, new_text, 0, 0, 3);
1392 assert_eq!(
1393 diff_default,
1394 "@@ -3,6 +3,6 @@\n line3\n line4\n line5\n-CHANGE_ME\n+CHANGED\n line7\n line8\n"
1395 );
1396
1397 let diff_full_context = unified_diff_with_context(old_text, new_text, 0, 0, 8);
1398 assert_eq!(
1399 diff_full_context,
1400 "@@ -1,8 +1,8 @@\n line1\n line2\n line3\n line4\n line5\n-CHANGE_ME\n+CHANGED\n line7\n line8\n"
1401 );
1402
1403 let diff_no_context = unified_diff_with_context(old_text, new_text, 0, 0, 0);
1404 assert_eq!(diff_no_context, "@@ -6,1 +6,1 @@\n-CHANGE_ME\n+CHANGED\n");
1405 }
1406}