1use std::{
2 borrow::Cow,
3 fmt::{Display, Write},
4 mem,
5 ops::Range,
6};
7
8use anyhow::{Context as _, Result, anyhow};
9use imara_diff::{
10 Algorithm, Sink, diff,
11 intern::{InternedInput, Interner, Token},
12};
13
14pub fn strip_diff_path_prefix<'a>(diff: &'a str, prefix: &str) -> Cow<'a, str> {
15 if prefix.is_empty() {
16 return Cow::Borrowed(diff);
17 }
18
19 let prefix_with_slash = format!("{}/", prefix);
20 let mut needs_rewrite = false;
21
22 for line in diff.lines() {
23 match DiffLine::parse(line) {
24 DiffLine::OldPath { path } | DiffLine::NewPath { path } => {
25 if path.starts_with(&prefix_with_slash) {
26 needs_rewrite = true;
27 break;
28 }
29 }
30 _ => {}
31 }
32 }
33
34 if !needs_rewrite {
35 return Cow::Borrowed(diff);
36 }
37
38 let mut result = String::with_capacity(diff.len());
39 for line in diff.lines() {
40 match DiffLine::parse(line) {
41 DiffLine::OldPath { path } => {
42 let stripped = path
43 .strip_prefix(&prefix_with_slash)
44 .unwrap_or(path.as_ref());
45 result.push_str(&format!("--- a/{}\n", stripped));
46 }
47 DiffLine::NewPath { path } => {
48 let stripped = path
49 .strip_prefix(&prefix_with_slash)
50 .unwrap_or(path.as_ref());
51 result.push_str(&format!("+++ b/{}\n", stripped));
52 }
53 _ => {
54 result.push_str(line);
55 result.push('\n');
56 }
57 }
58 }
59
60 Cow::Owned(result)
61}
62
63/// Strip unnecessary git metadata lines from a diff, keeping only the lines
64/// needed for patch application: path headers (--- and +++), hunk headers (@@),
65/// and content lines (+, -, space).
66pub fn strip_diff_metadata(diff: &str) -> String {
67 let mut result = String::new();
68
69 for line in diff.lines() {
70 let dominated = DiffLine::parse(line);
71 match dominated {
72 // Keep path headers, hunk headers, and content lines
73 DiffLine::OldPath { .. }
74 | DiffLine::NewPath { .. }
75 | DiffLine::HunkHeader(_)
76 | DiffLine::Context(_)
77 | DiffLine::Deletion(_)
78 | DiffLine::Addition(_)
79 | DiffLine::NoNewlineAtEOF => {
80 result.push_str(line);
81 result.push('\n');
82 }
83 // Skip garbage lines (diff --git, index, etc.)
84 DiffLine::Garbage(_) => {}
85 }
86 }
87
88 result
89}
90
91/// Marker used to encode cursor position in patch comment lines.
92pub const CURSOR_POSITION_MARKER: &str = "[CURSOR_POSITION]";
93
94/// Extract cursor offset from a patch and return `(clean_patch, cursor_offset)`.
95///
96/// Cursor position is encoded as a comment line (starting with `#`) containing
97/// `[CURSOR_POSITION]`. A `^` in the line indicates the cursor column; a `<`
98/// indicates column 0. The offset is computed relative to addition (`+`) and
99/// context (` `) lines accumulated so far in the hunk, which represent the
100/// cursor position within the new text contributed by the hunk.
101pub fn extract_cursor_from_patch(patch: &str) -> (String, Option<usize>) {
102 let mut clean_patch = String::new();
103 let mut cursor_offset: Option<usize> = None;
104 let mut line_start_offset = 0usize;
105 let mut prev_line_start_offset = 0usize;
106
107 for line in patch.lines() {
108 let diff_line = DiffLine::parse(line);
109
110 match &diff_line {
111 DiffLine::Garbage(content)
112 if content.starts_with('#') && content.contains(CURSOR_POSITION_MARKER) =>
113 {
114 let caret_column = if let Some(caret_pos) = content.find('^') {
115 caret_pos
116 } else if content.find('<').is_some() {
117 0
118 } else {
119 continue;
120 };
121 let cursor_column = caret_column.saturating_sub('#'.len_utf8());
122 cursor_offset = Some(prev_line_start_offset + cursor_column);
123 }
124 _ => {
125 if !clean_patch.is_empty() {
126 clean_patch.push('\n');
127 }
128 clean_patch.push_str(line);
129
130 match diff_line {
131 DiffLine::Addition(content) | DiffLine::Context(content) => {
132 prev_line_start_offset = line_start_offset;
133 line_start_offset += content.len() + 1;
134 }
135 _ => {}
136 }
137 }
138 }
139 }
140
141 if patch.ends_with('\n') && !clean_patch.is_empty() {
142 clean_patch.push('\n');
143 }
144
145 (clean_patch, cursor_offset)
146}
147
148/// Find all byte offsets where `hunk.context` occurs as a substring of `text`.
149///
150/// If no exact matches are found and the context ends with `'\n'` but `text`
151/// does not, retries without the trailing newline, accepting only a match at
152/// the very end of `text`. When this fallback fires, the hunk's context is
153/// trimmed and its edit ranges are clamped so that downstream code doesn't
154/// index past the end of the matched region. This handles diffs that are
155/// missing a `\ No newline at end of file` marker: the parser always appends
156/// `'\n'` via `writeln!`, so the context can have a trailing newline that
157/// doesn't exist in the source text.
158pub fn find_context_candidates(text: &str, hunk: &mut Hunk) -> Vec<usize> {
159 let candidates: Vec<usize> = text
160 .match_indices(&hunk.context)
161 .map(|(offset, _)| offset)
162 .collect();
163
164 if !candidates.is_empty() {
165 return candidates;
166 }
167
168 if hunk.context.ends_with('\n') && !hunk.context.is_empty() {
169 let old_len = hunk.context.len();
170 hunk.context.pop();
171 let new_len = hunk.context.len();
172
173 if !hunk.context.is_empty() {
174 let candidates: Vec<usize> = text
175 .match_indices(&hunk.context)
176 .filter(|(offset, _)| offset + new_len == text.len())
177 .map(|(offset, _)| offset)
178 .collect();
179
180 if !candidates.is_empty() {
181 for edit in &mut hunk.edits {
182 let touched_phantom = edit.range.end > new_len;
183 edit.range.start = edit.range.start.min(new_len);
184 edit.range.end = edit.range.end.min(new_len);
185 if touched_phantom {
186 // The replacement text was also written with a
187 // trailing '\n' that corresponds to the phantom
188 // newline we just removed from the context.
189 if edit.text.ends_with('\n') {
190 edit.text.pop();
191 }
192 }
193 }
194 return candidates;
195 }
196
197 // Restore if fallback didn't help either.
198 hunk.context.push('\n');
199 debug_assert_eq!(hunk.context.len(), old_len);
200 } else {
201 hunk.context.push('\n');
202 }
203 }
204
205 Vec::new()
206}
207
208/// Given multiple candidate offsets where context matches, use line numbers to disambiguate.
209/// Returns the offset that matches the expected line, or None if no match or no line number available.
210pub fn disambiguate_by_line_number(
211 candidates: &[usize],
212 expected_line: Option<u32>,
213 offset_to_line: &dyn Fn(usize) -> u32,
214) -> Option<usize> {
215 match candidates.len() {
216 0 => None,
217 1 => Some(candidates[0]),
218 _ => {
219 let expected = expected_line?;
220 candidates
221 .iter()
222 .copied()
223 .find(|&offset| offset_to_line(offset) == expected)
224 }
225 }
226}
227
228pub fn unified_diff_with_context(
229 old_text: &str,
230 new_text: &str,
231 old_start_line: u32,
232 new_start_line: u32,
233 context_lines: u32,
234) -> String {
235 let input = InternedInput::new(old_text, new_text);
236 diff(
237 Algorithm::Histogram,
238 &input,
239 OffsetUnifiedDiffBuilder::new(&input, old_start_line, new_start_line, context_lines),
240 )
241}
242
243struct OffsetUnifiedDiffBuilder<'a> {
244 before: &'a [Token],
245 after: &'a [Token],
246 interner: &'a Interner<&'a str>,
247 pos: u32,
248 before_hunk_start: u32,
249 after_hunk_start: u32,
250 before_hunk_len: u32,
251 after_hunk_len: u32,
252 old_line_offset: u32,
253 new_line_offset: u32,
254 context_lines: u32,
255 buffer: String,
256 dst: String,
257}
258
259impl<'a> OffsetUnifiedDiffBuilder<'a> {
260 fn new(
261 input: &'a InternedInput<&'a str>,
262 old_line_offset: u32,
263 new_line_offset: u32,
264 context_lines: u32,
265 ) -> Self {
266 Self {
267 before_hunk_start: 0,
268 after_hunk_start: 0,
269 before_hunk_len: 0,
270 after_hunk_len: 0,
271 old_line_offset,
272 new_line_offset,
273 context_lines,
274 buffer: String::with_capacity(8),
275 dst: String::new(),
276 interner: &input.interner,
277 before: &input.before,
278 after: &input.after,
279 pos: 0,
280 }
281 }
282
283 fn print_tokens(&mut self, tokens: &[Token], prefix: char) {
284 for &token in tokens {
285 writeln!(&mut self.buffer, "{prefix}{}", self.interner[token]).unwrap();
286 }
287 }
288
289 fn flush(&mut self) {
290 if self.before_hunk_len == 0 && self.after_hunk_len == 0 {
291 return;
292 }
293
294 let end = (self.pos + self.context_lines).min(self.before.len() as u32);
295 self.update_pos(end, end);
296
297 writeln!(
298 &mut self.dst,
299 "@@ -{},{} +{},{} @@",
300 self.before_hunk_start + 1 + self.old_line_offset,
301 self.before_hunk_len,
302 self.after_hunk_start + 1 + self.new_line_offset,
303 self.after_hunk_len,
304 )
305 .unwrap();
306 write!(&mut self.dst, "{}", &self.buffer).unwrap();
307 self.buffer.clear();
308 self.before_hunk_len = 0;
309 self.after_hunk_len = 0;
310 }
311
312 fn update_pos(&mut self, print_to: u32, move_to: u32) {
313 self.print_tokens(&self.before[self.pos as usize..print_to as usize], ' ');
314 let len = print_to - self.pos;
315 self.before_hunk_len += len;
316 self.after_hunk_len += len;
317 self.pos = move_to;
318 }
319}
320
321impl Sink for OffsetUnifiedDiffBuilder<'_> {
322 type Out = String;
323
324 fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
325 if before.start - self.pos > self.context_lines * 2 {
326 self.flush();
327 }
328 if self.before_hunk_len == 0 && self.after_hunk_len == 0 {
329 self.pos = before.start.saturating_sub(self.context_lines);
330 self.before_hunk_start = self.pos;
331 self.after_hunk_start = after.start.saturating_sub(self.context_lines);
332 }
333
334 self.update_pos(before.start, before.end);
335 self.before_hunk_len += before.end - before.start;
336 self.after_hunk_len += after.end - after.start;
337 self.print_tokens(
338 &self.before[before.start as usize..before.end as usize],
339 '-',
340 );
341 self.print_tokens(&self.after[after.start as usize..after.end as usize], '+');
342 }
343
344 fn finish(mut self) -> Self::Out {
345 self.flush();
346 self.dst
347 }
348}
349
350pub fn encode_cursor_in_patch(patch: &str, cursor_offset: Option<usize>) -> String {
351 let Some(cursor_offset) = cursor_offset else {
352 return patch.to_string();
353 };
354
355 let mut result = String::new();
356 let mut line_start_offset = 0usize;
357
358 for line in patch.lines() {
359 if matches!(
360 DiffLine::parse(line),
361 DiffLine::Garbage(content)
362 if content.starts_with('#') && content.contains(CURSOR_POSITION_MARKER)
363 ) {
364 continue;
365 }
366
367 if !result.is_empty() {
368 result.push('\n');
369 }
370 result.push_str(line);
371
372 match DiffLine::parse(line) {
373 DiffLine::Addition(content) => {
374 let line_end_offset = line_start_offset + content.len();
375
376 if cursor_offset >= line_start_offset && cursor_offset <= line_end_offset {
377 let cursor_column = cursor_offset - line_start_offset;
378
379 result.push('\n');
380 result.push('#');
381 for _ in 0..cursor_column {
382 result.push(' ');
383 }
384 write!(result, "^{}", CURSOR_POSITION_MARKER).unwrap();
385 }
386
387 line_start_offset = line_end_offset + 1;
388 }
389 DiffLine::Context(content) => {
390 line_start_offset += content.len() + 1;
391 }
392 _ => {}
393 }
394 }
395
396 if patch.ends_with('\n') {
397 result.push('\n');
398 }
399
400 result
401}
402
403pub fn apply_diff_to_string(diff_str: &str, text: &str) -> Result<String> {
404 apply_diff_to_string_with_hunk_offset(diff_str, text).map(|(text, _)| text)
405}
406
407/// Applies a diff to a string and returns the result along with the offset where
408/// the first hunk's context matched in the original text. This offset can be used
409/// to adjust cursor positions that are relative to the hunk's content.
410pub fn apply_diff_to_string_with_hunk_offset(
411 diff_str: &str,
412 text: &str,
413) -> Result<(String, Option<usize>)> {
414 let mut diff = DiffParser::new(diff_str);
415
416 let mut text = text.to_string();
417 let mut first_hunk_offset = None;
418 let mut line_delta = 0i64;
419
420 while let Some(event) = diff.next().context("Failed to parse diff")? {
421 match event {
422 DiffEvent::Hunk {
423 mut hunk,
424 path: _,
425 status: _,
426 } => {
427 let candidates = find_context_candidates(&text, &mut hunk);
428 let adjusted_start_line = hunk
429 .start_line
430 .and_then(|start_line| u32::try_from(start_line as i64 + line_delta).ok());
431
432 let hunk_offset =
433 disambiguate_by_line_number(&candidates, adjusted_start_line, &|offset| {
434 text[..offset].matches('\n').count() as u32
435 })
436 .ok_or_else(|| anyhow!("couldn't resolve hunk"))?;
437
438 if first_hunk_offset.is_none() {
439 first_hunk_offset = Some(hunk_offset);
440 }
441
442 let mut hunk_line_delta = 0i64;
443 for edit in hunk.edits.iter().rev() {
444 let range = (hunk_offset + edit.range.start)..(hunk_offset + edit.range.end);
445 let deleted_lines = text[range.clone()].matches('\n').count() as i64;
446 let inserted_lines = edit.text.matches('\n').count() as i64;
447 text.replace_range(range, &edit.text);
448 hunk_line_delta += inserted_lines - deleted_lines;
449 }
450 line_delta += hunk_line_delta;
451 }
452 DiffEvent::FileEnd { .. } => {
453 line_delta = 0;
454 }
455 }
456 }
457
458 Ok((text, first_hunk_offset))
459}
460
461struct PatchFile<'a> {
462 old_path: Cow<'a, str>,
463 new_path: Cow<'a, str>,
464}
465
466pub struct DiffParser<'a> {
467 current_file: Option<PatchFile<'a>>,
468 current_line: Option<(&'a str, DiffLine<'a>)>,
469 hunk: Hunk,
470 diff: std::str::Lines<'a>,
471 pending_start_line: Option<u32>,
472 processed_no_newline: bool,
473 last_diff_op: LastDiffOp,
474}
475
476#[derive(Clone, Copy, Default)]
477enum LastDiffOp {
478 #[default]
479 None,
480 Context,
481 Deletion,
482 Addition,
483}
484
485#[derive(Debug, PartialEq)]
486pub enum DiffEvent<'a> {
487 Hunk {
488 path: Cow<'a, str>,
489 hunk: Hunk,
490 status: FileStatus,
491 },
492 FileEnd {
493 renamed_to: Option<Cow<'a, str>>,
494 },
495}
496
497#[derive(Debug, Clone, Copy, PartialEq)]
498pub enum FileStatus {
499 Created,
500 Modified,
501 Deleted,
502}
503
504#[derive(Debug, Default, PartialEq)]
505pub struct Hunk {
506 pub context: String,
507 pub edits: Vec<Edit>,
508 pub start_line: Option<u32>,
509}
510
511impl Hunk {
512 pub fn is_empty(&self) -> bool {
513 self.context.is_empty() && self.edits.is_empty()
514 }
515}
516
517#[derive(Debug, PartialEq)]
518pub struct Edit {
519 pub range: Range<usize>,
520 pub text: String,
521}
522
523impl<'a> DiffParser<'a> {
524 pub fn new(diff: &'a str) -> Self {
525 let mut diff = diff.lines();
526 let current_line = diff.next().map(|line| (line, DiffLine::parse(line)));
527 DiffParser {
528 current_file: None,
529 hunk: Hunk::default(),
530 current_line,
531 diff,
532 pending_start_line: None,
533 processed_no_newline: false,
534 last_diff_op: LastDiffOp::None,
535 }
536 }
537
538 pub fn next(&mut self) -> Result<Option<DiffEvent<'a>>> {
539 loop {
540 let (hunk_done, file_done) = match self.current_line.as_ref().map(|e| &e.1) {
541 Some(DiffLine::OldPath { .. }) | Some(DiffLine::Garbage(_)) | None => (true, true),
542 Some(DiffLine::HunkHeader(_)) => (true, false),
543 _ => (false, false),
544 };
545
546 if hunk_done {
547 if let Some(file) = &self.current_file
548 && !self.hunk.is_empty()
549 {
550 let status = if file.old_path == "/dev/null" {
551 FileStatus::Created
552 } else if file.new_path == "/dev/null" {
553 FileStatus::Deleted
554 } else {
555 FileStatus::Modified
556 };
557 let path = if status == FileStatus::Created {
558 file.new_path.clone()
559 } else {
560 file.old_path.clone()
561 };
562 let mut hunk = mem::take(&mut self.hunk);
563 hunk.start_line = self.pending_start_line.take();
564 self.processed_no_newline = false;
565 self.last_diff_op = LastDiffOp::None;
566 return Ok(Some(DiffEvent::Hunk { path, hunk, status }));
567 }
568 }
569
570 if file_done {
571 if let Some(PatchFile { old_path, new_path }) = self.current_file.take() {
572 return Ok(Some(DiffEvent::FileEnd {
573 renamed_to: if old_path != new_path && old_path != "/dev/null" {
574 Some(new_path)
575 } else {
576 None
577 },
578 }));
579 }
580 }
581
582 let Some((line, parsed_line)) = self.current_line.take() else {
583 break;
584 };
585
586 (|| {
587 match parsed_line {
588 DiffLine::OldPath { path } => {
589 self.current_file = Some(PatchFile {
590 old_path: path,
591 new_path: "".into(),
592 });
593 }
594 DiffLine::NewPath { path } => {
595 if let Some(current_file) = &mut self.current_file {
596 current_file.new_path = path
597 }
598 }
599 DiffLine::HunkHeader(location) => {
600 if let Some(loc) = location {
601 self.pending_start_line = Some(loc.start_line_old);
602 }
603 }
604 DiffLine::Context(ctx) => {
605 if self.current_file.is_some() {
606 writeln!(&mut self.hunk.context, "{ctx}")?;
607 self.last_diff_op = LastDiffOp::Context;
608 }
609 }
610 DiffLine::Deletion(del) => {
611 if self.current_file.is_some() {
612 let range = self.hunk.context.len()
613 ..self.hunk.context.len() + del.len() + '\n'.len_utf8();
614 if let Some(last_edit) = self.hunk.edits.last_mut()
615 && last_edit.range.end == range.start
616 {
617 last_edit.range.end = range.end;
618 } else {
619 self.hunk.edits.push(Edit {
620 range,
621 text: String::new(),
622 });
623 }
624 writeln!(&mut self.hunk.context, "{del}")?;
625 self.last_diff_op = LastDiffOp::Deletion;
626 }
627 }
628 DiffLine::Addition(add) => {
629 if self.current_file.is_some() {
630 let range = self.hunk.context.len()..self.hunk.context.len();
631 if let Some(last_edit) = self.hunk.edits.last_mut()
632 && last_edit.range.end == range.start
633 {
634 writeln!(&mut last_edit.text, "{add}").unwrap();
635 } else {
636 self.hunk.edits.push(Edit {
637 range,
638 text: format!("{add}\n"),
639 });
640 }
641 self.last_diff_op = LastDiffOp::Addition;
642 }
643 }
644 DiffLine::NoNewlineAtEOF => {
645 if !self.processed_no_newline {
646 self.processed_no_newline = true;
647 match self.last_diff_op {
648 LastDiffOp::Addition => {
649 // Remove trailing newline from the last addition
650 if let Some(last_edit) = self.hunk.edits.last_mut() {
651 last_edit.text.pop();
652 }
653 }
654 LastDiffOp::Deletion => {
655 // Remove trailing newline from context (which includes the deletion)
656 self.hunk.context.pop();
657 if let Some(last_edit) = self.hunk.edits.last_mut() {
658 last_edit.range.end -= 1;
659 }
660 }
661 LastDiffOp::Context | LastDiffOp::None => {
662 // Remove trailing newline from context
663 self.hunk.context.pop();
664 }
665 }
666 }
667 }
668 DiffLine::Garbage(_) => {}
669 }
670
671 anyhow::Ok(())
672 })()
673 .with_context(|| format!("on line:\n\n```\n{}```", line))?;
674
675 self.current_line = self.diff.next().map(|line| (line, DiffLine::parse(line)));
676 }
677
678 anyhow::Ok(None)
679 }
680}
681
682#[derive(Debug, PartialEq)]
683pub enum DiffLine<'a> {
684 OldPath { path: Cow<'a, str> },
685 NewPath { path: Cow<'a, str> },
686 HunkHeader(Option<HunkLocation>),
687 Context(&'a str),
688 Deletion(&'a str),
689 Addition(&'a str),
690 NoNewlineAtEOF,
691 Garbage(&'a str),
692}
693
694#[derive(Debug, PartialEq)]
695pub struct HunkLocation {
696 pub start_line_old: u32,
697 pub count_old: u32,
698 pub start_line_new: u32,
699 pub count_new: u32,
700}
701
702impl<'a> DiffLine<'a> {
703 pub fn parse(line: &'a str) -> Self {
704 Self::try_parse(line).unwrap_or(Self::Garbage(line))
705 }
706
707 fn try_parse(line: &'a str) -> Option<Self> {
708 if line.starts_with("\\ No newline") {
709 return Some(Self::NoNewlineAtEOF);
710 }
711 if let Some(header) = line.strip_prefix("---").and_then(eat_required_whitespace) {
712 let path = parse_header_path("a/", header);
713 Some(Self::OldPath { path })
714 } else if let Some(header) = line.strip_prefix("+++").and_then(eat_required_whitespace) {
715 Some(Self::NewPath {
716 path: parse_header_path("b/", header),
717 })
718 } else if let Some(header) = line.strip_prefix("@@").and_then(eat_required_whitespace) {
719 if header.starts_with("...") {
720 return Some(Self::HunkHeader(None));
721 }
722
723 let mut tokens = header.split_whitespace();
724 let old_range = tokens.next()?.strip_prefix('-')?;
725 let new_range = tokens.next()?.strip_prefix('+')?;
726
727 let (start_line_old, count_old) = old_range.split_once(',').unwrap_or((old_range, "1"));
728 let (start_line_new, count_new) = new_range.split_once(',').unwrap_or((new_range, "1"));
729
730 Some(Self::HunkHeader(Some(HunkLocation {
731 start_line_old: start_line_old.parse::<u32>().ok()?.saturating_sub(1),
732 count_old: count_old.parse().ok()?,
733 start_line_new: start_line_new.parse::<u32>().ok()?.saturating_sub(1),
734 count_new: count_new.parse().ok()?,
735 })))
736 } else if let Some(deleted_header) = line.strip_prefix("-") {
737 Some(Self::Deletion(deleted_header))
738 } else if line.is_empty() {
739 Some(Self::Context(""))
740 } else if let Some(context) = line.strip_prefix(" ") {
741 Some(Self::Context(context))
742 } else {
743 Some(Self::Addition(line.strip_prefix("+")?))
744 }
745 }
746}
747
748impl<'a> Display for DiffLine<'a> {
749 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
750 match self {
751 DiffLine::OldPath { path } => write!(f, "--- {path}"),
752 DiffLine::NewPath { path } => write!(f, "+++ {path}"),
753 DiffLine::HunkHeader(Some(hunk_location)) => {
754 write!(
755 f,
756 "@@ -{},{} +{},{} @@",
757 hunk_location.start_line_old + 1,
758 hunk_location.count_old,
759 hunk_location.start_line_new + 1,
760 hunk_location.count_new
761 )
762 }
763 DiffLine::HunkHeader(None) => write!(f, "@@ ... @@"),
764 DiffLine::Context(content) => write!(f, " {content}"),
765 DiffLine::Deletion(content) => write!(f, "-{content}"),
766 DiffLine::Addition(content) => write!(f, "+{content}"),
767 DiffLine::NoNewlineAtEOF => write!(f, "\\ No newline at end of file"),
768 DiffLine::Garbage(line) => write!(f, "{line}"),
769 }
770 }
771}
772
773fn parse_header_path<'a>(strip_prefix: &'static str, header: &'a str) -> Cow<'a, str> {
774 if !header.contains(['"', '\\']) {
775 let path = header.split_ascii_whitespace().next().unwrap_or(header);
776 return Cow::Borrowed(path.strip_prefix(strip_prefix).unwrap_or(path));
777 }
778
779 let mut path = String::with_capacity(header.len());
780 let mut in_quote = false;
781 let mut chars = header.chars().peekable();
782 let mut strip_prefix = Some(strip_prefix);
783
784 while let Some(char) = chars.next() {
785 if char == '"' {
786 in_quote = !in_quote;
787 } else if char == '\\' {
788 let Some(&next_char) = chars.peek() else {
789 break;
790 };
791 chars.next();
792 path.push(next_char);
793 } else if char.is_ascii_whitespace() && !in_quote {
794 break;
795 } else {
796 path.push(char);
797 }
798
799 if let Some(prefix) = strip_prefix
800 && path == prefix
801 {
802 strip_prefix.take();
803 path.clear();
804 }
805 }
806
807 Cow::Owned(path)
808}
809
810fn eat_required_whitespace(header: &str) -> Option<&str> {
811 let trimmed = header.trim_ascii_start();
812
813 if trimmed.len() == header.len() {
814 None
815 } else {
816 Some(trimmed)
817 }
818}
819
820#[cfg(test)]
821mod tests {
822 use super::*;
823 use indoc::indoc;
824
825 #[test]
826 fn parse_lines_simple() {
827 let input = indoc! {"
828 diff --git a/text.txt b/text.txt
829 index 86c770d..a1fd855 100644
830 --- a/file.txt
831 +++ b/file.txt
832 @@ -1,2 +1,3 @@
833 context
834 -deleted
835 +inserted
836 garbage
837
838 --- b/file.txt
839 +++ a/file.txt
840 "};
841
842 let lines = input.lines().map(DiffLine::parse).collect::<Vec<_>>();
843
844 assert_eq!(
845 lines,
846 &[
847 DiffLine::Garbage("diff --git a/text.txt b/text.txt"),
848 DiffLine::Garbage("index 86c770d..a1fd855 100644"),
849 DiffLine::OldPath {
850 path: "file.txt".into()
851 },
852 DiffLine::NewPath {
853 path: "file.txt".into()
854 },
855 DiffLine::HunkHeader(Some(HunkLocation {
856 start_line_old: 0,
857 count_old: 2,
858 start_line_new: 0,
859 count_new: 3
860 })),
861 DiffLine::Context("context"),
862 DiffLine::Deletion("deleted"),
863 DiffLine::Addition("inserted"),
864 DiffLine::Garbage("garbage"),
865 DiffLine::Context(""),
866 DiffLine::OldPath {
867 path: "b/file.txt".into()
868 },
869 DiffLine::NewPath {
870 path: "a/file.txt".into()
871 },
872 ]
873 );
874 }
875
876 #[test]
877 fn file_header_extra_space() {
878 let options = ["--- file", "--- file", "---\tfile"];
879
880 for option in options {
881 assert_eq!(
882 DiffLine::parse(option),
883 DiffLine::OldPath {
884 path: "file".into()
885 },
886 "{option}",
887 );
888 }
889 }
890
891 #[test]
892 fn hunk_header_extra_space() {
893 let options = [
894 "@@ -1,2 +1,3 @@",
895 "@@ -1,2 +1,3 @@",
896 "@@\t-1,2\t+1,3\t@@",
897 "@@ -1,2 +1,3 @@",
898 "@@ -1,2 +1,3 @@",
899 "@@ -1,2 +1,3 @@",
900 "@@ -1,2 +1,3 @@ garbage",
901 ];
902
903 for option in options {
904 assert_eq!(
905 DiffLine::parse(option),
906 DiffLine::HunkHeader(Some(HunkLocation {
907 start_line_old: 0,
908 count_old: 2,
909 start_line_new: 0,
910 count_new: 3
911 })),
912 "{option}",
913 );
914 }
915 }
916
917 #[test]
918 fn hunk_header_without_location() {
919 assert_eq!(DiffLine::parse("@@ ... @@"), DiffLine::HunkHeader(None));
920 }
921
922 #[test]
923 fn test_parse_path() {
924 assert_eq!(parse_header_path("a/", "foo.txt"), "foo.txt");
925 assert_eq!(
926 parse_header_path("a/", "foo/bar/baz.txt"),
927 "foo/bar/baz.txt"
928 );
929 assert_eq!(parse_header_path("a/", "a/foo.txt"), "foo.txt");
930 assert_eq!(
931 parse_header_path("a/", "a/foo/bar/baz.txt"),
932 "foo/bar/baz.txt"
933 );
934
935 // Extra
936 assert_eq!(
937 parse_header_path("a/", "a/foo/bar/baz.txt 2025"),
938 "foo/bar/baz.txt"
939 );
940 assert_eq!(
941 parse_header_path("a/", "a/foo/bar/baz.txt\t2025"),
942 "foo/bar/baz.txt"
943 );
944 assert_eq!(
945 parse_header_path("a/", "a/foo/bar/baz.txt \""),
946 "foo/bar/baz.txt"
947 );
948
949 // Quoted
950 assert_eq!(
951 parse_header_path("a/", "a/foo/bar/\"baz quox.txt\""),
952 "foo/bar/baz quox.txt"
953 );
954 assert_eq!(
955 parse_header_path("a/", "\"a/foo/bar/baz quox.txt\""),
956 "foo/bar/baz quox.txt"
957 );
958 assert_eq!(
959 parse_header_path("a/", "\"foo/bar/baz quox.txt\""),
960 "foo/bar/baz quox.txt"
961 );
962 assert_eq!(parse_header_path("a/", "\"whatever π€·\""), "whatever π€·");
963 assert_eq!(
964 parse_header_path("a/", "\"foo/bar/baz quox.txt\" 2025"),
965 "foo/bar/baz quox.txt"
966 );
967 // unescaped quotes are dropped
968 assert_eq!(parse_header_path("a/", "foo/\"bar\""), "foo/bar");
969
970 // Escaped
971 assert_eq!(
972 parse_header_path("a/", "\"foo/\\\"bar\\\"/baz.txt\""),
973 "foo/\"bar\"/baz.txt"
974 );
975 assert_eq!(
976 parse_header_path("a/", "\"C:\\\\Projects\\\\My App\\\\old file.txt\""),
977 "C:\\Projects\\My App\\old file.txt"
978 );
979 }
980
981 #[test]
982 fn test_parse_diff_with_leading_and_trailing_garbage() {
983 let diff = indoc! {"
984 I need to make some changes.
985
986 I'll change the following things:
987 - one
988 - two
989 - three
990
991 ```
992 --- a/file.txt
993 +++ b/file.txt
994 one
995 +AND
996 two
997 ```
998
999 Summary of what I did:
1000 - one
1001 - two
1002 - three
1003
1004 That's about it.
1005 "};
1006
1007 let mut events = Vec::new();
1008 let mut parser = DiffParser::new(diff);
1009 while let Some(event) = parser.next().unwrap() {
1010 events.push(event);
1011 }
1012
1013 assert_eq!(
1014 events,
1015 &[
1016 DiffEvent::Hunk {
1017 path: "file.txt".into(),
1018 hunk: Hunk {
1019 context: "one\ntwo\n".into(),
1020 edits: vec![Edit {
1021 range: 4..4,
1022 text: "AND\n".into()
1023 }],
1024 start_line: None,
1025 },
1026 status: FileStatus::Modified,
1027 },
1028 DiffEvent::FileEnd { renamed_to: None }
1029 ],
1030 )
1031 }
1032
1033 #[test]
1034 fn test_no_newline_at_eof() {
1035 let diff = indoc! {"
1036 --- a/file.py
1037 +++ b/file.py
1038 @@ -55,7 +55,3 @@ class CustomDataset(Dataset):
1039 torch.set_rng_state(state)
1040 mask = self.transform(mask)
1041
1042 - if self.mode == 'Training':
1043 - return (img, mask, name)
1044 - else:
1045 - return (img, mask, name)
1046 \\ No newline at end of file
1047 "};
1048
1049 let mut events = Vec::new();
1050 let mut parser = DiffParser::new(diff);
1051 while let Some(event) = parser.next().unwrap() {
1052 events.push(event);
1053 }
1054
1055 assert_eq!(
1056 events,
1057 &[
1058 DiffEvent::Hunk {
1059 path: "file.py".into(),
1060 hunk: Hunk {
1061 context: concat!(
1062 " torch.set_rng_state(state)\n",
1063 " mask = self.transform(mask)\n",
1064 "\n",
1065 " if self.mode == 'Training':\n",
1066 " return (img, mask, name)\n",
1067 " else:\n",
1068 " return (img, mask, name)",
1069 )
1070 .into(),
1071 edits: vec![Edit {
1072 range: 80..203,
1073 text: "".into()
1074 }],
1075 start_line: Some(54), // @@ -55,7 -> line 54 (0-indexed)
1076 },
1077 status: FileStatus::Modified,
1078 },
1079 DiffEvent::FileEnd { renamed_to: None }
1080 ],
1081 );
1082 }
1083
1084 #[test]
1085 fn test_no_newline_at_eof_addition() {
1086 let diff = indoc! {"
1087 --- a/file.txt
1088 +++ b/file.txt
1089 @@ -1,2 +1,3 @@
1090 context
1091 -deleted
1092 +added line
1093 \\ No newline at end of file
1094 "};
1095
1096 let mut events = Vec::new();
1097 let mut parser = DiffParser::new(diff);
1098 while let Some(event) = parser.next().unwrap() {
1099 events.push(event);
1100 }
1101
1102 assert_eq!(
1103 events,
1104 &[
1105 DiffEvent::Hunk {
1106 path: "file.txt".into(),
1107 hunk: Hunk {
1108 context: "context\ndeleted\n".into(),
1109 edits: vec![Edit {
1110 range: 8..16,
1111 text: "added line".into()
1112 }],
1113 start_line: Some(0), // @@ -1,2 -> line 0 (0-indexed)
1114 },
1115 status: FileStatus::Modified,
1116 },
1117 DiffEvent::FileEnd { renamed_to: None }
1118 ],
1119 );
1120 }
1121
1122 #[test]
1123 fn test_double_no_newline_at_eof() {
1124 // Two consecutive "no newline" markers - the second should be ignored
1125 let diff = indoc! {"
1126 --- a/file.txt
1127 +++ b/file.txt
1128 @@ -1,3 +1,3 @@
1129 line1
1130 -old
1131 +new
1132 line3
1133 \\ No newline at end of file
1134 \\ No newline at end of file
1135 "};
1136
1137 let mut events = Vec::new();
1138 let mut parser = DiffParser::new(diff);
1139 while let Some(event) = parser.next().unwrap() {
1140 events.push(event);
1141 }
1142
1143 assert_eq!(
1144 events,
1145 &[
1146 DiffEvent::Hunk {
1147 path: "file.txt".into(),
1148 hunk: Hunk {
1149 context: "line1\nold\nline3".into(), // Only one newline removed
1150 edits: vec![Edit {
1151 range: 6..10, // "old\n" is 4 bytes
1152 text: "new\n".into()
1153 }],
1154 start_line: Some(0),
1155 },
1156 status: FileStatus::Modified,
1157 },
1158 DiffEvent::FileEnd { renamed_to: None }
1159 ],
1160 );
1161 }
1162
1163 #[test]
1164 fn test_no_newline_after_context_not_addition() {
1165 // "No newline" after context lines should remove newline from context,
1166 // not from an earlier addition
1167 let diff = indoc! {"
1168 --- a/file.txt
1169 +++ b/file.txt
1170 @@ -1,4 +1,4 @@
1171 line1
1172 -old
1173 +new
1174 line3
1175 line4
1176 \\ No newline at end of file
1177 "};
1178
1179 let mut events = Vec::new();
1180 let mut parser = DiffParser::new(diff);
1181 while let Some(event) = parser.next().unwrap() {
1182 events.push(event);
1183 }
1184
1185 assert_eq!(
1186 events,
1187 &[
1188 DiffEvent::Hunk {
1189 path: "file.txt".into(),
1190 hunk: Hunk {
1191 // newline removed from line4 (context), not from "new" (addition)
1192 context: "line1\nold\nline3\nline4".into(),
1193 edits: vec![Edit {
1194 range: 6..10, // "old\n" is 4 bytes
1195 text: "new\n".into() // Still has newline
1196 }],
1197 start_line: Some(0),
1198 },
1199 status: FileStatus::Modified,
1200 },
1201 DiffEvent::FileEnd { renamed_to: None }
1202 ],
1203 );
1204 }
1205
1206 #[test]
1207 fn test_strip_diff_metadata() {
1208 let diff_with_metadata = indoc! {r#"
1209 diff --git a/file.txt b/file.txt
1210 index 1234567..abcdefg 100644
1211 --- a/file.txt
1212 +++ b/file.txt
1213 @@ -1,3 +1,4 @@
1214 context line
1215 -removed line
1216 +added line
1217 more context
1218 "#};
1219
1220 let stripped = strip_diff_metadata(diff_with_metadata);
1221
1222 assert_eq!(
1223 stripped,
1224 indoc! {r#"
1225 --- a/file.txt
1226 +++ b/file.txt
1227 @@ -1,3 +1,4 @@
1228 context line
1229 -removed line
1230 +added line
1231 more context
1232 "#}
1233 );
1234 }
1235
1236 #[test]
1237 fn test_apply_diff_to_string_no_trailing_newline() {
1238 // Text without trailing newline; diff generated without
1239 // `\ No newline at end of file` marker.
1240 let text = "line1\nline2\nline3";
1241 let diff = indoc! {"
1242 --- a/file.txt
1243 +++ b/file.txt
1244 @@ -1,3 +1,3 @@
1245 line1
1246 -line2
1247 +replaced
1248 line3
1249 "};
1250
1251 let result = apply_diff_to_string(diff, text).unwrap();
1252 assert_eq!(result, "line1\nreplaced\nline3");
1253 }
1254
1255 #[test]
1256 fn test_apply_diff_to_string_trailing_newline_present() {
1257 // When text has a trailing newline, exact matching still works and
1258 // the fallback is never needed.
1259 let text = "line1\nline2\nline3\n";
1260 let diff = indoc! {"
1261 --- a/file.txt
1262 +++ b/file.txt
1263 @@ -1,3 +1,3 @@
1264 line1
1265 -line2
1266 +replaced
1267 line3
1268 "};
1269
1270 let result = apply_diff_to_string(diff, text).unwrap();
1271 assert_eq!(result, "line1\nreplaced\nline3\n");
1272 }
1273
1274 #[test]
1275 fn test_apply_diff_to_string_deletion_at_end_no_trailing_newline() {
1276 // Deletion of the last line when text has no trailing newline.
1277 // The edit range must be clamped so it doesn't index past the
1278 // end of the text.
1279 let text = "line1\nline2\nline3";
1280 let diff = indoc! {"
1281 --- a/file.txt
1282 +++ b/file.txt
1283 @@ -1,3 +1,2 @@
1284 line1
1285 line2
1286 -line3
1287 "};
1288
1289 let result = apply_diff_to_string(diff, text).unwrap();
1290 assert_eq!(result, "line1\nline2\n");
1291 }
1292
1293 #[test]
1294 fn test_apply_diff_to_string_replace_last_line_no_trailing_newline() {
1295 // Replace the last line when text has no trailing newline.
1296 let text = "aaa\nbbb\nccc";
1297 let diff = indoc! {"
1298 --- a/file.txt
1299 +++ b/file.txt
1300 @@ -1,3 +1,3 @@
1301 aaa
1302 bbb
1303 -ccc
1304 +ddd
1305 "};
1306
1307 let result = apply_diff_to_string(diff, text).unwrap();
1308 assert_eq!(result, "aaa\nbbb\nddd");
1309 }
1310
1311 #[test]
1312 fn test_apply_diff_to_string_multibyte_no_trailing_newline() {
1313 // Multi-byte UTF-8 characters near the end; ensures char boundary
1314 // safety when the fallback clamps edit ranges.
1315 let text = "hello\nμΈκ³";
1316 let diff = indoc! {"
1317 --- a/file.txt
1318 +++ b/file.txt
1319 @@ -1,2 +1,2 @@
1320 hello
1321 -μΈκ³
1322 +world
1323 "};
1324
1325 let result = apply_diff_to_string(diff, text).unwrap();
1326 assert_eq!(result, "hello\nworld");
1327 }
1328
1329 #[test]
1330 fn test_apply_diff_to_string_adjusts_line_numbers_after_prior_hunks() {
1331 let text = "first\nremove first\nfirst\nsame\nremove\nsame\nsame\nremove\nsame\n";
1332 let diff = indoc! {"
1333 --- a/file.txt
1334 +++ b/file.txt
1335 @@ -1,3 +1,2 @@
1336 first
1337 -remove first
1338 first
1339 @@ -4,3 +3,2 @@
1340 same
1341 -remove
1342 same
1343 "};
1344
1345 let result = apply_diff_to_string(diff, text).unwrap();
1346 assert_eq!(result, "first\nfirst\nsame\nsame\nsame\nremove\nsame\n");
1347 }
1348
1349 #[test]
1350 fn test_apply_diff_to_string_adjusts_line_numbers_after_prior_insertion_hunks() {
1351 let text = "first\nfirst\nsame\nremove\nsame\nsame\nremove\nsame\n";
1352 let diff = indoc! {"
1353 --- a/file.txt
1354 +++ b/file.txt
1355 @@ -1,2 +1,3 @@
1356 first
1357 +inserted
1358 first
1359 @@ -6,3 +7,2 @@
1360 same
1361 -remove
1362 same
1363 "};
1364
1365 let result = apply_diff_to_string(diff, text).unwrap();
1366 assert_eq!(
1367 result,
1368 "first\ninserted\nfirst\nsame\nremove\nsame\nsame\nsame\n"
1369 );
1370 }
1371
1372 #[test]
1373 fn test_find_context_candidates_no_false_positive_mid_text() {
1374 // The stripped fallback must only match at the end of text, not in
1375 // the middle where a real newline exists.
1376 let text = "aaa\nbbb\nccc\n";
1377 let mut hunk = Hunk {
1378 context: "bbb\n".into(),
1379 edits: vec![],
1380 start_line: None,
1381 };
1382
1383 let candidates = find_context_candidates(text, &mut hunk);
1384 // Exact match at offset 4 β the fallback is not used.
1385 assert_eq!(candidates, vec![4]);
1386 }
1387
1388 #[test]
1389 fn test_find_context_candidates_fallback_at_end() {
1390 let text = "aaa\nbbb";
1391 let mut hunk = Hunk {
1392 context: "bbb\n".into(),
1393 edits: vec![],
1394 start_line: None,
1395 };
1396
1397 let candidates = find_context_candidates(text, &mut hunk);
1398 assert_eq!(candidates, vec![4]);
1399 // Context should be stripped.
1400 assert_eq!(hunk.context, "bbb");
1401 }
1402
1403 #[test]
1404 fn test_find_context_candidates_no_fallback_mid_text() {
1405 // "bbb" appears mid-text followed by a newline, so the exact
1406 // match succeeds. Verify the stripped fallback doesn't produce a
1407 // second, spurious candidate.
1408 let text = "aaa\nbbb\nccc";
1409 let mut hunk = Hunk {
1410 context: "bbb\nccc\n".into(),
1411 edits: vec![],
1412 start_line: None,
1413 };
1414
1415 let candidates = find_context_candidates(text, &mut hunk);
1416 // No exact match (text ends without newline after "ccc"), but the
1417 // stripped context "bbb\nccc" matches at offset 4, which is the end.
1418 assert_eq!(candidates, vec![4]);
1419 assert_eq!(hunk.context, "bbb\nccc");
1420 }
1421
1422 #[test]
1423 fn test_find_context_candidates_clamps_edit_ranges() {
1424 let text = "aaa\nbbb";
1425 let mut hunk = Hunk {
1426 context: "aaa\nbbb\n".into(),
1427 edits: vec![Edit {
1428 range: 4..8, // "bbb\n" β end points at the trailing \n
1429 text: "ccc\n".into(),
1430 }],
1431 start_line: None,
1432 };
1433
1434 let candidates = find_context_candidates(text, &mut hunk);
1435 assert_eq!(candidates, vec![0]);
1436 // Edit range end should be clamped to 7 (new context length).
1437 assert_eq!(hunk.edits[0].range, 4..7);
1438 }
1439
1440 #[test]
1441 fn test_unified_diff_with_context_matches_expected_context_window() {
1442 let old_text = "line1\nline2\nline3\nline4\nline5\nCHANGE_ME\nline7\nline8\n";
1443 let new_text = "line1\nline2\nline3\nline4\nline5\nCHANGED\nline7\nline8\n";
1444
1445 let diff_default = unified_diff_with_context(old_text, new_text, 0, 0, 3);
1446 assert_eq!(
1447 diff_default,
1448 "@@ -3,6 +3,6 @@\n line3\n line4\n line5\n-CHANGE_ME\n+CHANGED\n line7\n line8\n"
1449 );
1450
1451 let diff_full_context = unified_diff_with_context(old_text, new_text, 0, 0, 8);
1452 assert_eq!(
1453 diff_full_context,
1454 "@@ -1,8 +1,8 @@\n line1\n line2\n line3\n line4\n line5\n-CHANGE_ME\n+CHANGED\n line7\n line8\n"
1455 );
1456
1457 let diff_no_context = unified_diff_with_context(old_text, new_text, 0, 0, 0);
1458 assert_eq!(diff_no_context, "@@ -6,1 +6,1 @@\n-CHANGE_ME\n+CHANGED\n");
1459 }
1460}