1use ordered_float::OrderedFloat;
2use rope::{Point, Rope, TextSummary};
3use std::collections::{BTreeSet, HashMap};
4use std::{
5 cmp,
6 fmt::{self, Debug},
7 ops::Range,
8};
9
10#[derive(Default)]
11struct Matrix {
12 cells: Vec<f64>,
13 rows: usize,
14 cols: usize,
15}
16
17impl Matrix {
18 fn new() -> Self {
19 Self {
20 cells: Vec::new(),
21 rows: 0,
22 cols: 0,
23 }
24 }
25
26 fn resize(&mut self, rows: usize, cols: usize) {
27 self.cells.resize(rows * cols, 0.);
28 self.rows = rows;
29 self.cols = cols;
30 }
31
32 fn swap_columns(&mut self, col1: usize, col2: usize) {
33 if col1 == col2 {
34 return;
35 }
36
37 if col1 >= self.cols {
38 panic!("column out of bounds");
39 }
40
41 if col2 >= self.cols {
42 panic!("column out of bounds");
43 }
44
45 unsafe {
46 let ptr = self.cells.as_mut_ptr();
47 std::ptr::swap_nonoverlapping(
48 ptr.add(col1 * self.rows),
49 ptr.add(col2 * self.rows),
50 self.rows,
51 );
52 }
53 }
54
55 fn get(&self, row: usize, col: usize) -> f64 {
56 if row >= self.rows {
57 panic!("row out of bounds")
58 }
59
60 if col >= self.cols {
61 panic!("column out of bounds")
62 }
63 self.cells[col * self.rows + row]
64 }
65
66 fn set(&mut self, row: usize, col: usize, value: f64) {
67 if row >= self.rows {
68 panic!("row out of bounds")
69 }
70
71 if col >= self.cols {
72 panic!("column out of bounds")
73 }
74
75 self.cells[col * self.rows + row] = value;
76 }
77}
78
79impl Debug for Matrix {
80 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
81 writeln!(f)?;
82 for i in 0..self.rows {
83 for j in 0..self.cols {
84 write!(f, "{:5}", self.get(i, j))?;
85 }
86 writeln!(f)?;
87 }
88 Ok(())
89 }
90}
91
92#[derive(Debug, Clone)]
93pub enum CharOperation {
94 Insert { text: String },
95 Delete { bytes: usize },
96 Keep { bytes: usize },
97}
98
99#[derive(Default)]
100pub struct StreamingDiff {
101 old: Vec<char>,
102 new: Vec<char>,
103 scores: Matrix,
104 old_text_ix: usize,
105 new_text_ix: usize,
106 equal_runs: HashMap<(usize, usize), u32>,
107}
108
109impl StreamingDiff {
110 const INSERTION_SCORE: f64 = -1.;
111 const DELETION_SCORE: f64 = -20.;
112 const EQUALITY_BASE: f64 = 1.8;
113 const MAX_EQUALITY_EXPONENT: i32 = 16;
114
115 pub fn new(old: String) -> Self {
116 let old = old.chars().collect::<Vec<_>>();
117 let mut scores = Matrix::new();
118 scores.resize(old.len() + 1, 1);
119 for i in 0..=old.len() {
120 scores.set(i, 0, i as f64 * Self::DELETION_SCORE);
121 }
122 Self {
123 old,
124 new: Vec::new(),
125 scores,
126 old_text_ix: 0,
127 new_text_ix: 0,
128 equal_runs: Default::default(),
129 }
130 }
131
132 pub fn push_new(&mut self, text: &str) -> Vec<CharOperation> {
133 self.new.extend(text.chars());
134 self.scores.swap_columns(0, self.scores.cols - 1);
135 self.scores
136 .resize(self.old.len() + 1, self.new.len() - self.new_text_ix + 1);
137 self.equal_runs.retain(|(_i, j), _| *j == self.new_text_ix);
138
139 for j in self.new_text_ix + 1..=self.new.len() {
140 let relative_j = j - self.new_text_ix;
141
142 self.scores
143 .set(0, relative_j, j as f64 * Self::INSERTION_SCORE);
144 for i in 1..=self.old.len() {
145 let insertion_score = self.scores.get(i, relative_j - 1) + Self::INSERTION_SCORE;
146 let deletion_score = self.scores.get(i - 1, relative_j) + Self::DELETION_SCORE;
147 let equality_score = if self.old[i - 1] == self.new[j - 1] {
148 let mut equal_run = self.equal_runs.get(&(i - 1, j - 1)).copied().unwrap_or(0);
149 equal_run += 1;
150 self.equal_runs.insert((i, j), equal_run);
151
152 let exponent = cmp::min(equal_run as i32 / 4, Self::MAX_EQUALITY_EXPONENT);
153 self.scores.get(i - 1, relative_j - 1) + Self::EQUALITY_BASE.powi(exponent)
154 } else {
155 f64::NEG_INFINITY
156 };
157
158 let score = insertion_score.max(deletion_score).max(equality_score);
159 self.scores.set(i, relative_j, score);
160 }
161 }
162
163 let mut max_score = f64::NEG_INFINITY;
164 let mut next_old_text_ix = self.old_text_ix;
165 let next_new_text_ix = self.new.len();
166 for i in self.old_text_ix..=self.old.len() {
167 let score = self.scores.get(i, next_new_text_ix - self.new_text_ix);
168 if score > max_score {
169 max_score = score;
170 next_old_text_ix = i;
171 }
172 }
173
174 let hunks = self.backtrack(next_old_text_ix, next_new_text_ix);
175 self.old_text_ix = next_old_text_ix;
176 self.new_text_ix = next_new_text_ix;
177 hunks
178 }
179
180 fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<CharOperation> {
181 let mut pending_insert: Option<Range<usize>> = None;
182 let mut hunks = Vec::new();
183 let mut i = old_text_ix;
184 let mut j = new_text_ix;
185 while (i, j) != (self.old_text_ix, self.new_text_ix) {
186 let insertion_score = if j > self.new_text_ix {
187 Some((i, j - 1))
188 } else {
189 None
190 };
191 let deletion_score = if i > self.old_text_ix {
192 Some((i - 1, j))
193 } else {
194 None
195 };
196 let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
197 if self.old[i - 1] == self.new[j - 1] {
198 Some((i - 1, j - 1))
199 } else {
200 None
201 }
202 } else {
203 None
204 };
205
206 let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
207 .iter()
208 .max_by_key(|cell| {
209 cell.map(|(i, j)| OrderedFloat(self.scores.get(i, j - self.new_text_ix)))
210 })
211 .unwrap()
212 .unwrap();
213
214 if prev_i == i && prev_j == j - 1 {
215 if let Some(pending_insert) = pending_insert.as_mut() {
216 pending_insert.start = prev_j;
217 } else {
218 pending_insert = Some(prev_j..j);
219 }
220 } else {
221 if let Some(range) = pending_insert.take() {
222 hunks.push(CharOperation::Insert {
223 text: self.new[range].iter().collect(),
224 });
225 }
226
227 let char_len = self.old[i - 1].len_utf8();
228 if prev_i == i - 1 && prev_j == j {
229 if let Some(CharOperation::Delete { bytes: len }) = hunks.last_mut() {
230 *len += char_len;
231 } else {
232 hunks.push(CharOperation::Delete { bytes: char_len })
233 }
234 } else if let Some(CharOperation::Keep { bytes: len }) = hunks.last_mut() {
235 *len += char_len;
236 } else {
237 hunks.push(CharOperation::Keep { bytes: char_len })
238 }
239 }
240
241 i = prev_i;
242 j = prev_j;
243 }
244
245 if let Some(range) = pending_insert.take() {
246 hunks.push(CharOperation::Insert {
247 text: self.new[range].iter().collect(),
248 });
249 }
250
251 hunks.reverse();
252 hunks
253 }
254
255 pub fn finish(self) -> Vec<CharOperation> {
256 self.backtrack(self.old.len(), self.new.len())
257 }
258}
259
260#[derive(Debug, Clone, PartialEq)]
261pub enum LineOperation {
262 Insert { lines: u32 },
263 Delete { lines: u32 },
264 Keep { lines: u32 },
265}
266
267#[derive(Debug, Default)]
268pub struct LineDiff {
269 inserted_newline_at_end: bool,
270 /// The extent of kept and deleted text.
271 old_end: Point,
272 /// The extent of kept and inserted text.
273 new_end: Point,
274 /// Deleted rows, expressed in terms of the old text.
275 deleted_rows: BTreeSet<u32>,
276 /// Inserted rows, expressed in terms of the new text.
277 inserted_rows: BTreeSet<u32>,
278 buffered_insert: String,
279 /// After deleting a newline, we buffer deletion until we keep or insert a character.
280 buffered_delete: usize,
281}
282
283impl LineDiff {
284 pub fn push_char_operations<'a>(
285 &mut self,
286 operations: impl IntoIterator<Item = &'a CharOperation>,
287 old_text: &Rope,
288 ) {
289 for operation in operations {
290 self.push_char_operation(operation, old_text);
291 }
292 }
293
294 pub fn push_char_operation(&mut self, operation: &CharOperation, old_text: &Rope) {
295 match operation {
296 CharOperation::Insert { text } => {
297 self.flush_delete(old_text);
298
299 if is_line_start(self.old_end) {
300 if let Some(newline_ix) = text.rfind('\n') {
301 let (prefix, suffix) = text.split_at(newline_ix + 1);
302 self.buffered_insert.push_str(prefix);
303 self.flush_insert(old_text);
304 self.buffered_insert.push_str(suffix);
305 } else {
306 self.buffered_insert.push_str(text);
307 }
308 } else {
309 self.buffered_insert.push_str(text);
310 if !text.ends_with('\n') {
311 self.flush_insert(old_text);
312 }
313 }
314 }
315 CharOperation::Delete { bytes } => {
316 self.buffered_delete += bytes;
317
318 let common_suffix_len = self.trim_buffered_end(old_text);
319 self.flush_insert(old_text);
320
321 if common_suffix_len > 0 || !is_line_end(self.old_end, old_text) {
322 self.flush_delete(old_text);
323 self.keep(common_suffix_len, old_text);
324 }
325 }
326 CharOperation::Keep { bytes } => {
327 self.flush_delete(old_text);
328 self.flush_insert(old_text);
329 self.keep(*bytes, old_text);
330 }
331 }
332 }
333
334 fn flush_insert(&mut self, old_text: &Rope) {
335 if self.buffered_insert.is_empty() {
336 return;
337 }
338
339 let new_start = self.new_end;
340 let lines = TextSummary::from(self.buffered_insert.as_str()).lines;
341 self.new_end += lines;
342
343 if is_line_start(self.old_end) {
344 if self.new_end.column == 0 {
345 self.inserted_rows.extend(new_start.row..self.new_end.row);
346 } else {
347 self.deleted_rows.insert(self.old_end.row);
348 self.inserted_rows.extend(new_start.row..=self.new_end.row);
349 }
350 } else if is_line_end(self.old_end, old_text) {
351 if self.buffered_insert.starts_with('\n') {
352 self.inserted_rows
353 .extend(new_start.row + 1..=self.new_end.row);
354 self.inserted_newline_at_end = true;
355 } else {
356 if !self.inserted_newline_at_end {
357 self.deleted_rows.insert(self.old_end.row);
358 }
359 self.inserted_rows.extend(new_start.row..=self.new_end.row);
360 }
361 } else {
362 self.deleted_rows.insert(self.old_end.row);
363 self.inserted_rows.extend(new_start.row..=self.new_end.row);
364 }
365
366 self.buffered_insert.clear();
367 }
368
369 fn flush_delete(&mut self, old_text: &Rope) {
370 if self.buffered_delete == 0 {
371 return;
372 }
373
374 let old_start = self.old_end;
375 self.old_end =
376 old_text.offset_to_point(old_text.point_to_offset(self.old_end) + self.buffered_delete);
377
378 if is_line_end(old_start, old_text) && is_line_end(self.old_end, old_text) {
379 self.deleted_rows
380 .extend(old_start.row + 1..=self.old_end.row);
381 } else if is_line_start(old_start)
382 && (is_line_start(self.old_end) && self.old_end < old_text.max_point())
383 && self.new_end.column == 0
384 {
385 self.deleted_rows.extend(old_start.row..self.old_end.row);
386 } else {
387 self.inserted_rows.insert(self.new_end.row);
388 self.deleted_rows.extend(old_start.row..=self.old_end.row);
389 }
390
391 self.inserted_newline_at_end = false;
392 self.buffered_delete = 0;
393 }
394
395 fn keep(&mut self, bytes: usize, old_text: &Rope) {
396 if bytes == 0 {
397 return;
398 }
399
400 let lines =
401 old_text.offset_to_point(old_text.point_to_offset(self.old_end) + bytes) - self.old_end;
402 self.old_end += lines;
403 self.new_end += lines;
404 self.inserted_newline_at_end = false;
405 }
406
407 fn trim_buffered_end(&mut self, old_text: &Rope) -> usize {
408 let old_start_offset = old_text.point_to_offset(self.old_end);
409 let old_end_offset = old_start_offset + self.buffered_delete;
410
411 let new_chars = self.buffered_insert.chars().rev();
412 let old_chars = old_text
413 .chunks_in_range(old_start_offset..old_end_offset)
414 .flat_map(|chunk| chunk.chars().rev());
415
416 let mut common_suffix_len = 0;
417 for (new_ch, old_ch) in new_chars.zip(old_chars) {
418 if new_ch == old_ch {
419 common_suffix_len += new_ch.len_utf8();
420 } else {
421 break;
422 }
423 }
424
425 self.buffered_delete -= common_suffix_len;
426 self.buffered_insert
427 .truncate(self.buffered_insert.len() - common_suffix_len);
428
429 common_suffix_len
430 }
431
432 pub fn finish(&mut self, old_text: &Rope) {
433 self.flush_insert(old_text);
434 self.flush_delete(old_text);
435
436 let old_start = self.old_end;
437 self.old_end = old_text.max_point();
438 self.new_end += self.old_end - old_start;
439 }
440
441 pub fn line_operations(&self) -> Vec<LineOperation> {
442 let mut ops = Vec::new();
443 let mut deleted_rows = self.deleted_rows.iter().copied().peekable();
444 let mut inserted_rows = self.inserted_rows.iter().copied().peekable();
445 let mut old_row = 0;
446 let mut new_row = 0;
447
448 while deleted_rows.peek().is_some() || inserted_rows.peek().is_some() {
449 // Check for a run of deleted lines at current old row.
450 if Some(old_row) == deleted_rows.peek().copied() {
451 if let Some(LineOperation::Delete { lines }) = ops.last_mut() {
452 *lines += 1;
453 } else {
454 ops.push(LineOperation::Delete { lines: 1 });
455 }
456 old_row += 1;
457 deleted_rows.next();
458 } else if Some(new_row) == inserted_rows.peek().copied() {
459 if let Some(LineOperation::Insert { lines }) = ops.last_mut() {
460 *lines += 1;
461 } else {
462 ops.push(LineOperation::Insert { lines: 1 });
463 }
464 new_row += 1;
465 inserted_rows.next();
466 } else {
467 // Keep lines until the next deletion, insertion, or the end of the old text.
468 let lines_to_next_deletion = inserted_rows
469 .peek()
470 .copied()
471 .unwrap_or(self.new_end.row + 1)
472 - new_row;
473 let lines_to_next_insertion =
474 deleted_rows.peek().copied().unwrap_or(self.old_end.row + 1) - old_row;
475 let kept_lines =
476 cmp::max(1, cmp::min(lines_to_next_insertion, lines_to_next_deletion));
477 if kept_lines > 0 {
478 ops.push(LineOperation::Keep { lines: kept_lines });
479 old_row += kept_lines;
480 new_row += kept_lines;
481 }
482 }
483 }
484
485 if old_row < self.old_end.row + 1 {
486 ops.push(LineOperation::Keep {
487 lines: self.old_end.row + 1 - old_row,
488 });
489 }
490
491 ops
492 }
493}
494
495fn is_line_start(point: Point) -> bool {
496 point.column == 0
497}
498
499fn is_line_end(point: Point, text: &Rope) -> bool {
500 text.line_len(point.row) == point.column
501}
502
503#[cfg(test)]
504mod tests {
505 use super::*;
506 use rand::prelude::*;
507 use std::env;
508
509 #[test]
510 fn test_delete_first_of_two_lines() {
511 let old_text = "aaaa\nbbbb";
512 let char_ops = vec![
513 CharOperation::Delete { bytes: 5 },
514 CharOperation::Keep { bytes: 4 },
515 ];
516 let expected_line_ops = vec![
517 LineOperation::Delete { lines: 1 },
518 LineOperation::Keep { lines: 1 },
519 ];
520 let new_text = apply_char_operations(old_text, &char_ops);
521 assert_eq!(
522 new_text,
523 apply_line_operations(old_text, &new_text, &expected_line_ops)
524 );
525
526 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
527 assert_eq!(line_ops, expected_line_ops);
528 }
529
530 #[test]
531 fn test_delete_second_of_two_lines() {
532 let old_text = "aaaa\nbbbb";
533 let char_ops = vec![
534 CharOperation::Keep { bytes: 5 },
535 CharOperation::Delete { bytes: 4 },
536 ];
537 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
538 assert_eq!(
539 line_ops,
540 vec![
541 LineOperation::Keep { lines: 1 },
542 LineOperation::Delete { lines: 1 },
543 LineOperation::Insert { lines: 1 }
544 ]
545 );
546 let new_text = apply_char_operations(old_text, &char_ops);
547 assert_eq!(
548 new_text,
549 apply_line_operations(old_text, &new_text, &line_ops)
550 );
551 }
552
553 #[test]
554 fn test_add_new_line() {
555 let old_text = "aaaa\nbbbb";
556 let char_ops = vec![
557 CharOperation::Keep { bytes: 9 },
558 CharOperation::Insert {
559 text: "\ncccc".into(),
560 },
561 ];
562 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
563 assert_eq!(
564 line_ops,
565 vec![
566 LineOperation::Keep { lines: 2 },
567 LineOperation::Insert { lines: 1 }
568 ]
569 );
570 let new_text = apply_char_operations(old_text, &char_ops);
571 assert_eq!(
572 new_text,
573 apply_line_operations(old_text, &new_text, &line_ops)
574 );
575 }
576
577 #[test]
578 fn test_delete_line_in_middle() {
579 let old_text = "aaaa\nbbbb\ncccc";
580 let char_ops = vec![
581 CharOperation::Keep { bytes: 5 },
582 CharOperation::Delete { bytes: 5 },
583 CharOperation::Keep { bytes: 4 },
584 ];
585 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
586 assert_eq!(
587 line_ops,
588 vec![
589 LineOperation::Keep { lines: 1 },
590 LineOperation::Delete { lines: 1 },
591 LineOperation::Keep { lines: 1 }
592 ]
593 );
594 let new_text = apply_char_operations(old_text, &char_ops);
595 assert_eq!(
596 new_text,
597 apply_line_operations(old_text, &new_text, &line_ops)
598 );
599 }
600
601 #[test]
602 fn test_replace_line() {
603 let old_text = "aaaa\nbbbb\ncccc";
604 let char_ops = vec![
605 CharOperation::Keep { bytes: 5 },
606 CharOperation::Delete { bytes: 4 },
607 CharOperation::Insert {
608 text: "BBBB".into(),
609 },
610 CharOperation::Keep { bytes: 5 },
611 ];
612 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
613 assert_eq!(
614 line_ops,
615 vec![
616 LineOperation::Keep { lines: 1 },
617 LineOperation::Delete { lines: 1 },
618 LineOperation::Insert { lines: 1 },
619 LineOperation::Keep { lines: 1 }
620 ]
621 );
622 let new_text = apply_char_operations(old_text, &char_ops);
623 assert_eq!(
624 new_text,
625 apply_line_operations(old_text, &new_text, &line_ops)
626 );
627 }
628
629 #[test]
630 fn test_multiple_edits_on_different_lines() {
631 let old_text = "aaaa\nbbbb\ncccc\ndddd";
632 let char_ops = vec![
633 CharOperation::Insert { text: "A".into() },
634 CharOperation::Keep { bytes: 9 },
635 CharOperation::Delete { bytes: 5 },
636 CharOperation::Keep { bytes: 4 },
637 CharOperation::Insert {
638 text: "\nEEEE".into(),
639 },
640 ];
641 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
642 assert_eq!(
643 line_ops,
644 vec![
645 LineOperation::Delete { lines: 1 },
646 LineOperation::Insert { lines: 1 },
647 LineOperation::Keep { lines: 1 },
648 LineOperation::Delete { lines: 2 },
649 LineOperation::Insert { lines: 2 },
650 ]
651 );
652 let new_text = apply_char_operations(old_text, &char_ops);
653 assert_eq!(
654 new_text,
655 apply_line_operations(old_text, &new_text, &line_ops)
656 );
657 }
658
659 #[test]
660 fn test_edit_at_end_of_line() {
661 let old_text = "aaaa\nbbbb\ncccc";
662 let char_ops = vec![
663 CharOperation::Keep { bytes: 4 },
664 CharOperation::Insert { text: "A".into() },
665 CharOperation::Keep { bytes: 10 },
666 ];
667 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
668 assert_eq!(
669 line_ops,
670 vec![
671 LineOperation::Delete { lines: 1 },
672 LineOperation::Insert { lines: 1 },
673 LineOperation::Keep { lines: 2 }
674 ]
675 );
676 let new_text = apply_char_operations(old_text, &char_ops);
677 assert_eq!(
678 new_text,
679 apply_line_operations(old_text, &new_text, &line_ops)
680 );
681 }
682
683 #[test]
684 fn test_insert_newline_character() {
685 let old_text = "aaaabbbb";
686 let char_ops = vec![
687 CharOperation::Keep { bytes: 4 },
688 CharOperation::Insert { text: "\n".into() },
689 CharOperation::Keep { bytes: 4 },
690 ];
691 let new_text = apply_char_operations(old_text, &char_ops);
692 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
693 assert_eq!(
694 line_ops,
695 vec![
696 LineOperation::Delete { lines: 1 },
697 LineOperation::Insert { lines: 2 }
698 ]
699 );
700 assert_eq!(
701 new_text,
702 apply_line_operations(old_text, &new_text, &line_ops)
703 );
704 }
705
706 #[test]
707 fn test_insert_newline_at_beginning() {
708 let old_text = "aaaa\nbbbb";
709 let char_ops = vec![
710 CharOperation::Insert { text: "\n".into() },
711 CharOperation::Keep { bytes: 9 },
712 ];
713 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
714 assert_eq!(
715 line_ops,
716 vec![
717 LineOperation::Insert { lines: 1 },
718 LineOperation::Keep { lines: 2 }
719 ]
720 );
721 let new_text = apply_char_operations(old_text, &char_ops);
722 assert_eq!(
723 new_text,
724 apply_line_operations(old_text, &new_text, &line_ops)
725 );
726 }
727
728 #[test]
729 fn test_delete_newline() {
730 let old_text = "aaaa\nbbbb";
731 let char_ops = vec![
732 CharOperation::Keep { bytes: 4 },
733 CharOperation::Delete { bytes: 1 },
734 CharOperation::Keep { bytes: 4 },
735 ];
736 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
737 assert_eq!(
738 line_ops,
739 vec![
740 LineOperation::Delete { lines: 2 },
741 LineOperation::Insert { lines: 1 }
742 ]
743 );
744
745 let new_text = apply_char_operations(old_text, &char_ops);
746 assert_eq!(
747 new_text,
748 apply_line_operations(old_text, &new_text, &line_ops)
749 );
750 }
751
752 #[test]
753 fn test_insert_multiple_newlines() {
754 let old_text = "aaaa\nbbbb";
755 let char_ops = vec![
756 CharOperation::Keep { bytes: 5 },
757 CharOperation::Insert {
758 text: "\n\n".into(),
759 },
760 CharOperation::Keep { bytes: 4 },
761 ];
762 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
763 assert_eq!(
764 line_ops,
765 vec![
766 LineOperation::Keep { lines: 1 },
767 LineOperation::Insert { lines: 2 },
768 LineOperation::Keep { lines: 1 }
769 ]
770 );
771 let new_text = apply_char_operations(old_text, &char_ops);
772 assert_eq!(
773 new_text,
774 apply_line_operations(old_text, &new_text, &line_ops)
775 );
776 }
777
778 #[test]
779 fn test_delete_multiple_newlines() {
780 let old_text = "aaaa\n\n\nbbbb";
781 let char_ops = vec![
782 CharOperation::Keep { bytes: 5 },
783 CharOperation::Delete { bytes: 2 },
784 CharOperation::Keep { bytes: 4 },
785 ];
786 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
787 assert_eq!(
788 line_ops,
789 vec![
790 LineOperation::Keep { lines: 1 },
791 LineOperation::Delete { lines: 2 },
792 LineOperation::Keep { lines: 1 }
793 ]
794 );
795 let new_text = apply_char_operations(old_text, &char_ops);
796 assert_eq!(
797 new_text,
798 apply_line_operations(old_text, &new_text, &line_ops)
799 );
800 }
801
802 #[test]
803 fn test_complex_scenario() {
804 let old_text = "line1\nline2\nline3\nline4";
805 let char_ops = vec![
806 CharOperation::Keep { bytes: 6 },
807 CharOperation::Insert {
808 text: "inserted\n".into(),
809 },
810 CharOperation::Delete { bytes: 6 },
811 CharOperation::Keep { bytes: 5 },
812 CharOperation::Insert {
813 text: "\nnewline".into(),
814 },
815 CharOperation::Keep { bytes: 6 },
816 ];
817 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
818 assert_eq!(
819 line_ops,
820 vec![
821 LineOperation::Keep { lines: 1 },
822 LineOperation::Delete { lines: 1 },
823 LineOperation::Insert { lines: 1 },
824 LineOperation::Keep { lines: 1 },
825 LineOperation::Insert { lines: 1 },
826 LineOperation::Keep { lines: 1 }
827 ]
828 );
829 let new_text = apply_char_operations(old_text, &char_ops);
830 assert_eq!(new_text, "line1\ninserted\nline3\nnewline\nline4");
831 assert_eq!(
832 apply_line_operations(old_text, &new_text, &line_ops),
833 new_text,
834 );
835 }
836
837 #[test]
838 fn test_cleaning_up_common_suffix() {
839 let old_text = concat!(
840 " for y in 0..size.y() {\n",
841 " let a = 10;\n",
842 " let b = 20;\n",
843 " }",
844 );
845 let char_ops = [
846 CharOperation::Keep { bytes: 8 },
847 CharOperation::Insert { text: "let".into() },
848 CharOperation::Insert {
849 text: " mut".into(),
850 },
851 CharOperation::Insert { text: " y".into() },
852 CharOperation::Insert { text: " =".into() },
853 CharOperation::Insert { text: " 0".into() },
854 CharOperation::Insert { text: ";".into() },
855 CharOperation::Insert { text: "\n".into() },
856 CharOperation::Insert {
857 text: " while".into(),
858 },
859 CharOperation::Insert { text: " y".into() },
860 CharOperation::Insert {
861 text: " < size".into(),
862 },
863 CharOperation::Insert { text: ".".into() },
864 CharOperation::Insert { text: "y".into() },
865 CharOperation::Insert { text: "()".into() },
866 CharOperation::Insert { text: " {".into() },
867 CharOperation::Insert { text: "\n".into() },
868 CharOperation::Delete { bytes: 23 },
869 CharOperation::Keep { bytes: 23 },
870 CharOperation::Keep { bytes: 1 },
871 CharOperation::Keep { bytes: 23 },
872 CharOperation::Keep { bytes: 1 },
873 CharOperation::Keep { bytes: 8 },
874 CharOperation::Insert {
875 text: " y".into(),
876 },
877 CharOperation::Insert { text: " +=".into() },
878 CharOperation::Insert { text: " 1".into() },
879 CharOperation::Insert { text: ";".into() },
880 CharOperation::Insert { text: "\n".into() },
881 CharOperation::Insert {
882 text: " ".into(),
883 },
884 CharOperation::Keep { bytes: 1 },
885 ];
886 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
887 assert_eq!(
888 line_ops,
889 vec![
890 LineOperation::Delete { lines: 1 },
891 LineOperation::Insert { lines: 2 },
892 LineOperation::Keep { lines: 2 },
893 LineOperation::Delete { lines: 1 },
894 LineOperation::Insert { lines: 2 },
895 ]
896 );
897 let new_text = apply_char_operations(old_text, &char_ops);
898 assert_eq!(
899 new_text,
900 apply_line_operations(old_text, &new_text, &line_ops)
901 );
902 }
903
904 #[test]
905 fn test_random_diffs() {
906 random_test(|mut rng| {
907 let old_text_len = env::var("OLD_TEXT_LEN")
908 .map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable"))
909 .unwrap_or(10);
910
911 let old = random_text(&mut rng, old_text_len);
912 println!("old text: {:?}", old);
913
914 let new = randomly_edit(&old, &mut rng);
915 println!("new text: {:?}", new);
916
917 let char_operations = random_streaming_diff(&mut rng, &old, &new);
918 println!("char operations: {:?}", char_operations);
919
920 // Use apply_char_operations to verify the result
921 let patched = apply_char_operations(&old, &char_operations);
922 assert_eq!(patched, new);
923
924 // Test char_ops_to_line_ops
925 let line_ops = char_ops_to_line_ops(&old, &char_operations);
926 println!("line operations: {:?}", line_ops);
927 let patched = apply_line_operations(&old, &new, &line_ops);
928 assert_eq!(patched, new);
929 });
930 }
931
932 fn char_ops_to_line_ops(old_text: &str, char_ops: &[CharOperation]) -> Vec<LineOperation> {
933 let old_rope = Rope::from(old_text);
934 let mut diff = LineDiff::default();
935 for op in char_ops {
936 diff.push_char_operation(op, &old_rope);
937 }
938 diff.finish(&old_rope);
939 diff.line_operations()
940 }
941
942 fn random_streaming_diff(rng: &mut impl Rng, old: &str, new: &str) -> Vec<CharOperation> {
943 let mut diff = StreamingDiff::new(old.to_string());
944 let mut char_operations = Vec::new();
945 let mut new_len = 0;
946
947 while new_len < new.len() {
948 let mut chunk_len = rng.random_range(1..=new.len() - new_len);
949 while !new.is_char_boundary(new_len + chunk_len) {
950 chunk_len += 1;
951 }
952 let chunk = &new[new_len..new_len + chunk_len];
953 let new_hunks = diff.push_new(chunk);
954 char_operations.extend(new_hunks);
955 new_len += chunk_len;
956 }
957
958 char_operations.extend(diff.finish());
959 char_operations
960 }
961
962 fn random_test<F>(mut test_fn: F)
963 where
964 F: FnMut(StdRng),
965 {
966 let iterations = env::var("ITERATIONS")
967 .map(|i| i.parse().expect("invalid `ITERATIONS` variable"))
968 .unwrap_or(100);
969
970 let seed: u64 = env::var("SEED")
971 .map(|s| s.parse().expect("invalid `SEED` variable"))
972 .unwrap_or(0);
973
974 println!(
975 "Running test with {} iterations and seed {}",
976 iterations, seed
977 );
978
979 for i in 0..iterations {
980 println!("Iteration {}", i + 1);
981 let rng = StdRng::seed_from_u64(seed + i);
982 test_fn(rng);
983 }
984 }
985
986 fn apply_line_operations(old_text: &str, new_text: &str, line_ops: &[LineOperation]) -> String {
987 let mut result: Vec<&str> = Vec::new();
988
989 let old_lines: Vec<&str> = old_text.split('\n').collect();
990 let new_lines: Vec<&str> = new_text.split('\n').collect();
991 let mut old_start = 0_usize;
992 let mut new_start = 0_usize;
993
994 for op in line_ops {
995 match op {
996 LineOperation::Keep { lines } => {
997 let old_end = old_start + *lines as usize;
998 result.extend(&old_lines[old_start..old_end]);
999 old_start = old_end;
1000 new_start += *lines as usize;
1001 }
1002 LineOperation::Delete { lines } => {
1003 old_start += *lines as usize;
1004 }
1005 LineOperation::Insert { lines } => {
1006 let new_end = new_start + *lines as usize;
1007 result.extend(&new_lines[new_start..new_end]);
1008 new_start = new_end;
1009 }
1010 }
1011 }
1012
1013 result.join("\n")
1014 }
1015
1016 #[test]
1017 fn test_apply_char_operations() {
1018 let old_text = "Hello, world!";
1019 let char_ops = vec![
1020 CharOperation::Keep { bytes: 7 },
1021 CharOperation::Delete { bytes: 5 },
1022 CharOperation::Insert {
1023 text: "Rust".to_string(),
1024 },
1025 CharOperation::Keep { bytes: 1 },
1026 ];
1027 let result = apply_char_operations(old_text, &char_ops);
1028 assert_eq!(result, "Hello, Rust!");
1029 }
1030
1031 fn random_text(rng: &mut impl Rng, length: usize) -> String {
1032 util::RandomCharIter::new(rng).take(length).collect()
1033 }
1034
1035 fn randomly_edit(text: &str, rng: &mut impl Rng) -> String {
1036 let mut result = String::from(text);
1037 let edit_count = rng.random_range(1..=5);
1038
1039 fn random_char_range(text: &str, rng: &mut impl Rng) -> (usize, usize) {
1040 let mut start = rng.random_range(0..=text.len());
1041 while !text.is_char_boundary(start) {
1042 start -= 1;
1043 }
1044 let mut end = rng.random_range(start..=text.len());
1045 while !text.is_char_boundary(end) {
1046 end += 1;
1047 }
1048 (start, end)
1049 }
1050
1051 for _ in 0..edit_count {
1052 match rng.random_range(0..3) {
1053 0 => {
1054 // Insert
1055 let (pos, _) = random_char_range(&result, rng);
1056 let insert_len = rng.random_range(1..=5);
1057 let insert_text: String = random_text(rng, insert_len);
1058 result.insert_str(pos, &insert_text);
1059 }
1060 1 => {
1061 // Delete
1062 if !result.is_empty() {
1063 let (start, end) = random_char_range(&result, rng);
1064 result.replace_range(start..end, "");
1065 }
1066 }
1067 2 => {
1068 // Replace
1069 if !result.is_empty() {
1070 let (start, end) = random_char_range(&result, rng);
1071 let replace_len = end - start;
1072 let replace_text: String = random_text(rng, replace_len);
1073 result.replace_range(start..end, &replace_text);
1074 }
1075 }
1076 _ => unreachable!(),
1077 }
1078 }
1079
1080 result
1081 }
1082
1083 fn apply_char_operations(old_text: &str, char_ops: &[CharOperation]) -> String {
1084 let mut result = String::new();
1085 let mut old_ix = 0;
1086
1087 for operation in char_ops {
1088 match operation {
1089 CharOperation::Keep { bytes } => {
1090 result.push_str(&old_text[old_ix..old_ix + bytes]);
1091 old_ix += bytes;
1092 }
1093 CharOperation::Delete { bytes } => {
1094 old_ix += bytes;
1095 }
1096 CharOperation::Insert { text } => {
1097 result.push_str(text);
1098 }
1099 }
1100 }
1101
1102 result
1103 }
1104}