1use ordered_float::OrderedFloat;
2use rope::{Point, Rope, TextSummary};
3use std::collections::{BTreeSet, HashMap};
4use std::{
5 cmp,
6 fmt::{self, Debug},
7 ops::Range,
8};
9
10struct Matrix {
11 cells: Vec<f64>,
12 rows: usize,
13 cols: usize,
14}
15
16impl Matrix {
17 fn new() -> Self {
18 Self {
19 cells: Vec::new(),
20 rows: 0,
21 cols: 0,
22 }
23 }
24
25 fn resize(&mut self, rows: usize, cols: usize) {
26 self.cells.resize(rows * cols, 0.);
27 self.rows = rows;
28 self.cols = cols;
29 }
30
31 fn get(&self, row: usize, col: usize) -> f64 {
32 if row >= self.rows {
33 panic!("row out of bounds")
34 }
35
36 if col >= self.cols {
37 panic!("col out of bounds")
38 }
39 self.cells[col * self.rows + row]
40 }
41
42 fn set(&mut self, row: usize, col: usize, value: f64) {
43 if row >= self.rows {
44 panic!("row out of bounds")
45 }
46
47 if col >= self.cols {
48 panic!("col out of bounds")
49 }
50
51 self.cells[col * self.rows + row] = value;
52 }
53}
54
55impl Debug for Matrix {
56 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
57 writeln!(f)?;
58 for i in 0..self.rows {
59 for j in 0..self.cols {
60 write!(f, "{:5}", self.get(i, j))?;
61 }
62 writeln!(f)?;
63 }
64 Ok(())
65 }
66}
67
68#[derive(Debug, Clone)]
69pub enum CharOperation {
70 Insert { text: String },
71 Delete { bytes: usize },
72 Keep { bytes: usize },
73}
74
75pub struct StreamingDiff {
76 old: Vec<char>,
77 new: Vec<char>,
78 scores: Matrix,
79 old_text_ix: usize,
80 new_text_ix: usize,
81 equal_runs: HashMap<(usize, usize), u32>,
82}
83
84impl StreamingDiff {
85 const INSERTION_SCORE: f64 = -1.;
86 const DELETION_SCORE: f64 = -20.;
87 const EQUALITY_BASE: f64 = 1.8;
88 const MAX_EQUALITY_EXPONENT: i32 = 16;
89
90 pub fn new(old: String) -> Self {
91 let old = old.chars().collect::<Vec<_>>();
92 let mut scores = Matrix::new();
93 scores.resize(old.len() + 1, 1);
94 for i in 0..=old.len() {
95 scores.set(i, 0, i as f64 * Self::DELETION_SCORE);
96 }
97 Self {
98 old,
99 new: Vec::new(),
100 scores,
101 old_text_ix: 0,
102 new_text_ix: 0,
103 equal_runs: Default::default(),
104 }
105 }
106
107 pub fn push_new(&mut self, text: &str) -> Vec<CharOperation> {
108 self.new.extend(text.chars());
109 self.scores.resize(self.old.len() + 1, self.new.len() + 1);
110
111 for j in self.new_text_ix + 1..=self.new.len() {
112 self.scores.set(0, j, j as f64 * Self::INSERTION_SCORE);
113 for i in 1..=self.old.len() {
114 let insertion_score = self.scores.get(i, j - 1) + Self::INSERTION_SCORE;
115 let deletion_score = self.scores.get(i - 1, j) + Self::DELETION_SCORE;
116 let equality_score = if self.old[i - 1] == self.new[j - 1] {
117 let mut equal_run = self.equal_runs.get(&(i - 1, j - 1)).copied().unwrap_or(0);
118 equal_run += 1;
119 self.equal_runs.insert((i, j), equal_run);
120
121 let exponent = cmp::min(equal_run as i32 / 4, Self::MAX_EQUALITY_EXPONENT);
122 self.scores.get(i - 1, j - 1) + Self::EQUALITY_BASE.powi(exponent)
123 } else {
124 f64::NEG_INFINITY
125 };
126
127 let score = insertion_score.max(deletion_score).max(equality_score);
128 self.scores.set(i, j, score);
129 }
130 }
131
132 let mut max_score = f64::NEG_INFINITY;
133 let mut next_old_text_ix = self.old_text_ix;
134 let next_new_text_ix = self.new.len();
135 for i in self.old_text_ix..=self.old.len() {
136 let score = self.scores.get(i, next_new_text_ix);
137 if score > max_score {
138 max_score = score;
139 next_old_text_ix = i;
140 }
141 }
142
143 let hunks = self.backtrack(next_old_text_ix, next_new_text_ix);
144 self.old_text_ix = next_old_text_ix;
145 self.new_text_ix = next_new_text_ix;
146 hunks
147 }
148
149 fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<CharOperation> {
150 let mut pending_insert: Option<Range<usize>> = None;
151 let mut hunks = Vec::new();
152 let mut i = old_text_ix;
153 let mut j = new_text_ix;
154 while (i, j) != (self.old_text_ix, self.new_text_ix) {
155 let insertion_score = if j > self.new_text_ix {
156 Some((i, j - 1))
157 } else {
158 None
159 };
160 let deletion_score = if i > self.old_text_ix {
161 Some((i - 1, j))
162 } else {
163 None
164 };
165 let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
166 if self.old[i - 1] == self.new[j - 1] {
167 Some((i - 1, j - 1))
168 } else {
169 None
170 }
171 } else {
172 None
173 };
174
175 let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
176 .iter()
177 .max_by_key(|cell| cell.map(|(i, j)| OrderedFloat(self.scores.get(i, j))))
178 .unwrap()
179 .unwrap();
180
181 if prev_i == i && prev_j == j - 1 {
182 if let Some(pending_insert) = pending_insert.as_mut() {
183 pending_insert.start = prev_j;
184 } else {
185 pending_insert = Some(prev_j..j);
186 }
187 } else {
188 if let Some(range) = pending_insert.take() {
189 hunks.push(CharOperation::Insert {
190 text: self.new[range].iter().collect(),
191 });
192 }
193
194 let char_len = self.old[i - 1].len_utf8();
195 if prev_i == i - 1 && prev_j == j {
196 if let Some(CharOperation::Delete { bytes: len }) = hunks.last_mut() {
197 *len += char_len;
198 } else {
199 hunks.push(CharOperation::Delete { bytes: char_len })
200 }
201 } else if let Some(CharOperation::Keep { bytes: len }) = hunks.last_mut() {
202 *len += char_len;
203 } else {
204 hunks.push(CharOperation::Keep { bytes: char_len })
205 }
206 }
207
208 i = prev_i;
209 j = prev_j;
210 }
211
212 if let Some(range) = pending_insert.take() {
213 hunks.push(CharOperation::Insert {
214 text: self.new[range].iter().collect(),
215 });
216 }
217
218 hunks.reverse();
219 hunks
220 }
221
222 pub fn finish(self) -> Vec<CharOperation> {
223 self.backtrack(self.old.len(), self.new.len())
224 }
225}
226
227#[derive(Debug, Clone, PartialEq)]
228pub enum LineOperation {
229 Insert { lines: u32 },
230 Delete { lines: u32 },
231 Keep { lines: u32 },
232}
233
234#[derive(Debug, Default)]
235pub struct LineDiff {
236 inserted_newline_at_end: bool,
237 /// The extent of kept and deleted text.
238 old_end: Point,
239 /// The extent of kept and inserted text.
240 new_end: Point,
241 /// Deleted rows, expressed in terms of the old text.
242 deleted_rows: BTreeSet<u32>,
243 /// Inserted rows, expressed in terms of the new text.
244 inserted_rows: BTreeSet<u32>,
245 buffered_insert: String,
246 /// After deleting a newline, we buffer deletion until we keep or insert a character.
247 buffered_delete: usize,
248}
249
250impl LineDiff {
251 pub fn push_char_operations<'a>(
252 &mut self,
253 operations: impl IntoIterator<Item = &'a CharOperation>,
254 old_text: &Rope,
255 ) {
256 for operation in operations {
257 self.push_char_operation(operation, old_text);
258 }
259 }
260
261 pub fn push_char_operation(&mut self, operation: &CharOperation, old_text: &Rope) {
262 match operation {
263 CharOperation::Insert { text } => {
264 self.flush_delete(old_text);
265
266 if is_line_start(self.old_end) {
267 if let Some(newline_ix) = text.rfind('\n') {
268 let (prefix, suffix) = text.split_at(newline_ix + 1);
269 self.buffered_insert.push_str(prefix);
270 self.flush_insert(old_text);
271 self.buffered_insert.push_str(suffix);
272 } else {
273 self.buffered_insert.push_str(&text);
274 }
275 } else {
276 self.buffered_insert.push_str(&text);
277 if !text.ends_with('\n') {
278 self.flush_insert(old_text);
279 }
280 }
281 }
282 CharOperation::Delete { bytes } => {
283 self.buffered_delete += bytes;
284
285 let common_suffix_len = self.trim_buffered_end(old_text);
286 self.flush_insert(old_text);
287
288 if common_suffix_len > 0 || !is_line_end(self.old_end, old_text) {
289 self.flush_delete(old_text);
290 self.keep(common_suffix_len, old_text);
291 }
292 }
293 CharOperation::Keep { bytes } => {
294 self.flush_delete(old_text);
295 self.flush_insert(old_text);
296 self.keep(*bytes, old_text);
297 }
298 }
299 }
300
301 fn flush_insert(&mut self, old_text: &Rope) {
302 if self.buffered_insert.is_empty() {
303 return;
304 }
305
306 let new_start = self.new_end;
307 let lines = TextSummary::from(self.buffered_insert.as_str()).lines;
308 self.new_end += lines;
309
310 if is_line_start(self.old_end) {
311 if self.new_end.column == 0 {
312 self.inserted_rows.extend(new_start.row..self.new_end.row);
313 } else {
314 self.deleted_rows.insert(self.old_end.row);
315 self.inserted_rows.extend(new_start.row..=self.new_end.row);
316 }
317 } else if is_line_end(self.old_end, old_text) {
318 if self.buffered_insert.starts_with('\n') {
319 self.inserted_rows
320 .extend(new_start.row + 1..=self.new_end.row);
321 self.inserted_newline_at_end = true;
322 } else {
323 if !self.inserted_newline_at_end {
324 self.deleted_rows.insert(self.old_end.row);
325 }
326 self.inserted_rows.extend(new_start.row..=self.new_end.row);
327 }
328 } else {
329 self.deleted_rows.insert(self.old_end.row);
330 self.inserted_rows.extend(new_start.row..=self.new_end.row);
331 }
332
333 self.buffered_insert.clear();
334 }
335
336 fn flush_delete(&mut self, old_text: &Rope) {
337 if self.buffered_delete == 0 {
338 return;
339 }
340
341 let old_start = self.old_end;
342 self.old_end =
343 old_text.offset_to_point(old_text.point_to_offset(self.old_end) + self.buffered_delete);
344
345 if is_line_end(old_start, old_text) && is_line_end(self.old_end, old_text) {
346 self.deleted_rows
347 .extend(old_start.row + 1..=self.old_end.row);
348 } else if is_line_start(old_start)
349 && (is_line_start(self.old_end) && self.old_end < old_text.max_point())
350 && self.new_end.column == 0
351 {
352 self.deleted_rows.extend(old_start.row..self.old_end.row);
353 } else {
354 self.inserted_rows.insert(self.new_end.row);
355 self.deleted_rows.extend(old_start.row..=self.old_end.row);
356 }
357
358 self.inserted_newline_at_end = false;
359 self.buffered_delete = 0;
360 }
361
362 fn keep(&mut self, bytes: usize, old_text: &Rope) {
363 if bytes == 0 {
364 return;
365 }
366
367 let lines =
368 old_text.offset_to_point(old_text.point_to_offset(self.old_end) + bytes) - self.old_end;
369 self.old_end += lines;
370 self.new_end += lines;
371 self.inserted_newline_at_end = false;
372 }
373
374 fn trim_buffered_end(&mut self, old_text: &Rope) -> usize {
375 let old_start_offset = old_text.point_to_offset(self.old_end);
376 let old_end_offset = old_start_offset + self.buffered_delete;
377
378 let new_chars = self.buffered_insert.chars().rev();
379 let old_chars = old_text
380 .chunks_in_range(old_start_offset..old_end_offset)
381 .flat_map(|chunk| chunk.chars().rev());
382
383 let mut common_suffix_len = 0;
384 for (new_ch, old_ch) in new_chars.zip(old_chars) {
385 if new_ch == old_ch {
386 common_suffix_len += new_ch.len_utf8();
387 } else {
388 break;
389 }
390 }
391
392 self.buffered_delete -= common_suffix_len;
393 self.buffered_insert
394 .truncate(self.buffered_insert.len() - common_suffix_len);
395
396 common_suffix_len
397 }
398
399 pub fn finish(&mut self, old_text: &Rope) {
400 self.flush_insert(old_text);
401 self.flush_delete(old_text);
402
403 let old_start = self.old_end;
404 self.old_end = old_text.max_point();
405 self.new_end += self.old_end - old_start;
406 }
407
408 pub fn line_operations(&self) -> Vec<LineOperation> {
409 let mut ops = Vec::new();
410 let mut deleted_rows = self.deleted_rows.iter().copied().peekable();
411 let mut inserted_rows = self.inserted_rows.iter().copied().peekable();
412 let mut old_row = 0;
413 let mut new_row = 0;
414
415 while deleted_rows.peek().is_some() || inserted_rows.peek().is_some() {
416 // Check for a run of deleted lines at current old row.
417 if Some(old_row) == deleted_rows.peek().copied() {
418 if let Some(LineOperation::Delete { lines }) = ops.last_mut() {
419 *lines += 1;
420 } else {
421 ops.push(LineOperation::Delete { lines: 1 });
422 }
423 old_row += 1;
424 deleted_rows.next();
425 } else if Some(new_row) == inserted_rows.peek().copied() {
426 if let Some(LineOperation::Insert { lines }) = ops.last_mut() {
427 *lines += 1;
428 } else {
429 ops.push(LineOperation::Insert { lines: 1 });
430 }
431 new_row += 1;
432 inserted_rows.next();
433 } else {
434 // Keep lines until the next deletion, insertion, or the end of the old text.
435 let lines_to_next_deletion = inserted_rows
436 .peek()
437 .copied()
438 .unwrap_or(self.new_end.row + 1)
439 - new_row;
440 let lines_to_next_insertion =
441 deleted_rows.peek().copied().unwrap_or(self.old_end.row + 1) - old_row;
442 let kept_lines =
443 cmp::max(1, cmp::min(lines_to_next_insertion, lines_to_next_deletion));
444 if kept_lines > 0 {
445 ops.push(LineOperation::Keep { lines: kept_lines });
446 old_row += kept_lines;
447 new_row += kept_lines;
448 }
449 }
450 }
451
452 if old_row < self.old_end.row + 1 {
453 ops.push(LineOperation::Keep {
454 lines: self.old_end.row + 1 - old_row,
455 });
456 }
457
458 ops
459 }
460}
461
462fn is_line_start(point: Point) -> bool {
463 point.column == 0
464}
465
466fn is_line_end(point: Point, text: &Rope) -> bool {
467 text.line_len(point.row) == point.column
468}
469
470#[cfg(test)]
471mod tests {
472 use super::*;
473 use rand::prelude::*;
474 use std::env;
475
476 #[test]
477 fn test_delete_first_of_two_lines() {
478 let old_text = "aaaa\nbbbb";
479 let char_ops = vec![
480 CharOperation::Delete { bytes: 5 },
481 CharOperation::Keep { bytes: 4 },
482 ];
483 let expected_line_ops = vec![
484 LineOperation::Delete { lines: 1 },
485 LineOperation::Keep { lines: 1 },
486 ];
487 let new_text = apply_char_operations(old_text, &char_ops);
488 assert_eq!(
489 new_text,
490 apply_line_operations(old_text, &new_text, &expected_line_ops)
491 );
492
493 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
494 assert_eq!(line_ops, expected_line_ops);
495 }
496
497 #[test]
498 fn test_delete_second_of_two_lines() {
499 let old_text = "aaaa\nbbbb";
500 let char_ops = vec![
501 CharOperation::Keep { bytes: 5 },
502 CharOperation::Delete { bytes: 4 },
503 ];
504 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
505 assert_eq!(
506 line_ops,
507 vec![
508 LineOperation::Keep { lines: 1 },
509 LineOperation::Delete { lines: 1 },
510 LineOperation::Insert { lines: 1 }
511 ]
512 );
513 let new_text = apply_char_operations(old_text, &char_ops);
514 assert_eq!(
515 new_text,
516 apply_line_operations(old_text, &new_text, &line_ops)
517 );
518 }
519
520 #[test]
521 fn test_add_new_line() {
522 let old_text = "aaaa\nbbbb";
523 let char_ops = vec![
524 CharOperation::Keep { bytes: 9 },
525 CharOperation::Insert {
526 text: "\ncccc".into(),
527 },
528 ];
529 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
530 assert_eq!(
531 line_ops,
532 vec![
533 LineOperation::Keep { lines: 2 },
534 LineOperation::Insert { lines: 1 }
535 ]
536 );
537 let new_text = apply_char_operations(old_text, &char_ops);
538 assert_eq!(
539 new_text,
540 apply_line_operations(old_text, &new_text, &line_ops)
541 );
542 }
543
544 #[test]
545 fn test_delete_line_in_middle() {
546 let old_text = "aaaa\nbbbb\ncccc";
547 let char_ops = vec![
548 CharOperation::Keep { bytes: 5 },
549 CharOperation::Delete { bytes: 5 },
550 CharOperation::Keep { bytes: 4 },
551 ];
552 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
553 assert_eq!(
554 line_ops,
555 vec![
556 LineOperation::Keep { lines: 1 },
557 LineOperation::Delete { lines: 1 },
558 LineOperation::Keep { lines: 1 }
559 ]
560 );
561 let new_text = apply_char_operations(old_text, &char_ops);
562 assert_eq!(
563 new_text,
564 apply_line_operations(old_text, &new_text, &line_ops)
565 );
566 }
567
568 #[test]
569 fn test_replace_line() {
570 let old_text = "aaaa\nbbbb\ncccc";
571 let char_ops = vec![
572 CharOperation::Keep { bytes: 5 },
573 CharOperation::Delete { bytes: 4 },
574 CharOperation::Insert {
575 text: "BBBB".into(),
576 },
577 CharOperation::Keep { bytes: 5 },
578 ];
579 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
580 assert_eq!(
581 line_ops,
582 vec![
583 LineOperation::Keep { lines: 1 },
584 LineOperation::Delete { lines: 1 },
585 LineOperation::Insert { lines: 1 },
586 LineOperation::Keep { lines: 1 }
587 ]
588 );
589 let new_text = apply_char_operations(old_text, &char_ops);
590 assert_eq!(
591 new_text,
592 apply_line_operations(old_text, &new_text, &line_ops)
593 );
594 }
595
596 #[test]
597 fn test_multiple_edits_on_different_lines() {
598 let old_text = "aaaa\nbbbb\ncccc\ndddd";
599 let char_ops = vec![
600 CharOperation::Insert { text: "A".into() },
601 CharOperation::Keep { bytes: 9 },
602 CharOperation::Delete { bytes: 5 },
603 CharOperation::Keep { bytes: 4 },
604 CharOperation::Insert {
605 text: "\nEEEE".into(),
606 },
607 ];
608 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
609 assert_eq!(
610 line_ops,
611 vec![
612 LineOperation::Delete { lines: 1 },
613 LineOperation::Insert { lines: 1 },
614 LineOperation::Keep { lines: 1 },
615 LineOperation::Delete { lines: 2 },
616 LineOperation::Insert { lines: 2 },
617 ]
618 );
619 let new_text = apply_char_operations(old_text, &char_ops);
620 assert_eq!(
621 new_text,
622 apply_line_operations(old_text, &new_text, &line_ops)
623 );
624 }
625
626 #[test]
627 fn test_edit_at_end_of_line() {
628 let old_text = "aaaa\nbbbb\ncccc";
629 let char_ops = vec![
630 CharOperation::Keep { bytes: 4 },
631 CharOperation::Insert { text: "A".into() },
632 CharOperation::Keep { bytes: 10 },
633 ];
634 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
635 assert_eq!(
636 line_ops,
637 vec![
638 LineOperation::Delete { lines: 1 },
639 LineOperation::Insert { lines: 1 },
640 LineOperation::Keep { lines: 2 }
641 ]
642 );
643 let new_text = apply_char_operations(old_text, &char_ops);
644 assert_eq!(
645 new_text,
646 apply_line_operations(old_text, &new_text, &line_ops)
647 );
648 }
649
650 #[test]
651 fn test_insert_newline_character() {
652 let old_text = "aaaabbbb";
653 let char_ops = vec![
654 CharOperation::Keep { bytes: 4 },
655 CharOperation::Insert { text: "\n".into() },
656 CharOperation::Keep { bytes: 4 },
657 ];
658 let new_text = apply_char_operations(old_text, &char_ops);
659 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
660 assert_eq!(
661 line_ops,
662 vec![
663 LineOperation::Delete { lines: 1 },
664 LineOperation::Insert { lines: 2 }
665 ]
666 );
667 assert_eq!(
668 new_text,
669 apply_line_operations(old_text, &new_text, &line_ops)
670 );
671 }
672
673 #[test]
674 fn test_insert_newline_at_beginning() {
675 let old_text = "aaaa\nbbbb";
676 let char_ops = vec![
677 CharOperation::Insert { text: "\n".into() },
678 CharOperation::Keep { bytes: 9 },
679 ];
680 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
681 assert_eq!(
682 line_ops,
683 vec![
684 LineOperation::Insert { lines: 1 },
685 LineOperation::Keep { lines: 2 }
686 ]
687 );
688 let new_text = apply_char_operations(old_text, &char_ops);
689 assert_eq!(
690 new_text,
691 apply_line_operations(old_text, &new_text, &line_ops)
692 );
693 }
694
695 #[test]
696 fn test_delete_newline() {
697 let old_text = "aaaa\nbbbb";
698 let char_ops = vec![
699 CharOperation::Keep { bytes: 4 },
700 CharOperation::Delete { bytes: 1 },
701 CharOperation::Keep { bytes: 4 },
702 ];
703 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
704 assert_eq!(
705 line_ops,
706 vec![
707 LineOperation::Delete { lines: 2 },
708 LineOperation::Insert { lines: 1 }
709 ]
710 );
711
712 let new_text = apply_char_operations(old_text, &char_ops);
713 assert_eq!(
714 new_text,
715 apply_line_operations(old_text, &new_text, &line_ops)
716 );
717 }
718
719 #[test]
720 fn test_insert_multiple_newlines() {
721 let old_text = "aaaa\nbbbb";
722 let char_ops = vec![
723 CharOperation::Keep { bytes: 5 },
724 CharOperation::Insert {
725 text: "\n\n".into(),
726 },
727 CharOperation::Keep { bytes: 4 },
728 ];
729 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
730 assert_eq!(
731 line_ops,
732 vec![
733 LineOperation::Keep { lines: 1 },
734 LineOperation::Insert { lines: 2 },
735 LineOperation::Keep { lines: 1 }
736 ]
737 );
738 let new_text = apply_char_operations(old_text, &char_ops);
739 assert_eq!(
740 new_text,
741 apply_line_operations(old_text, &new_text, &line_ops)
742 );
743 }
744
745 #[test]
746 fn test_delete_multiple_newlines() {
747 let old_text = "aaaa\n\n\nbbbb";
748 let char_ops = vec![
749 CharOperation::Keep { bytes: 5 },
750 CharOperation::Delete { bytes: 2 },
751 CharOperation::Keep { bytes: 4 },
752 ];
753 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
754 assert_eq!(
755 line_ops,
756 vec![
757 LineOperation::Keep { lines: 1 },
758 LineOperation::Delete { lines: 2 },
759 LineOperation::Keep { lines: 1 }
760 ]
761 );
762 let new_text = apply_char_operations(old_text, &char_ops);
763 assert_eq!(
764 new_text,
765 apply_line_operations(old_text, &new_text, &line_ops)
766 );
767 }
768
769 #[test]
770 fn test_complex_scenario() {
771 let old_text = "line1\nline2\nline3\nline4";
772 let char_ops = vec![
773 CharOperation::Keep { bytes: 6 },
774 CharOperation::Insert {
775 text: "inserted\n".into(),
776 },
777 CharOperation::Delete { bytes: 6 },
778 CharOperation::Keep { bytes: 5 },
779 CharOperation::Insert {
780 text: "\nnewline".into(),
781 },
782 CharOperation::Keep { bytes: 6 },
783 ];
784 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
785 assert_eq!(
786 line_ops,
787 vec![
788 LineOperation::Keep { lines: 1 },
789 LineOperation::Delete { lines: 1 },
790 LineOperation::Insert { lines: 1 },
791 LineOperation::Keep { lines: 1 },
792 LineOperation::Insert { lines: 1 },
793 LineOperation::Keep { lines: 1 }
794 ]
795 );
796 let new_text = apply_char_operations(old_text, &char_ops);
797 assert_eq!(new_text, "line1\ninserted\nline3\nnewline\nline4");
798 assert_eq!(
799 apply_line_operations(old_text, &new_text, &line_ops),
800 new_text,
801 );
802 }
803
804 #[test]
805 fn test_cleaning_up_common_suffix() {
806 let old_text = concat!(
807 " for y in 0..size.y() {\n",
808 " let a = 10;\n",
809 " let b = 20;\n",
810 " }",
811 );
812 let char_ops = [
813 CharOperation::Keep { bytes: 8 },
814 CharOperation::Insert { text: "let".into() },
815 CharOperation::Insert {
816 text: " mut".into(),
817 },
818 CharOperation::Insert { text: " y".into() },
819 CharOperation::Insert { text: " =".into() },
820 CharOperation::Insert { text: " 0".into() },
821 CharOperation::Insert { text: ";".into() },
822 CharOperation::Insert { text: "\n".into() },
823 CharOperation::Insert {
824 text: " while".into(),
825 },
826 CharOperation::Insert { text: " y".into() },
827 CharOperation::Insert {
828 text: " < size".into(),
829 },
830 CharOperation::Insert { text: ".".into() },
831 CharOperation::Insert { text: "y".into() },
832 CharOperation::Insert { text: "()".into() },
833 CharOperation::Insert { text: " {".into() },
834 CharOperation::Insert { text: "\n".into() },
835 CharOperation::Delete { bytes: 23 },
836 CharOperation::Keep { bytes: 23 },
837 CharOperation::Keep { bytes: 1 },
838 CharOperation::Keep { bytes: 23 },
839 CharOperation::Keep { bytes: 1 },
840 CharOperation::Keep { bytes: 8 },
841 CharOperation::Insert {
842 text: " y".into(),
843 },
844 CharOperation::Insert { text: " +=".into() },
845 CharOperation::Insert { text: " 1".into() },
846 CharOperation::Insert { text: ";".into() },
847 CharOperation::Insert { text: "\n".into() },
848 CharOperation::Insert {
849 text: " ".into(),
850 },
851 CharOperation::Keep { bytes: 1 },
852 ];
853 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
854 assert_eq!(
855 line_ops,
856 vec![
857 LineOperation::Delete { lines: 1 },
858 LineOperation::Insert { lines: 2 },
859 LineOperation::Keep { lines: 2 },
860 LineOperation::Delete { lines: 1 },
861 LineOperation::Insert { lines: 2 },
862 ]
863 );
864 let new_text = apply_char_operations(old_text, &char_ops);
865 assert_eq!(
866 new_text,
867 apply_line_operations(old_text, &new_text, &line_ops)
868 );
869 }
870
871 #[test]
872 fn test_random_diffs() {
873 random_test(|mut rng| {
874 let old_text_len = env::var("OLD_TEXT_LEN")
875 .map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable"))
876 .unwrap_or(10);
877
878 let old = random_text(&mut rng, old_text_len);
879 println!("old text: {:?}", old);
880
881 let new = randomly_edit(&old, &mut rng);
882 println!("new text: {:?}", new);
883
884 let char_operations = random_streaming_diff(&mut rng, &old, &new);
885 println!("char operations: {:?}", char_operations);
886
887 // Use apply_char_operations to verify the result
888 let patched = apply_char_operations(&old, &char_operations);
889 assert_eq!(patched, new);
890
891 // Test char_ops_to_line_ops
892 let line_ops = char_ops_to_line_ops(&old, &char_operations);
893 println!("line operations: {:?}", line_ops);
894 let patched = apply_line_operations(&old, &new, &line_ops);
895 assert_eq!(patched, new);
896 });
897 }
898
899 fn char_ops_to_line_ops(old_text: &str, char_ops: &[CharOperation]) -> Vec<LineOperation> {
900 let old_rope = Rope::from(old_text);
901 let mut diff = LineDiff::default();
902 for op in char_ops {
903 diff.push_char_operation(op, &old_rope);
904 }
905 diff.finish(&old_rope);
906 diff.line_operations()
907 }
908
909 fn random_streaming_diff(rng: &mut impl Rng, old: &str, new: &str) -> Vec<CharOperation> {
910 let mut diff = StreamingDiff::new(old.to_string());
911 let mut char_operations = Vec::new();
912 let mut new_len = 0;
913
914 while new_len < new.len() {
915 let mut chunk_len = rng.gen_range(1..=new.len() - new_len);
916 while !new.is_char_boundary(new_len + chunk_len) {
917 chunk_len += 1;
918 }
919 let chunk = &new[new_len..new_len + chunk_len];
920 let new_hunks = diff.push_new(chunk);
921 char_operations.extend(new_hunks);
922 new_len += chunk_len;
923 }
924
925 char_operations.extend(diff.finish());
926 char_operations
927 }
928
929 fn random_test<F>(mut test_fn: F)
930 where
931 F: FnMut(StdRng),
932 {
933 let iterations = env::var("ITERATIONS")
934 .map(|i| i.parse().expect("invalid `ITERATIONS` variable"))
935 .unwrap_or(100);
936
937 let seed: u64 = env::var("SEED")
938 .map(|s| s.parse().expect("invalid `SEED` variable"))
939 .unwrap_or(0);
940
941 println!(
942 "Running test with {} iterations and seed {}",
943 iterations, seed
944 );
945
946 for i in 0..iterations {
947 println!("Iteration {}", i + 1);
948 let rng = StdRng::seed_from_u64(seed + i);
949 test_fn(rng);
950 }
951 }
952
953 fn apply_line_operations(old_text: &str, new_text: &str, line_ops: &[LineOperation]) -> String {
954 let mut result: Vec<&str> = Vec::new();
955
956 let old_lines: Vec<&str> = old_text.split('\n').collect();
957 let new_lines: Vec<&str> = new_text.split('\n').collect();
958 let mut old_start = 0_usize;
959 let mut new_start = 0_usize;
960
961 for op in line_ops {
962 match op {
963 LineOperation::Keep { lines } => {
964 let old_end = old_start + *lines as usize;
965 result.extend(&old_lines[old_start..old_end]);
966 old_start = old_end;
967 new_start += *lines as usize;
968 }
969 LineOperation::Delete { lines } => {
970 old_start += *lines as usize;
971 }
972 LineOperation::Insert { lines } => {
973 let new_end = new_start + *lines as usize;
974 result.extend(&new_lines[new_start..new_end]);
975 new_start = new_end;
976 }
977 }
978 }
979
980 result.join("\n")
981 }
982
983 #[test]
984 fn test_apply_char_operations() {
985 let old_text = "Hello, world!";
986 let char_ops = vec![
987 CharOperation::Keep { bytes: 7 },
988 CharOperation::Delete { bytes: 5 },
989 CharOperation::Insert {
990 text: "Rust".to_string(),
991 },
992 CharOperation::Keep { bytes: 1 },
993 ];
994 let result = apply_char_operations(old_text, &char_ops);
995 assert_eq!(result, "Hello, Rust!");
996 }
997
998 fn random_text(rng: &mut impl Rng, length: usize) -> String {
999 util::RandomCharIter::new(rng).take(length).collect()
1000 }
1001
1002 fn randomly_edit(text: &str, rng: &mut impl Rng) -> String {
1003 let mut result = String::from(text);
1004 let edit_count = rng.gen_range(1..=5);
1005
1006 fn random_char_range(text: &str, rng: &mut impl Rng) -> (usize, usize) {
1007 let mut start = rng.gen_range(0..=text.len());
1008 while !text.is_char_boundary(start) {
1009 start -= 1;
1010 }
1011 let mut end = rng.gen_range(start..=text.len());
1012 while !text.is_char_boundary(end) {
1013 end += 1;
1014 }
1015 (start, end)
1016 }
1017
1018 for _ in 0..edit_count {
1019 match rng.gen_range(0..3) {
1020 0 => {
1021 // Insert
1022 let (pos, _) = random_char_range(&result, rng);
1023 let insert_len = rng.gen_range(1..=5);
1024 let insert_text: String = random_text(rng, insert_len);
1025 result.insert_str(pos, &insert_text);
1026 }
1027 1 => {
1028 // Delete
1029 if !result.is_empty() {
1030 let (start, end) = random_char_range(&result, rng);
1031 result.replace_range(start..end, "");
1032 }
1033 }
1034 2 => {
1035 // Replace
1036 if !result.is_empty() {
1037 let (start, end) = random_char_range(&result, rng);
1038 let replace_len = end - start;
1039 let replace_text: String = random_text(rng, replace_len);
1040 result.replace_range(start..end, &replace_text);
1041 }
1042 }
1043 _ => unreachable!(),
1044 }
1045 }
1046
1047 result
1048 }
1049
1050 fn apply_char_operations(old_text: &str, char_ops: &[CharOperation]) -> String {
1051 let mut result = String::new();
1052 let mut old_ix = 0;
1053
1054 for operation in char_ops {
1055 match operation {
1056 CharOperation::Keep { bytes } => {
1057 result.push_str(&old_text[old_ix..old_ix + bytes]);
1058 old_ix += bytes;
1059 }
1060 CharOperation::Delete { bytes } => {
1061 old_ix += bytes;
1062 }
1063 CharOperation::Insert { text } => {
1064 result.push_str(text);
1065 }
1066 }
1067 }
1068
1069 result
1070 }
1071}