1use ordered_float::OrderedFloat;
2use rope::{Point, Rope, TextSummary};
3use std::collections::{BTreeSet, HashMap};
4use std::{
5 cmp,
6 fmt::{self, Debug},
7 ops::Range,
8};
9
10struct Matrix {
11 cells: Vec<f64>,
12 rows: usize,
13 cols: usize,
14}
15
16impl Matrix {
17 fn new() -> Self {
18 Self {
19 cells: Vec::new(),
20 rows: 0,
21 cols: 0,
22 }
23 }
24
25 fn resize(&mut self, rows: usize, cols: usize) {
26 self.cells.resize(rows * cols, 0.);
27 self.rows = rows;
28 self.cols = cols;
29 }
30
31 fn swap_columns(&mut self, col1: usize, col2: usize) {
32 if col1 == col2 {
33 return;
34 }
35
36 if col1 >= self.cols {
37 panic!("column out of bounds");
38 }
39
40 if col2 >= self.cols {
41 panic!("column out of bounds");
42 }
43
44 unsafe {
45 let ptr = self.cells.as_mut_ptr();
46 std::ptr::swap_nonoverlapping(
47 ptr.add(col1 * self.rows),
48 ptr.add(col2 * self.rows),
49 self.rows,
50 );
51 }
52 }
53
54 fn get(&self, row: usize, col: usize) -> f64 {
55 if row >= self.rows {
56 panic!("row out of bounds")
57 }
58
59 if col >= self.cols {
60 panic!("column out of bounds")
61 }
62 self.cells[col * self.rows + row]
63 }
64
65 fn set(&mut self, row: usize, col: usize, value: f64) {
66 if row >= self.rows {
67 panic!("row out of bounds")
68 }
69
70 if col >= self.cols {
71 panic!("column out of bounds")
72 }
73
74 self.cells[col * self.rows + row] = value;
75 }
76}
77
78impl Debug for Matrix {
79 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
80 writeln!(f)?;
81 for i in 0..self.rows {
82 for j in 0..self.cols {
83 write!(f, "{:5}", self.get(i, j))?;
84 }
85 writeln!(f)?;
86 }
87 Ok(())
88 }
89}
90
91#[derive(Debug, Clone)]
92pub enum CharOperation {
93 Insert { text: String },
94 Delete { bytes: usize },
95 Keep { bytes: usize },
96}
97
98pub struct StreamingDiff {
99 old: Vec<char>,
100 new: Vec<char>,
101 scores: Matrix,
102 old_text_ix: usize,
103 new_text_ix: usize,
104 equal_runs: HashMap<(usize, usize), u32>,
105}
106
107impl StreamingDiff {
108 const INSERTION_SCORE: f64 = -1.;
109 const DELETION_SCORE: f64 = -20.;
110 const EQUALITY_BASE: f64 = 1.8;
111 const MAX_EQUALITY_EXPONENT: i32 = 16;
112
113 pub fn new(old: String) -> Self {
114 let old = old.chars().collect::<Vec<_>>();
115 let mut scores = Matrix::new();
116 scores.resize(old.len() + 1, 1);
117 for i in 0..=old.len() {
118 scores.set(i, 0, i as f64 * Self::DELETION_SCORE);
119 }
120 Self {
121 old,
122 new: Vec::new(),
123 scores,
124 old_text_ix: 0,
125 new_text_ix: 0,
126 equal_runs: Default::default(),
127 }
128 }
129
130 pub fn push_new(&mut self, text: &str) -> Vec<CharOperation> {
131 self.new.extend(text.chars());
132 self.scores.swap_columns(0, self.scores.cols - 1);
133 self.scores
134 .resize(self.old.len() + 1, self.new.len() - self.new_text_ix + 1);
135 self.equal_runs.retain(|(_i, j), _| *j == self.new_text_ix);
136
137 for j in self.new_text_ix + 1..=self.new.len() {
138 let relative_j = j - self.new_text_ix;
139
140 self.scores
141 .set(0, relative_j, j as f64 * Self::INSERTION_SCORE);
142 for i in 1..=self.old.len() {
143 let insertion_score = self.scores.get(i, relative_j - 1) + Self::INSERTION_SCORE;
144 let deletion_score = self.scores.get(i - 1, relative_j) + Self::DELETION_SCORE;
145 let equality_score = if self.old[i - 1] == self.new[j - 1] {
146 let mut equal_run = self.equal_runs.get(&(i - 1, j - 1)).copied().unwrap_or(0);
147 equal_run += 1;
148 self.equal_runs.insert((i, j), equal_run);
149
150 let exponent = cmp::min(equal_run as i32 / 4, Self::MAX_EQUALITY_EXPONENT);
151 self.scores.get(i - 1, relative_j - 1) + Self::EQUALITY_BASE.powi(exponent)
152 } else {
153 f64::NEG_INFINITY
154 };
155
156 let score = insertion_score.max(deletion_score).max(equality_score);
157 self.scores.set(i, relative_j, score);
158 }
159 }
160
161 let mut max_score = f64::NEG_INFINITY;
162 let mut next_old_text_ix = self.old_text_ix;
163 let next_new_text_ix = self.new.len();
164 for i in self.old_text_ix..=self.old.len() {
165 let score = self.scores.get(i, next_new_text_ix - self.new_text_ix);
166 if score > max_score {
167 max_score = score;
168 next_old_text_ix = i;
169 }
170 }
171
172 let hunks = self.backtrack(next_old_text_ix, next_new_text_ix);
173 self.old_text_ix = next_old_text_ix;
174 self.new_text_ix = next_new_text_ix;
175 hunks
176 }
177
178 fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<CharOperation> {
179 let mut pending_insert: Option<Range<usize>> = None;
180 let mut hunks = Vec::new();
181 let mut i = old_text_ix;
182 let mut j = new_text_ix;
183 while (i, j) != (self.old_text_ix, self.new_text_ix) {
184 let insertion_score = if j > self.new_text_ix {
185 Some((i, j - 1))
186 } else {
187 None
188 };
189 let deletion_score = if i > self.old_text_ix {
190 Some((i - 1, j))
191 } else {
192 None
193 };
194 let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
195 if self.old[i - 1] == self.new[j - 1] {
196 Some((i - 1, j - 1))
197 } else {
198 None
199 }
200 } else {
201 None
202 };
203
204 let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
205 .iter()
206 .max_by_key(|cell| {
207 cell.map(|(i, j)| OrderedFloat(self.scores.get(i, j - self.new_text_ix)))
208 })
209 .unwrap()
210 .unwrap();
211
212 if prev_i == i && prev_j == j - 1 {
213 if let Some(pending_insert) = pending_insert.as_mut() {
214 pending_insert.start = prev_j;
215 } else {
216 pending_insert = Some(prev_j..j);
217 }
218 } else {
219 if let Some(range) = pending_insert.take() {
220 hunks.push(CharOperation::Insert {
221 text: self.new[range].iter().collect(),
222 });
223 }
224
225 let char_len = self.old[i - 1].len_utf8();
226 if prev_i == i - 1 && prev_j == j {
227 if let Some(CharOperation::Delete { bytes: len }) = hunks.last_mut() {
228 *len += char_len;
229 } else {
230 hunks.push(CharOperation::Delete { bytes: char_len })
231 }
232 } else if let Some(CharOperation::Keep { bytes: len }) = hunks.last_mut() {
233 *len += char_len;
234 } else {
235 hunks.push(CharOperation::Keep { bytes: char_len })
236 }
237 }
238
239 i = prev_i;
240 j = prev_j;
241 }
242
243 if let Some(range) = pending_insert.take() {
244 hunks.push(CharOperation::Insert {
245 text: self.new[range].iter().collect(),
246 });
247 }
248
249 hunks.reverse();
250 hunks
251 }
252
253 pub fn finish(self) -> Vec<CharOperation> {
254 self.backtrack(self.old.len(), self.new.len())
255 }
256}
257
258#[derive(Debug, Clone, PartialEq)]
259pub enum LineOperation {
260 Insert { lines: u32 },
261 Delete { lines: u32 },
262 Keep { lines: u32 },
263}
264
265#[derive(Debug, Default)]
266pub struct LineDiff {
267 inserted_newline_at_end: bool,
268 /// The extent of kept and deleted text.
269 old_end: Point,
270 /// The extent of kept and inserted text.
271 new_end: Point,
272 /// Deleted rows, expressed in terms of the old text.
273 deleted_rows: BTreeSet<u32>,
274 /// Inserted rows, expressed in terms of the new text.
275 inserted_rows: BTreeSet<u32>,
276 buffered_insert: String,
277 /// After deleting a newline, we buffer deletion until we keep or insert a character.
278 buffered_delete: usize,
279}
280
281impl LineDiff {
282 pub fn push_char_operations<'a>(
283 &mut self,
284 operations: impl IntoIterator<Item = &'a CharOperation>,
285 old_text: &Rope,
286 ) {
287 for operation in operations {
288 self.push_char_operation(operation, old_text);
289 }
290 }
291
292 pub fn push_char_operation(&mut self, operation: &CharOperation, old_text: &Rope) {
293 match operation {
294 CharOperation::Insert { text } => {
295 self.flush_delete(old_text);
296
297 if is_line_start(self.old_end) {
298 if let Some(newline_ix) = text.rfind('\n') {
299 let (prefix, suffix) = text.split_at(newline_ix + 1);
300 self.buffered_insert.push_str(prefix);
301 self.flush_insert(old_text);
302 self.buffered_insert.push_str(suffix);
303 } else {
304 self.buffered_insert.push_str(&text);
305 }
306 } else {
307 self.buffered_insert.push_str(&text);
308 if !text.ends_with('\n') {
309 self.flush_insert(old_text);
310 }
311 }
312 }
313 CharOperation::Delete { bytes } => {
314 self.buffered_delete += bytes;
315
316 let common_suffix_len = self.trim_buffered_end(old_text);
317 self.flush_insert(old_text);
318
319 if common_suffix_len > 0 || !is_line_end(self.old_end, old_text) {
320 self.flush_delete(old_text);
321 self.keep(common_suffix_len, old_text);
322 }
323 }
324 CharOperation::Keep { bytes } => {
325 self.flush_delete(old_text);
326 self.flush_insert(old_text);
327 self.keep(*bytes, old_text);
328 }
329 }
330 }
331
332 fn flush_insert(&mut self, old_text: &Rope) {
333 if self.buffered_insert.is_empty() {
334 return;
335 }
336
337 let new_start = self.new_end;
338 let lines = TextSummary::from(self.buffered_insert.as_str()).lines;
339 self.new_end += lines;
340
341 if is_line_start(self.old_end) {
342 if self.new_end.column == 0 {
343 self.inserted_rows.extend(new_start.row..self.new_end.row);
344 } else {
345 self.deleted_rows.insert(self.old_end.row);
346 self.inserted_rows.extend(new_start.row..=self.new_end.row);
347 }
348 } else if is_line_end(self.old_end, old_text) {
349 if self.buffered_insert.starts_with('\n') {
350 self.inserted_rows
351 .extend(new_start.row + 1..=self.new_end.row);
352 self.inserted_newline_at_end = true;
353 } else {
354 if !self.inserted_newline_at_end {
355 self.deleted_rows.insert(self.old_end.row);
356 }
357 self.inserted_rows.extend(new_start.row..=self.new_end.row);
358 }
359 } else {
360 self.deleted_rows.insert(self.old_end.row);
361 self.inserted_rows.extend(new_start.row..=self.new_end.row);
362 }
363
364 self.buffered_insert.clear();
365 }
366
367 fn flush_delete(&mut self, old_text: &Rope) {
368 if self.buffered_delete == 0 {
369 return;
370 }
371
372 let old_start = self.old_end;
373 self.old_end =
374 old_text.offset_to_point(old_text.point_to_offset(self.old_end) + self.buffered_delete);
375
376 if is_line_end(old_start, old_text) && is_line_end(self.old_end, old_text) {
377 self.deleted_rows
378 .extend(old_start.row + 1..=self.old_end.row);
379 } else if is_line_start(old_start)
380 && (is_line_start(self.old_end) && self.old_end < old_text.max_point())
381 && self.new_end.column == 0
382 {
383 self.deleted_rows.extend(old_start.row..self.old_end.row);
384 } else {
385 self.inserted_rows.insert(self.new_end.row);
386 self.deleted_rows.extend(old_start.row..=self.old_end.row);
387 }
388
389 self.inserted_newline_at_end = false;
390 self.buffered_delete = 0;
391 }
392
393 fn keep(&mut self, bytes: usize, old_text: &Rope) {
394 if bytes == 0 {
395 return;
396 }
397
398 let lines =
399 old_text.offset_to_point(old_text.point_to_offset(self.old_end) + bytes) - self.old_end;
400 self.old_end += lines;
401 self.new_end += lines;
402 self.inserted_newline_at_end = false;
403 }
404
405 fn trim_buffered_end(&mut self, old_text: &Rope) -> usize {
406 let old_start_offset = old_text.point_to_offset(self.old_end);
407 let old_end_offset = old_start_offset + self.buffered_delete;
408
409 let new_chars = self.buffered_insert.chars().rev();
410 let old_chars = old_text
411 .chunks_in_range(old_start_offset..old_end_offset)
412 .flat_map(|chunk| chunk.chars().rev());
413
414 let mut common_suffix_len = 0;
415 for (new_ch, old_ch) in new_chars.zip(old_chars) {
416 if new_ch == old_ch {
417 common_suffix_len += new_ch.len_utf8();
418 } else {
419 break;
420 }
421 }
422
423 self.buffered_delete -= common_suffix_len;
424 self.buffered_insert
425 .truncate(self.buffered_insert.len() - common_suffix_len);
426
427 common_suffix_len
428 }
429
430 pub fn finish(&mut self, old_text: &Rope) {
431 self.flush_insert(old_text);
432 self.flush_delete(old_text);
433
434 let old_start = self.old_end;
435 self.old_end = old_text.max_point();
436 self.new_end += self.old_end - old_start;
437 }
438
439 pub fn line_operations(&self) -> Vec<LineOperation> {
440 let mut ops = Vec::new();
441 let mut deleted_rows = self.deleted_rows.iter().copied().peekable();
442 let mut inserted_rows = self.inserted_rows.iter().copied().peekable();
443 let mut old_row = 0;
444 let mut new_row = 0;
445
446 while deleted_rows.peek().is_some() || inserted_rows.peek().is_some() {
447 // Check for a run of deleted lines at current old row.
448 if Some(old_row) == deleted_rows.peek().copied() {
449 if let Some(LineOperation::Delete { lines }) = ops.last_mut() {
450 *lines += 1;
451 } else {
452 ops.push(LineOperation::Delete { lines: 1 });
453 }
454 old_row += 1;
455 deleted_rows.next();
456 } else if Some(new_row) == inserted_rows.peek().copied() {
457 if let Some(LineOperation::Insert { lines }) = ops.last_mut() {
458 *lines += 1;
459 } else {
460 ops.push(LineOperation::Insert { lines: 1 });
461 }
462 new_row += 1;
463 inserted_rows.next();
464 } else {
465 // Keep lines until the next deletion, insertion, or the end of the old text.
466 let lines_to_next_deletion = inserted_rows
467 .peek()
468 .copied()
469 .unwrap_or(self.new_end.row + 1)
470 - new_row;
471 let lines_to_next_insertion =
472 deleted_rows.peek().copied().unwrap_or(self.old_end.row + 1) - old_row;
473 let kept_lines =
474 cmp::max(1, cmp::min(lines_to_next_insertion, lines_to_next_deletion));
475 if kept_lines > 0 {
476 ops.push(LineOperation::Keep { lines: kept_lines });
477 old_row += kept_lines;
478 new_row += kept_lines;
479 }
480 }
481 }
482
483 if old_row < self.old_end.row + 1 {
484 ops.push(LineOperation::Keep {
485 lines: self.old_end.row + 1 - old_row,
486 });
487 }
488
489 ops
490 }
491}
492
493fn is_line_start(point: Point) -> bool {
494 point.column == 0
495}
496
497fn is_line_end(point: Point, text: &Rope) -> bool {
498 text.line_len(point.row) == point.column
499}
500
501#[cfg(test)]
502mod tests {
503 use super::*;
504 use rand::prelude::*;
505 use std::env;
506
507 #[test]
508 fn test_delete_first_of_two_lines() {
509 let old_text = "aaaa\nbbbb";
510 let char_ops = vec![
511 CharOperation::Delete { bytes: 5 },
512 CharOperation::Keep { bytes: 4 },
513 ];
514 let expected_line_ops = vec![
515 LineOperation::Delete { lines: 1 },
516 LineOperation::Keep { lines: 1 },
517 ];
518 let new_text = apply_char_operations(old_text, &char_ops);
519 assert_eq!(
520 new_text,
521 apply_line_operations(old_text, &new_text, &expected_line_ops)
522 );
523
524 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
525 assert_eq!(line_ops, expected_line_ops);
526 }
527
528 #[test]
529 fn test_delete_second_of_two_lines() {
530 let old_text = "aaaa\nbbbb";
531 let char_ops = vec![
532 CharOperation::Keep { bytes: 5 },
533 CharOperation::Delete { bytes: 4 },
534 ];
535 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
536 assert_eq!(
537 line_ops,
538 vec![
539 LineOperation::Keep { lines: 1 },
540 LineOperation::Delete { lines: 1 },
541 LineOperation::Insert { lines: 1 }
542 ]
543 );
544 let new_text = apply_char_operations(old_text, &char_ops);
545 assert_eq!(
546 new_text,
547 apply_line_operations(old_text, &new_text, &line_ops)
548 );
549 }
550
551 #[test]
552 fn test_add_new_line() {
553 let old_text = "aaaa\nbbbb";
554 let char_ops = vec![
555 CharOperation::Keep { bytes: 9 },
556 CharOperation::Insert {
557 text: "\ncccc".into(),
558 },
559 ];
560 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
561 assert_eq!(
562 line_ops,
563 vec![
564 LineOperation::Keep { lines: 2 },
565 LineOperation::Insert { lines: 1 }
566 ]
567 );
568 let new_text = apply_char_operations(old_text, &char_ops);
569 assert_eq!(
570 new_text,
571 apply_line_operations(old_text, &new_text, &line_ops)
572 );
573 }
574
575 #[test]
576 fn test_delete_line_in_middle() {
577 let old_text = "aaaa\nbbbb\ncccc";
578 let char_ops = vec![
579 CharOperation::Keep { bytes: 5 },
580 CharOperation::Delete { bytes: 5 },
581 CharOperation::Keep { bytes: 4 },
582 ];
583 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
584 assert_eq!(
585 line_ops,
586 vec![
587 LineOperation::Keep { lines: 1 },
588 LineOperation::Delete { lines: 1 },
589 LineOperation::Keep { lines: 1 }
590 ]
591 );
592 let new_text = apply_char_operations(old_text, &char_ops);
593 assert_eq!(
594 new_text,
595 apply_line_operations(old_text, &new_text, &line_ops)
596 );
597 }
598
599 #[test]
600 fn test_replace_line() {
601 let old_text = "aaaa\nbbbb\ncccc";
602 let char_ops = vec![
603 CharOperation::Keep { bytes: 5 },
604 CharOperation::Delete { bytes: 4 },
605 CharOperation::Insert {
606 text: "BBBB".into(),
607 },
608 CharOperation::Keep { bytes: 5 },
609 ];
610 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
611 assert_eq!(
612 line_ops,
613 vec![
614 LineOperation::Keep { lines: 1 },
615 LineOperation::Delete { lines: 1 },
616 LineOperation::Insert { lines: 1 },
617 LineOperation::Keep { lines: 1 }
618 ]
619 );
620 let new_text = apply_char_operations(old_text, &char_ops);
621 assert_eq!(
622 new_text,
623 apply_line_operations(old_text, &new_text, &line_ops)
624 );
625 }
626
627 #[test]
628 fn test_multiple_edits_on_different_lines() {
629 let old_text = "aaaa\nbbbb\ncccc\ndddd";
630 let char_ops = vec![
631 CharOperation::Insert { text: "A".into() },
632 CharOperation::Keep { bytes: 9 },
633 CharOperation::Delete { bytes: 5 },
634 CharOperation::Keep { bytes: 4 },
635 CharOperation::Insert {
636 text: "\nEEEE".into(),
637 },
638 ];
639 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
640 assert_eq!(
641 line_ops,
642 vec![
643 LineOperation::Delete { lines: 1 },
644 LineOperation::Insert { lines: 1 },
645 LineOperation::Keep { lines: 1 },
646 LineOperation::Delete { lines: 2 },
647 LineOperation::Insert { lines: 2 },
648 ]
649 );
650 let new_text = apply_char_operations(old_text, &char_ops);
651 assert_eq!(
652 new_text,
653 apply_line_operations(old_text, &new_text, &line_ops)
654 );
655 }
656
657 #[test]
658 fn test_edit_at_end_of_line() {
659 let old_text = "aaaa\nbbbb\ncccc";
660 let char_ops = vec![
661 CharOperation::Keep { bytes: 4 },
662 CharOperation::Insert { text: "A".into() },
663 CharOperation::Keep { bytes: 10 },
664 ];
665 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
666 assert_eq!(
667 line_ops,
668 vec![
669 LineOperation::Delete { lines: 1 },
670 LineOperation::Insert { lines: 1 },
671 LineOperation::Keep { lines: 2 }
672 ]
673 );
674 let new_text = apply_char_operations(old_text, &char_ops);
675 assert_eq!(
676 new_text,
677 apply_line_operations(old_text, &new_text, &line_ops)
678 );
679 }
680
681 #[test]
682 fn test_insert_newline_character() {
683 let old_text = "aaaabbbb";
684 let char_ops = vec![
685 CharOperation::Keep { bytes: 4 },
686 CharOperation::Insert { text: "\n".into() },
687 CharOperation::Keep { bytes: 4 },
688 ];
689 let new_text = apply_char_operations(old_text, &char_ops);
690 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
691 assert_eq!(
692 line_ops,
693 vec![
694 LineOperation::Delete { lines: 1 },
695 LineOperation::Insert { lines: 2 }
696 ]
697 );
698 assert_eq!(
699 new_text,
700 apply_line_operations(old_text, &new_text, &line_ops)
701 );
702 }
703
704 #[test]
705 fn test_insert_newline_at_beginning() {
706 let old_text = "aaaa\nbbbb";
707 let char_ops = vec![
708 CharOperation::Insert { text: "\n".into() },
709 CharOperation::Keep { bytes: 9 },
710 ];
711 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
712 assert_eq!(
713 line_ops,
714 vec![
715 LineOperation::Insert { lines: 1 },
716 LineOperation::Keep { lines: 2 }
717 ]
718 );
719 let new_text = apply_char_operations(old_text, &char_ops);
720 assert_eq!(
721 new_text,
722 apply_line_operations(old_text, &new_text, &line_ops)
723 );
724 }
725
726 #[test]
727 fn test_delete_newline() {
728 let old_text = "aaaa\nbbbb";
729 let char_ops = vec![
730 CharOperation::Keep { bytes: 4 },
731 CharOperation::Delete { bytes: 1 },
732 CharOperation::Keep { bytes: 4 },
733 ];
734 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
735 assert_eq!(
736 line_ops,
737 vec![
738 LineOperation::Delete { lines: 2 },
739 LineOperation::Insert { lines: 1 }
740 ]
741 );
742
743 let new_text = apply_char_operations(old_text, &char_ops);
744 assert_eq!(
745 new_text,
746 apply_line_operations(old_text, &new_text, &line_ops)
747 );
748 }
749
750 #[test]
751 fn test_insert_multiple_newlines() {
752 let old_text = "aaaa\nbbbb";
753 let char_ops = vec![
754 CharOperation::Keep { bytes: 5 },
755 CharOperation::Insert {
756 text: "\n\n".into(),
757 },
758 CharOperation::Keep { bytes: 4 },
759 ];
760 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
761 assert_eq!(
762 line_ops,
763 vec![
764 LineOperation::Keep { lines: 1 },
765 LineOperation::Insert { lines: 2 },
766 LineOperation::Keep { lines: 1 }
767 ]
768 );
769 let new_text = apply_char_operations(old_text, &char_ops);
770 assert_eq!(
771 new_text,
772 apply_line_operations(old_text, &new_text, &line_ops)
773 );
774 }
775
776 #[test]
777 fn test_delete_multiple_newlines() {
778 let old_text = "aaaa\n\n\nbbbb";
779 let char_ops = vec![
780 CharOperation::Keep { bytes: 5 },
781 CharOperation::Delete { bytes: 2 },
782 CharOperation::Keep { bytes: 4 },
783 ];
784 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
785 assert_eq!(
786 line_ops,
787 vec![
788 LineOperation::Keep { lines: 1 },
789 LineOperation::Delete { lines: 2 },
790 LineOperation::Keep { lines: 1 }
791 ]
792 );
793 let new_text = apply_char_operations(old_text, &char_ops);
794 assert_eq!(
795 new_text,
796 apply_line_operations(old_text, &new_text, &line_ops)
797 );
798 }
799
800 #[test]
801 fn test_complex_scenario() {
802 let old_text = "line1\nline2\nline3\nline4";
803 let char_ops = vec![
804 CharOperation::Keep { bytes: 6 },
805 CharOperation::Insert {
806 text: "inserted\n".into(),
807 },
808 CharOperation::Delete { bytes: 6 },
809 CharOperation::Keep { bytes: 5 },
810 CharOperation::Insert {
811 text: "\nnewline".into(),
812 },
813 CharOperation::Keep { bytes: 6 },
814 ];
815 let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
816 assert_eq!(
817 line_ops,
818 vec![
819 LineOperation::Keep { lines: 1 },
820 LineOperation::Delete { lines: 1 },
821 LineOperation::Insert { lines: 1 },
822 LineOperation::Keep { lines: 1 },
823 LineOperation::Insert { lines: 1 },
824 LineOperation::Keep { lines: 1 }
825 ]
826 );
827 let new_text = apply_char_operations(old_text, &char_ops);
828 assert_eq!(new_text, "line1\ninserted\nline3\nnewline\nline4");
829 assert_eq!(
830 apply_line_operations(old_text, &new_text, &line_ops),
831 new_text,
832 );
833 }
834
835 #[test]
836 fn test_cleaning_up_common_suffix() {
837 let old_text = concat!(
838 " for y in 0..size.y() {\n",
839 " let a = 10;\n",
840 " let b = 20;\n",
841 " }",
842 );
843 let char_ops = [
844 CharOperation::Keep { bytes: 8 },
845 CharOperation::Insert { text: "let".into() },
846 CharOperation::Insert {
847 text: " mut".into(),
848 },
849 CharOperation::Insert { text: " y".into() },
850 CharOperation::Insert { text: " =".into() },
851 CharOperation::Insert { text: " 0".into() },
852 CharOperation::Insert { text: ";".into() },
853 CharOperation::Insert { text: "\n".into() },
854 CharOperation::Insert {
855 text: " while".into(),
856 },
857 CharOperation::Insert { text: " y".into() },
858 CharOperation::Insert {
859 text: " < size".into(),
860 },
861 CharOperation::Insert { text: ".".into() },
862 CharOperation::Insert { text: "y".into() },
863 CharOperation::Insert { text: "()".into() },
864 CharOperation::Insert { text: " {".into() },
865 CharOperation::Insert { text: "\n".into() },
866 CharOperation::Delete { bytes: 23 },
867 CharOperation::Keep { bytes: 23 },
868 CharOperation::Keep { bytes: 1 },
869 CharOperation::Keep { bytes: 23 },
870 CharOperation::Keep { bytes: 1 },
871 CharOperation::Keep { bytes: 8 },
872 CharOperation::Insert {
873 text: " y".into(),
874 },
875 CharOperation::Insert { text: " +=".into() },
876 CharOperation::Insert { text: " 1".into() },
877 CharOperation::Insert { text: ";".into() },
878 CharOperation::Insert { text: "\n".into() },
879 CharOperation::Insert {
880 text: " ".into(),
881 },
882 CharOperation::Keep { bytes: 1 },
883 ];
884 let line_ops = char_ops_to_line_ops(old_text, &char_ops);
885 assert_eq!(
886 line_ops,
887 vec![
888 LineOperation::Delete { lines: 1 },
889 LineOperation::Insert { lines: 2 },
890 LineOperation::Keep { lines: 2 },
891 LineOperation::Delete { lines: 1 },
892 LineOperation::Insert { lines: 2 },
893 ]
894 );
895 let new_text = apply_char_operations(old_text, &char_ops);
896 assert_eq!(
897 new_text,
898 apply_line_operations(old_text, &new_text, &line_ops)
899 );
900 }
901
902 #[test]
903 fn test_random_diffs() {
904 random_test(|mut rng| {
905 let old_text_len = env::var("OLD_TEXT_LEN")
906 .map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable"))
907 .unwrap_or(10);
908
909 let old = random_text(&mut rng, old_text_len);
910 println!("old text: {:?}", old);
911
912 let new = randomly_edit(&old, &mut rng);
913 println!("new text: {:?}", new);
914
915 let char_operations = random_streaming_diff(&mut rng, &old, &new);
916 println!("char operations: {:?}", char_operations);
917
918 // Use apply_char_operations to verify the result
919 let patched = apply_char_operations(&old, &char_operations);
920 assert_eq!(patched, new);
921
922 // Test char_ops_to_line_ops
923 let line_ops = char_ops_to_line_ops(&old, &char_operations);
924 println!("line operations: {:?}", line_ops);
925 let patched = apply_line_operations(&old, &new, &line_ops);
926 assert_eq!(patched, new);
927 });
928 }
929
930 fn char_ops_to_line_ops(old_text: &str, char_ops: &[CharOperation]) -> Vec<LineOperation> {
931 let old_rope = Rope::from(old_text);
932 let mut diff = LineDiff::default();
933 for op in char_ops {
934 diff.push_char_operation(op, &old_rope);
935 }
936 diff.finish(&old_rope);
937 diff.line_operations()
938 }
939
940 fn random_streaming_diff(rng: &mut impl Rng, old: &str, new: &str) -> Vec<CharOperation> {
941 let mut diff = StreamingDiff::new(old.to_string());
942 let mut char_operations = Vec::new();
943 let mut new_len = 0;
944
945 while new_len < new.len() {
946 let mut chunk_len = rng.gen_range(1..=new.len() - new_len);
947 while !new.is_char_boundary(new_len + chunk_len) {
948 chunk_len += 1;
949 }
950 let chunk = &new[new_len..new_len + chunk_len];
951 let new_hunks = diff.push_new(chunk);
952 char_operations.extend(new_hunks);
953 new_len += chunk_len;
954 }
955
956 char_operations.extend(diff.finish());
957 char_operations
958 }
959
960 fn random_test<F>(mut test_fn: F)
961 where
962 F: FnMut(StdRng),
963 {
964 let iterations = env::var("ITERATIONS")
965 .map(|i| i.parse().expect("invalid `ITERATIONS` variable"))
966 .unwrap_or(100);
967
968 let seed: u64 = env::var("SEED")
969 .map(|s| s.parse().expect("invalid `SEED` variable"))
970 .unwrap_or(0);
971
972 println!(
973 "Running test with {} iterations and seed {}",
974 iterations, seed
975 );
976
977 for i in 0..iterations {
978 println!("Iteration {}", i + 1);
979 let rng = StdRng::seed_from_u64(seed + i);
980 test_fn(rng);
981 }
982 }
983
984 fn apply_line_operations(old_text: &str, new_text: &str, line_ops: &[LineOperation]) -> String {
985 let mut result: Vec<&str> = Vec::new();
986
987 let old_lines: Vec<&str> = old_text.split('\n').collect();
988 let new_lines: Vec<&str> = new_text.split('\n').collect();
989 let mut old_start = 0_usize;
990 let mut new_start = 0_usize;
991
992 for op in line_ops {
993 match op {
994 LineOperation::Keep { lines } => {
995 let old_end = old_start + *lines as usize;
996 result.extend(&old_lines[old_start..old_end]);
997 old_start = old_end;
998 new_start += *lines as usize;
999 }
1000 LineOperation::Delete { lines } => {
1001 old_start += *lines as usize;
1002 }
1003 LineOperation::Insert { lines } => {
1004 let new_end = new_start + *lines as usize;
1005 result.extend(&new_lines[new_start..new_end]);
1006 new_start = new_end;
1007 }
1008 }
1009 }
1010
1011 result.join("\n")
1012 }
1013
1014 #[test]
1015 fn test_apply_char_operations() {
1016 let old_text = "Hello, world!";
1017 let char_ops = vec![
1018 CharOperation::Keep { bytes: 7 },
1019 CharOperation::Delete { bytes: 5 },
1020 CharOperation::Insert {
1021 text: "Rust".to_string(),
1022 },
1023 CharOperation::Keep { bytes: 1 },
1024 ];
1025 let result = apply_char_operations(old_text, &char_ops);
1026 assert_eq!(result, "Hello, Rust!");
1027 }
1028
1029 fn random_text(rng: &mut impl Rng, length: usize) -> String {
1030 util::RandomCharIter::new(rng).take(length).collect()
1031 }
1032
1033 fn randomly_edit(text: &str, rng: &mut impl Rng) -> String {
1034 let mut result = String::from(text);
1035 let edit_count = rng.gen_range(1..=5);
1036
1037 fn random_char_range(text: &str, rng: &mut impl Rng) -> (usize, usize) {
1038 let mut start = rng.gen_range(0..=text.len());
1039 while !text.is_char_boundary(start) {
1040 start -= 1;
1041 }
1042 let mut end = rng.gen_range(start..=text.len());
1043 while !text.is_char_boundary(end) {
1044 end += 1;
1045 }
1046 (start, end)
1047 }
1048
1049 for _ in 0..edit_count {
1050 match rng.gen_range(0..3) {
1051 0 => {
1052 // Insert
1053 let (pos, _) = random_char_range(&result, rng);
1054 let insert_len = rng.gen_range(1..=5);
1055 let insert_text: String = random_text(rng, insert_len);
1056 result.insert_str(pos, &insert_text);
1057 }
1058 1 => {
1059 // Delete
1060 if !result.is_empty() {
1061 let (start, end) = random_char_range(&result, rng);
1062 result.replace_range(start..end, "");
1063 }
1064 }
1065 2 => {
1066 // Replace
1067 if !result.is_empty() {
1068 let (start, end) = random_char_range(&result, rng);
1069 let replace_len = end - start;
1070 let replace_text: String = random_text(rng, replace_len);
1071 result.replace_range(start..end, &replace_text);
1072 }
1073 }
1074 _ => unreachable!(),
1075 }
1076 }
1077
1078 result
1079 }
1080
1081 fn apply_char_operations(old_text: &str, char_ops: &[CharOperation]) -> String {
1082 let mut result = String::new();
1083 let mut old_ix = 0;
1084
1085 for operation in char_ops {
1086 match operation {
1087 CharOperation::Keep { bytes } => {
1088 result.push_str(&old_text[old_ix..old_ix + bytes]);
1089 old_ix += bytes;
1090 }
1091 CharOperation::Delete { bytes } => {
1092 old_ix += bytes;
1093 }
1094 CharOperation::Insert { text } => {
1095 result.push_str(text);
1096 }
1097 }
1098 }
1099
1100 result
1101 }
1102}