streaming_diff.rs

   1use ordered_float::OrderedFloat;
   2use rope::{Point, Rope, TextSummary};
   3use std::collections::{BTreeSet, HashMap};
   4use std::{
   5    cmp,
   6    fmt::{self, Debug},
   7    ops::Range,
   8};
   9
  10struct Matrix {
  11    cells: Vec<f64>,
  12    rows: usize,
  13    cols: usize,
  14}
  15
  16impl Matrix {
  17    fn new() -> Self {
  18        Self {
  19            cells: Vec::new(),
  20            rows: 0,
  21            cols: 0,
  22        }
  23    }
  24
  25    fn resize(&mut self, rows: usize, cols: usize) {
  26        self.cells.resize(rows * cols, 0.);
  27        self.rows = rows;
  28        self.cols = cols;
  29    }
  30
  31    fn get(&self, row: usize, col: usize) -> f64 {
  32        if row >= self.rows {
  33            panic!("row out of bounds")
  34        }
  35
  36        if col >= self.cols {
  37            panic!("col out of bounds")
  38        }
  39        self.cells[col * self.rows + row]
  40    }
  41
  42    fn set(&mut self, row: usize, col: usize, value: f64) {
  43        if row >= self.rows {
  44            panic!("row out of bounds")
  45        }
  46
  47        if col >= self.cols {
  48            panic!("col out of bounds")
  49        }
  50
  51        self.cells[col * self.rows + row] = value;
  52    }
  53}
  54
  55impl Debug for Matrix {
  56    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  57        writeln!(f)?;
  58        for i in 0..self.rows {
  59            for j in 0..self.cols {
  60                write!(f, "{:5}", self.get(i, j))?;
  61            }
  62            writeln!(f)?;
  63        }
  64        Ok(())
  65    }
  66}
  67
  68#[derive(Debug, Clone)]
  69pub enum CharOperation {
  70    Insert { text: String },
  71    Delete { bytes: usize },
  72    Keep { bytes: usize },
  73}
  74
  75pub struct StreamingDiff {
  76    old: Vec<char>,
  77    new: Vec<char>,
  78    scores: Matrix,
  79    old_text_ix: usize,
  80    new_text_ix: usize,
  81    equal_runs: HashMap<(usize, usize), u32>,
  82}
  83
  84impl StreamingDiff {
  85    const INSERTION_SCORE: f64 = -1.;
  86    const DELETION_SCORE: f64 = -20.;
  87    const EQUALITY_BASE: f64 = 1.8;
  88    const MAX_EQUALITY_EXPONENT: i32 = 16;
  89
  90    pub fn new(old: String) -> Self {
  91        let old = old.chars().collect::<Vec<_>>();
  92        let mut scores = Matrix::new();
  93        scores.resize(old.len() + 1, 1);
  94        for i in 0..=old.len() {
  95            scores.set(i, 0, i as f64 * Self::DELETION_SCORE);
  96        }
  97        Self {
  98            old,
  99            new: Vec::new(),
 100            scores,
 101            old_text_ix: 0,
 102            new_text_ix: 0,
 103            equal_runs: Default::default(),
 104        }
 105    }
 106
 107    pub fn push_new(&mut self, text: &str) -> Vec<CharOperation> {
 108        self.new.extend(text.chars());
 109        self.scores.resize(self.old.len() + 1, self.new.len() + 1);
 110
 111        for j in self.new_text_ix + 1..=self.new.len() {
 112            self.scores.set(0, j, j as f64 * Self::INSERTION_SCORE);
 113            for i in 1..=self.old.len() {
 114                let insertion_score = self.scores.get(i, j - 1) + Self::INSERTION_SCORE;
 115                let deletion_score = self.scores.get(i - 1, j) + Self::DELETION_SCORE;
 116                let equality_score = if self.old[i - 1] == self.new[j - 1] {
 117                    let mut equal_run = self.equal_runs.get(&(i - 1, j - 1)).copied().unwrap_or(0);
 118                    equal_run += 1;
 119                    self.equal_runs.insert((i, j), equal_run);
 120
 121                    let exponent = cmp::min(equal_run as i32 / 4, Self::MAX_EQUALITY_EXPONENT);
 122                    self.scores.get(i - 1, j - 1) + Self::EQUALITY_BASE.powi(exponent)
 123                } else {
 124                    f64::NEG_INFINITY
 125                };
 126
 127                let score = insertion_score.max(deletion_score).max(equality_score);
 128                self.scores.set(i, j, score);
 129            }
 130        }
 131
 132        let mut max_score = f64::NEG_INFINITY;
 133        let mut next_old_text_ix = self.old_text_ix;
 134        let next_new_text_ix = self.new.len();
 135        for i in self.old_text_ix..=self.old.len() {
 136            let score = self.scores.get(i, next_new_text_ix);
 137            if score > max_score {
 138                max_score = score;
 139                next_old_text_ix = i;
 140            }
 141        }
 142
 143        let hunks = self.backtrack(next_old_text_ix, next_new_text_ix);
 144        self.old_text_ix = next_old_text_ix;
 145        self.new_text_ix = next_new_text_ix;
 146        hunks
 147    }
 148
 149    fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<CharOperation> {
 150        let mut pending_insert: Option<Range<usize>> = None;
 151        let mut hunks = Vec::new();
 152        let mut i = old_text_ix;
 153        let mut j = new_text_ix;
 154        while (i, j) != (self.old_text_ix, self.new_text_ix) {
 155            let insertion_score = if j > self.new_text_ix {
 156                Some((i, j - 1))
 157            } else {
 158                None
 159            };
 160            let deletion_score = if i > self.old_text_ix {
 161                Some((i - 1, j))
 162            } else {
 163                None
 164            };
 165            let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
 166                if self.old[i - 1] == self.new[j - 1] {
 167                    Some((i - 1, j - 1))
 168                } else {
 169                    None
 170                }
 171            } else {
 172                None
 173            };
 174
 175            let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
 176                .iter()
 177                .max_by_key(|cell| cell.map(|(i, j)| OrderedFloat(self.scores.get(i, j))))
 178                .unwrap()
 179                .unwrap();
 180
 181            if prev_i == i && prev_j == j - 1 {
 182                if let Some(pending_insert) = pending_insert.as_mut() {
 183                    pending_insert.start = prev_j;
 184                } else {
 185                    pending_insert = Some(prev_j..j);
 186                }
 187            } else {
 188                if let Some(range) = pending_insert.take() {
 189                    hunks.push(CharOperation::Insert {
 190                        text: self.new[range].iter().collect(),
 191                    });
 192                }
 193
 194                let char_len = self.old[i - 1].len_utf8();
 195                if prev_i == i - 1 && prev_j == j {
 196                    if let Some(CharOperation::Delete { bytes: len }) = hunks.last_mut() {
 197                        *len += char_len;
 198                    } else {
 199                        hunks.push(CharOperation::Delete { bytes: char_len })
 200                    }
 201                } else if let Some(CharOperation::Keep { bytes: len }) = hunks.last_mut() {
 202                    *len += char_len;
 203                } else {
 204                    hunks.push(CharOperation::Keep { bytes: char_len })
 205                }
 206            }
 207
 208            i = prev_i;
 209            j = prev_j;
 210        }
 211
 212        if let Some(range) = pending_insert.take() {
 213            hunks.push(CharOperation::Insert {
 214                text: self.new[range].iter().collect(),
 215            });
 216        }
 217
 218        hunks.reverse();
 219        hunks
 220    }
 221
 222    pub fn finish(self) -> Vec<CharOperation> {
 223        self.backtrack(self.old.len(), self.new.len())
 224    }
 225}
 226
 227#[derive(Debug, Clone, PartialEq)]
 228pub enum LineOperation {
 229    Insert { lines: u32 },
 230    Delete { lines: u32 },
 231    Keep { lines: u32 },
 232}
 233
 234#[derive(Debug, Default)]
 235pub struct LineDiff {
 236    inserted_newline_at_end: bool,
 237    /// The extent of kept and deleted text.
 238    old_end: Point,
 239    /// The extent of kept and inserted text.
 240    new_end: Point,
 241    /// Deleted rows, expressed in terms of the old text.
 242    deleted_rows: BTreeSet<u32>,
 243    /// Inserted rows, expressed in terms of the new text.
 244    inserted_rows: BTreeSet<u32>,
 245    buffered_insert: String,
 246    /// After deleting a newline, we buffer deletion until we keep or insert a character.
 247    buffered_delete: usize,
 248}
 249
 250impl LineDiff {
 251    pub fn push_char_operations<'a>(
 252        &mut self,
 253        operations: impl IntoIterator<Item = &'a CharOperation>,
 254        old_text: &Rope,
 255    ) {
 256        for operation in operations {
 257            self.push_char_operation(operation, old_text);
 258        }
 259    }
 260
 261    pub fn push_char_operation(&mut self, operation: &CharOperation, old_text: &Rope) {
 262        match operation {
 263            CharOperation::Insert { text } => {
 264                self.flush_delete(old_text);
 265
 266                if is_line_start(self.old_end) {
 267                    if let Some(newline_ix) = text.rfind('\n') {
 268                        let (prefix, suffix) = text.split_at(newline_ix + 1);
 269                        self.buffered_insert.push_str(prefix);
 270                        self.flush_insert(old_text);
 271                        self.buffered_insert.push_str(suffix);
 272                    } else {
 273                        self.buffered_insert.push_str(&text);
 274                    }
 275                } else {
 276                    self.buffered_insert.push_str(&text);
 277                    if !text.ends_with('\n') {
 278                        self.flush_insert(old_text);
 279                    }
 280                }
 281            }
 282            CharOperation::Delete { bytes } => {
 283                self.buffered_delete += bytes;
 284
 285                let common_suffix_len = self.trim_buffered_end(old_text);
 286                self.flush_insert(old_text);
 287
 288                if common_suffix_len > 0 || !is_line_end(self.old_end, old_text) {
 289                    self.flush_delete(old_text);
 290                    self.keep(common_suffix_len, old_text);
 291                }
 292            }
 293            CharOperation::Keep { bytes } => {
 294                self.flush_delete(old_text);
 295                self.flush_insert(old_text);
 296                self.keep(*bytes, old_text);
 297            }
 298        }
 299    }
 300
 301    fn flush_insert(&mut self, old_text: &Rope) {
 302        if self.buffered_insert.is_empty() {
 303            return;
 304        }
 305
 306        let new_start = self.new_end;
 307        let lines = TextSummary::from(self.buffered_insert.as_str()).lines;
 308        self.new_end += lines;
 309
 310        if is_line_start(self.old_end) {
 311            if self.new_end.column == 0 {
 312                self.inserted_rows.extend(new_start.row..self.new_end.row);
 313            } else {
 314                self.deleted_rows.insert(self.old_end.row);
 315                self.inserted_rows.extend(new_start.row..=self.new_end.row);
 316            }
 317        } else if is_line_end(self.old_end, old_text) {
 318            if self.buffered_insert.starts_with('\n') {
 319                self.inserted_rows
 320                    .extend(new_start.row + 1..=self.new_end.row);
 321                self.inserted_newline_at_end = true;
 322            } else {
 323                if !self.inserted_newline_at_end {
 324                    self.deleted_rows.insert(self.old_end.row);
 325                }
 326                self.inserted_rows.extend(new_start.row..=self.new_end.row);
 327            }
 328        } else {
 329            self.deleted_rows.insert(self.old_end.row);
 330            self.inserted_rows.extend(new_start.row..=self.new_end.row);
 331        }
 332
 333        self.buffered_insert.clear();
 334    }
 335
 336    fn flush_delete(&mut self, old_text: &Rope) {
 337        if self.buffered_delete == 0 {
 338            return;
 339        }
 340
 341        let old_start = self.old_end;
 342        self.old_end =
 343            old_text.offset_to_point(old_text.point_to_offset(self.old_end) + self.buffered_delete);
 344
 345        if is_line_end(old_start, old_text) && is_line_end(self.old_end, old_text) {
 346            self.deleted_rows
 347                .extend(old_start.row + 1..=self.old_end.row);
 348        } else if is_line_start(old_start)
 349            && (is_line_start(self.old_end) && self.old_end < old_text.max_point())
 350            && self.new_end.column == 0
 351        {
 352            self.deleted_rows.extend(old_start.row..self.old_end.row);
 353        } else {
 354            self.inserted_rows.insert(self.new_end.row);
 355            self.deleted_rows.extend(old_start.row..=self.old_end.row);
 356        }
 357
 358        self.inserted_newline_at_end = false;
 359        self.buffered_delete = 0;
 360    }
 361
 362    fn keep(&mut self, bytes: usize, old_text: &Rope) {
 363        if bytes == 0 {
 364            return;
 365        }
 366
 367        let lines =
 368            old_text.offset_to_point(old_text.point_to_offset(self.old_end) + bytes) - self.old_end;
 369        self.old_end += lines;
 370        self.new_end += lines;
 371        self.inserted_newline_at_end = false;
 372    }
 373
 374    fn trim_buffered_end(&mut self, old_text: &Rope) -> usize {
 375        let old_start_offset = old_text.point_to_offset(self.old_end);
 376        let old_end_offset = old_start_offset + self.buffered_delete;
 377
 378        let new_chars = self.buffered_insert.chars().rev();
 379        let old_chars = old_text
 380            .chunks_in_range(old_start_offset..old_end_offset)
 381            .flat_map(|chunk| chunk.chars().rev());
 382
 383        let mut common_suffix_len = 0;
 384        for (new_ch, old_ch) in new_chars.zip(old_chars) {
 385            if new_ch == old_ch {
 386                common_suffix_len += new_ch.len_utf8();
 387            } else {
 388                break;
 389            }
 390        }
 391
 392        self.buffered_delete -= common_suffix_len;
 393        self.buffered_insert
 394            .truncate(self.buffered_insert.len() - common_suffix_len);
 395
 396        common_suffix_len
 397    }
 398
 399    pub fn finish(&mut self, old_text: &Rope) {
 400        self.flush_insert(old_text);
 401        self.flush_delete(old_text);
 402
 403        let old_start = self.old_end;
 404        self.old_end = old_text.max_point();
 405        self.new_end += self.old_end - old_start;
 406    }
 407
 408    pub fn line_operations(&self) -> Vec<LineOperation> {
 409        let mut ops = Vec::new();
 410        let mut deleted_rows = self.deleted_rows.iter().copied().peekable();
 411        let mut inserted_rows = self.inserted_rows.iter().copied().peekable();
 412        let mut old_row = 0;
 413        let mut new_row = 0;
 414
 415        while deleted_rows.peek().is_some() || inserted_rows.peek().is_some() {
 416            // Check for a run of deleted lines at current old row.
 417            if Some(old_row) == deleted_rows.peek().copied() {
 418                if let Some(LineOperation::Delete { lines }) = ops.last_mut() {
 419                    *lines += 1;
 420                } else {
 421                    ops.push(LineOperation::Delete { lines: 1 });
 422                }
 423                old_row += 1;
 424                deleted_rows.next();
 425            } else if Some(new_row) == inserted_rows.peek().copied() {
 426                if let Some(LineOperation::Insert { lines }) = ops.last_mut() {
 427                    *lines += 1;
 428                } else {
 429                    ops.push(LineOperation::Insert { lines: 1 });
 430                }
 431                new_row += 1;
 432                inserted_rows.next();
 433            } else {
 434                // Keep lines until the next deletion, insertion, or the end of the old text.
 435                let lines_to_next_deletion = inserted_rows
 436                    .peek()
 437                    .copied()
 438                    .unwrap_or(self.new_end.row + 1)
 439                    - new_row;
 440                let lines_to_next_insertion =
 441                    deleted_rows.peek().copied().unwrap_or(self.old_end.row + 1) - old_row;
 442                let kept_lines =
 443                    cmp::max(1, cmp::min(lines_to_next_insertion, lines_to_next_deletion));
 444                if kept_lines > 0 {
 445                    ops.push(LineOperation::Keep { lines: kept_lines });
 446                    old_row += kept_lines;
 447                    new_row += kept_lines;
 448                }
 449            }
 450        }
 451
 452        if old_row < self.old_end.row + 1 {
 453            ops.push(LineOperation::Keep {
 454                lines: self.old_end.row + 1 - old_row,
 455            });
 456        }
 457
 458        ops
 459    }
 460}
 461
 462fn is_line_start(point: Point) -> bool {
 463    point.column == 0
 464}
 465
 466fn is_line_end(point: Point, text: &Rope) -> bool {
 467    text.line_len(point.row) == point.column
 468}
 469
 470#[cfg(test)]
 471mod tests {
 472    use super::*;
 473    use rand::prelude::*;
 474    use std::env;
 475
 476    #[test]
 477    fn test_delete_first_of_two_lines() {
 478        let old_text = "aaaa\nbbbb";
 479        let char_ops = vec![
 480            CharOperation::Delete { bytes: 5 },
 481            CharOperation::Keep { bytes: 4 },
 482        ];
 483        let expected_line_ops = vec![
 484            LineOperation::Delete { lines: 1 },
 485            LineOperation::Keep { lines: 1 },
 486        ];
 487        let new_text = apply_char_operations(old_text, &char_ops);
 488        assert_eq!(
 489            new_text,
 490            apply_line_operations(old_text, &new_text, &expected_line_ops)
 491        );
 492
 493        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 494        assert_eq!(line_ops, expected_line_ops);
 495    }
 496
 497    #[test]
 498    fn test_delete_second_of_two_lines() {
 499        let old_text = "aaaa\nbbbb";
 500        let char_ops = vec![
 501            CharOperation::Keep { bytes: 5 },
 502            CharOperation::Delete { bytes: 4 },
 503        ];
 504        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 505        assert_eq!(
 506            line_ops,
 507            vec![
 508                LineOperation::Keep { lines: 1 },
 509                LineOperation::Delete { lines: 1 },
 510                LineOperation::Insert { lines: 1 }
 511            ]
 512        );
 513        let new_text = apply_char_operations(old_text, &char_ops);
 514        assert_eq!(
 515            new_text,
 516            apply_line_operations(old_text, &new_text, &line_ops)
 517        );
 518    }
 519
 520    #[test]
 521    fn test_add_new_line() {
 522        let old_text = "aaaa\nbbbb";
 523        let char_ops = vec![
 524            CharOperation::Keep { bytes: 9 },
 525            CharOperation::Insert {
 526                text: "\ncccc".into(),
 527            },
 528        ];
 529        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 530        assert_eq!(
 531            line_ops,
 532            vec![
 533                LineOperation::Keep { lines: 2 },
 534                LineOperation::Insert { lines: 1 }
 535            ]
 536        );
 537        let new_text = apply_char_operations(old_text, &char_ops);
 538        assert_eq!(
 539            new_text,
 540            apply_line_operations(old_text, &new_text, &line_ops)
 541        );
 542    }
 543
 544    #[test]
 545    fn test_delete_line_in_middle() {
 546        let old_text = "aaaa\nbbbb\ncccc";
 547        let char_ops = vec![
 548            CharOperation::Keep { bytes: 5 },
 549            CharOperation::Delete { bytes: 5 },
 550            CharOperation::Keep { bytes: 4 },
 551        ];
 552        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 553        assert_eq!(
 554            line_ops,
 555            vec![
 556                LineOperation::Keep { lines: 1 },
 557                LineOperation::Delete { lines: 1 },
 558                LineOperation::Keep { lines: 1 }
 559            ]
 560        );
 561        let new_text = apply_char_operations(old_text, &char_ops);
 562        assert_eq!(
 563            new_text,
 564            apply_line_operations(old_text, &new_text, &line_ops)
 565        );
 566    }
 567
 568    #[test]
 569    fn test_replace_line() {
 570        let old_text = "aaaa\nbbbb\ncccc";
 571        let char_ops = vec![
 572            CharOperation::Keep { bytes: 5 },
 573            CharOperation::Delete { bytes: 4 },
 574            CharOperation::Insert {
 575                text: "BBBB".into(),
 576            },
 577            CharOperation::Keep { bytes: 5 },
 578        ];
 579        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 580        assert_eq!(
 581            line_ops,
 582            vec![
 583                LineOperation::Keep { lines: 1 },
 584                LineOperation::Delete { lines: 1 },
 585                LineOperation::Insert { lines: 1 },
 586                LineOperation::Keep { lines: 1 }
 587            ]
 588        );
 589        let new_text = apply_char_operations(old_text, &char_ops);
 590        assert_eq!(
 591            new_text,
 592            apply_line_operations(old_text, &new_text, &line_ops)
 593        );
 594    }
 595
 596    #[test]
 597    fn test_multiple_edits_on_different_lines() {
 598        let old_text = "aaaa\nbbbb\ncccc\ndddd";
 599        let char_ops = vec![
 600            CharOperation::Insert { text: "A".into() },
 601            CharOperation::Keep { bytes: 9 },
 602            CharOperation::Delete { bytes: 5 },
 603            CharOperation::Keep { bytes: 4 },
 604            CharOperation::Insert {
 605                text: "\nEEEE".into(),
 606            },
 607        ];
 608        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 609        assert_eq!(
 610            line_ops,
 611            vec![
 612                LineOperation::Delete { lines: 1 },
 613                LineOperation::Insert { lines: 1 },
 614                LineOperation::Keep { lines: 1 },
 615                LineOperation::Delete { lines: 2 },
 616                LineOperation::Insert { lines: 2 },
 617            ]
 618        );
 619        let new_text = apply_char_operations(old_text, &char_ops);
 620        assert_eq!(
 621            new_text,
 622            apply_line_operations(old_text, &new_text, &line_ops)
 623        );
 624    }
 625
 626    #[test]
 627    fn test_edit_at_end_of_line() {
 628        let old_text = "aaaa\nbbbb\ncccc";
 629        let char_ops = vec![
 630            CharOperation::Keep { bytes: 4 },
 631            CharOperation::Insert { text: "A".into() },
 632            CharOperation::Keep { bytes: 10 },
 633        ];
 634        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 635        assert_eq!(
 636            line_ops,
 637            vec![
 638                LineOperation::Delete { lines: 1 },
 639                LineOperation::Insert { lines: 1 },
 640                LineOperation::Keep { lines: 2 }
 641            ]
 642        );
 643        let new_text = apply_char_operations(old_text, &char_ops);
 644        assert_eq!(
 645            new_text,
 646            apply_line_operations(old_text, &new_text, &line_ops)
 647        );
 648    }
 649
 650    #[test]
 651    fn test_insert_newline_character() {
 652        let old_text = "aaaabbbb";
 653        let char_ops = vec![
 654            CharOperation::Keep { bytes: 4 },
 655            CharOperation::Insert { text: "\n".into() },
 656            CharOperation::Keep { bytes: 4 },
 657        ];
 658        let new_text = apply_char_operations(old_text, &char_ops);
 659        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 660        assert_eq!(
 661            line_ops,
 662            vec![
 663                LineOperation::Delete { lines: 1 },
 664                LineOperation::Insert { lines: 2 }
 665            ]
 666        );
 667        assert_eq!(
 668            new_text,
 669            apply_line_operations(old_text, &new_text, &line_ops)
 670        );
 671    }
 672
 673    #[test]
 674    fn test_insert_newline_at_beginning() {
 675        let old_text = "aaaa\nbbbb";
 676        let char_ops = vec![
 677            CharOperation::Insert { text: "\n".into() },
 678            CharOperation::Keep { bytes: 9 },
 679        ];
 680        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 681        assert_eq!(
 682            line_ops,
 683            vec![
 684                LineOperation::Insert { lines: 1 },
 685                LineOperation::Keep { lines: 2 }
 686            ]
 687        );
 688        let new_text = apply_char_operations(old_text, &char_ops);
 689        assert_eq!(
 690            new_text,
 691            apply_line_operations(old_text, &new_text, &line_ops)
 692        );
 693    }
 694
 695    #[test]
 696    fn test_delete_newline() {
 697        let old_text = "aaaa\nbbbb";
 698        let char_ops = vec![
 699            CharOperation::Keep { bytes: 4 },
 700            CharOperation::Delete { bytes: 1 },
 701            CharOperation::Keep { bytes: 4 },
 702        ];
 703        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 704        assert_eq!(
 705            line_ops,
 706            vec![
 707                LineOperation::Delete { lines: 2 },
 708                LineOperation::Insert { lines: 1 }
 709            ]
 710        );
 711
 712        let new_text = apply_char_operations(old_text, &char_ops);
 713        assert_eq!(
 714            new_text,
 715            apply_line_operations(old_text, &new_text, &line_ops)
 716        );
 717    }
 718
 719    #[test]
 720    fn test_insert_multiple_newlines() {
 721        let old_text = "aaaa\nbbbb";
 722        let char_ops = vec![
 723            CharOperation::Keep { bytes: 5 },
 724            CharOperation::Insert {
 725                text: "\n\n".into(),
 726            },
 727            CharOperation::Keep { bytes: 4 },
 728        ];
 729        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 730        assert_eq!(
 731            line_ops,
 732            vec![
 733                LineOperation::Keep { lines: 1 },
 734                LineOperation::Insert { lines: 2 },
 735                LineOperation::Keep { lines: 1 }
 736            ]
 737        );
 738        let new_text = apply_char_operations(old_text, &char_ops);
 739        assert_eq!(
 740            new_text,
 741            apply_line_operations(old_text, &new_text, &line_ops)
 742        );
 743    }
 744
 745    #[test]
 746    fn test_delete_multiple_newlines() {
 747        let old_text = "aaaa\n\n\nbbbb";
 748        let char_ops = vec![
 749            CharOperation::Keep { bytes: 5 },
 750            CharOperation::Delete { bytes: 2 },
 751            CharOperation::Keep { bytes: 4 },
 752        ];
 753        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 754        assert_eq!(
 755            line_ops,
 756            vec![
 757                LineOperation::Keep { lines: 1 },
 758                LineOperation::Delete { lines: 2 },
 759                LineOperation::Keep { lines: 1 }
 760            ]
 761        );
 762        let new_text = apply_char_operations(old_text, &char_ops);
 763        assert_eq!(
 764            new_text,
 765            apply_line_operations(old_text, &new_text, &line_ops)
 766        );
 767    }
 768
 769    #[test]
 770    fn test_complex_scenario() {
 771        let old_text = "line1\nline2\nline3\nline4";
 772        let char_ops = vec![
 773            CharOperation::Keep { bytes: 6 },
 774            CharOperation::Insert {
 775                text: "inserted\n".into(),
 776            },
 777            CharOperation::Delete { bytes: 6 },
 778            CharOperation::Keep { bytes: 5 },
 779            CharOperation::Insert {
 780                text: "\nnewline".into(),
 781            },
 782            CharOperation::Keep { bytes: 6 },
 783        ];
 784        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 785        assert_eq!(
 786            line_ops,
 787            vec![
 788                LineOperation::Keep { lines: 1 },
 789                LineOperation::Delete { lines: 1 },
 790                LineOperation::Insert { lines: 1 },
 791                LineOperation::Keep { lines: 1 },
 792                LineOperation::Insert { lines: 1 },
 793                LineOperation::Keep { lines: 1 }
 794            ]
 795        );
 796        let new_text = apply_char_operations(old_text, &char_ops);
 797        assert_eq!(new_text, "line1\ninserted\nline3\nnewline\nline4");
 798        assert_eq!(
 799            apply_line_operations(old_text, &new_text, &line_ops),
 800            new_text,
 801        );
 802    }
 803
 804    #[test]
 805    fn test_cleaning_up_common_suffix() {
 806        let old_text = concat!(
 807            "        for y in 0..size.y() {\n",
 808            "            let a = 10;\n",
 809            "            let b = 20;\n",
 810            "        }",
 811        );
 812        let char_ops = [
 813            CharOperation::Keep { bytes: 8 },
 814            CharOperation::Insert { text: "let".into() },
 815            CharOperation::Insert {
 816                text: " mut".into(),
 817            },
 818            CharOperation::Insert { text: " y".into() },
 819            CharOperation::Insert { text: " =".into() },
 820            CharOperation::Insert { text: " 0".into() },
 821            CharOperation::Insert { text: ";".into() },
 822            CharOperation::Insert { text: "\n".into() },
 823            CharOperation::Insert {
 824                text: "        while".into(),
 825            },
 826            CharOperation::Insert { text: " y".into() },
 827            CharOperation::Insert {
 828                text: " < size".into(),
 829            },
 830            CharOperation::Insert { text: ".".into() },
 831            CharOperation::Insert { text: "y".into() },
 832            CharOperation::Insert { text: "()".into() },
 833            CharOperation::Insert { text: " {".into() },
 834            CharOperation::Insert { text: "\n".into() },
 835            CharOperation::Delete { bytes: 23 },
 836            CharOperation::Keep { bytes: 23 },
 837            CharOperation::Keep { bytes: 1 },
 838            CharOperation::Keep { bytes: 23 },
 839            CharOperation::Keep { bytes: 1 },
 840            CharOperation::Keep { bytes: 8 },
 841            CharOperation::Insert {
 842                text: "    y".into(),
 843            },
 844            CharOperation::Insert { text: " +=".into() },
 845            CharOperation::Insert { text: " 1".into() },
 846            CharOperation::Insert { text: ";".into() },
 847            CharOperation::Insert { text: "\n".into() },
 848            CharOperation::Insert {
 849                text: "        ".into(),
 850            },
 851            CharOperation::Keep { bytes: 1 },
 852        ];
 853        let line_ops = char_ops_to_line_ops(old_text, &char_ops);
 854        assert_eq!(
 855            line_ops,
 856            vec![
 857                LineOperation::Delete { lines: 1 },
 858                LineOperation::Insert { lines: 2 },
 859                LineOperation::Keep { lines: 2 },
 860                LineOperation::Delete { lines: 1 },
 861                LineOperation::Insert { lines: 2 },
 862            ]
 863        );
 864        let new_text = apply_char_operations(old_text, &char_ops);
 865        assert_eq!(
 866            new_text,
 867            apply_line_operations(old_text, &new_text, &line_ops)
 868        );
 869    }
 870
 871    #[test]
 872    fn test_random_diffs() {
 873        random_test(|mut rng| {
 874            let old_text_len = env::var("OLD_TEXT_LEN")
 875                .map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable"))
 876                .unwrap_or(10);
 877
 878            let old = random_text(&mut rng, old_text_len);
 879            println!("old text: {:?}", old);
 880
 881            let new = randomly_edit(&old, &mut rng);
 882            println!("new text: {:?}", new);
 883
 884            let char_operations = random_streaming_diff(&mut rng, &old, &new);
 885            println!("char operations: {:?}", char_operations);
 886
 887            // Use apply_char_operations to verify the result
 888            let patched = apply_char_operations(&old, &char_operations);
 889            assert_eq!(patched, new);
 890
 891            // Test char_ops_to_line_ops
 892            let line_ops = char_ops_to_line_ops(&old, &char_operations);
 893            println!("line operations: {:?}", line_ops);
 894            let patched = apply_line_operations(&old, &new, &line_ops);
 895            assert_eq!(patched, new);
 896        });
 897    }
 898
 899    fn char_ops_to_line_ops(old_text: &str, char_ops: &[CharOperation]) -> Vec<LineOperation> {
 900        let old_rope = Rope::from(old_text);
 901        let mut diff = LineDiff::default();
 902        for op in char_ops {
 903            diff.push_char_operation(op, &old_rope);
 904        }
 905        diff.finish(&old_rope);
 906        diff.line_operations()
 907    }
 908
 909    fn random_streaming_diff(rng: &mut impl Rng, old: &str, new: &str) -> Vec<CharOperation> {
 910        let mut diff = StreamingDiff::new(old.to_string());
 911        let mut char_operations = Vec::new();
 912        let mut new_len = 0;
 913
 914        while new_len < new.len() {
 915            let mut chunk_len = rng.gen_range(1..=new.len() - new_len);
 916            while !new.is_char_boundary(new_len + chunk_len) {
 917                chunk_len += 1;
 918            }
 919            let chunk = &new[new_len..new_len + chunk_len];
 920            let new_hunks = diff.push_new(chunk);
 921            char_operations.extend(new_hunks);
 922            new_len += chunk_len;
 923        }
 924
 925        char_operations.extend(diff.finish());
 926        char_operations
 927    }
 928
 929    fn random_test<F>(mut test_fn: F)
 930    where
 931        F: FnMut(StdRng),
 932    {
 933        let iterations = env::var("ITERATIONS")
 934            .map(|i| i.parse().expect("invalid `ITERATIONS` variable"))
 935            .unwrap_or(100);
 936
 937        let seed: u64 = env::var("SEED")
 938            .map(|s| s.parse().expect("invalid `SEED` variable"))
 939            .unwrap_or(0);
 940
 941        println!(
 942            "Running test with {} iterations and seed {}",
 943            iterations, seed
 944        );
 945
 946        for i in 0..iterations {
 947            println!("Iteration {}", i + 1);
 948            let rng = StdRng::seed_from_u64(seed + i);
 949            test_fn(rng);
 950        }
 951    }
 952
 953    fn apply_line_operations(old_text: &str, new_text: &str, line_ops: &[LineOperation]) -> String {
 954        let mut result: Vec<&str> = Vec::new();
 955
 956        let old_lines: Vec<&str> = old_text.split('\n').collect();
 957        let new_lines: Vec<&str> = new_text.split('\n').collect();
 958        let mut old_start = 0_usize;
 959        let mut new_start = 0_usize;
 960
 961        for op in line_ops {
 962            match op {
 963                LineOperation::Keep { lines } => {
 964                    let old_end = old_start + *lines as usize;
 965                    result.extend(&old_lines[old_start..old_end]);
 966                    old_start = old_end;
 967                    new_start += *lines as usize;
 968                }
 969                LineOperation::Delete { lines } => {
 970                    old_start += *lines as usize;
 971                }
 972                LineOperation::Insert { lines } => {
 973                    let new_end = new_start + *lines as usize;
 974                    result.extend(&new_lines[new_start..new_end]);
 975                    new_start = new_end;
 976                }
 977            }
 978        }
 979
 980        result.join("\n")
 981    }
 982
 983    #[test]
 984    fn test_apply_char_operations() {
 985        let old_text = "Hello, world!";
 986        let char_ops = vec![
 987            CharOperation::Keep { bytes: 7 },
 988            CharOperation::Delete { bytes: 5 },
 989            CharOperation::Insert {
 990                text: "Rust".to_string(),
 991            },
 992            CharOperation::Keep { bytes: 1 },
 993        ];
 994        let result = apply_char_operations(old_text, &char_ops);
 995        assert_eq!(result, "Hello, Rust!");
 996    }
 997
 998    fn random_text(rng: &mut impl Rng, length: usize) -> String {
 999        util::RandomCharIter::new(rng).take(length).collect()
1000    }
1001
1002    fn randomly_edit(text: &str, rng: &mut impl Rng) -> String {
1003        let mut result = String::from(text);
1004        let edit_count = rng.gen_range(1..=5);
1005
1006        fn random_char_range(text: &str, rng: &mut impl Rng) -> (usize, usize) {
1007            let mut start = rng.gen_range(0..=text.len());
1008            while !text.is_char_boundary(start) {
1009                start -= 1;
1010            }
1011            let mut end = rng.gen_range(start..=text.len());
1012            while !text.is_char_boundary(end) {
1013                end += 1;
1014            }
1015            (start, end)
1016        }
1017
1018        for _ in 0..edit_count {
1019            match rng.gen_range(0..3) {
1020                0 => {
1021                    // Insert
1022                    let (pos, _) = random_char_range(&result, rng);
1023                    let insert_len = rng.gen_range(1..=5);
1024                    let insert_text: String = random_text(rng, insert_len);
1025                    result.insert_str(pos, &insert_text);
1026                }
1027                1 => {
1028                    // Delete
1029                    if !result.is_empty() {
1030                        let (start, end) = random_char_range(&result, rng);
1031                        result.replace_range(start..end, "");
1032                    }
1033                }
1034                2 => {
1035                    // Replace
1036                    if !result.is_empty() {
1037                        let (start, end) = random_char_range(&result, rng);
1038                        let replace_len = end - start;
1039                        let replace_text: String = random_text(rng, replace_len);
1040                        result.replace_range(start..end, &replace_text);
1041                    }
1042                }
1043                _ => unreachable!(),
1044            }
1045        }
1046
1047        result
1048    }
1049
1050    fn apply_char_operations(old_text: &str, char_ops: &[CharOperation]) -> String {
1051        let mut result = String::new();
1052        let mut old_ix = 0;
1053
1054        for operation in char_ops {
1055            match operation {
1056                CharOperation::Keep { bytes } => {
1057                    result.push_str(&old_text[old_ix..old_ix + bytes]);
1058                    old_ix += bytes;
1059                }
1060                CharOperation::Delete { bytes } => {
1061                    old_ix += bytes;
1062                }
1063                CharOperation::Insert { text } => {
1064                    result.push_str(text);
1065                }
1066            }
1067        }
1068
1069        result
1070    }
1071}