streaming_diff.rs

   1use ordered_float::OrderedFloat;
   2use rope::{Point, Rope, TextSummary};
   3use std::collections::{BTreeSet, HashMap};
   4use std::{
   5    cmp,
   6    fmt::{self, Debug},
   7    ops::Range,
   8};
   9
  10struct Matrix {
  11    cells: Vec<f64>,
  12    rows: usize,
  13    cols: usize,
  14}
  15
  16impl Matrix {
  17    fn new() -> Self {
  18        Self {
  19            cells: Vec::new(),
  20            rows: 0,
  21            cols: 0,
  22        }
  23    }
  24
  25    fn resize(&mut self, rows: usize, cols: usize) {
  26        self.cells.resize(rows * cols, 0.);
  27        self.rows = rows;
  28        self.cols = cols;
  29    }
  30
  31    fn swap_columns(&mut self, col1: usize, col2: usize) {
  32        if col1 == col2 {
  33            return;
  34        }
  35
  36        if col1 >= self.cols {
  37            panic!("column out of bounds");
  38        }
  39
  40        if col2 >= self.cols {
  41            panic!("column out of bounds");
  42        }
  43
  44        unsafe {
  45            let ptr = self.cells.as_mut_ptr();
  46            std::ptr::swap_nonoverlapping(
  47                ptr.add(col1 * self.rows),
  48                ptr.add(col2 * self.rows),
  49                self.rows,
  50            );
  51        }
  52    }
  53
  54    fn get(&self, row: usize, col: usize) -> f64 {
  55        if row >= self.rows {
  56            panic!("row out of bounds")
  57        }
  58
  59        if col >= self.cols {
  60            panic!("column out of bounds")
  61        }
  62        self.cells[col * self.rows + row]
  63    }
  64
  65    fn set(&mut self, row: usize, col: usize, value: f64) {
  66        if row >= self.rows {
  67            panic!("row out of bounds")
  68        }
  69
  70        if col >= self.cols {
  71            panic!("column out of bounds")
  72        }
  73
  74        self.cells[col * self.rows + row] = value;
  75    }
  76}
  77
  78impl Debug for Matrix {
  79    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  80        writeln!(f)?;
  81        for i in 0..self.rows {
  82            for j in 0..self.cols {
  83                write!(f, "{:5}", self.get(i, j))?;
  84            }
  85            writeln!(f)?;
  86        }
  87        Ok(())
  88    }
  89}
  90
  91#[derive(Debug, Clone)]
  92pub enum CharOperation {
  93    Insert { text: String },
  94    Delete { bytes: usize },
  95    Keep { bytes: usize },
  96}
  97
  98pub struct StreamingDiff {
  99    old: Vec<char>,
 100    new: Vec<char>,
 101    scores: Matrix,
 102    old_text_ix: usize,
 103    new_text_ix: usize,
 104    equal_runs: HashMap<(usize, usize), u32>,
 105}
 106
 107impl StreamingDiff {
 108    const INSERTION_SCORE: f64 = -1.;
 109    const DELETION_SCORE: f64 = -20.;
 110    const EQUALITY_BASE: f64 = 1.8;
 111    const MAX_EQUALITY_EXPONENT: i32 = 16;
 112
 113    pub fn new(old: String) -> Self {
 114        let old = old.chars().collect::<Vec<_>>();
 115        let mut scores = Matrix::new();
 116        scores.resize(old.len() + 1, 1);
 117        for i in 0..=old.len() {
 118            scores.set(i, 0, i as f64 * Self::DELETION_SCORE);
 119        }
 120        Self {
 121            old,
 122            new: Vec::new(),
 123            scores,
 124            old_text_ix: 0,
 125            new_text_ix: 0,
 126            equal_runs: Default::default(),
 127        }
 128    }
 129
 130    pub fn push_new(&mut self, text: &str) -> Vec<CharOperation> {
 131        self.new.extend(text.chars());
 132        self.scores.swap_columns(0, self.scores.cols - 1);
 133        self.scores
 134            .resize(self.old.len() + 1, self.new.len() - self.new_text_ix + 1);
 135        self.equal_runs.retain(|(_i, j), _| *j == self.new_text_ix);
 136
 137        for j in self.new_text_ix + 1..=self.new.len() {
 138            let relative_j = j - self.new_text_ix;
 139
 140            self.scores
 141                .set(0, relative_j, j as f64 * Self::INSERTION_SCORE);
 142            for i in 1..=self.old.len() {
 143                let insertion_score = self.scores.get(i, relative_j - 1) + Self::INSERTION_SCORE;
 144                let deletion_score = self.scores.get(i - 1, relative_j) + Self::DELETION_SCORE;
 145                let equality_score = if self.old[i - 1] == self.new[j - 1] {
 146                    let mut equal_run = self.equal_runs.get(&(i - 1, j - 1)).copied().unwrap_or(0);
 147                    equal_run += 1;
 148                    self.equal_runs.insert((i, j), equal_run);
 149
 150                    let exponent = cmp::min(equal_run as i32 / 4, Self::MAX_EQUALITY_EXPONENT);
 151                    self.scores.get(i - 1, relative_j - 1) + Self::EQUALITY_BASE.powi(exponent)
 152                } else {
 153                    f64::NEG_INFINITY
 154                };
 155
 156                let score = insertion_score.max(deletion_score).max(equality_score);
 157                self.scores.set(i, relative_j, score);
 158            }
 159        }
 160
 161        let mut max_score = f64::NEG_INFINITY;
 162        let mut next_old_text_ix = self.old_text_ix;
 163        let next_new_text_ix = self.new.len();
 164        for i in self.old_text_ix..=self.old.len() {
 165            let score = self.scores.get(i, next_new_text_ix - self.new_text_ix);
 166            if score > max_score {
 167                max_score = score;
 168                next_old_text_ix = i;
 169            }
 170        }
 171
 172        let hunks = self.backtrack(next_old_text_ix, next_new_text_ix);
 173        self.old_text_ix = next_old_text_ix;
 174        self.new_text_ix = next_new_text_ix;
 175        hunks
 176    }
 177
 178    fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<CharOperation> {
 179        let mut pending_insert: Option<Range<usize>> = None;
 180        let mut hunks = Vec::new();
 181        let mut i = old_text_ix;
 182        let mut j = new_text_ix;
 183        while (i, j) != (self.old_text_ix, self.new_text_ix) {
 184            let insertion_score = if j > self.new_text_ix {
 185                Some((i, j - 1))
 186            } else {
 187                None
 188            };
 189            let deletion_score = if i > self.old_text_ix {
 190                Some((i - 1, j))
 191            } else {
 192                None
 193            };
 194            let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
 195                if self.old[i - 1] == self.new[j - 1] {
 196                    Some((i - 1, j - 1))
 197                } else {
 198                    None
 199                }
 200            } else {
 201                None
 202            };
 203
 204            let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
 205                .iter()
 206                .max_by_key(|cell| {
 207                    cell.map(|(i, j)| OrderedFloat(self.scores.get(i, j - self.new_text_ix)))
 208                })
 209                .unwrap()
 210                .unwrap();
 211
 212            if prev_i == i && prev_j == j - 1 {
 213                if let Some(pending_insert) = pending_insert.as_mut() {
 214                    pending_insert.start = prev_j;
 215                } else {
 216                    pending_insert = Some(prev_j..j);
 217                }
 218            } else {
 219                if let Some(range) = pending_insert.take() {
 220                    hunks.push(CharOperation::Insert {
 221                        text: self.new[range].iter().collect(),
 222                    });
 223                }
 224
 225                let char_len = self.old[i - 1].len_utf8();
 226                if prev_i == i - 1 && prev_j == j {
 227                    if let Some(CharOperation::Delete { bytes: len }) = hunks.last_mut() {
 228                        *len += char_len;
 229                    } else {
 230                        hunks.push(CharOperation::Delete { bytes: char_len })
 231                    }
 232                } else if let Some(CharOperation::Keep { bytes: len }) = hunks.last_mut() {
 233                    *len += char_len;
 234                } else {
 235                    hunks.push(CharOperation::Keep { bytes: char_len })
 236                }
 237            }
 238
 239            i = prev_i;
 240            j = prev_j;
 241        }
 242
 243        if let Some(range) = pending_insert.take() {
 244            hunks.push(CharOperation::Insert {
 245                text: self.new[range].iter().collect(),
 246            });
 247        }
 248
 249        hunks.reverse();
 250        hunks
 251    }
 252
 253    pub fn finish(self) -> Vec<CharOperation> {
 254        self.backtrack(self.old.len(), self.new.len())
 255    }
 256}
 257
 258#[derive(Debug, Clone, PartialEq)]
 259pub enum LineOperation {
 260    Insert { lines: u32 },
 261    Delete { lines: u32 },
 262    Keep { lines: u32 },
 263}
 264
 265#[derive(Debug, Default)]
 266pub struct LineDiff {
 267    inserted_newline_at_end: bool,
 268    /// The extent of kept and deleted text.
 269    old_end: Point,
 270    /// The extent of kept and inserted text.
 271    new_end: Point,
 272    /// Deleted rows, expressed in terms of the old text.
 273    deleted_rows: BTreeSet<u32>,
 274    /// Inserted rows, expressed in terms of the new text.
 275    inserted_rows: BTreeSet<u32>,
 276    buffered_insert: String,
 277    /// After deleting a newline, we buffer deletion until we keep or insert a character.
 278    buffered_delete: usize,
 279}
 280
 281impl LineDiff {
 282    pub fn push_char_operations<'a>(
 283        &mut self,
 284        operations: impl IntoIterator<Item = &'a CharOperation>,
 285        old_text: &Rope,
 286    ) {
 287        for operation in operations {
 288            self.push_char_operation(operation, old_text);
 289        }
 290    }
 291
 292    pub fn push_char_operation(&mut self, operation: &CharOperation, old_text: &Rope) {
 293        match operation {
 294            CharOperation::Insert { text } => {
 295                self.flush_delete(old_text);
 296
 297                if is_line_start(self.old_end) {
 298                    if let Some(newline_ix) = text.rfind('\n') {
 299                        let (prefix, suffix) = text.split_at(newline_ix + 1);
 300                        self.buffered_insert.push_str(prefix);
 301                        self.flush_insert(old_text);
 302                        self.buffered_insert.push_str(suffix);
 303                    } else {
 304                        self.buffered_insert.push_str(&text);
 305                    }
 306                } else {
 307                    self.buffered_insert.push_str(&text);
 308                    if !text.ends_with('\n') {
 309                        self.flush_insert(old_text);
 310                    }
 311                }
 312            }
 313            CharOperation::Delete { bytes } => {
 314                self.buffered_delete += bytes;
 315
 316                let common_suffix_len = self.trim_buffered_end(old_text);
 317                self.flush_insert(old_text);
 318
 319                if common_suffix_len > 0 || !is_line_end(self.old_end, old_text) {
 320                    self.flush_delete(old_text);
 321                    self.keep(common_suffix_len, old_text);
 322                }
 323            }
 324            CharOperation::Keep { bytes } => {
 325                self.flush_delete(old_text);
 326                self.flush_insert(old_text);
 327                self.keep(*bytes, old_text);
 328            }
 329        }
 330    }
 331
 332    fn flush_insert(&mut self, old_text: &Rope) {
 333        if self.buffered_insert.is_empty() {
 334            return;
 335        }
 336
 337        let new_start = self.new_end;
 338        let lines = TextSummary::from(self.buffered_insert.as_str()).lines;
 339        self.new_end += lines;
 340
 341        if is_line_start(self.old_end) {
 342            if self.new_end.column == 0 {
 343                self.inserted_rows.extend(new_start.row..self.new_end.row);
 344            } else {
 345                self.deleted_rows.insert(self.old_end.row);
 346                self.inserted_rows.extend(new_start.row..=self.new_end.row);
 347            }
 348        } else if is_line_end(self.old_end, old_text) {
 349            if self.buffered_insert.starts_with('\n') {
 350                self.inserted_rows
 351                    .extend(new_start.row + 1..=self.new_end.row);
 352                self.inserted_newline_at_end = true;
 353            } else {
 354                if !self.inserted_newline_at_end {
 355                    self.deleted_rows.insert(self.old_end.row);
 356                }
 357                self.inserted_rows.extend(new_start.row..=self.new_end.row);
 358            }
 359        } else {
 360            self.deleted_rows.insert(self.old_end.row);
 361            self.inserted_rows.extend(new_start.row..=self.new_end.row);
 362        }
 363
 364        self.buffered_insert.clear();
 365    }
 366
 367    fn flush_delete(&mut self, old_text: &Rope) {
 368        if self.buffered_delete == 0 {
 369            return;
 370        }
 371
 372        let old_start = self.old_end;
 373        self.old_end =
 374            old_text.offset_to_point(old_text.point_to_offset(self.old_end) + self.buffered_delete);
 375
 376        if is_line_end(old_start, old_text) && is_line_end(self.old_end, old_text) {
 377            self.deleted_rows
 378                .extend(old_start.row + 1..=self.old_end.row);
 379        } else if is_line_start(old_start)
 380            && (is_line_start(self.old_end) && self.old_end < old_text.max_point())
 381            && self.new_end.column == 0
 382        {
 383            self.deleted_rows.extend(old_start.row..self.old_end.row);
 384        } else {
 385            self.inserted_rows.insert(self.new_end.row);
 386            self.deleted_rows.extend(old_start.row..=self.old_end.row);
 387        }
 388
 389        self.inserted_newline_at_end = false;
 390        self.buffered_delete = 0;
 391    }
 392
 393    fn keep(&mut self, bytes: usize, old_text: &Rope) {
 394        if bytes == 0 {
 395            return;
 396        }
 397
 398        let lines =
 399            old_text.offset_to_point(old_text.point_to_offset(self.old_end) + bytes) - self.old_end;
 400        self.old_end += lines;
 401        self.new_end += lines;
 402        self.inserted_newline_at_end = false;
 403    }
 404
 405    fn trim_buffered_end(&mut self, old_text: &Rope) -> usize {
 406        let old_start_offset = old_text.point_to_offset(self.old_end);
 407        let old_end_offset = old_start_offset + self.buffered_delete;
 408
 409        let new_chars = self.buffered_insert.chars().rev();
 410        let old_chars = old_text
 411            .chunks_in_range(old_start_offset..old_end_offset)
 412            .flat_map(|chunk| chunk.chars().rev());
 413
 414        let mut common_suffix_len = 0;
 415        for (new_ch, old_ch) in new_chars.zip(old_chars) {
 416            if new_ch == old_ch {
 417                common_suffix_len += new_ch.len_utf8();
 418            } else {
 419                break;
 420            }
 421        }
 422
 423        self.buffered_delete -= common_suffix_len;
 424        self.buffered_insert
 425            .truncate(self.buffered_insert.len() - common_suffix_len);
 426
 427        common_suffix_len
 428    }
 429
 430    pub fn finish(&mut self, old_text: &Rope) {
 431        self.flush_insert(old_text);
 432        self.flush_delete(old_text);
 433
 434        let old_start = self.old_end;
 435        self.old_end = old_text.max_point();
 436        self.new_end += self.old_end - old_start;
 437    }
 438
 439    pub fn line_operations(&self) -> Vec<LineOperation> {
 440        let mut ops = Vec::new();
 441        let mut deleted_rows = self.deleted_rows.iter().copied().peekable();
 442        let mut inserted_rows = self.inserted_rows.iter().copied().peekable();
 443        let mut old_row = 0;
 444        let mut new_row = 0;
 445
 446        while deleted_rows.peek().is_some() || inserted_rows.peek().is_some() {
 447            // Check for a run of deleted lines at current old row.
 448            if Some(old_row) == deleted_rows.peek().copied() {
 449                if let Some(LineOperation::Delete { lines }) = ops.last_mut() {
 450                    *lines += 1;
 451                } else {
 452                    ops.push(LineOperation::Delete { lines: 1 });
 453                }
 454                old_row += 1;
 455                deleted_rows.next();
 456            } else if Some(new_row) == inserted_rows.peek().copied() {
 457                if let Some(LineOperation::Insert { lines }) = ops.last_mut() {
 458                    *lines += 1;
 459                } else {
 460                    ops.push(LineOperation::Insert { lines: 1 });
 461                }
 462                new_row += 1;
 463                inserted_rows.next();
 464            } else {
 465                // Keep lines until the next deletion, insertion, or the end of the old text.
 466                let lines_to_next_deletion = inserted_rows
 467                    .peek()
 468                    .copied()
 469                    .unwrap_or(self.new_end.row + 1)
 470                    - new_row;
 471                let lines_to_next_insertion =
 472                    deleted_rows.peek().copied().unwrap_or(self.old_end.row + 1) - old_row;
 473                let kept_lines =
 474                    cmp::max(1, cmp::min(lines_to_next_insertion, lines_to_next_deletion));
 475                if kept_lines > 0 {
 476                    ops.push(LineOperation::Keep { lines: kept_lines });
 477                    old_row += kept_lines;
 478                    new_row += kept_lines;
 479                }
 480            }
 481        }
 482
 483        if old_row < self.old_end.row + 1 {
 484            ops.push(LineOperation::Keep {
 485                lines: self.old_end.row + 1 - old_row,
 486            });
 487        }
 488
 489        ops
 490    }
 491}
 492
 493fn is_line_start(point: Point) -> bool {
 494    point.column == 0
 495}
 496
 497fn is_line_end(point: Point, text: &Rope) -> bool {
 498    text.line_len(point.row) == point.column
 499}
 500
 501#[cfg(test)]
 502mod tests {
 503    use super::*;
 504    use rand::prelude::*;
 505    use std::env;
 506
 507    #[test]
 508    fn test_delete_first_of_two_lines() {
 509        let old_text = "aaaa\nbbbb";
 510        let char_ops = vec![
 511            CharOperation::Delete { bytes: 5 },
 512            CharOperation::Keep { bytes: 4 },
 513        ];
 514        let expected_line_ops = vec![
 515            LineOperation::Delete { lines: 1 },
 516            LineOperation::Keep { lines: 1 },
 517        ];
 518        let new_text = apply_char_operations(old_text, &char_ops);
 519        assert_eq!(
 520            new_text,
 521            apply_line_operations(old_text, &new_text, &expected_line_ops)
 522        );
 523
 524        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 525        assert_eq!(line_ops, expected_line_ops);
 526    }
 527
 528    #[test]
 529    fn test_delete_second_of_two_lines() {
 530        let old_text = "aaaa\nbbbb";
 531        let char_ops = vec![
 532            CharOperation::Keep { bytes: 5 },
 533            CharOperation::Delete { bytes: 4 },
 534        ];
 535        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 536        assert_eq!(
 537            line_ops,
 538            vec![
 539                LineOperation::Keep { lines: 1 },
 540                LineOperation::Delete { lines: 1 },
 541                LineOperation::Insert { lines: 1 }
 542            ]
 543        );
 544        let new_text = apply_char_operations(old_text, &char_ops);
 545        assert_eq!(
 546            new_text,
 547            apply_line_operations(old_text, &new_text, &line_ops)
 548        );
 549    }
 550
 551    #[test]
 552    fn test_add_new_line() {
 553        let old_text = "aaaa\nbbbb";
 554        let char_ops = vec![
 555            CharOperation::Keep { bytes: 9 },
 556            CharOperation::Insert {
 557                text: "\ncccc".into(),
 558            },
 559        ];
 560        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 561        assert_eq!(
 562            line_ops,
 563            vec![
 564                LineOperation::Keep { lines: 2 },
 565                LineOperation::Insert { lines: 1 }
 566            ]
 567        );
 568        let new_text = apply_char_operations(old_text, &char_ops);
 569        assert_eq!(
 570            new_text,
 571            apply_line_operations(old_text, &new_text, &line_ops)
 572        );
 573    }
 574
 575    #[test]
 576    fn test_delete_line_in_middle() {
 577        let old_text = "aaaa\nbbbb\ncccc";
 578        let char_ops = vec![
 579            CharOperation::Keep { bytes: 5 },
 580            CharOperation::Delete { bytes: 5 },
 581            CharOperation::Keep { bytes: 4 },
 582        ];
 583        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 584        assert_eq!(
 585            line_ops,
 586            vec![
 587                LineOperation::Keep { lines: 1 },
 588                LineOperation::Delete { lines: 1 },
 589                LineOperation::Keep { lines: 1 }
 590            ]
 591        );
 592        let new_text = apply_char_operations(old_text, &char_ops);
 593        assert_eq!(
 594            new_text,
 595            apply_line_operations(old_text, &new_text, &line_ops)
 596        );
 597    }
 598
 599    #[test]
 600    fn test_replace_line() {
 601        let old_text = "aaaa\nbbbb\ncccc";
 602        let char_ops = vec![
 603            CharOperation::Keep { bytes: 5 },
 604            CharOperation::Delete { bytes: 4 },
 605            CharOperation::Insert {
 606                text: "BBBB".into(),
 607            },
 608            CharOperation::Keep { bytes: 5 },
 609        ];
 610        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 611        assert_eq!(
 612            line_ops,
 613            vec![
 614                LineOperation::Keep { lines: 1 },
 615                LineOperation::Delete { lines: 1 },
 616                LineOperation::Insert { lines: 1 },
 617                LineOperation::Keep { lines: 1 }
 618            ]
 619        );
 620        let new_text = apply_char_operations(old_text, &char_ops);
 621        assert_eq!(
 622            new_text,
 623            apply_line_operations(old_text, &new_text, &line_ops)
 624        );
 625    }
 626
 627    #[test]
 628    fn test_multiple_edits_on_different_lines() {
 629        let old_text = "aaaa\nbbbb\ncccc\ndddd";
 630        let char_ops = vec![
 631            CharOperation::Insert { text: "A".into() },
 632            CharOperation::Keep { bytes: 9 },
 633            CharOperation::Delete { bytes: 5 },
 634            CharOperation::Keep { bytes: 4 },
 635            CharOperation::Insert {
 636                text: "\nEEEE".into(),
 637            },
 638        ];
 639        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 640        assert_eq!(
 641            line_ops,
 642            vec![
 643                LineOperation::Delete { lines: 1 },
 644                LineOperation::Insert { lines: 1 },
 645                LineOperation::Keep { lines: 1 },
 646                LineOperation::Delete { lines: 2 },
 647                LineOperation::Insert { lines: 2 },
 648            ]
 649        );
 650        let new_text = apply_char_operations(old_text, &char_ops);
 651        assert_eq!(
 652            new_text,
 653            apply_line_operations(old_text, &new_text, &line_ops)
 654        );
 655    }
 656
 657    #[test]
 658    fn test_edit_at_end_of_line() {
 659        let old_text = "aaaa\nbbbb\ncccc";
 660        let char_ops = vec![
 661            CharOperation::Keep { bytes: 4 },
 662            CharOperation::Insert { text: "A".into() },
 663            CharOperation::Keep { bytes: 10 },
 664        ];
 665        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 666        assert_eq!(
 667            line_ops,
 668            vec![
 669                LineOperation::Delete { lines: 1 },
 670                LineOperation::Insert { lines: 1 },
 671                LineOperation::Keep { lines: 2 }
 672            ]
 673        );
 674        let new_text = apply_char_operations(old_text, &char_ops);
 675        assert_eq!(
 676            new_text,
 677            apply_line_operations(old_text, &new_text, &line_ops)
 678        );
 679    }
 680
 681    #[test]
 682    fn test_insert_newline_character() {
 683        let old_text = "aaaabbbb";
 684        let char_ops = vec![
 685            CharOperation::Keep { bytes: 4 },
 686            CharOperation::Insert { text: "\n".into() },
 687            CharOperation::Keep { bytes: 4 },
 688        ];
 689        let new_text = apply_char_operations(old_text, &char_ops);
 690        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 691        assert_eq!(
 692            line_ops,
 693            vec![
 694                LineOperation::Delete { lines: 1 },
 695                LineOperation::Insert { lines: 2 }
 696            ]
 697        );
 698        assert_eq!(
 699            new_text,
 700            apply_line_operations(old_text, &new_text, &line_ops)
 701        );
 702    }
 703
 704    #[test]
 705    fn test_insert_newline_at_beginning() {
 706        let old_text = "aaaa\nbbbb";
 707        let char_ops = vec![
 708            CharOperation::Insert { text: "\n".into() },
 709            CharOperation::Keep { bytes: 9 },
 710        ];
 711        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 712        assert_eq!(
 713            line_ops,
 714            vec![
 715                LineOperation::Insert { lines: 1 },
 716                LineOperation::Keep { lines: 2 }
 717            ]
 718        );
 719        let new_text = apply_char_operations(old_text, &char_ops);
 720        assert_eq!(
 721            new_text,
 722            apply_line_operations(old_text, &new_text, &line_ops)
 723        );
 724    }
 725
 726    #[test]
 727    fn test_delete_newline() {
 728        let old_text = "aaaa\nbbbb";
 729        let char_ops = vec![
 730            CharOperation::Keep { bytes: 4 },
 731            CharOperation::Delete { bytes: 1 },
 732            CharOperation::Keep { bytes: 4 },
 733        ];
 734        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 735        assert_eq!(
 736            line_ops,
 737            vec![
 738                LineOperation::Delete { lines: 2 },
 739                LineOperation::Insert { lines: 1 }
 740            ]
 741        );
 742
 743        let new_text = apply_char_operations(old_text, &char_ops);
 744        assert_eq!(
 745            new_text,
 746            apply_line_operations(old_text, &new_text, &line_ops)
 747        );
 748    }
 749
 750    #[test]
 751    fn test_insert_multiple_newlines() {
 752        let old_text = "aaaa\nbbbb";
 753        let char_ops = vec![
 754            CharOperation::Keep { bytes: 5 },
 755            CharOperation::Insert {
 756                text: "\n\n".into(),
 757            },
 758            CharOperation::Keep { bytes: 4 },
 759        ];
 760        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 761        assert_eq!(
 762            line_ops,
 763            vec![
 764                LineOperation::Keep { lines: 1 },
 765                LineOperation::Insert { lines: 2 },
 766                LineOperation::Keep { lines: 1 }
 767            ]
 768        );
 769        let new_text = apply_char_operations(old_text, &char_ops);
 770        assert_eq!(
 771            new_text,
 772            apply_line_operations(old_text, &new_text, &line_ops)
 773        );
 774    }
 775
 776    #[test]
 777    fn test_delete_multiple_newlines() {
 778        let old_text = "aaaa\n\n\nbbbb";
 779        let char_ops = vec![
 780            CharOperation::Keep { bytes: 5 },
 781            CharOperation::Delete { bytes: 2 },
 782            CharOperation::Keep { bytes: 4 },
 783        ];
 784        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 785        assert_eq!(
 786            line_ops,
 787            vec![
 788                LineOperation::Keep { lines: 1 },
 789                LineOperation::Delete { lines: 2 },
 790                LineOperation::Keep { lines: 1 }
 791            ]
 792        );
 793        let new_text = apply_char_operations(old_text, &char_ops);
 794        assert_eq!(
 795            new_text,
 796            apply_line_operations(old_text, &new_text, &line_ops)
 797        );
 798    }
 799
 800    #[test]
 801    fn test_complex_scenario() {
 802        let old_text = "line1\nline2\nline3\nline4";
 803        let char_ops = vec![
 804            CharOperation::Keep { bytes: 6 },
 805            CharOperation::Insert {
 806                text: "inserted\n".into(),
 807            },
 808            CharOperation::Delete { bytes: 6 },
 809            CharOperation::Keep { bytes: 5 },
 810            CharOperation::Insert {
 811                text: "\nnewline".into(),
 812            },
 813            CharOperation::Keep { bytes: 6 },
 814        ];
 815        let line_ops = char_ops_to_line_ops(&old_text, &char_ops);
 816        assert_eq!(
 817            line_ops,
 818            vec![
 819                LineOperation::Keep { lines: 1 },
 820                LineOperation::Delete { lines: 1 },
 821                LineOperation::Insert { lines: 1 },
 822                LineOperation::Keep { lines: 1 },
 823                LineOperation::Insert { lines: 1 },
 824                LineOperation::Keep { lines: 1 }
 825            ]
 826        );
 827        let new_text = apply_char_operations(old_text, &char_ops);
 828        assert_eq!(new_text, "line1\ninserted\nline3\nnewline\nline4");
 829        assert_eq!(
 830            apply_line_operations(old_text, &new_text, &line_ops),
 831            new_text,
 832        );
 833    }
 834
 835    #[test]
 836    fn test_cleaning_up_common_suffix() {
 837        let old_text = concat!(
 838            "        for y in 0..size.y() {\n",
 839            "            let a = 10;\n",
 840            "            let b = 20;\n",
 841            "        }",
 842        );
 843        let char_ops = [
 844            CharOperation::Keep { bytes: 8 },
 845            CharOperation::Insert { text: "let".into() },
 846            CharOperation::Insert {
 847                text: " mut".into(),
 848            },
 849            CharOperation::Insert { text: " y".into() },
 850            CharOperation::Insert { text: " =".into() },
 851            CharOperation::Insert { text: " 0".into() },
 852            CharOperation::Insert { text: ";".into() },
 853            CharOperation::Insert { text: "\n".into() },
 854            CharOperation::Insert {
 855                text: "        while".into(),
 856            },
 857            CharOperation::Insert { text: " y".into() },
 858            CharOperation::Insert {
 859                text: " < size".into(),
 860            },
 861            CharOperation::Insert { text: ".".into() },
 862            CharOperation::Insert { text: "y".into() },
 863            CharOperation::Insert { text: "()".into() },
 864            CharOperation::Insert { text: " {".into() },
 865            CharOperation::Insert { text: "\n".into() },
 866            CharOperation::Delete { bytes: 23 },
 867            CharOperation::Keep { bytes: 23 },
 868            CharOperation::Keep { bytes: 1 },
 869            CharOperation::Keep { bytes: 23 },
 870            CharOperation::Keep { bytes: 1 },
 871            CharOperation::Keep { bytes: 8 },
 872            CharOperation::Insert {
 873                text: "    y".into(),
 874            },
 875            CharOperation::Insert { text: " +=".into() },
 876            CharOperation::Insert { text: " 1".into() },
 877            CharOperation::Insert { text: ";".into() },
 878            CharOperation::Insert { text: "\n".into() },
 879            CharOperation::Insert {
 880                text: "        ".into(),
 881            },
 882            CharOperation::Keep { bytes: 1 },
 883        ];
 884        let line_ops = char_ops_to_line_ops(old_text, &char_ops);
 885        assert_eq!(
 886            line_ops,
 887            vec![
 888                LineOperation::Delete { lines: 1 },
 889                LineOperation::Insert { lines: 2 },
 890                LineOperation::Keep { lines: 2 },
 891                LineOperation::Delete { lines: 1 },
 892                LineOperation::Insert { lines: 2 },
 893            ]
 894        );
 895        let new_text = apply_char_operations(old_text, &char_ops);
 896        assert_eq!(
 897            new_text,
 898            apply_line_operations(old_text, &new_text, &line_ops)
 899        );
 900    }
 901
 902    #[test]
 903    fn test_random_diffs() {
 904        random_test(|mut rng| {
 905            let old_text_len = env::var("OLD_TEXT_LEN")
 906                .map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable"))
 907                .unwrap_or(10);
 908
 909            let old = random_text(&mut rng, old_text_len);
 910            println!("old text: {:?}", old);
 911
 912            let new = randomly_edit(&old, &mut rng);
 913            println!("new text: {:?}", new);
 914
 915            let char_operations = random_streaming_diff(&mut rng, &old, &new);
 916            println!("char operations: {:?}", char_operations);
 917
 918            // Use apply_char_operations to verify the result
 919            let patched = apply_char_operations(&old, &char_operations);
 920            assert_eq!(patched, new);
 921
 922            // Test char_ops_to_line_ops
 923            let line_ops = char_ops_to_line_ops(&old, &char_operations);
 924            println!("line operations: {:?}", line_ops);
 925            let patched = apply_line_operations(&old, &new, &line_ops);
 926            assert_eq!(patched, new);
 927        });
 928    }
 929
 930    fn char_ops_to_line_ops(old_text: &str, char_ops: &[CharOperation]) -> Vec<LineOperation> {
 931        let old_rope = Rope::from(old_text);
 932        let mut diff = LineDiff::default();
 933        for op in char_ops {
 934            diff.push_char_operation(op, &old_rope);
 935        }
 936        diff.finish(&old_rope);
 937        diff.line_operations()
 938    }
 939
 940    fn random_streaming_diff(rng: &mut impl Rng, old: &str, new: &str) -> Vec<CharOperation> {
 941        let mut diff = StreamingDiff::new(old.to_string());
 942        let mut char_operations = Vec::new();
 943        let mut new_len = 0;
 944
 945        while new_len < new.len() {
 946            let mut chunk_len = rng.gen_range(1..=new.len() - new_len);
 947            while !new.is_char_boundary(new_len + chunk_len) {
 948                chunk_len += 1;
 949            }
 950            let chunk = &new[new_len..new_len + chunk_len];
 951            let new_hunks = diff.push_new(chunk);
 952            char_operations.extend(new_hunks);
 953            new_len += chunk_len;
 954        }
 955
 956        char_operations.extend(diff.finish());
 957        char_operations
 958    }
 959
 960    fn random_test<F>(mut test_fn: F)
 961    where
 962        F: FnMut(StdRng),
 963    {
 964        let iterations = env::var("ITERATIONS")
 965            .map(|i| i.parse().expect("invalid `ITERATIONS` variable"))
 966            .unwrap_or(100);
 967
 968        let seed: u64 = env::var("SEED")
 969            .map(|s| s.parse().expect("invalid `SEED` variable"))
 970            .unwrap_or(0);
 971
 972        println!(
 973            "Running test with {} iterations and seed {}",
 974            iterations, seed
 975        );
 976
 977        for i in 0..iterations {
 978            println!("Iteration {}", i + 1);
 979            let rng = StdRng::seed_from_u64(seed + i);
 980            test_fn(rng);
 981        }
 982    }
 983
 984    fn apply_line_operations(old_text: &str, new_text: &str, line_ops: &[LineOperation]) -> String {
 985        let mut result: Vec<&str> = Vec::new();
 986
 987        let old_lines: Vec<&str> = old_text.split('\n').collect();
 988        let new_lines: Vec<&str> = new_text.split('\n').collect();
 989        let mut old_start = 0_usize;
 990        let mut new_start = 0_usize;
 991
 992        for op in line_ops {
 993            match op {
 994                LineOperation::Keep { lines } => {
 995                    let old_end = old_start + *lines as usize;
 996                    result.extend(&old_lines[old_start..old_end]);
 997                    old_start = old_end;
 998                    new_start += *lines as usize;
 999                }
1000                LineOperation::Delete { lines } => {
1001                    old_start += *lines as usize;
1002                }
1003                LineOperation::Insert { lines } => {
1004                    let new_end = new_start + *lines as usize;
1005                    result.extend(&new_lines[new_start..new_end]);
1006                    new_start = new_end;
1007                }
1008            }
1009        }
1010
1011        result.join("\n")
1012    }
1013
1014    #[test]
1015    fn test_apply_char_operations() {
1016        let old_text = "Hello, world!";
1017        let char_ops = vec![
1018            CharOperation::Keep { bytes: 7 },
1019            CharOperation::Delete { bytes: 5 },
1020            CharOperation::Insert {
1021                text: "Rust".to_string(),
1022            },
1023            CharOperation::Keep { bytes: 1 },
1024        ];
1025        let result = apply_char_operations(old_text, &char_ops);
1026        assert_eq!(result, "Hello, Rust!");
1027    }
1028
1029    fn random_text(rng: &mut impl Rng, length: usize) -> String {
1030        util::RandomCharIter::new(rng).take(length).collect()
1031    }
1032
1033    fn randomly_edit(text: &str, rng: &mut impl Rng) -> String {
1034        let mut result = String::from(text);
1035        let edit_count = rng.gen_range(1..=5);
1036
1037        fn random_char_range(text: &str, rng: &mut impl Rng) -> (usize, usize) {
1038            let mut start = rng.gen_range(0..=text.len());
1039            while !text.is_char_boundary(start) {
1040                start -= 1;
1041            }
1042            let mut end = rng.gen_range(start..=text.len());
1043            while !text.is_char_boundary(end) {
1044                end += 1;
1045            }
1046            (start, end)
1047        }
1048
1049        for _ in 0..edit_count {
1050            match rng.gen_range(0..3) {
1051                0 => {
1052                    // Insert
1053                    let (pos, _) = random_char_range(&result, rng);
1054                    let insert_len = rng.gen_range(1..=5);
1055                    let insert_text: String = random_text(rng, insert_len);
1056                    result.insert_str(pos, &insert_text);
1057                }
1058                1 => {
1059                    // Delete
1060                    if !result.is_empty() {
1061                        let (start, end) = random_char_range(&result, rng);
1062                        result.replace_range(start..end, "");
1063                    }
1064                }
1065                2 => {
1066                    // Replace
1067                    if !result.is_empty() {
1068                        let (start, end) = random_char_range(&result, rng);
1069                        let replace_len = end - start;
1070                        let replace_text: String = random_text(rng, replace_len);
1071                        result.replace_range(start..end, &replace_text);
1072                    }
1073                }
1074                _ => unreachable!(),
1075            }
1076        }
1077
1078        result
1079    }
1080
1081    fn apply_char_operations(old_text: &str, char_ops: &[CharOperation]) -> String {
1082        let mut result = String::new();
1083        let mut old_ix = 0;
1084
1085        for operation in char_ops {
1086            match operation {
1087                CharOperation::Keep { bytes } => {
1088                    result.push_str(&old_text[old_ix..old_ix + bytes]);
1089                    old_ix += bytes;
1090                }
1091                CharOperation::Delete { bytes } => {
1092                    old_ix += bytes;
1093                }
1094                CharOperation::Insert { text } => {
1095                    result.push_str(text);
1096                }
1097            }
1098        }
1099
1100        result
1101    }
1102}