diff.rs

  1use std::{
  2    cmp,
  3    fmt::{self, Debug},
  4};
  5
  6use collections::BinaryHeap;
  7
  8struct Matrix {
  9    cells: Vec<isize>,
 10    rows: usize,
 11    cols: usize,
 12}
 13
 14impl Matrix {
 15    fn new() -> Self {
 16        Self {
 17            cells: Vec::new(),
 18            rows: 0,
 19            cols: 0,
 20        }
 21    }
 22
 23    fn resize(&mut self, rows: usize, cols: usize) {
 24        self.cells.resize(rows * cols, 0);
 25        self.rows = rows;
 26        self.cols = cols;
 27    }
 28
 29    fn get(&self, row: usize, col: usize) -> isize {
 30        if row >= self.rows {
 31            panic!("row out of bounds")
 32        }
 33
 34        if col >= self.cols {
 35            panic!("col out of bounds")
 36        }
 37        self.cells[col * self.rows + row]
 38    }
 39
 40    fn set(&mut self, row: usize, col: usize, value: isize) {
 41        if row >= self.rows {
 42            panic!("row out of bounds")
 43        }
 44
 45        if col >= self.cols {
 46            panic!("col out of bounds")
 47        }
 48
 49        self.cells[col * self.rows + row] = value;
 50    }
 51}
 52
 53impl Debug for Matrix {
 54    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 55        writeln!(f)?;
 56        for i in 0..self.rows {
 57            for j in 0..self.cols {
 58                write!(f, "{:5}", self.get(i, j))?;
 59            }
 60            writeln!(f)?;
 61        }
 62        Ok(())
 63    }
 64}
 65
 66#[derive(Debug)]
 67pub enum Hunk {
 68    Insert { len: usize },
 69    Remove { len: usize },
 70    Keep { len: usize },
 71}
 72
 73pub struct Diff {
 74    old: String,
 75    new: String,
 76    scores: Matrix,
 77    old_text_ix: usize,
 78}
 79
 80impl Diff {
 81    pub fn new(old: String) -> Self {
 82        let mut scores = Matrix::new();
 83        scores.resize(old.len() + 1, 1);
 84        for i in 0..=old.len() {
 85            scores.set(i, 0, -(i as isize));
 86        }
 87        Self {
 88            old,
 89            new: String::new(),
 90            scores,
 91            old_text_ix: 0,
 92        }
 93    }
 94
 95    pub fn push_new(&mut self, text: &str) -> Vec<Hunk> {
 96        let new_text_ix = self.new.len();
 97        self.new.push_str(text);
 98        self.scores.resize(self.old.len() + 1, self.new.len() + 1);
 99
100        for j in new_text_ix + 1..=self.new.len() {
101            self.scores.set(0, j, -(j as isize));
102            for i in 1..=self.old.len() {
103                let insertion_score = self.scores.get(i, j - 1) - 1;
104                let deletion_score = self.scores.get(i - 1, j) - 10;
105                let equality_score = if self.old.as_bytes()[i - 1] == self.new.as_bytes()[j - 1] {
106                    self.scores.get(i - 1, j - 1) + 5
107                } else {
108                    self.scores.get(i - 1, j - 1) - 20
109                };
110                let score = insertion_score.max(deletion_score).max(equality_score);
111                self.scores.set(i, j, score);
112            }
113        }
114
115        let mut max_score = isize::MIN;
116        let mut best_row = self.old_text_ix;
117        for i in self.old_text_ix..=self.old.len() {
118            let score = self.scores.get(i, self.new.len());
119            if score > max_score {
120                max_score = score;
121                best_row = i;
122            }
123        }
124
125        let mut hunks = Vec::new();
126        let mut i = best_row;
127        let mut j = self.new.len();
128        while (i, j) != (self.old_text_ix, new_text_ix) {
129            let insertion_score = if j > new_text_ix {
130                Some((i, j - 1))
131            } else {
132                None
133            };
134            let deletion_score = if i > self.old_text_ix {
135                Some((i - 1, j))
136            } else {
137                None
138            };
139            let equality_score = if i > self.old_text_ix && j > new_text_ix {
140                Some((i - 1, j - 1))
141            } else {
142                None
143            };
144
145            let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
146                .iter()
147                .max_by_key(|cell| cell.map(|(i, j)| self.scores.get(i, j)))
148                .unwrap()
149                .unwrap();
150
151            if prev_i == i && prev_j == j - 1 {
152                if let Some(Hunk::Insert { len }) = hunks.last_mut() {
153                    *len += 1;
154                } else {
155                    hunks.push(Hunk::Insert { len: 1 })
156                }
157            } else if prev_i == i - 1 && prev_j == j {
158                if let Some(Hunk::Remove { len }) = hunks.last_mut() {
159                    *len += 1;
160                } else {
161                    hunks.push(Hunk::Remove { len: 1 })
162                }
163            } else {
164                if let Some(Hunk::Keep { len }) = hunks.last_mut() {
165                    *len += 1;
166                } else {
167                    hunks.push(Hunk::Keep { len: 1 })
168                }
169            }
170
171            i = prev_i;
172            j = prev_j;
173        }
174        self.old_text_ix = best_row;
175        hunks.reverse();
176        hunks
177    }
178
179    pub fn finish(self) -> Option<Hunk> {
180        if self.old_text_ix < self.old.len() {
181            Some(Hunk::Remove {
182                len: self.old.len() - self.old_text_ix,
183            })
184        } else {
185            None
186        }
187    }
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193
194    #[test]
195    fn test_diff() {
196        let mut diff = Diff::new("hello world".to_string());
197        diff.push_new("hello");
198        diff.push_new(" ciaone");
199        diff.push_new(" world");
200        diff.finish();
201    }
202}