diff.rs

  1use std::{
  2    cmp,
  3    fmt::{self, Debug},
  4};
  5
  6use collections::BinaryHeap;
  7
  8struct Matrix {
  9    cells: Vec<isize>,
 10    rows: usize,
 11    cols: usize,
 12}
 13
 14impl Matrix {
 15    fn new() -> Self {
 16        Self {
 17            cells: Vec::new(),
 18            rows: 0,
 19            cols: 0,
 20        }
 21    }
 22
 23    fn resize(&mut self, rows: usize, cols: usize) {
 24        self.cells.resize(rows * cols, 0);
 25        self.rows = rows;
 26        self.cols = cols;
 27    }
 28
 29    fn get(&self, row: usize, col: usize) -> isize {
 30        if row >= self.rows {
 31            panic!("row out of bounds")
 32        }
 33
 34        if col >= self.cols {
 35            panic!("col out of bounds")
 36        }
 37        self.cells[col * self.rows + row]
 38    }
 39
 40    fn set(&mut self, row: usize, col: usize, value: isize) {
 41        if row >= self.rows {
 42            panic!("row out of bounds")
 43        }
 44
 45        if col >= self.cols {
 46            panic!("col out of bounds")
 47        }
 48
 49        self.cells[col * self.rows + row] = value;
 50    }
 51}
 52
 53impl Debug for Matrix {
 54    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 55        writeln!(f)?;
 56        for i in 0..self.rows {
 57            for j in 0..self.cols {
 58                write!(f, "{:5}", self.get(i, j))?;
 59            }
 60            writeln!(f)?;
 61        }
 62        Ok(())
 63    }
 64}
 65
 66#[derive(Debug)]
 67pub enum Hunk {
 68    Insert { text: String },
 69    Remove { len: usize },
 70    Keep { len: usize },
 71}
 72
 73pub struct Diff {
 74    old: String,
 75    new: String,
 76    scores: Matrix,
 77    old_text_ix: usize,
 78    new_text_ix: usize,
 79}
 80
 81impl Diff {
 82    const INSERTION_SCORE: isize = -1;
 83    const DELETION_SCORE: isize = -4;
 84    const EQUALITY_SCORE: isize = 5;
 85
 86    pub fn new(old: String) -> Self {
 87        let mut scores = Matrix::new();
 88        scores.resize(old.len() + 1, 1);
 89        for i in 0..=old.len() {
 90            scores.set(i, 0, i as isize * Self::DELETION_SCORE);
 91        }
 92        Self {
 93            old,
 94            new: String::new(),
 95            scores,
 96            old_text_ix: 0,
 97            new_text_ix: 0,
 98        }
 99    }
100
101    pub fn push_new(&mut self, text: &str) -> Vec<Hunk> {
102        self.new.push_str(text);
103        self.scores.resize(self.old.len() + 1, self.new.len() + 1);
104
105        for j in self.new_text_ix + 1..=self.new.len() {
106            self.scores.set(0, j, j as isize * Self::INSERTION_SCORE);
107            for i in 1..=self.old.len() {
108                let insertion_score = self.scores.get(i, j - 1) + Self::INSERTION_SCORE;
109                let deletion_score = self.scores.get(i - 1, j) + Self::DELETION_SCORE;
110                let equality_score = if self.old.as_bytes()[i - 1] == self.new.as_bytes()[j - 1] {
111                    self.scores.get(i - 1, j - 1) + Self::EQUALITY_SCORE
112                } else {
113                    isize::MIN
114                };
115                let score = insertion_score.max(deletion_score).max(equality_score);
116                self.scores.set(i, j, score);
117            }
118        }
119
120        let mut max_score = isize::MIN;
121        let mut best_row = self.old_text_ix;
122        let mut best_col = self.new_text_ix;
123        for i in self.old_text_ix..=self.old.len() {
124            for j in self.new_text_ix..=self.new.len() {
125                let score = self.scores.get(i, j);
126                if score > max_score {
127                    max_score = score;
128                    best_row = i;
129                    best_col = j;
130                }
131            }
132        }
133
134        let hunks = self.backtrack(best_row, best_col);
135        self.old_text_ix = best_row;
136        self.new_text_ix = best_col;
137        hunks
138    }
139
140    fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<Hunk> {
141        let mut hunks = Vec::new();
142        let mut i = old_text_ix;
143        let mut j = new_text_ix;
144        while (i, j) != (self.old_text_ix, self.new_text_ix) {
145            let insertion_score = if j > self.new_text_ix {
146                Some((i, j - 1))
147            } else {
148                None
149            };
150            let deletion_score = if i > self.old_text_ix {
151                Some((i - 1, j))
152            } else {
153                None
154            };
155            let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
156                if self.old.as_bytes()[i - 1] == self.new.as_bytes()[j - 1] {
157                    Some((i - 1, j - 1))
158                } else {
159                    None
160                }
161            } else {
162                None
163            };
164
165            let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
166                .iter()
167                .max_by_key(|cell| cell.map(|(i, j)| self.scores.get(i, j)))
168                .unwrap()
169                .unwrap();
170
171            if prev_i == i && prev_j == j - 1 {
172                if let Some(Hunk::Insert { text }) = hunks.last_mut() {
173                    text.insert_str(0, &self.new[prev_j..j]);
174                } else {
175                    hunks.push(Hunk::Insert {
176                        text: self.new[prev_j..j].to_string(),
177                    })
178                }
179            } else if prev_i == i - 1 && prev_j == j {
180                if let Some(Hunk::Remove { len }) = hunks.last_mut() {
181                    *len += 1;
182                } else {
183                    hunks.push(Hunk::Remove { len: 1 })
184                }
185            } else {
186                if let Some(Hunk::Keep { len }) = hunks.last_mut() {
187                    *len += 1;
188                } else {
189                    hunks.push(Hunk::Keep { len: 1 })
190                }
191            }
192
193            i = prev_i;
194            j = prev_j;
195        }
196        hunks.reverse();
197        hunks
198    }
199
200    pub fn finish(self) -> Vec<Hunk> {
201        self.backtrack(self.old.len(), self.new.len())
202    }
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    #[test]
210    fn test_diff() {
211        let mut diff = Diff::new("hello world".to_string());
212        dbg!(diff.push_new("hello"));
213        dbg!(diff.push_new(" ciaone"));
214        // dbg!(diff.push_new(" world"));
215        dbg!(diff.finish());
216    }
217}