diff.rs

  1use std::{
  2    cmp,
  3    fmt::{self, Debug},
  4    ops::Range,
  5};
  6
  7use collections::BinaryHeap;
  8
  9struct Matrix {
 10    cells: Vec<isize>,
 11    rows: usize,
 12    cols: usize,
 13}
 14
 15impl Matrix {
 16    fn new() -> Self {
 17        Self {
 18            cells: Vec::new(),
 19            rows: 0,
 20            cols: 0,
 21        }
 22    }
 23
 24    fn resize(&mut self, rows: usize, cols: usize) {
 25        self.cells.resize(rows * cols, 0);
 26        self.rows = rows;
 27        self.cols = cols;
 28    }
 29
 30    fn get(&self, row: usize, col: usize) -> isize {
 31        if row >= self.rows {
 32            panic!("row out of bounds")
 33        }
 34
 35        if col >= self.cols {
 36            panic!("col out of bounds")
 37        }
 38        self.cells[col * self.rows + row]
 39    }
 40
 41    fn set(&mut self, row: usize, col: usize, value: isize) {
 42        if row >= self.rows {
 43            panic!("row out of bounds")
 44        }
 45
 46        if col >= self.cols {
 47            panic!("col out of bounds")
 48        }
 49
 50        self.cells[col * self.rows + row] = value;
 51    }
 52}
 53
 54impl Debug for Matrix {
 55    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 56        writeln!(f)?;
 57        for i in 0..self.rows {
 58            for j in 0..self.cols {
 59                write!(f, "{:5}", self.get(i, j))?;
 60            }
 61            writeln!(f)?;
 62        }
 63        Ok(())
 64    }
 65}
 66
 67#[derive(Debug)]
 68pub enum Hunk {
 69    Insert { text: String },
 70    Remove { len: usize },
 71    Keep { len: usize },
 72}
 73
 74pub struct Diff {
 75    old: Vec<char>,
 76    new: Vec<char>,
 77    scores: Matrix,
 78    old_text_ix: usize,
 79    new_text_ix: usize,
 80}
 81
 82impl Diff {
 83    const INSERTION_SCORE: isize = -1;
 84    const DELETION_SCORE: isize = -4;
 85    const EQUALITY_SCORE: isize = 5;
 86
 87    pub fn new(old: String) -> Self {
 88        let old = old.chars().collect::<Vec<_>>();
 89        let mut scores = Matrix::new();
 90        scores.resize(old.len() + 1, 1);
 91        for i in 0..=old.len() {
 92            scores.set(i, 0, i as isize * Self::DELETION_SCORE);
 93        }
 94        Self {
 95            old,
 96            new: Vec::new(),
 97            scores,
 98            old_text_ix: 0,
 99            new_text_ix: 0,
100        }
101    }
102
103    pub fn push_new(&mut self, text: &str) -> Vec<Hunk> {
104        self.new.extend(text.chars());
105        self.scores.resize(self.old.len() + 1, self.new.len() + 1);
106
107        for j in self.new_text_ix + 1..=self.new.len() {
108            self.scores.set(0, j, j as isize * Self::INSERTION_SCORE);
109            for i in 1..=self.old.len() {
110                let insertion_score = self.scores.get(i, j - 1) + Self::INSERTION_SCORE;
111                let deletion_score = self.scores.get(i - 1, j) + Self::DELETION_SCORE;
112                let equality_score = if self.old[i - 1] == self.new[j - 1] {
113                    self.scores.get(i - 1, j - 1) + Self::EQUALITY_SCORE
114                } else {
115                    isize::MIN
116                };
117                let score = insertion_score.max(deletion_score).max(equality_score);
118                self.scores.set(i, j, score);
119            }
120        }
121
122        let mut max_score = isize::MIN;
123        let mut best_row = self.old_text_ix;
124        let mut best_col = self.new_text_ix;
125        for i in self.old_text_ix..=self.old.len() {
126            for j in self.new_text_ix..=self.new.len() {
127                let score = self.scores.get(i, j);
128                if score > max_score {
129                    max_score = score;
130                    best_row = i;
131                    best_col = j;
132                }
133            }
134        }
135
136        let hunks = self.backtrack(best_row, best_col);
137        self.old_text_ix = best_row;
138        self.new_text_ix = best_col;
139        hunks
140    }
141
142    fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<Hunk> {
143        let mut pending_insert: Option<Range<usize>> = None;
144        let mut hunks = Vec::new();
145        let mut i = old_text_ix;
146        let mut j = new_text_ix;
147        while (i, j) != (self.old_text_ix, self.new_text_ix) {
148            let insertion_score = if j > self.new_text_ix {
149                Some((i, j - 1))
150            } else {
151                None
152            };
153            let deletion_score = if i > self.old_text_ix {
154                Some((i - 1, j))
155            } else {
156                None
157            };
158            let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
159                if self.old[i - 1] == self.new[j - 1] {
160                    Some((i - 1, j - 1))
161                } else {
162                    None
163                }
164            } else {
165                None
166            };
167
168            let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
169                .iter()
170                .max_by_key(|cell| cell.map(|(i, j)| self.scores.get(i, j)))
171                .unwrap()
172                .unwrap();
173
174            if prev_i == i && prev_j == j - 1 {
175                if let Some(pending_insert) = pending_insert.as_mut() {
176                    pending_insert.start = prev_j;
177                } else {
178                    pending_insert = Some(prev_j..j);
179                }
180            } else {
181                if let Some(range) = pending_insert.take() {
182                    hunks.push(Hunk::Insert {
183                        text: self.new[range].iter().collect(),
184                    });
185                }
186
187                let char_len = self.old[i - 1].len_utf8();
188                if prev_i == i - 1 && prev_j == j {
189                    if let Some(Hunk::Remove { len }) = hunks.last_mut() {
190                        *len += char_len;
191                    } else {
192                        hunks.push(Hunk::Remove { len: char_len })
193                    }
194                } else {
195                    if let Some(Hunk::Keep { len }) = hunks.last_mut() {
196                        *len += char_len;
197                    } else {
198                        hunks.push(Hunk::Keep { len: char_len })
199                    }
200                }
201            }
202
203            i = prev_i;
204            j = prev_j;
205        }
206
207        if let Some(range) = pending_insert.take() {
208            hunks.push(Hunk::Insert {
209                text: self.new[range].iter().collect(),
210            });
211        }
212
213        hunks.reverse();
214        hunks
215    }
216
217    pub fn finish(self) -> Vec<Hunk> {
218        self.backtrack(self.old.len(), self.new.len())
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use std::env;
225
226    use super::*;
227    use rand::prelude::*;
228
229    #[gpui::test(iterations = 100)]
230    fn test_random_diffs(mut rng: StdRng) {
231        let old_text_len = env::var("OLD_TEXT_LEN")
232            .map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable"))
233            .unwrap_or(10);
234        let new_text_len = env::var("NEW_TEXT_LEN")
235            .map(|i| i.parse().expect("invalid `NEW_TEXT_LEN` variable"))
236            .unwrap_or(10);
237
238        let old = util::RandomCharIter::new(&mut rng)
239            .take(old_text_len)
240            .collect::<String>();
241        log::info!("old text: {:?}", old);
242
243        let mut diff = Diff::new(old.clone());
244        let mut hunks = Vec::new();
245        let mut new_len = 0;
246        let mut new = String::new();
247        while new_len < new_text_len {
248            let new_chunk_len = rng.gen_range(1..=new_text_len - new_len);
249            let new_chunk = util::RandomCharIter::new(&mut rng)
250                .take(new_len)
251                .collect::<String>();
252            log::info!("new chunk: {:?}", new_chunk);
253            new_len += new_chunk_len;
254            new.push_str(&new_chunk);
255            let new_hunks = diff.push_new(&new_chunk);
256            log::info!("hunks: {:?}", new_hunks);
257            hunks.extend(new_hunks);
258        }
259        let final_hunks = diff.finish();
260        log::info!("final hunks: {:?}", final_hunks);
261        hunks.extend(final_hunks);
262
263        log::info!("new text: {:?}", new);
264        let mut old_ix = 0;
265        let mut new_ix = 0;
266        let mut patched = String::new();
267        for hunk in hunks {
268            match hunk {
269                Hunk::Keep { len } => {
270                    assert_eq!(&old[old_ix..old_ix + len], &new[new_ix..new_ix + len]);
271                    patched.push_str(&old[old_ix..old_ix + len]);
272                    old_ix += len;
273                    new_ix += len;
274                }
275                Hunk::Remove { len } => {
276                    old_ix += len;
277                }
278                Hunk::Insert { text } => {
279                    assert_eq!(text, &new[new_ix..new_ix + text.len()]);
280                    patched.push_str(&text);
281                    new_ix += text.len();
282                }
283            }
284        }
285        assert_eq!(patched, new);
286    }
287}