diff.rs

  1use collections::HashMap;
  2use std::{
  3    fmt::{self, Debug},
  4    ops::Range,
  5};
  6
  7struct Matrix {
  8    cells: Vec<isize>,
  9    rows: usize,
 10    cols: usize,
 11}
 12
 13impl Matrix {
 14    fn new() -> Self {
 15        Self {
 16            cells: Vec::new(),
 17            rows: 0,
 18            cols: 0,
 19        }
 20    }
 21
 22    fn resize(&mut self, rows: usize, cols: usize) {
 23        self.cells.resize(rows * cols, 0);
 24        self.rows = rows;
 25        self.cols = cols;
 26    }
 27
 28    fn get(&self, row: usize, col: usize) -> isize {
 29        if row >= self.rows {
 30            panic!("row out of bounds")
 31        }
 32
 33        if col >= self.cols {
 34            panic!("col out of bounds")
 35        }
 36        self.cells[col * self.rows + row]
 37    }
 38
 39    fn set(&mut self, row: usize, col: usize, value: isize) {
 40        if row >= self.rows {
 41            panic!("row out of bounds")
 42        }
 43
 44        if col >= self.cols {
 45            panic!("col out of bounds")
 46        }
 47
 48        self.cells[col * self.rows + row] = value;
 49    }
 50}
 51
 52impl Debug for Matrix {
 53    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 54        writeln!(f)?;
 55        for i in 0..self.rows {
 56            for j in 0..self.cols {
 57                write!(f, "{:5}", self.get(i, j))?;
 58            }
 59            writeln!(f)?;
 60        }
 61        Ok(())
 62    }
 63}
 64
 65#[derive(Debug)]
 66pub enum Hunk {
 67    Insert { text: String },
 68    Remove { len: usize },
 69    Keep { len: usize },
 70}
 71
 72pub struct Diff {
 73    old: Vec<char>,
 74    new: Vec<char>,
 75    scores: Matrix,
 76    old_text_ix: usize,
 77    new_text_ix: usize,
 78    equal_runs: HashMap<(usize, usize), u32>,
 79}
 80
 81impl Diff {
 82    const INSERTION_SCORE: isize = -1;
 83    const DELETION_SCORE: isize = -5;
 84    const EQUALITY_BASE: isize = 2;
 85
 86    pub fn new(old: String) -> Self {
 87        let old = old.chars().collect::<Vec<_>>();
 88        let mut scores = Matrix::new();
 89        scores.resize(old.len() + 1, 1);
 90        for i in 0..=old.len() {
 91            scores.set(i, 0, i as isize * Self::DELETION_SCORE);
 92        }
 93        Self {
 94            old,
 95            new: Vec::new(),
 96            scores,
 97            old_text_ix: 0,
 98            new_text_ix: 0,
 99            equal_runs: Default::default(),
100        }
101    }
102
103    pub fn push_new(&mut self, text: &str) -> Vec<Hunk> {
104        self.new.extend(text.chars());
105        self.scores.resize(self.old.len() + 1, self.new.len() + 1);
106
107        for j in self.new_text_ix + 1..=self.new.len() {
108            self.scores.set(0, j, j as isize * Self::INSERTION_SCORE);
109            for i in 1..=self.old.len() {
110                let insertion_score = self.scores.get(i, j - 1) + Self::INSERTION_SCORE;
111                let deletion_score = self.scores.get(i - 1, j) + Self::DELETION_SCORE;
112                let equality_score = if self.old[i - 1] == self.new[j - 1] {
113                    let mut equal_run = self.equal_runs.get(&(i - 1, j - 1)).copied().unwrap_or(0);
114                    equal_run += 1;
115                    self.equal_runs.insert((i, j), equal_run);
116
117                    if self.old[i - 1] == ' ' {
118                        self.scores.get(i - 1, j - 1)
119                    } else {
120                        self.scores.get(i - 1, j - 1) + Self::EQUALITY_BASE.pow(equal_run / 3)
121                    }
122                } else {
123                    isize::MIN
124                };
125
126                let score = insertion_score.max(deletion_score).max(equality_score);
127                self.scores.set(i, j, score);
128            }
129        }
130
131        let mut max_score = isize::MIN;
132        let mut next_old_text_ix = self.old_text_ix;
133        let next_new_text_ix = self.new.len();
134        for i in self.old_text_ix..=self.old.len() {
135            let score = self.scores.get(i, next_new_text_ix);
136            if score > max_score {
137                max_score = score;
138                next_old_text_ix = i;
139            }
140        }
141
142        let hunks = self.backtrack(next_old_text_ix, next_new_text_ix);
143        self.old_text_ix = next_old_text_ix;
144        self.new_text_ix = next_new_text_ix;
145        hunks
146    }
147
148    fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<Hunk> {
149        let mut pending_insert: Option<Range<usize>> = None;
150        let mut hunks = Vec::new();
151        let mut i = old_text_ix;
152        let mut j = new_text_ix;
153        while (i, j) != (self.old_text_ix, self.new_text_ix) {
154            let insertion_score = if j > self.new_text_ix {
155                Some((i, j - 1))
156            } else {
157                None
158            };
159            let deletion_score = if i > self.old_text_ix {
160                Some((i - 1, j))
161            } else {
162                None
163            };
164            let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
165                if self.old[i - 1] == self.new[j - 1] {
166                    Some((i - 1, j - 1))
167                } else {
168                    None
169                }
170            } else {
171                None
172            };
173
174            let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
175                .iter()
176                .max_by_key(|cell| cell.map(|(i, j)| self.scores.get(i, j)))
177                .unwrap()
178                .unwrap();
179
180            if prev_i == i && prev_j == j - 1 {
181                if let Some(pending_insert) = pending_insert.as_mut() {
182                    pending_insert.start = prev_j;
183                } else {
184                    pending_insert = Some(prev_j..j);
185                }
186            } else {
187                if let Some(range) = pending_insert.take() {
188                    hunks.push(Hunk::Insert {
189                        text: self.new[range].iter().collect(),
190                    });
191                }
192
193                let char_len = self.old[i - 1].len_utf8();
194                if prev_i == i - 1 && prev_j == j {
195                    if let Some(Hunk::Remove { len }) = hunks.last_mut() {
196                        *len += char_len;
197                    } else {
198                        hunks.push(Hunk::Remove { len: char_len })
199                    }
200                } else {
201                    if let Some(Hunk::Keep { len }) = hunks.last_mut() {
202                        *len += char_len;
203                    } else {
204                        hunks.push(Hunk::Keep { len: char_len })
205                    }
206                }
207            }
208
209            i = prev_i;
210            j = prev_j;
211        }
212
213        if let Some(range) = pending_insert.take() {
214            hunks.push(Hunk::Insert {
215                text: self.new[range].iter().collect(),
216            });
217        }
218
219        hunks.reverse();
220        hunks
221    }
222
223    pub fn finish(self) -> Vec<Hunk> {
224        self.backtrack(self.old.len(), self.new.len())
225    }
226}
227
228#[cfg(test)]
229mod tests {
230    use std::env;
231
232    use super::*;
233    use rand::prelude::*;
234
235    #[gpui::test(iterations = 100)]
236    fn test_random_diffs(mut rng: StdRng) {
237        let old_text_len = env::var("OLD_TEXT_LEN")
238            .map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable"))
239            .unwrap_or(10);
240        let new_text_len = env::var("NEW_TEXT_LEN")
241            .map(|i| i.parse().expect("invalid `NEW_TEXT_LEN` variable"))
242            .unwrap_or(10);
243
244        let old = util::RandomCharIter::new(&mut rng)
245            .take(old_text_len)
246            .collect::<String>();
247        log::info!("old text: {:?}", old);
248
249        let mut diff = Diff::new(old.clone());
250        let mut hunks = Vec::new();
251        let mut new_len = 0;
252        let mut new = String::new();
253        while new_len < new_text_len {
254            let new_chunk_len = rng.gen_range(1..=new_text_len - new_len);
255            let new_chunk = util::RandomCharIter::new(&mut rng)
256                .take(new_len)
257                .collect::<String>();
258            log::info!("new chunk: {:?}", new_chunk);
259            new_len += new_chunk_len;
260            new.push_str(&new_chunk);
261            let new_hunks = diff.push_new(&new_chunk);
262            log::info!("hunks: {:?}", new_hunks);
263            hunks.extend(new_hunks);
264        }
265        let final_hunks = diff.finish();
266        log::info!("final hunks: {:?}", final_hunks);
267        hunks.extend(final_hunks);
268
269        log::info!("new text: {:?}", new);
270        let mut old_ix = 0;
271        let mut new_ix = 0;
272        let mut patched = String::new();
273        for hunk in hunks {
274            match hunk {
275                Hunk::Keep { len } => {
276                    assert_eq!(&old[old_ix..old_ix + len], &new[new_ix..new_ix + len]);
277                    patched.push_str(&old[old_ix..old_ix + len]);
278                    old_ix += len;
279                    new_ix += len;
280                }
281                Hunk::Remove { len } => {
282                    old_ix += len;
283                }
284                Hunk::Insert { text } => {
285                    assert_eq!(text, &new[new_ix..new_ix + text.len()]);
286                    patched.push_str(&text);
287                    new_ix += text.len();
288                }
289            }
290        }
291        assert_eq!(patched, new);
292    }
293}