diff.rs

  1use std::{
  2    fmt::{self, Debug},
  3    ops::Range,
  4};
  5
  6struct Matrix {
  7    cells: Vec<isize>,
  8    rows: usize,
  9    cols: usize,
 10}
 11
 12impl Matrix {
 13    fn new() -> Self {
 14        Self {
 15            cells: Vec::new(),
 16            rows: 0,
 17            cols: 0,
 18        }
 19    }
 20
 21    fn resize(&mut self, rows: usize, cols: usize) {
 22        self.cells.resize(rows * cols, 0);
 23        self.rows = rows;
 24        self.cols = cols;
 25    }
 26
 27    fn get(&self, row: usize, col: usize) -> isize {
 28        if row >= self.rows {
 29            panic!("row out of bounds")
 30        }
 31
 32        if col >= self.cols {
 33            panic!("col out of bounds")
 34        }
 35        self.cells[col * self.rows + row]
 36    }
 37
 38    fn set(&mut self, row: usize, col: usize, value: isize) {
 39        if row >= self.rows {
 40            panic!("row out of bounds")
 41        }
 42
 43        if col >= self.cols {
 44            panic!("col out of bounds")
 45        }
 46
 47        self.cells[col * self.rows + row] = value;
 48    }
 49}
 50
 51impl Debug for Matrix {
 52    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 53        writeln!(f)?;
 54        for i in 0..self.rows {
 55            for j in 0..self.cols {
 56                write!(f, "{:5}", self.get(i, j))?;
 57            }
 58            writeln!(f)?;
 59        }
 60        Ok(())
 61    }
 62}
 63
 64#[derive(Debug)]
 65pub enum Hunk {
 66    Insert { text: String },
 67    Remove { len: usize },
 68    Keep { len: usize },
 69}
 70
 71pub struct Diff {
 72    old: Vec<char>,
 73    new: Vec<char>,
 74    scores: Matrix,
 75    old_text_ix: usize,
 76    new_text_ix: usize,
 77}
 78
 79impl Diff {
 80    const INSERTION_SCORE: isize = -1;
 81    const DELETION_SCORE: isize = -4;
 82    const EQUALITY_SCORE: isize = 15;
 83
 84    pub fn new(old: String) -> Self {
 85        let old = old.chars().collect::<Vec<_>>();
 86        let mut scores = Matrix::new();
 87        scores.resize(old.len() + 1, 1);
 88        for i in 0..=old.len() {
 89            scores.set(i, 0, i as isize * Self::DELETION_SCORE);
 90        }
 91        Self {
 92            old,
 93            new: Vec::new(),
 94            scores,
 95            old_text_ix: 0,
 96            new_text_ix: 0,
 97        }
 98    }
 99
100    pub fn push_new(&mut self, text: &str) -> Vec<Hunk> {
101        self.new.extend(text.chars());
102        self.scores.resize(self.old.len() + 1, self.new.len() + 1);
103
104        for j in self.new_text_ix + 1..=self.new.len() {
105            self.scores.set(0, j, j as isize * Self::INSERTION_SCORE);
106            for i in 1..=self.old.len() {
107                let insertion_score = self.scores.get(i, j - 1) + Self::INSERTION_SCORE;
108                let deletion_score = self.scores.get(i - 1, j) + Self::DELETION_SCORE;
109                let equality_score = if self.old[i - 1] == self.new[j - 1] {
110                    if self.old[i - 1] == ' ' {
111                        self.scores.get(i - 1, j - 1)
112                    } else {
113                        self.scores.get(i - 1, j - 1) + Self::EQUALITY_SCORE
114                    }
115                } else {
116                    isize::MIN
117                };
118                let score = insertion_score.max(deletion_score).max(equality_score);
119                self.scores.set(i, j, score);
120            }
121        }
122
123        let mut max_score = isize::MIN;
124        let mut best_row = self.old_text_ix;
125        let mut best_col = self.new_text_ix;
126        for i in self.old_text_ix..=self.old.len() {
127            for j in self.new_text_ix..=self.new.len() {
128                let score = self.scores.get(i, j);
129                if score > max_score {
130                    max_score = score;
131                    best_row = i;
132                    best_col = j;
133                }
134            }
135        }
136
137        let hunks = self.backtrack(best_row, best_col);
138        self.old_text_ix = best_row;
139        self.new_text_ix = best_col;
140        hunks
141    }
142
143    fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<Hunk> {
144        let mut pending_insert: Option<Range<usize>> = None;
145        let mut hunks = Vec::new();
146        let mut i = old_text_ix;
147        let mut j = new_text_ix;
148        while (i, j) != (self.old_text_ix, self.new_text_ix) {
149            let insertion_score = if j > self.new_text_ix {
150                Some((i, j - 1))
151            } else {
152                None
153            };
154            let deletion_score = if i > self.old_text_ix {
155                Some((i - 1, j))
156            } else {
157                None
158            };
159            let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
160                if self.old[i - 1] == self.new[j - 1] {
161                    Some((i - 1, j - 1))
162                } else {
163                    None
164                }
165            } else {
166                None
167            };
168
169            let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
170                .iter()
171                .max_by_key(|cell| cell.map(|(i, j)| self.scores.get(i, j)))
172                .unwrap()
173                .unwrap();
174
175            if prev_i == i && prev_j == j - 1 {
176                if let Some(pending_insert) = pending_insert.as_mut() {
177                    pending_insert.start = prev_j;
178                } else {
179                    pending_insert = Some(prev_j..j);
180                }
181            } else {
182                if let Some(range) = pending_insert.take() {
183                    hunks.push(Hunk::Insert {
184                        text: self.new[range].iter().collect(),
185                    });
186                }
187
188                let char_len = self.old[i - 1].len_utf8();
189                if prev_i == i - 1 && prev_j == j {
190                    if let Some(Hunk::Remove { len }) = hunks.last_mut() {
191                        *len += char_len;
192                    } else {
193                        hunks.push(Hunk::Remove { len: char_len })
194                    }
195                } else {
196                    if let Some(Hunk::Keep { len }) = hunks.last_mut() {
197                        *len += char_len;
198                    } else {
199                        hunks.push(Hunk::Keep { len: char_len })
200                    }
201                }
202            }
203
204            i = prev_i;
205            j = prev_j;
206        }
207
208        if let Some(range) = pending_insert.take() {
209            hunks.push(Hunk::Insert {
210                text: self.new[range].iter().collect(),
211            });
212        }
213
214        hunks.reverse();
215        hunks
216    }
217
218    pub fn finish(self) -> Vec<Hunk> {
219        self.backtrack(self.old.len(), self.new.len())
220    }
221}
222
223#[cfg(test)]
224mod tests {
225    use std::env;
226
227    use super::*;
228    use rand::prelude::*;
229
230    #[gpui::test(iterations = 100)]
231    fn test_random_diffs(mut rng: StdRng) {
232        let old_text_len = env::var("OLD_TEXT_LEN")
233            .map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable"))
234            .unwrap_or(10);
235        let new_text_len = env::var("NEW_TEXT_LEN")
236            .map(|i| i.parse().expect("invalid `NEW_TEXT_LEN` variable"))
237            .unwrap_or(10);
238
239        let old = util::RandomCharIter::new(&mut rng)
240            .take(old_text_len)
241            .collect::<String>();
242        log::info!("old text: {:?}", old);
243
244        let mut diff = Diff::new(old.clone());
245        let mut hunks = Vec::new();
246        let mut new_len = 0;
247        let mut new = String::new();
248        while new_len < new_text_len {
249            let new_chunk_len = rng.gen_range(1..=new_text_len - new_len);
250            let new_chunk = util::RandomCharIter::new(&mut rng)
251                .take(new_len)
252                .collect::<String>();
253            log::info!("new chunk: {:?}", new_chunk);
254            new_len += new_chunk_len;
255            new.push_str(&new_chunk);
256            let new_hunks = diff.push_new(&new_chunk);
257            log::info!("hunks: {:?}", new_hunks);
258            hunks.extend(new_hunks);
259        }
260        let final_hunks = diff.finish();
261        log::info!("final hunks: {:?}", final_hunks);
262        hunks.extend(final_hunks);
263
264        log::info!("new text: {:?}", new);
265        let mut old_ix = 0;
266        let mut new_ix = 0;
267        let mut patched = String::new();
268        for hunk in hunks {
269            match hunk {
270                Hunk::Keep { len } => {
271                    assert_eq!(&old[old_ix..old_ix + len], &new[new_ix..new_ix + len]);
272                    patched.push_str(&old[old_ix..old_ix + len]);
273                    old_ix += len;
274                    new_ix += len;
275                }
276                Hunk::Remove { len } => {
277                    old_ix += len;
278                }
279                Hunk::Insert { text } => {
280                    assert_eq!(text, &new[new_ix..new_ix + text.len()]);
281                    patched.push_str(&text);
282                    new_ix += text.len();
283                }
284            }
285        }
286        assert_eq!(patched, new);
287    }
288}