1use std::{
2 fmt::{self, Debug},
3 ops::Range,
4};
5
6struct Matrix {
7 cells: Vec<isize>,
8 rows: usize,
9 cols: usize,
10}
11
12impl Matrix {
13 fn new() -> Self {
14 Self {
15 cells: Vec::new(),
16 rows: 0,
17 cols: 0,
18 }
19 }
20
21 fn resize(&mut self, rows: usize, cols: usize) {
22 self.cells.resize(rows * cols, 0);
23 self.rows = rows;
24 self.cols = cols;
25 }
26
27 fn get(&self, row: usize, col: usize) -> isize {
28 if row >= self.rows {
29 panic!("row out of bounds")
30 }
31
32 if col >= self.cols {
33 panic!("col out of bounds")
34 }
35 self.cells[col * self.rows + row]
36 }
37
38 fn set(&mut self, row: usize, col: usize, value: isize) {
39 if row >= self.rows {
40 panic!("row out of bounds")
41 }
42
43 if col >= self.cols {
44 panic!("col out of bounds")
45 }
46
47 self.cells[col * self.rows + row] = value;
48 }
49}
50
51impl Debug for Matrix {
52 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
53 writeln!(f)?;
54 for i in 0..self.rows {
55 for j in 0..self.cols {
56 write!(f, "{:5}", self.get(i, j))?;
57 }
58 writeln!(f)?;
59 }
60 Ok(())
61 }
62}
63
64#[derive(Debug)]
65pub enum Hunk {
66 Insert { text: String },
67 Remove { len: usize },
68 Keep { len: usize },
69}
70
71pub struct Diff {
72 old: Vec<char>,
73 new: Vec<char>,
74 scores: Matrix,
75 old_text_ix: usize,
76 new_text_ix: usize,
77}
78
79impl Diff {
80 const INSERTION_SCORE: isize = -1;
81 const DELETION_SCORE: isize = -4;
82 const EQUALITY_SCORE: isize = 5;
83
84 pub fn new(old: String) -> Self {
85 let old = old.chars().collect::<Vec<_>>();
86 let mut scores = Matrix::new();
87 scores.resize(old.len() + 1, 1);
88 for i in 0..=old.len() {
89 scores.set(i, 0, i as isize * Self::DELETION_SCORE);
90 }
91 Self {
92 old,
93 new: Vec::new(),
94 scores,
95 old_text_ix: 0,
96 new_text_ix: 0,
97 }
98 }
99
100 pub fn push_new(&mut self, text: &str) -> Vec<Hunk> {
101 self.new.extend(text.chars());
102 self.scores.resize(self.old.len() + 1, self.new.len() + 1);
103
104 for j in self.new_text_ix + 1..=self.new.len() {
105 self.scores.set(0, j, j as isize * Self::INSERTION_SCORE);
106 for i in 1..=self.old.len() {
107 let insertion_score = self.scores.get(i, j - 1) + Self::INSERTION_SCORE;
108 let deletion_score = self.scores.get(i - 1, j) + Self::DELETION_SCORE;
109 let equality_score = if self.old[i - 1] == self.new[j - 1] {
110 self.scores.get(i - 1, j - 1) + Self::EQUALITY_SCORE
111 } else {
112 isize::MIN
113 };
114 let score = insertion_score.max(deletion_score).max(equality_score);
115 self.scores.set(i, j, score);
116 }
117 }
118
119 let mut max_score = isize::MIN;
120 let mut best_row = self.old_text_ix;
121 let mut best_col = self.new_text_ix;
122 for i in self.old_text_ix..=self.old.len() {
123 for j in self.new_text_ix..=self.new.len() {
124 let score = self.scores.get(i, j);
125 if score > max_score {
126 max_score = score;
127 best_row = i;
128 best_col = j;
129 }
130 }
131 }
132
133 let hunks = self.backtrack(best_row, best_col);
134 self.old_text_ix = best_row;
135 self.new_text_ix = best_col;
136 hunks
137 }
138
139 fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<Hunk> {
140 let mut pending_insert: Option<Range<usize>> = None;
141 let mut hunks = Vec::new();
142 let mut i = old_text_ix;
143 let mut j = new_text_ix;
144 while (i, j) != (self.old_text_ix, self.new_text_ix) {
145 let insertion_score = if j > self.new_text_ix {
146 Some((i, j - 1))
147 } else {
148 None
149 };
150 let deletion_score = if i > self.old_text_ix {
151 Some((i - 1, j))
152 } else {
153 None
154 };
155 let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
156 if self.old[i - 1] == self.new[j - 1] {
157 Some((i - 1, j - 1))
158 } else {
159 None
160 }
161 } else {
162 None
163 };
164
165 let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
166 .iter()
167 .max_by_key(|cell| cell.map(|(i, j)| self.scores.get(i, j)))
168 .unwrap()
169 .unwrap();
170
171 if prev_i == i && prev_j == j - 1 {
172 if let Some(pending_insert) = pending_insert.as_mut() {
173 pending_insert.start = prev_j;
174 } else {
175 pending_insert = Some(prev_j..j);
176 }
177 } else {
178 if let Some(range) = pending_insert.take() {
179 hunks.push(Hunk::Insert {
180 text: self.new[range].iter().collect(),
181 });
182 }
183
184 let char_len = self.old[i - 1].len_utf8();
185 if prev_i == i - 1 && prev_j == j {
186 if let Some(Hunk::Remove { len }) = hunks.last_mut() {
187 *len += char_len;
188 } else {
189 hunks.push(Hunk::Remove { len: char_len })
190 }
191 } else {
192 if let Some(Hunk::Keep { len }) = hunks.last_mut() {
193 *len += char_len;
194 } else {
195 hunks.push(Hunk::Keep { len: char_len })
196 }
197 }
198 }
199
200 i = prev_i;
201 j = prev_j;
202 }
203
204 if let Some(range) = pending_insert.take() {
205 hunks.push(Hunk::Insert {
206 text: self.new[range].iter().collect(),
207 });
208 }
209
210 hunks.reverse();
211 hunks
212 }
213
214 pub fn finish(self) -> Vec<Hunk> {
215 self.backtrack(self.old.len(), self.new.len())
216 }
217}
218
219#[cfg(test)]
220mod tests {
221 use std::env;
222
223 use super::*;
224 use rand::prelude::*;
225
226 #[gpui::test(iterations = 100)]
227 fn test_random_diffs(mut rng: StdRng) {
228 let old_text_len = env::var("OLD_TEXT_LEN")
229 .map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable"))
230 .unwrap_or(10);
231 let new_text_len = env::var("NEW_TEXT_LEN")
232 .map(|i| i.parse().expect("invalid `NEW_TEXT_LEN` variable"))
233 .unwrap_or(10);
234
235 let old = util::RandomCharIter::new(&mut rng)
236 .take(old_text_len)
237 .collect::<String>();
238 log::info!("old text: {:?}", old);
239
240 let mut diff = Diff::new(old.clone());
241 let mut hunks = Vec::new();
242 let mut new_len = 0;
243 let mut new = String::new();
244 while new_len < new_text_len {
245 let new_chunk_len = rng.gen_range(1..=new_text_len - new_len);
246 let new_chunk = util::RandomCharIter::new(&mut rng)
247 .take(new_len)
248 .collect::<String>();
249 log::info!("new chunk: {:?}", new_chunk);
250 new_len += new_chunk_len;
251 new.push_str(&new_chunk);
252 let new_hunks = diff.push_new(&new_chunk);
253 log::info!("hunks: {:?}", new_hunks);
254 hunks.extend(new_hunks);
255 }
256 let final_hunks = diff.finish();
257 log::info!("final hunks: {:?}", final_hunks);
258 hunks.extend(final_hunks);
259
260 log::info!("new text: {:?}", new);
261 let mut old_ix = 0;
262 let mut new_ix = 0;
263 let mut patched = String::new();
264 for hunk in hunks {
265 match hunk {
266 Hunk::Keep { len } => {
267 assert_eq!(&old[old_ix..old_ix + len], &new[new_ix..new_ix + len]);
268 patched.push_str(&old[old_ix..old_ix + len]);
269 old_ix += len;
270 new_ix += len;
271 }
272 Hunk::Remove { len } => {
273 old_ix += len;
274 }
275 Hunk::Insert { text } => {
276 assert_eq!(text, &new[new_ix..new_ix + text.len()]);
277 patched.push_str(&text);
278 new_ix += text.len();
279 }
280 }
281 }
282 assert_eq!(patched, new);
283 }
284}