1use std::{
2 fmt::{self, Debug},
3 ops::Range,
4};
5
6struct Matrix {
7 cells: Vec<isize>,
8 rows: usize,
9 cols: usize,
10}
11
12impl Matrix {
13 fn new() -> Self {
14 Self {
15 cells: Vec::new(),
16 rows: 0,
17 cols: 0,
18 }
19 }
20
21 fn resize(&mut self, rows: usize, cols: usize) {
22 self.cells.resize(rows * cols, 0);
23 self.rows = rows;
24 self.cols = cols;
25 }
26
27 fn get(&self, row: usize, col: usize) -> isize {
28 if row >= self.rows {
29 panic!("row out of bounds")
30 }
31
32 if col >= self.cols {
33 panic!("col out of bounds")
34 }
35 self.cells[col * self.rows + row]
36 }
37
38 fn set(&mut self, row: usize, col: usize, value: isize) {
39 if row >= self.rows {
40 panic!("row out of bounds")
41 }
42
43 if col >= self.cols {
44 panic!("col out of bounds")
45 }
46
47 self.cells[col * self.rows + row] = value;
48 }
49}
50
51impl Debug for Matrix {
52 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
53 writeln!(f)?;
54 for i in 0..self.rows {
55 for j in 0..self.cols {
56 write!(f, "{:5}", self.get(i, j))?;
57 }
58 writeln!(f)?;
59 }
60 Ok(())
61 }
62}
63
64#[derive(Debug)]
65pub enum Hunk {
66 Insert { text: String },
67 Remove { len: usize },
68 Keep { len: usize },
69}
70
71pub struct Diff {
72 old: Vec<char>,
73 new: Vec<char>,
74 scores: Matrix,
75 old_text_ix: usize,
76 new_text_ix: usize,
77}
78
79impl Diff {
80 const INSERTION_SCORE: isize = -1;
81 const DELETION_SCORE: isize = -4;
82 const EQUALITY_SCORE: isize = 15;
83
84 pub fn new(old: String) -> Self {
85 let old = old.chars().collect::<Vec<_>>();
86 let mut scores = Matrix::new();
87 scores.resize(old.len() + 1, 1);
88 for i in 0..=old.len() {
89 scores.set(i, 0, i as isize * Self::DELETION_SCORE);
90 }
91 Self {
92 old,
93 new: Vec::new(),
94 scores,
95 old_text_ix: 0,
96 new_text_ix: 0,
97 }
98 }
99
100 pub fn push_new(&mut self, text: &str) -> Vec<Hunk> {
101 self.new.extend(text.chars());
102 self.scores.resize(self.old.len() + 1, self.new.len() + 1);
103
104 for j in self.new_text_ix + 1..=self.new.len() {
105 self.scores.set(0, j, j as isize * Self::INSERTION_SCORE);
106 for i in 1..=self.old.len() {
107 let insertion_score = self.scores.get(i, j - 1) + Self::INSERTION_SCORE;
108 let deletion_score = self.scores.get(i - 1, j) + Self::DELETION_SCORE;
109 let equality_score = if self.old[i - 1] == self.new[j - 1] {
110 if self.old[i - 1] == ' ' {
111 self.scores.get(i - 1, j - 1)
112 } else {
113 self.scores.get(i - 1, j - 1) + Self::EQUALITY_SCORE
114 }
115 } else {
116 isize::MIN
117 };
118 let score = insertion_score.max(deletion_score).max(equality_score);
119 self.scores.set(i, j, score);
120 }
121 }
122
123 let mut max_score = isize::MIN;
124 let mut best_row = self.old_text_ix;
125 let mut best_col = self.new_text_ix;
126 for i in self.old_text_ix..=self.old.len() {
127 for j in self.new_text_ix..=self.new.len() {
128 let score = self.scores.get(i, j);
129 if score > max_score {
130 max_score = score;
131 best_row = i;
132 best_col = j;
133 }
134 }
135 }
136
137 let hunks = self.backtrack(best_row, best_col);
138 self.old_text_ix = best_row;
139 self.new_text_ix = best_col;
140 hunks
141 }
142
143 fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<Hunk> {
144 let mut pending_insert: Option<Range<usize>> = None;
145 let mut hunks = Vec::new();
146 let mut i = old_text_ix;
147 let mut j = new_text_ix;
148 while (i, j) != (self.old_text_ix, self.new_text_ix) {
149 let insertion_score = if j > self.new_text_ix {
150 Some((i, j - 1))
151 } else {
152 None
153 };
154 let deletion_score = if i > self.old_text_ix {
155 Some((i - 1, j))
156 } else {
157 None
158 };
159 let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
160 if self.old[i - 1] == self.new[j - 1] {
161 Some((i - 1, j - 1))
162 } else {
163 None
164 }
165 } else {
166 None
167 };
168
169 let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
170 .iter()
171 .max_by_key(|cell| cell.map(|(i, j)| self.scores.get(i, j)))
172 .unwrap()
173 .unwrap();
174
175 if prev_i == i && prev_j == j - 1 {
176 if let Some(pending_insert) = pending_insert.as_mut() {
177 pending_insert.start = prev_j;
178 } else {
179 pending_insert = Some(prev_j..j);
180 }
181 } else {
182 if let Some(range) = pending_insert.take() {
183 hunks.push(Hunk::Insert {
184 text: self.new[range].iter().collect(),
185 });
186 }
187
188 let char_len = self.old[i - 1].len_utf8();
189 if prev_i == i - 1 && prev_j == j {
190 if let Some(Hunk::Remove { len }) = hunks.last_mut() {
191 *len += char_len;
192 } else {
193 hunks.push(Hunk::Remove { len: char_len })
194 }
195 } else {
196 if let Some(Hunk::Keep { len }) = hunks.last_mut() {
197 *len += char_len;
198 } else {
199 hunks.push(Hunk::Keep { len: char_len })
200 }
201 }
202 }
203
204 i = prev_i;
205 j = prev_j;
206 }
207
208 if let Some(range) = pending_insert.take() {
209 hunks.push(Hunk::Insert {
210 text: self.new[range].iter().collect(),
211 });
212 }
213
214 hunks.reverse();
215 hunks
216 }
217
218 pub fn finish(self) -> Vec<Hunk> {
219 self.backtrack(self.old.len(), self.new.len())
220 }
221}
222
223#[cfg(test)]
224mod tests {
225 use std::env;
226
227 use super::*;
228 use rand::prelude::*;
229
230 #[gpui::test(iterations = 100)]
231 fn test_random_diffs(mut rng: StdRng) {
232 let old_text_len = env::var("OLD_TEXT_LEN")
233 .map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable"))
234 .unwrap_or(10);
235 let new_text_len = env::var("NEW_TEXT_LEN")
236 .map(|i| i.parse().expect("invalid `NEW_TEXT_LEN` variable"))
237 .unwrap_or(10);
238
239 let old = util::RandomCharIter::new(&mut rng)
240 .take(old_text_len)
241 .collect::<String>();
242 log::info!("old text: {:?}", old);
243
244 let mut diff = Diff::new(old.clone());
245 let mut hunks = Vec::new();
246 let mut new_len = 0;
247 let mut new = String::new();
248 while new_len < new_text_len {
249 let new_chunk_len = rng.gen_range(1..=new_text_len - new_len);
250 let new_chunk = util::RandomCharIter::new(&mut rng)
251 .take(new_len)
252 .collect::<String>();
253 log::info!("new chunk: {:?}", new_chunk);
254 new_len += new_chunk_len;
255 new.push_str(&new_chunk);
256 let new_hunks = diff.push_new(&new_chunk);
257 log::info!("hunks: {:?}", new_hunks);
258 hunks.extend(new_hunks);
259 }
260 let final_hunks = diff.finish();
261 log::info!("final hunks: {:?}", final_hunks);
262 hunks.extend(final_hunks);
263
264 log::info!("new text: {:?}", new);
265 let mut old_ix = 0;
266 let mut new_ix = 0;
267 let mut patched = String::new();
268 for hunk in hunks {
269 match hunk {
270 Hunk::Keep { len } => {
271 assert_eq!(&old[old_ix..old_ix + len], &new[new_ix..new_ix + len]);
272 patched.push_str(&old[old_ix..old_ix + len]);
273 old_ix += len;
274 new_ix += len;
275 }
276 Hunk::Remove { len } => {
277 old_ix += len;
278 }
279 Hunk::Insert { text } => {
280 assert_eq!(text, &new[new_ix..new_ix + text.len()]);
281 patched.push_str(&text);
282 new_ix += text.len();
283 }
284 }
285 }
286 assert_eq!(patched, new);
287 }
288}