1use std::{
2 cmp,
3 fmt::{self, Debug},
4 ops::Range,
5};
6
7use collections::BinaryHeap;
8
9struct Matrix {
10 cells: Vec<isize>,
11 rows: usize,
12 cols: usize,
13}
14
15impl Matrix {
16 fn new() -> Self {
17 Self {
18 cells: Vec::new(),
19 rows: 0,
20 cols: 0,
21 }
22 }
23
24 fn resize(&mut self, rows: usize, cols: usize) {
25 self.cells.resize(rows * cols, 0);
26 self.rows = rows;
27 self.cols = cols;
28 }
29
30 fn get(&self, row: usize, col: usize) -> isize {
31 if row >= self.rows {
32 panic!("row out of bounds")
33 }
34
35 if col >= self.cols {
36 panic!("col out of bounds")
37 }
38 self.cells[col * self.rows + row]
39 }
40
41 fn set(&mut self, row: usize, col: usize, value: isize) {
42 if row >= self.rows {
43 panic!("row out of bounds")
44 }
45
46 if col >= self.cols {
47 panic!("col out of bounds")
48 }
49
50 self.cells[col * self.rows + row] = value;
51 }
52}
53
54impl Debug for Matrix {
55 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
56 writeln!(f)?;
57 for i in 0..self.rows {
58 for j in 0..self.cols {
59 write!(f, "{:5}", self.get(i, j))?;
60 }
61 writeln!(f)?;
62 }
63 Ok(())
64 }
65}
66
67#[derive(Debug)]
68pub enum Hunk {
69 Insert { text: String },
70 Remove { len: usize },
71 Keep { len: usize },
72}
73
74pub struct Diff {
75 old: Vec<char>,
76 new: Vec<char>,
77 scores: Matrix,
78 old_text_ix: usize,
79 new_text_ix: usize,
80}
81
82impl Diff {
83 const INSERTION_SCORE: isize = -1;
84 const DELETION_SCORE: isize = -4;
85 const EQUALITY_SCORE: isize = 5;
86
87 pub fn new(old: String) -> Self {
88 let old = old.chars().collect::<Vec<_>>();
89 let mut scores = Matrix::new();
90 scores.resize(old.len() + 1, 1);
91 for i in 0..=old.len() {
92 scores.set(i, 0, i as isize * Self::DELETION_SCORE);
93 }
94 Self {
95 old,
96 new: Vec::new(),
97 scores,
98 old_text_ix: 0,
99 new_text_ix: 0,
100 }
101 }
102
103 pub fn push_new(&mut self, text: &str) -> Vec<Hunk> {
104 self.new.extend(text.chars());
105 self.scores.resize(self.old.len() + 1, self.new.len() + 1);
106
107 for j in self.new_text_ix + 1..=self.new.len() {
108 self.scores.set(0, j, j as isize * Self::INSERTION_SCORE);
109 for i in 1..=self.old.len() {
110 let insertion_score = self.scores.get(i, j - 1) + Self::INSERTION_SCORE;
111 let deletion_score = self.scores.get(i - 1, j) + Self::DELETION_SCORE;
112 let equality_score = if self.old[i - 1] == self.new[j - 1] {
113 self.scores.get(i - 1, j - 1) + Self::EQUALITY_SCORE
114 } else {
115 isize::MIN
116 };
117 let score = insertion_score.max(deletion_score).max(equality_score);
118 self.scores.set(i, j, score);
119 }
120 }
121
122 let mut max_score = isize::MIN;
123 let mut best_row = self.old_text_ix;
124 let mut best_col = self.new_text_ix;
125 for i in self.old_text_ix..=self.old.len() {
126 for j in self.new_text_ix..=self.new.len() {
127 let score = self.scores.get(i, j);
128 if score > max_score {
129 max_score = score;
130 best_row = i;
131 best_col = j;
132 }
133 }
134 }
135
136 let hunks = self.backtrack(best_row, best_col);
137 self.old_text_ix = best_row;
138 self.new_text_ix = best_col;
139 hunks
140 }
141
142 fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<Hunk> {
143 let mut pending_insert: Option<Range<usize>> = None;
144 let mut hunks = Vec::new();
145 let mut i = old_text_ix;
146 let mut j = new_text_ix;
147 while (i, j) != (self.old_text_ix, self.new_text_ix) {
148 let insertion_score = if j > self.new_text_ix {
149 Some((i, j - 1))
150 } else {
151 None
152 };
153 let deletion_score = if i > self.old_text_ix {
154 Some((i - 1, j))
155 } else {
156 None
157 };
158 let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
159 if self.old[i - 1] == self.new[j - 1] {
160 Some((i - 1, j - 1))
161 } else {
162 None
163 }
164 } else {
165 None
166 };
167
168 let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
169 .iter()
170 .max_by_key(|cell| cell.map(|(i, j)| self.scores.get(i, j)))
171 .unwrap()
172 .unwrap();
173
174 if prev_i == i && prev_j == j - 1 {
175 if let Some(pending_insert) = pending_insert.as_mut() {
176 pending_insert.start = prev_j;
177 } else {
178 pending_insert = Some(prev_j..j);
179 }
180 } else {
181 if let Some(range) = pending_insert.take() {
182 hunks.push(Hunk::Insert {
183 text: self.new[range].iter().collect(),
184 });
185 }
186
187 let char_len = self.old[i - 1].len_utf8();
188 if prev_i == i - 1 && prev_j == j {
189 if let Some(Hunk::Remove { len }) = hunks.last_mut() {
190 *len += char_len;
191 } else {
192 hunks.push(Hunk::Remove { len: char_len })
193 }
194 } else {
195 if let Some(Hunk::Keep { len }) = hunks.last_mut() {
196 *len += char_len;
197 } else {
198 hunks.push(Hunk::Keep { len: char_len })
199 }
200 }
201 }
202
203 i = prev_i;
204 j = prev_j;
205 }
206
207 if let Some(range) = pending_insert.take() {
208 hunks.push(Hunk::Insert {
209 text: self.new[range].iter().collect(),
210 });
211 }
212
213 hunks.reverse();
214 hunks
215 }
216
217 pub fn finish(self) -> Vec<Hunk> {
218 self.backtrack(self.old.len(), self.new.len())
219 }
220}
221
222#[cfg(test)]
223mod tests {
224 use std::env;
225
226 use super::*;
227 use rand::prelude::*;
228
229 #[gpui::test(iterations = 100)]
230 fn test_random_diffs(mut rng: StdRng) {
231 let old_text_len = env::var("OLD_TEXT_LEN")
232 .map(|i| i.parse().expect("invalid `OLD_TEXT_LEN` variable"))
233 .unwrap_or(10);
234 let new_text_len = env::var("NEW_TEXT_LEN")
235 .map(|i| i.parse().expect("invalid `NEW_TEXT_LEN` variable"))
236 .unwrap_or(10);
237
238 let old = util::RandomCharIter::new(&mut rng)
239 .take(old_text_len)
240 .collect::<String>();
241 log::info!("old text: {:?}", old);
242
243 let mut diff = Diff::new(old.clone());
244 let mut hunks = Vec::new();
245 let mut new_len = 0;
246 let mut new = String::new();
247 while new_len < new_text_len {
248 let new_chunk_len = rng.gen_range(1..=new_text_len - new_len);
249 let new_chunk = util::RandomCharIter::new(&mut rng)
250 .take(new_len)
251 .collect::<String>();
252 log::info!("new chunk: {:?}", new_chunk);
253 new_len += new_chunk_len;
254 new.push_str(&new_chunk);
255 let new_hunks = diff.push_new(&new_chunk);
256 log::info!("hunks: {:?}", new_hunks);
257 hunks.extend(new_hunks);
258 }
259 let final_hunks = diff.finish();
260 log::info!("final hunks: {:?}", final_hunks);
261 hunks.extend(final_hunks);
262
263 log::info!("new text: {:?}", new);
264 let mut old_ix = 0;
265 let mut new_ix = 0;
266 let mut patched = String::new();
267 for hunk in hunks {
268 match hunk {
269 Hunk::Keep { len } => {
270 assert_eq!(&old[old_ix..old_ix + len], &new[new_ix..new_ix + len]);
271 patched.push_str(&old[old_ix..old_ix + len]);
272 old_ix += len;
273 new_ix += len;
274 }
275 Hunk::Remove { len } => {
276 old_ix += len;
277 }
278 Hunk::Insert { text } => {
279 assert_eq!(text, &new[new_ix..new_ix + text.len()]);
280 patched.push_str(&text);
281 new_ix += text.len();
282 }
283 }
284 }
285 assert_eq!(patched, new);
286 }
287}