1use std::{
2 cmp,
3 fmt::{self, Debug},
4};
5
6use collections::BinaryHeap;
7
8struct Matrix {
9 cells: Vec<isize>,
10 rows: usize,
11 cols: usize,
12}
13
14impl Matrix {
15 fn new() -> Self {
16 Self {
17 cells: Vec::new(),
18 rows: 0,
19 cols: 0,
20 }
21 }
22
23 fn resize(&mut self, rows: usize, cols: usize) {
24 self.cells.resize(rows * cols, 0);
25 self.rows = rows;
26 self.cols = cols;
27 }
28
29 fn get(&self, row: usize, col: usize) -> isize {
30 if row >= self.rows {
31 panic!("row out of bounds")
32 }
33
34 if col >= self.cols {
35 panic!("col out of bounds")
36 }
37 self.cells[col * self.rows + row]
38 }
39
40 fn set(&mut self, row: usize, col: usize, value: isize) {
41 if row >= self.rows {
42 panic!("row out of bounds")
43 }
44
45 if col >= self.cols {
46 panic!("col out of bounds")
47 }
48
49 self.cells[col * self.rows + row] = value;
50 }
51}
52
53impl Debug for Matrix {
54 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
55 writeln!(f)?;
56 for i in 0..self.rows {
57 for j in 0..self.cols {
58 write!(f, "{:5}", self.get(i, j))?;
59 }
60 writeln!(f)?;
61 }
62 Ok(())
63 }
64}
65
66#[derive(Debug)]
67pub enum Hunk {
68 Insert { text: String },
69 Remove { len: usize },
70 Keep { len: usize },
71}
72
73pub struct Diff {
74 old: String,
75 new: String,
76 scores: Matrix,
77 old_text_ix: usize,
78 new_text_ix: usize,
79}
80
81impl Diff {
82 const INSERTION_SCORE: isize = -1;
83 const DELETION_SCORE: isize = -4;
84 const EQUALITY_SCORE: isize = 5;
85
86 pub fn new(old: String) -> Self {
87 let mut scores = Matrix::new();
88 scores.resize(old.len() + 1, 1);
89 for i in 0..=old.len() {
90 scores.set(i, 0, i as isize * Self::DELETION_SCORE);
91 }
92 Self {
93 old,
94 new: String::new(),
95 scores,
96 old_text_ix: 0,
97 new_text_ix: 0,
98 }
99 }
100
101 pub fn push_new(&mut self, text: &str) -> Vec<Hunk> {
102 self.new.push_str(text);
103 self.scores.resize(self.old.len() + 1, self.new.len() + 1);
104
105 for j in self.new_text_ix + 1..=self.new.len() {
106 self.scores.set(0, j, j as isize * Self::INSERTION_SCORE);
107 for i in 1..=self.old.len() {
108 let insertion_score = self.scores.get(i, j - 1) + Self::INSERTION_SCORE;
109 let deletion_score = self.scores.get(i - 1, j) + Self::DELETION_SCORE;
110 let equality_score = if self.old.as_bytes()[i - 1] == self.new.as_bytes()[j - 1] {
111 self.scores.get(i - 1, j - 1) + Self::EQUALITY_SCORE
112 } else {
113 isize::MIN
114 };
115 let score = insertion_score.max(deletion_score).max(equality_score);
116 self.scores.set(i, j, score);
117 }
118 }
119
120 let mut max_score = isize::MIN;
121 let mut best_row = self.old_text_ix;
122 let mut best_col = self.new_text_ix;
123 for i in self.old_text_ix..=self.old.len() {
124 for j in self.new_text_ix..=self.new.len() {
125 let score = self.scores.get(i, j);
126 if score > max_score {
127 max_score = score;
128 best_row = i;
129 best_col = j;
130 }
131 }
132 }
133
134 let hunks = self.backtrack(best_row, best_col);
135 self.old_text_ix = best_row;
136 self.new_text_ix = best_col;
137 hunks
138 }
139
140 fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<Hunk> {
141 let mut hunks = Vec::new();
142 let mut i = old_text_ix;
143 let mut j = new_text_ix;
144 while (i, j) != (self.old_text_ix, self.new_text_ix) {
145 let insertion_score = if j > self.new_text_ix {
146 Some((i, j - 1))
147 } else {
148 None
149 };
150 let deletion_score = if i > self.old_text_ix {
151 Some((i - 1, j))
152 } else {
153 None
154 };
155 let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
156 if self.old.as_bytes()[i - 1] == self.new.as_bytes()[j - 1] {
157 Some((i - 1, j - 1))
158 } else {
159 None
160 }
161 } else {
162 None
163 };
164
165 let (prev_i, prev_j) = [insertion_score, deletion_score, equality_score]
166 .iter()
167 .max_by_key(|cell| cell.map(|(i, j)| self.scores.get(i, j)))
168 .unwrap()
169 .unwrap();
170
171 if prev_i == i && prev_j == j - 1 {
172 if let Some(Hunk::Insert { text }) = hunks.last_mut() {
173 text.insert_str(0, &self.new[prev_j..j]);
174 } else {
175 hunks.push(Hunk::Insert {
176 text: self.new[prev_j..j].to_string(),
177 })
178 }
179 } else if prev_i == i - 1 && prev_j == j {
180 if let Some(Hunk::Remove { len }) = hunks.last_mut() {
181 *len += 1;
182 } else {
183 hunks.push(Hunk::Remove { len: 1 })
184 }
185 } else {
186 if let Some(Hunk::Keep { len }) = hunks.last_mut() {
187 *len += 1;
188 } else {
189 hunks.push(Hunk::Keep { len: 1 })
190 }
191 }
192
193 i = prev_i;
194 j = prev_j;
195 }
196 hunks.reverse();
197 hunks
198 }
199
200 pub fn finish(self) -> Vec<Hunk> {
201 self.backtrack(self.old.len(), self.new.len())
202 }
203}
204
205#[cfg(test)]
206mod tests {
207 use super::*;
208
209 #[test]
210 fn test_diff() {
211 let mut diff = Diff::new("hello world".to_string());
212 dbg!(diff.push_new("hello"));
213 dbg!(diff.push_new(" ciaone"));
214 // dbg!(diff.push_new(" world"));
215 dbg!(diff.finish());
216 }
217}