1use crate::{CharClassifier, CharKind, CharScopeContext, LanguageScope};
2use anyhow::{Context, anyhow};
3use imara_diff::{
4 Algorithm, UnifiedDiffBuilder, diff,
5 intern::{InternedInput, Token},
6 sources::lines_with_terminator,
7};
8use std::{iter, ops::Range, sync::Arc};
9
10const MAX_WORD_DIFF_LEN: usize = 512;
11const MAX_WORD_DIFF_LINE_COUNT: usize = 8;
12
13/// Computes a diff between two strings, returning a unified diff string.
14pub fn unified_diff(old_text: &str, new_text: &str) -> String {
15 let input = InternedInput::new(old_text, new_text);
16 diff(
17 Algorithm::Histogram,
18 &input,
19 UnifiedDiffBuilder::new(&input),
20 )
21}
22
23/// Computes a diff between two strings, returning a vector of old and new row
24/// ranges.
25pub fn line_diff(old_text: &str, new_text: &str) -> Vec<(Range<u32>, Range<u32>)> {
26 let mut edits = Vec::new();
27 let input = InternedInput::new(
28 lines_with_terminator(old_text),
29 lines_with_terminator(new_text),
30 );
31 diff_internal(&input, |_, _, old_rows, new_rows| {
32 edits.push((old_rows, new_rows));
33 });
34 edits
35}
36
37/// Computes a diff between two strings, returning a vector of edits.
38///
39/// The edits are represented as tuples of byte ranges and replacement strings.
40///
41/// Internally, this function first performs a line-based diff, and then performs a second
42/// word-based diff within hunks that replace small numbers of lines.
43pub fn text_diff(old_text: &str, new_text: &str) -> Vec<(Range<usize>, Arc<str>)> {
44 text_diff_with_options(old_text, new_text, DiffOptions::default())
45}
46
47pub struct DiffOptions {
48 pub language_scope: Option<LanguageScope>,
49 pub max_word_diff_len: usize,
50 pub max_word_diff_line_count: usize,
51}
52
53impl Default for DiffOptions {
54 fn default() -> Self {
55 Self {
56 language_scope: Default::default(),
57 max_word_diff_len: MAX_WORD_DIFF_LEN,
58 max_word_diff_line_count: MAX_WORD_DIFF_LINE_COUNT,
59 }
60 }
61}
62
63/// Computes a diff between two strings, using a specific language scope's
64/// word characters for word-level diffing.
65pub fn text_diff_with_options(
66 old_text: &str,
67 new_text: &str,
68 options: DiffOptions,
69) -> Vec<(Range<usize>, Arc<str>)> {
70 let empty: Arc<str> = Arc::default();
71 let mut edits = Vec::new();
72 let mut hunk_input = InternedInput::default();
73 let input = InternedInput::new(
74 lines_with_terminator(old_text),
75 lines_with_terminator(new_text),
76 );
77 diff_internal(
78 &input,
79 |old_byte_range, new_byte_range, old_rows, new_rows| {
80 if should_perform_word_diff_within_hunk(
81 &old_rows,
82 &old_byte_range,
83 &new_rows,
84 &new_byte_range,
85 &options,
86 ) {
87 let old_offset = old_byte_range.start;
88 let new_offset = new_byte_range.start;
89 hunk_input.clear();
90 hunk_input.update_before(tokenize(
91 &old_text[old_byte_range],
92 options.language_scope.clone(),
93 ));
94 hunk_input.update_after(tokenize(
95 &new_text[new_byte_range],
96 options.language_scope.clone(),
97 ));
98 diff_internal(&hunk_input, |old_byte_range, new_byte_range, _, _| {
99 let old_byte_range =
100 old_offset + old_byte_range.start..old_offset + old_byte_range.end;
101 let new_byte_range =
102 new_offset + new_byte_range.start..new_offset + new_byte_range.end;
103 let replacement_text = if new_byte_range.is_empty() {
104 empty.clone()
105 } else {
106 new_text[new_byte_range].into()
107 };
108 edits.push((old_byte_range, replacement_text));
109 });
110 } else {
111 let replacement_text = if new_byte_range.is_empty() {
112 empty.clone()
113 } else {
114 new_text[new_byte_range].into()
115 };
116 edits.push((old_byte_range, replacement_text));
117 }
118 },
119 );
120 edits
121}
122
123pub fn apply_diff_patch(base_text: &str, patch: &str) -> Result<String, anyhow::Error> {
124 let patch = diffy::Patch::from_str(patch).context("Failed to parse patch")?;
125 let result = diffy::apply(base_text, &patch);
126 result.map_err(|err| anyhow!(err))
127}
128
129fn should_perform_word_diff_within_hunk(
130 old_row_range: &Range<u32>,
131 old_byte_range: &Range<usize>,
132 new_row_range: &Range<u32>,
133 new_byte_range: &Range<usize>,
134 options: &DiffOptions,
135) -> bool {
136 !old_byte_range.is_empty()
137 && !new_byte_range.is_empty()
138 && old_byte_range.len() <= options.max_word_diff_len
139 && new_byte_range.len() <= options.max_word_diff_len
140 && old_row_range.len() <= options.max_word_diff_line_count
141 && new_row_range.len() <= options.max_word_diff_line_count
142}
143
144fn diff_internal(
145 input: &InternedInput<&str>,
146 mut on_change: impl FnMut(Range<usize>, Range<usize>, Range<u32>, Range<u32>),
147) {
148 let mut old_offset = 0;
149 let mut new_offset = 0;
150 let mut old_token_ix = 0;
151 let mut new_token_ix = 0;
152 diff(
153 Algorithm::Histogram,
154 input,
155 |old_tokens: Range<u32>, new_tokens: Range<u32>| {
156 old_offset += token_len(
157 input,
158 &input.before[old_token_ix as usize..old_tokens.start as usize],
159 );
160 new_offset += token_len(
161 input,
162 &input.after[new_token_ix as usize..new_tokens.start as usize],
163 );
164 let old_len = token_len(
165 input,
166 &input.before[old_tokens.start as usize..old_tokens.end as usize],
167 );
168 let new_len = token_len(
169 input,
170 &input.after[new_tokens.start as usize..new_tokens.end as usize],
171 );
172 let old_byte_range = old_offset..old_offset + old_len;
173 let new_byte_range = new_offset..new_offset + new_len;
174 old_token_ix = old_tokens.end;
175 new_token_ix = new_tokens.end;
176 old_offset = old_byte_range.end;
177 new_offset = new_byte_range.end;
178 on_change(old_byte_range, new_byte_range, old_tokens, new_tokens);
179 },
180 );
181}
182
183fn tokenize(text: &str, language_scope: Option<LanguageScope>) -> impl Iterator<Item = &str> {
184 let classifier =
185 CharClassifier::new(language_scope).scope_context(Some(CharScopeContext::Completion));
186 let mut chars = text.char_indices();
187 let mut prev = None;
188 let mut start_ix = 0;
189 iter::from_fn(move || {
190 for (ix, c) in chars.by_ref() {
191 let mut token = None;
192 let kind = classifier.kind(c);
193 if let Some((prev_char, prev_kind)) = prev
194 && (kind != prev_kind || (kind == CharKind::Punctuation && c != prev_char))
195 {
196 token = Some(&text[start_ix..ix]);
197 start_ix = ix;
198 }
199 prev = Some((c, kind));
200 if token.is_some() {
201 return token;
202 }
203 }
204 if start_ix < text.len() {
205 let token = &text[start_ix..];
206 start_ix = text.len();
207 return Some(token);
208 }
209 None
210 })
211}
212
213fn token_len(input: &InternedInput<&str>, tokens: &[Token]) -> usize {
214 tokens
215 .iter()
216 .map(|token| input.interner[*token].len())
217 .sum()
218}
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223
224 #[test]
225 fn test_tokenize() {
226 let text = "";
227 assert_eq!(tokenize(text, None).collect::<Vec<_>>(), Vec::<&str>::new());
228
229 let text = " ";
230 assert_eq!(tokenize(text, None).collect::<Vec<_>>(), vec![" "]);
231
232 let text = "one";
233 assert_eq!(tokenize(text, None).collect::<Vec<_>>(), vec!["one"]);
234
235 let text = "one\n";
236 assert_eq!(tokenize(text, None).collect::<Vec<_>>(), vec!["one", "\n"]);
237
238 let text = "one.two(three)";
239 assert_eq!(
240 tokenize(text, None).collect::<Vec<_>>(),
241 vec!["one", ".", "two", "(", "three", ")"]
242 );
243
244 let text = "one two three()";
245 assert_eq!(
246 tokenize(text, None).collect::<Vec<_>>(),
247 vec!["one", " ", "two", " ", "three", "(", ")"]
248 );
249
250 let text = " one\n two three";
251 assert_eq!(
252 tokenize(text, None).collect::<Vec<_>>(),
253 vec![" ", "one", "\n ", "two", " ", "three"]
254 );
255 }
256
257 #[test]
258 fn test_text_diff() {
259 let old_text = "one two three";
260 let new_text = "one TWO three";
261 assert_eq!(text_diff(old_text, new_text), [(4..7, "TWO".into()),]);
262
263 let old_text = "one\ntwo\nthree\n";
264 let new_text = "one\ntwo\nAND\nTHEN\nthree\n";
265 assert_eq!(
266 text_diff(old_text, new_text),
267 [(8..8, "AND\nTHEN\n".into()),]
268 );
269
270 let old_text = "one two\nthree four five\nsix seven eight nine\nten\n";
271 let new_text = "one two\nthree FOUR five\nsix SEVEN eight nine\nten\nELEVEN\n";
272 assert_eq!(
273 text_diff(old_text, new_text),
274 [
275 (14..18, "FOUR".into()),
276 (28..33, "SEVEN".into()),
277 (49..49, "ELEVEN\n".into())
278 ]
279 );
280 }
281
282 #[test]
283 fn test_apply_diff_patch() {
284 let old_text = "one two\nthree four five\nsix seven eight nine\nten\n";
285 let new_text = "one two\nthree FOUR five\nsix SEVEN eight nine\nten\nELEVEN\n";
286 let patch = unified_diff(old_text, new_text);
287 assert_eq!(apply_diff_patch(old_text, &patch).unwrap(), new_text);
288 }
289}