1use anyhow::{Context as _, Result, anyhow};
2use language::{Anchor, BufferSnapshot, OffsetRangeExt as _, TextBufferSnapshot};
3use std::ops::Range;
4use std::path::Path;
5use std::sync::Arc;
6
7pub async fn parse_xml_edits<'a>(
8 input: &'a str,
9 get_buffer: impl Fn(&Path) -> Option<(&'a BufferSnapshot, &'a [Range<Anchor>])> + Send,
10) -> Result<(&'a BufferSnapshot, Vec<(Range<Anchor>, Arc<str>)>)> {
11 parse_xml_edits_inner(input, get_buffer)
12 .await
13 .with_context(|| format!("Failed to parse XML edits:\n{input}"))
14}
15
16async fn parse_xml_edits_inner<'a>(
17 mut input: &'a str,
18 get_buffer: impl Fn(&Path) -> Option<(&'a BufferSnapshot, &'a [Range<Anchor>])> + Send,
19) -> Result<(&'a BufferSnapshot, Vec<(Range<Anchor>, Arc<str>)>)> {
20 let edits_tag = parse_tag(&mut input, "edits")?.context("No edits tag")?;
21
22 input = edits_tag.body;
23
24 let file_path = edits_tag
25 .attributes
26 .trim_start()
27 .strip_prefix("path")
28 .context("no file attribute on edits tag")?
29 .trim_end()
30 .strip_prefix('=')
31 .context("no value for path attribute")?
32 .trim()
33 .trim_start_matches('"')
34 .trim_end_matches('"');
35
36 let (buffer, context_ranges) = get_buffer(file_path.as_ref())
37 .with_context(|| format!("no buffer for file {file_path}"))?;
38
39 let mut edits = vec![];
40 while let Some(old_text_tag) = parse_tag(&mut input, "old_text")? {
41 let new_text_tag =
42 parse_tag(&mut input, "new_text")?.context("no new_text tag following old_text")?;
43 edits.extend(resolve_new_text_old_text_in_buffer(
44 new_text_tag.body,
45 old_text_tag.body,
46 buffer,
47 context_ranges,
48 )?);
49 }
50
51 Ok((buffer, edits))
52}
53
54fn resolve_new_text_old_text_in_buffer(
55 new_text: &str,
56 old_text: &str,
57 buffer: &TextBufferSnapshot,
58 ranges: &[Range<Anchor>],
59) -> Result<impl Iterator<Item = (Range<Anchor>, Arc<str>)>, anyhow::Error> {
60 let context_offset = if old_text.is_empty() {
61 Ok(0)
62 } else {
63 let mut offset = None;
64 for range in ranges {
65 let range = range.to_offset(buffer);
66 let text = buffer.text_for_range(range.clone()).collect::<String>();
67 for (match_offset, _) in text.match_indices(old_text) {
68 if let Some(offset) = offset {
69 let offset_match_point = buffer.offset_to_point(offset);
70 let second_match_point = buffer.offset_to_point(range.start + match_offset);
71 anyhow::bail!(
72 "old_text is not unique enough:\n{}\nFound at {:?} and {:?}",
73 old_text,
74 offset_match_point,
75 second_match_point
76 );
77 }
78 offset = Some(range.start + match_offset);
79 }
80 }
81 offset.ok_or_else(|| {
82 #[cfg(any(debug_assertions, feature = "eval-support"))]
83 if let Some(closest_match) = closest_old_text_match(buffer, old_text) {
84 log::info!(
85 "Closest `old_text` match: {}",
86 pretty_assertions::StrComparison::new(old_text, &closest_match)
87 )
88 }
89 anyhow!("Failed to match old_text:\n{}", old_text)
90 })
91 }?;
92
93 let edits_within_hunk = language::text_diff(&old_text, &new_text);
94 Ok(edits_within_hunk
95 .into_iter()
96 .map(move |(inner_range, inner_text)| {
97 (
98 buffer.anchor_after(context_offset + inner_range.start)
99 ..buffer.anchor_before(context_offset + inner_range.end),
100 inner_text,
101 )
102 }))
103}
104
105#[cfg(any(debug_assertions, feature = "eval-support"))]
106fn closest_old_text_match(buffer: &TextBufferSnapshot, old_text: &str) -> Option<String> {
107 let buffer_text = buffer.text();
108 let len = old_text.len();
109
110 if len == 0 || buffer_text.len() < len {
111 return None;
112 }
113
114 let mut min_score = usize::MAX;
115 let mut min_start = 0;
116
117 let old_text_bytes = old_text.as_bytes();
118 let old_alpha_count = old_text_bytes
119 .iter()
120 .filter(|&&b| b.is_ascii_alphanumeric())
121 .count();
122
123 let old_line_count = old_text.lines().count();
124
125 let mut cursor = 0;
126
127 while cursor + len <= buffer_text.len() {
128 let candidate = &buffer_text[cursor..cursor + len];
129 let candidate_bytes = candidate.as_bytes();
130
131 if usize::abs_diff(candidate.lines().count(), old_line_count) > 4 {
132 cursor += 1;
133 continue;
134 }
135
136 let candidate_alpha_count = candidate_bytes
137 .iter()
138 .filter(|&&b| b.is_ascii_alphanumeric())
139 .count();
140
141 // If alphanumeric character count differs by more than 30%, skip
142 if usize::abs_diff(old_alpha_count, candidate_alpha_count) * 10 > old_alpha_count * 3 {
143 cursor += 1;
144 continue;
145 }
146
147 let score = strsim::levenshtein(candidate, old_text);
148 if score < min_score {
149 min_score = score;
150 min_start = cursor;
151
152 if min_score <= len / 10 {
153 break;
154 }
155 }
156
157 cursor += 1;
158 }
159
160 if min_score != usize::MAX {
161 Some(buffer_text[min_start..min_start + len].to_string())
162 } else {
163 None
164 }
165}
166
167struct ParsedTag<'a> {
168 attributes: &'a str,
169 body: &'a str,
170}
171
172fn parse_tag<'a>(input: &mut &'a str, tag: &str) -> Result<Option<ParsedTag<'a>>> {
173 let open_tag = format!("<{}", tag);
174 let close_tag = format!("</{}>", tag);
175 let Some(start_ix) = input.find(&open_tag) else {
176 return Ok(None);
177 };
178 let start_ix = start_ix + open_tag.len();
179 let closing_bracket_ix = start_ix
180 + input[start_ix..]
181 .find('>')
182 .with_context(|| format!("missing > after {tag}"))?;
183 let attributes = &input[start_ix..closing_bracket_ix].trim();
184 let end_ix = closing_bracket_ix
185 + input[closing_bracket_ix..]
186 .find(&close_tag)
187 .with_context(|| format!("no `{close_tag}` tag"))?;
188 let body = &input[closing_bracket_ix + '>'.len_utf8()..end_ix];
189 let body = body.strip_prefix('\n').unwrap_or(body);
190 *input = &input[end_ix + close_tag.len()..];
191 Ok(Some(ParsedTag { attributes, body }))
192}
193
194#[cfg(test)]
195mod tests {
196 use super::*;
197 use gpui::TestAppContext;
198 use indoc::indoc;
199 use language::Point;
200 use project::{FakeFs, Project};
201 use serde_json::json;
202 use settings::SettingsStore;
203 use util::path;
204
205 #[test]
206 fn test_parse_tags() {
207 let mut input = indoc! {r#"
208 Prelude
209 <tag attr="foo">
210 tag value
211 </tag>
212 "# };
213 let parsed = parse_tag(&mut input, "tag").unwrap().unwrap();
214 assert_eq!(parsed.attributes, "attr=\"foo\"");
215 assert_eq!(parsed.body, "tag value\n");
216 assert_eq!(input, "\n");
217 }
218
219 #[gpui::test]
220 async fn test_parse_xml_edits(cx: &mut TestAppContext) {
221 let fs = init_test(cx);
222
223 let buffer_1_text = indoc! {r#"
224 one two three four
225 five six seven eight
226 nine ten eleven twelve
227 "# };
228
229 fs.insert_tree(
230 path!("/root"),
231 json!({
232 "file1": buffer_1_text,
233 }),
234 )
235 .await;
236
237 let project = Project::test(fs, [path!("/root").as_ref()], cx).await;
238 let buffer = project
239 .update(cx, |project, cx| {
240 project.open_local_buffer(path!("/root/file1"), cx)
241 })
242 .await
243 .unwrap();
244 let buffer_snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot());
245
246 let edits = indoc! {r#"
247 <edits path="root/file1">
248 <old_text>
249 five six seven eight
250 </old_text>
251 <new_text>
252 five SIX seven eight!
253 </new_text>
254 </edits>
255 "#};
256
257 let (buffer, edits) = parse_xml_edits(edits, |_path| {
258 Some((&buffer_snapshot, &[(Anchor::MIN..Anchor::MAX)] as &[_]))
259 })
260 .await
261 .unwrap();
262
263 let edits = edits
264 .into_iter()
265 .map(|(range, text)| (range.to_point(&buffer), text))
266 .collect::<Vec<_>>();
267 assert_eq!(
268 edits,
269 &[
270 (Point::new(1, 5)..Point::new(1, 8), "SIX".into()),
271 (Point::new(1, 20)..Point::new(1, 20), "!".into())
272 ]
273 );
274 }
275
276 fn init_test(cx: &mut TestAppContext) -> Arc<FakeFs> {
277 cx.update(|cx| {
278 let settings_store = SettingsStore::test(cx);
279 cx.set_global(settings_store);
280 });
281
282 FakeFs::new(cx.background_executor.clone())
283 }
284}