xml_edits.rs

  1use anyhow::{Context as _, Result, anyhow};
  2use language::{Anchor, BufferSnapshot, OffsetRangeExt as _, TextBufferSnapshot};
  3use std::ops::Range;
  4use std::path::Path;
  5use std::sync::Arc;
  6
  7pub async fn parse_xml_edits<'a>(
  8    input: &'a str,
  9    get_buffer: impl Fn(&Path) -> Option<(&'a BufferSnapshot, &'a [Range<Anchor>])> + Send,
 10) -> Result<(&'a BufferSnapshot, Vec<(Range<Anchor>, Arc<str>)>)> {
 11    parse_xml_edits_inner(input, get_buffer)
 12        .await
 13        .with_context(|| format!("Failed to parse XML edits:\n{input}"))
 14}
 15
 16async fn parse_xml_edits_inner<'a>(
 17    mut input: &'a str,
 18    get_buffer: impl Fn(&Path) -> Option<(&'a BufferSnapshot, &'a [Range<Anchor>])> + Send,
 19) -> Result<(&'a BufferSnapshot, Vec<(Range<Anchor>, Arc<str>)>)> {
 20    let edits_tag = parse_tag(&mut input, "edits")?.context("No edits tag")?;
 21
 22    input = edits_tag.body;
 23
 24    let file_path = edits_tag
 25        .attributes
 26        .trim_start()
 27        .strip_prefix("path")
 28        .context("no file attribute on edits tag")?
 29        .trim_end()
 30        .strip_prefix('=')
 31        .context("no value for path attribute")?
 32        .trim()
 33        .trim_start_matches('"')
 34        .trim_end_matches('"');
 35
 36    let (buffer, context_ranges) = get_buffer(file_path.as_ref())
 37        .with_context(|| format!("no buffer for file {file_path}"))?;
 38
 39    let mut edits = vec![];
 40    while let Some(old_text_tag) = parse_tag(&mut input, "old_text")? {
 41        let new_text_tag =
 42            parse_tag(&mut input, "new_text")?.context("no new_text tag following old_text")?;
 43        edits.extend(resolve_new_text_old_text_in_buffer(
 44            new_text_tag.body,
 45            old_text_tag.body,
 46            buffer,
 47            context_ranges,
 48        )?);
 49    }
 50
 51    Ok((buffer, edits))
 52}
 53
 54fn resolve_new_text_old_text_in_buffer(
 55    new_text: &str,
 56    old_text: &str,
 57    buffer: &TextBufferSnapshot,
 58    ranges: &[Range<Anchor>],
 59) -> Result<impl Iterator<Item = (Range<Anchor>, Arc<str>)>, anyhow::Error> {
 60    let context_offset = if old_text.is_empty() {
 61        Ok(0)
 62    } else {
 63        let mut offset = None;
 64        for range in ranges {
 65            let range = range.to_offset(buffer);
 66            let text = buffer.text_for_range(range.clone()).collect::<String>();
 67            for (match_offset, _) in text.match_indices(old_text) {
 68                if let Some(offset) = offset {
 69                    let offset_match_point = buffer.offset_to_point(offset);
 70                    let second_match_point = buffer.offset_to_point(range.start + match_offset);
 71                    anyhow::bail!(
 72                        "old_text is not unique enough:\n{}\nFound at {:?} and {:?}",
 73                        old_text,
 74                        offset_match_point,
 75                        second_match_point
 76                    );
 77                }
 78                offset = Some(range.start + match_offset);
 79            }
 80        }
 81        offset.ok_or_else(|| {
 82            #[cfg(debug_assertions)]
 83            if let Some(closest_match) = closest_old_text_match(buffer, old_text) {
 84                log::info!(
 85                    "Closest `old_text` match: {}",
 86                    pretty_assertions::StrComparison::new(old_text, &closest_match)
 87                )
 88            }
 89            anyhow!("Failed to match old_text:\n{}", old_text)
 90        })
 91    }?;
 92
 93    let edits_within_hunk = language::text_diff(&old_text, &new_text);
 94    Ok(edits_within_hunk
 95        .into_iter()
 96        .map(move |(inner_range, inner_text)| {
 97            (
 98                buffer.anchor_after(context_offset + inner_range.start)
 99                    ..buffer.anchor_before(context_offset + inner_range.end),
100                inner_text,
101            )
102        }))
103}
104
105#[cfg(debug_assertions)]
106fn closest_old_text_match(buffer: &TextBufferSnapshot, old_text: &str) -> Option<String> {
107    let buffer_text = buffer.text();
108    let len = old_text.len();
109
110    if len == 0 || buffer_text.len() < len {
111        return None;
112    }
113
114    let mut min_score = usize::MAX;
115    let mut min_start = 0;
116
117    let old_text_bytes = old_text.as_bytes();
118    let old_alpha_count = old_text_bytes
119        .iter()
120        .filter(|&&b| b.is_ascii_alphanumeric())
121        .count();
122
123    let old_line_count = old_text.lines().count();
124
125    let mut cursor = 0;
126
127    while cursor + len <= buffer_text.len() {
128        let candidate = &buffer_text[cursor..cursor + len];
129        let candidate_bytes = candidate.as_bytes();
130
131        if usize::abs_diff(candidate.lines().count(), old_line_count) > 4 {
132            cursor += 1;
133            continue;
134        }
135
136        let candidate_alpha_count = candidate_bytes
137            .iter()
138            .filter(|&&b| b.is_ascii_alphanumeric())
139            .count();
140
141        // If alphanumeric character count differs by more than 30%, skip
142        if usize::abs_diff(old_alpha_count, candidate_alpha_count) * 10 > old_alpha_count * 3 {
143            cursor += 1;
144            continue;
145        }
146
147        let score = strsim::levenshtein(candidate, old_text);
148        if score < min_score {
149            min_score = score;
150            min_start = cursor;
151
152            if min_score <= len / 10 {
153                break;
154            }
155        }
156
157        cursor += 1;
158    }
159
160    if min_score != usize::MAX {
161        Some(buffer_text[min_start..min_start + len].to_string())
162    } else {
163        None
164    }
165}
166
167struct ParsedTag<'a> {
168    attributes: &'a str,
169    body: &'a str,
170}
171
172fn parse_tag<'a>(input: &mut &'a str, tag: &str) -> Result<Option<ParsedTag<'a>>> {
173    let open_tag = format!("<{}", tag);
174    let close_tag = format!("</{}>", tag);
175    let Some(start_ix) = input.find(&open_tag) else {
176        return Ok(None);
177    };
178    let start_ix = start_ix + open_tag.len();
179    let closing_bracket_ix = start_ix
180        + input[start_ix..]
181            .find('>')
182            .with_context(|| format!("missing > after {tag}"))?;
183    let attributes = &input[start_ix..closing_bracket_ix].trim();
184    let end_ix = closing_bracket_ix
185        + input[closing_bracket_ix..]
186            .find(&close_tag)
187            .with_context(|| format!("no `{close_tag}` tag"))?;
188    let body = &input[closing_bracket_ix + '>'.len_utf8()..end_ix];
189    let body = body.strip_prefix('\n').unwrap_or(body);
190    *input = &input[end_ix + close_tag.len()..];
191    Ok(Some(ParsedTag { attributes, body }))
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197    use gpui::TestAppContext;
198    use indoc::indoc;
199    use language::Point;
200    use project::{FakeFs, Project};
201    use serde_json::json;
202    use settings::SettingsStore;
203    use util::path;
204
205    #[test]
206    fn test_parse_tags() {
207        let mut input = indoc! {r#"
208            Prelude
209            <tag attr="foo">
210            tag value
211            </tag>
212            "# };
213        let parsed = parse_tag(&mut input, "tag").unwrap().unwrap();
214        assert_eq!(parsed.attributes, "attr=\"foo\"");
215        assert_eq!(parsed.body, "tag value\n");
216        assert_eq!(input, "\n");
217    }
218
219    #[gpui::test]
220    async fn test_parse_xml_edits(cx: &mut TestAppContext) {
221        let fs = init_test(cx);
222
223        let buffer_1_text = indoc! {r#"
224            one two three four
225            five six seven eight
226            nine ten eleven twelve
227        "# };
228
229        fs.insert_tree(
230            path!("/root"),
231            json!({
232                "file1": buffer_1_text,
233            }),
234        )
235        .await;
236
237        let project = Project::test(fs, [path!("/root").as_ref()], cx).await;
238        let buffer = project
239            .update(cx, |project, cx| {
240                project.open_local_buffer(path!("/root/file1"), cx)
241            })
242            .await
243            .unwrap();
244        let buffer_snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot());
245
246        let edits = indoc! {r#"
247            <edits path="root/file1">
248            <old_text>
249            five six seven eight
250            </old_text>
251            <new_text>
252            five SIX seven eight!
253            </new_text>
254            </edits>
255        "#};
256
257        let (buffer, edits) = parse_xml_edits(edits, |_path| {
258            Some((&buffer_snapshot, &[(Anchor::MIN..Anchor::MAX)] as &[_]))
259        })
260        .await
261        .unwrap();
262
263        let edits = edits
264            .into_iter()
265            .map(|(range, text)| (range.to_point(&buffer), text))
266            .collect::<Vec<_>>();
267        assert_eq!(
268            edits,
269            &[
270                (Point::new(1, 5)..Point::new(1, 8), "SIX".into()),
271                (Point::new(1, 20)..Point::new(1, 20), "!".into())
272            ]
273        );
274    }
275
276    fn init_test(cx: &mut TestAppContext) -> Arc<FakeFs> {
277        cx.update(|cx| {
278            let settings_store = SettingsStore::test(cx);
279            cx.set_global(settings_store);
280        });
281
282        FakeFs::new(cx.background_executor.clone())
283    }
284}