1use anyhow::bail;
2use derive_more::{Add, AddAssign};
3use language_model::LanguageModel;
4use regex::Regex;
5use schemars::JsonSchema;
6use serde::{Deserialize, Serialize};
7use smallvec::SmallVec;
8use std::{mem, ops::Range, str::FromStr, sync::Arc};
9
10const OLD_TEXT_END_TAG: &str = "</old_text>";
11const NEW_TEXT_END_TAG: &str = "</new_text>";
12const EDITS_END_TAG: &str = "</edits>";
13const SEARCH_MARKER: &str = "<<<<<<< SEARCH";
14const SEPARATOR_MARKER: &str = "=======";
15const REPLACE_MARKER: &str = ">>>>>>> REPLACE";
16const SONNET_PARAMETER_INVOKE_1: &str = "</parameter>\n</invoke>";
17const SONNET_PARAMETER_INVOKE_2: &str = "</parameter></invoke>";
18const END_TAGS: [&str; 5] = [
19 OLD_TEXT_END_TAG,
20 NEW_TEXT_END_TAG,
21 EDITS_END_TAG,
22 SONNET_PARAMETER_INVOKE_1, // Remove this after switching to streaming tool call
23 SONNET_PARAMETER_INVOKE_2,
24];
25
26#[derive(Debug)]
27pub enum EditParserEvent {
28 OldTextChunk {
29 chunk: String,
30 done: bool,
31 line_hint: Option<u32>,
32 },
33 NewTextChunk {
34 chunk: String,
35 done: bool,
36 },
37}
38
39#[derive(
40 Clone, Debug, Default, PartialEq, Eq, Add, AddAssign, Serialize, Deserialize, JsonSchema,
41)]
42pub struct EditParserMetrics {
43 pub tags: usize,
44 pub mismatched_tags: usize,
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum EditFormat {
50 /// XML-like tags:
51 /// <old_text>...</old_text>
52 /// <new_text>...</new_text>
53 XmlTags,
54 /// Diff-fenced format, in which:
55 /// - Text before the SEARCH marker is ignored
56 /// - Fences are optional
57 /// - Line hint is optional.
58 ///
59 /// Example:
60 ///
61 /// ```diff
62 /// <<<<<<< SEARCH line=42
63 /// ...
64 /// =======
65 /// ...
66 /// >>>>>>> REPLACE
67 /// ```
68 DiffFenced,
69}
70
71impl FromStr for EditFormat {
72 type Err = anyhow::Error;
73
74 fn from_str(s: &str) -> anyhow::Result<Self> {
75 match s.to_lowercase().as_str() {
76 "xml_tags" | "xml" => Ok(EditFormat::XmlTags),
77 "diff_fenced" | "diff-fenced" | "diff" => Ok(EditFormat::DiffFenced),
78 _ => bail!("Unknown EditFormat: {}", s),
79 }
80 }
81}
82
83impl EditFormat {
84 /// Return an optimal edit format for the language model
85 pub fn from_model(model: Arc<dyn LanguageModel>) -> anyhow::Result<Self> {
86 if model.provider_id().0 == "google" || model.id().0.to_lowercase().contains("gemini") {
87 Ok(EditFormat::DiffFenced)
88 } else {
89 Ok(EditFormat::XmlTags)
90 }
91 }
92
93 /// Return an optimal edit format for the language model,
94 /// with the ability to override it by setting the
95 /// `ZED_EDIT_FORMAT` environment variable
96 #[allow(dead_code)]
97 pub fn from_env(model: Arc<dyn LanguageModel>) -> anyhow::Result<Self> {
98 let default = EditFormat::from_model(model)?;
99 std::env::var("ZED_EDIT_FORMAT").map_or(Ok(default), |s| EditFormat::from_str(&s))
100 }
101}
102
103pub trait EditFormatParser: Send + std::fmt::Debug {
104 fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]>;
105 fn take_metrics(&mut self) -> EditParserMetrics;
106}
107
108#[derive(Debug)]
109pub struct XmlEditParser {
110 state: XmlParserState,
111 buffer: String,
112 metrics: EditParserMetrics,
113}
114
115#[derive(Debug, PartialEq)]
116enum XmlParserState {
117 Pending,
118 WithinOldText { start: bool, line_hint: Option<u32> },
119 AfterOldText,
120 WithinNewText { start: bool },
121}
122
123#[derive(Debug)]
124pub struct DiffFencedEditParser {
125 state: DiffParserState,
126 buffer: String,
127 metrics: EditParserMetrics,
128}
129
130#[derive(Debug, PartialEq)]
131enum DiffParserState {
132 Pending,
133 WithinSearch { start: bool, line_hint: Option<u32> },
134 WithinReplace { start: bool },
135}
136
137/// Main parser that delegates to format-specific parsers
138pub struct EditParser {
139 parser: Box<dyn EditFormatParser>,
140}
141
142impl XmlEditParser {
143 pub fn new() -> Self {
144 XmlEditParser {
145 state: XmlParserState::Pending,
146 buffer: String::new(),
147 metrics: EditParserMetrics::default(),
148 }
149 }
150
151 fn find_end_tag(&self) -> Option<Range<usize>> {
152 let (tag, start_ix) = END_TAGS
153 .iter()
154 .flat_map(|tag| Some((tag, self.buffer.find(tag)?)))
155 .min_by_key(|(_, ix)| *ix)?;
156 Some(start_ix..start_ix + tag.len())
157 }
158
159 fn ends_with_tag_prefix(&self) -> bool {
160 let mut end_prefixes = END_TAGS
161 .iter()
162 .flat_map(|tag| (1..tag.len()).map(move |i| &tag[..i]))
163 .chain(["\n"]);
164 end_prefixes.any(|prefix| self.buffer.ends_with(&prefix))
165 }
166
167 fn parse_line_hint(&self, tag: &str) -> Option<u32> {
168 use std::sync::LazyLock;
169 static LINE_HINT_REGEX: LazyLock<Regex> =
170 LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap());
171
172 LINE_HINT_REGEX
173 .captures(tag)
174 .and_then(|caps| caps.get(1))
175 .and_then(|m| m.as_str().parse::<u32>().ok())
176 }
177}
178
179impl EditFormatParser for XmlEditParser {
180 fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
181 self.buffer.push_str(chunk);
182
183 let mut edit_events = SmallVec::new();
184 loop {
185 match &mut self.state {
186 XmlParserState::Pending => {
187 if let Some(start) = self.buffer.find("<old_text") {
188 if let Some(tag_end) = self.buffer[start..].find('>') {
189 let tag_end = start + tag_end + 1;
190 let tag = &self.buffer[start..tag_end];
191 let line_hint = self.parse_line_hint(tag);
192 self.buffer.drain(..tag_end);
193 self.state = XmlParserState::WithinOldText {
194 start: true,
195 line_hint,
196 };
197 } else {
198 break;
199 }
200 } else {
201 break;
202 }
203 }
204 XmlParserState::WithinOldText { start, line_hint } => {
205 if !self.buffer.is_empty() {
206 if *start && self.buffer.starts_with('\n') {
207 self.buffer.remove(0);
208 }
209 *start = false;
210 }
211
212 let line_hint = *line_hint;
213 if let Some(tag_range) = self.find_end_tag() {
214 let mut chunk = self.buffer[..tag_range.start].to_string();
215 if chunk.ends_with('\n') {
216 chunk.pop();
217 }
218
219 self.metrics.tags += 1;
220 if &self.buffer[tag_range.clone()] != OLD_TEXT_END_TAG {
221 self.metrics.mismatched_tags += 1;
222 }
223
224 self.buffer.drain(..tag_range.end);
225 self.state = XmlParserState::AfterOldText;
226 edit_events.push(EditParserEvent::OldTextChunk {
227 chunk,
228 done: true,
229 line_hint,
230 });
231 } else {
232 if !self.ends_with_tag_prefix() {
233 edit_events.push(EditParserEvent::OldTextChunk {
234 chunk: mem::take(&mut self.buffer),
235 done: false,
236 line_hint,
237 });
238 }
239 break;
240 }
241 }
242 XmlParserState::AfterOldText => {
243 if let Some(start) = self.buffer.find("<new_text>") {
244 self.buffer.drain(..start + "<new_text>".len());
245 self.state = XmlParserState::WithinNewText { start: true };
246 } else {
247 break;
248 }
249 }
250 XmlParserState::WithinNewText { start } => {
251 if !self.buffer.is_empty() {
252 if *start && self.buffer.starts_with('\n') {
253 self.buffer.remove(0);
254 }
255 *start = false;
256 }
257
258 if let Some(tag_range) = self.find_end_tag() {
259 let mut chunk = self.buffer[..tag_range.start].to_string();
260 if chunk.ends_with('\n') {
261 chunk.pop();
262 }
263
264 self.metrics.tags += 1;
265 if &self.buffer[tag_range.clone()] != NEW_TEXT_END_TAG {
266 self.metrics.mismatched_tags += 1;
267 }
268
269 self.buffer.drain(..tag_range.end);
270 self.state = XmlParserState::Pending;
271 edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true });
272 } else {
273 if !self.ends_with_tag_prefix() {
274 edit_events.push(EditParserEvent::NewTextChunk {
275 chunk: mem::take(&mut self.buffer),
276 done: false,
277 });
278 }
279 break;
280 }
281 }
282 }
283 }
284 edit_events
285 }
286
287 fn take_metrics(&mut self) -> EditParserMetrics {
288 std::mem::take(&mut self.metrics)
289 }
290}
291
292impl DiffFencedEditParser {
293 pub fn new() -> Self {
294 DiffFencedEditParser {
295 state: DiffParserState::Pending,
296 buffer: String::new(),
297 metrics: EditParserMetrics::default(),
298 }
299 }
300
301 fn ends_with_diff_marker_prefix(&self) -> bool {
302 let diff_markers = [SEPARATOR_MARKER, REPLACE_MARKER];
303 let mut diff_prefixes = diff_markers
304 .iter()
305 .flat_map(|marker| (1..marker.len()).map(move |i| &marker[..i]))
306 .chain(["\n"]);
307 diff_prefixes.any(|prefix| self.buffer.ends_with(&prefix))
308 }
309
310 fn parse_line_hint(&self, search_line: &str) -> Option<u32> {
311 use regex::Regex;
312 use std::sync::LazyLock;
313 static LINE_HINT_REGEX: LazyLock<Regex> =
314 LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap());
315
316 LINE_HINT_REGEX
317 .captures(search_line)
318 .and_then(|caps| caps.get(1))
319 .and_then(|m| m.as_str().parse::<u32>().ok())
320 }
321}
322
323impl EditFormatParser for DiffFencedEditParser {
324 fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
325 self.buffer.push_str(chunk);
326
327 let mut edit_events = SmallVec::new();
328 loop {
329 match &mut self.state {
330 DiffParserState::Pending => {
331 if let Some(diff) = self.buffer.find(SEARCH_MARKER) {
332 let search_end = diff + SEARCH_MARKER.len();
333 if let Some(newline_pos) = self.buffer[search_end..].find('\n') {
334 let search_line = &self.buffer[diff..search_end + newline_pos];
335 let line_hint = self.parse_line_hint(search_line);
336 self.buffer.drain(..search_end + newline_pos + 1);
337 self.state = DiffParserState::WithinSearch {
338 start: true,
339 line_hint,
340 };
341 } else {
342 break;
343 }
344 } else {
345 break;
346 }
347 }
348 DiffParserState::WithinSearch { start, line_hint } => {
349 if !self.buffer.is_empty() {
350 if *start && self.buffer.starts_with('\n') {
351 self.buffer.remove(0);
352 }
353 *start = false;
354 }
355
356 let line_hint = *line_hint;
357 if let Some(separator_pos) = self.buffer.find(SEPARATOR_MARKER) {
358 let mut chunk = self.buffer[..separator_pos].to_string();
359 if chunk.ends_with('\n') {
360 chunk.pop();
361 }
362
363 let separator_end = separator_pos + SEPARATOR_MARKER.len();
364 if let Some(newline_pos) = self.buffer[separator_end..].find('\n') {
365 self.buffer.drain(..separator_end + newline_pos + 1);
366 self.state = DiffParserState::WithinReplace { start: true };
367 edit_events.push(EditParserEvent::OldTextChunk {
368 chunk,
369 done: true,
370 line_hint,
371 });
372 } else {
373 break;
374 }
375 } else {
376 if !self.ends_with_diff_marker_prefix() {
377 edit_events.push(EditParserEvent::OldTextChunk {
378 chunk: mem::take(&mut self.buffer),
379 done: false,
380 line_hint,
381 });
382 }
383 break;
384 }
385 }
386 DiffParserState::WithinReplace { start } => {
387 if !self.buffer.is_empty() {
388 if *start && self.buffer.starts_with('\n') {
389 self.buffer.remove(0);
390 }
391 *start = false;
392 }
393
394 if let Some(replace_pos) = self.buffer.find(REPLACE_MARKER) {
395 let mut chunk = self.buffer[..replace_pos].to_string();
396 if chunk.ends_with('\n') {
397 chunk.pop();
398 }
399
400 self.buffer.drain(..replace_pos + REPLACE_MARKER.len());
401 if let Some(newline_pos) = self.buffer.find('\n') {
402 self.buffer.drain(..newline_pos + 1);
403 } else {
404 self.buffer.clear();
405 }
406
407 self.state = DiffParserState::Pending;
408 edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true });
409 } else {
410 if !self.ends_with_diff_marker_prefix() {
411 edit_events.push(EditParserEvent::NewTextChunk {
412 chunk: mem::take(&mut self.buffer),
413 done: false,
414 });
415 }
416 break;
417 }
418 }
419 }
420 }
421 edit_events
422 }
423
424 fn take_metrics(&mut self) -> EditParserMetrics {
425 std::mem::take(&mut self.metrics)
426 }
427}
428
429impl EditParser {
430 pub fn new(format: EditFormat) -> Self {
431 let parser: Box<dyn EditFormatParser> = match format {
432 EditFormat::XmlTags => Box::new(XmlEditParser::new()),
433 EditFormat::DiffFenced => Box::new(DiffFencedEditParser::new()),
434 };
435 EditParser { parser }
436 }
437
438 pub fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
439 self.parser.push(chunk)
440 }
441
442 pub fn finish(mut self) -> EditParserMetrics {
443 self.parser.take_metrics()
444 }
445}
446
447#[cfg(test)]
448mod tests {
449 use super::*;
450 use indoc::indoc;
451 use rand::prelude::*;
452 use std::cmp;
453
454 #[gpui::test(iterations = 1000)]
455 fn test_xml_single_edit(mut rng: StdRng) {
456 let mut parser = EditParser::new(EditFormat::XmlTags);
457 assert_eq!(
458 parse_random_chunks(
459 "<old_text>original</old_text><new_text>updated</new_text>",
460 &mut parser,
461 &mut rng
462 ),
463 vec![Edit {
464 old_text: "original".to_string(),
465 new_text: "updated".to_string(),
466 line_hint: None,
467 }]
468 );
469 assert_eq!(
470 parser.finish(),
471 EditParserMetrics {
472 tags: 2,
473 mismatched_tags: 0
474 }
475 );
476 }
477
478 #[gpui::test(iterations = 1000)]
479 fn test_xml_multiple_edits(mut rng: StdRng) {
480 let mut parser = EditParser::new(EditFormat::XmlTags);
481 assert_eq!(
482 parse_random_chunks(
483 indoc! {"
484 <old_text>
485 first old
486 </old_text><new_text>first new</new_text>
487 <old_text>second old</old_text><new_text>
488 second new
489 </new_text>
490 "},
491 &mut parser,
492 &mut rng
493 ),
494 vec![
495 Edit {
496 old_text: "first old".to_string(),
497 new_text: "first new".to_string(),
498 line_hint: None,
499 },
500 Edit {
501 old_text: "second old".to_string(),
502 new_text: "second new".to_string(),
503 line_hint: None,
504 },
505 ]
506 );
507 assert_eq!(
508 parser.finish(),
509 EditParserMetrics {
510 tags: 4,
511 mismatched_tags: 0
512 }
513 );
514 }
515
516 #[gpui::test(iterations = 1000)]
517 fn test_xml_edits_with_extra_text(mut rng: StdRng) {
518 let mut parser = EditParser::new(EditFormat::XmlTags);
519 assert_eq!(
520 parse_random_chunks(
521 indoc! {"
522 ignore this <old_text>
523 content</old_text>extra stuff<new_text>updated content</new_text>trailing data
524 more text <old_text>second item
525 </old_text>middle text<new_text>modified second item</new_text>end
526 <old_text>third case</old_text><new_text>improved third case</new_text> with trailing text
527 "},
528 &mut parser,
529 &mut rng
530 ),
531 vec![
532 Edit {
533 old_text: "content".to_string(),
534 new_text: "updated content".to_string(),
535 line_hint: None,
536 },
537 Edit {
538 old_text: "second item".to_string(),
539 new_text: "modified second item".to_string(),
540 line_hint: None,
541 },
542 Edit {
543 old_text: "third case".to_string(),
544 new_text: "improved third case".to_string(),
545 line_hint: None,
546 },
547 ]
548 );
549 assert_eq!(
550 parser.finish(),
551 EditParserMetrics {
552 tags: 6,
553 mismatched_tags: 0
554 }
555 );
556 }
557
558 #[gpui::test(iterations = 1000)]
559 fn test_xml_edits_with_closing_parameter_invoke(mut rng: StdRng) {
560 // This case is a regression with Claude Sonnet 4.5.
561 // Sometimes Sonnet thinks that it's doing a tool call
562 // and closes its response with '</parameter></invoke>'
563 // instead of properly closing </new_text>
564
565 let mut parser = EditParser::new(EditFormat::XmlTags);
566 assert_eq!(
567 parse_random_chunks(
568 indoc! {"
569 <old_text>some text</old_text><new_text>updated text</parameter></invoke>
570 "},
571 &mut parser,
572 &mut rng
573 ),
574 vec![Edit {
575 old_text: "some text".to_string(),
576 new_text: "updated text".to_string(),
577 line_hint: None,
578 },]
579 );
580 assert_eq!(
581 parser.finish(),
582 EditParserMetrics {
583 tags: 2,
584 mismatched_tags: 1
585 }
586 );
587 }
588
589 #[gpui::test(iterations = 1000)]
590 fn test_xml_nested_tags(mut rng: StdRng) {
591 let mut parser = EditParser::new(EditFormat::XmlTags);
592 assert_eq!(
593 parse_random_chunks(
594 "<old_text>code with <tag>nested</tag> elements</old_text><new_text>new <code>content</code></new_text>",
595 &mut parser,
596 &mut rng
597 ),
598 vec![Edit {
599 old_text: "code with <tag>nested</tag> elements".to_string(),
600 new_text: "new <code>content</code>".to_string(),
601 line_hint: None,
602 }]
603 );
604 assert_eq!(
605 parser.finish(),
606 EditParserMetrics {
607 tags: 2,
608 mismatched_tags: 0
609 }
610 );
611 }
612
613 #[gpui::test(iterations = 1000)]
614 fn test_xml_empty_old_and_new_text(mut rng: StdRng) {
615 let mut parser = EditParser::new(EditFormat::XmlTags);
616 assert_eq!(
617 parse_random_chunks(
618 "<old_text></old_text><new_text></new_text>",
619 &mut parser,
620 &mut rng
621 ),
622 vec![Edit {
623 old_text: "".to_string(),
624 new_text: "".to_string(),
625 line_hint: None,
626 }]
627 );
628 assert_eq!(
629 parser.finish(),
630 EditParserMetrics {
631 tags: 2,
632 mismatched_tags: 0
633 }
634 );
635 }
636
637 #[gpui::test(iterations = 100)]
638 fn test_xml_multiline_content(mut rng: StdRng) {
639 let mut parser = EditParser::new(EditFormat::XmlTags);
640 assert_eq!(
641 parse_random_chunks(
642 "<old_text>line1\nline2\nline3</old_text><new_text>line1\nmodified line2\nline3</new_text>",
643 &mut parser,
644 &mut rng
645 ),
646 vec![Edit {
647 old_text: "line1\nline2\nline3".to_string(),
648 new_text: "line1\nmodified line2\nline3".to_string(),
649 line_hint: None,
650 }]
651 );
652 assert_eq!(
653 parser.finish(),
654 EditParserMetrics {
655 tags: 2,
656 mismatched_tags: 0
657 }
658 );
659 }
660
661 #[gpui::test(iterations = 1000)]
662 fn test_xml_mismatched_tags(mut rng: StdRng) {
663 let mut parser = EditParser::new(EditFormat::XmlTags);
664 assert_eq!(
665 parse_random_chunks(
666 // Reduced from an actual Sonnet 3.7 output
667 indoc! {"
668 <old_text>
669 a
670 b
671 c
672 </new_text>
673 <new_text>
674 a
675 B
676 c
677 </old_text>
678 <old_text>
679 d
680 e
681 f
682 </new_text>
683 <new_text>
684 D
685 e
686 F
687 </old_text>
688 "},
689 &mut parser,
690 &mut rng
691 ),
692 vec![
693 Edit {
694 old_text: "a\nb\nc".to_string(),
695 new_text: "a\nB\nc".to_string(),
696 line_hint: None,
697 },
698 Edit {
699 old_text: "d\ne\nf".to_string(),
700 new_text: "D\ne\nF".to_string(),
701 line_hint: None,
702 }
703 ]
704 );
705 assert_eq!(
706 parser.finish(),
707 EditParserMetrics {
708 tags: 4,
709 mismatched_tags: 4
710 }
711 );
712
713 let mut parser = EditParser::new(EditFormat::XmlTags);
714 assert_eq!(
715 parse_random_chunks(
716 // Reduced from an actual Opus 4 output
717 indoc! {"
718 <edits>
719 <old_text>
720 Lorem
721 </old_text>
722 <new_text>
723 LOREM
724 </edits>
725 "},
726 &mut parser,
727 &mut rng
728 ),
729 vec![Edit {
730 old_text: "Lorem".to_string(),
731 new_text: "LOREM".to_string(),
732 line_hint: None,
733 },]
734 );
735 assert_eq!(
736 parser.finish(),
737 EditParserMetrics {
738 tags: 2,
739 mismatched_tags: 1
740 }
741 );
742 }
743
744 #[gpui::test(iterations = 1000)]
745 fn test_diff_fenced_single_edit(mut rng: StdRng) {
746 let mut parser = EditParser::new(EditFormat::DiffFenced);
747 assert_eq!(
748 parse_random_chunks(
749 indoc! {"
750 <<<<<<< SEARCH
751 original text
752 =======
753 updated text
754 >>>>>>> REPLACE
755 "},
756 &mut parser,
757 &mut rng
758 ),
759 vec![Edit {
760 old_text: "original text".to_string(),
761 new_text: "updated text".to_string(),
762 line_hint: None,
763 }]
764 );
765 assert_eq!(
766 parser.finish(),
767 EditParserMetrics {
768 tags: 0,
769 mismatched_tags: 0
770 }
771 );
772 }
773
774 #[gpui::test(iterations = 100)]
775 fn test_diff_fenced_with_markdown_fences(mut rng: StdRng) {
776 let mut parser = EditParser::new(EditFormat::DiffFenced);
777 assert_eq!(
778 parse_random_chunks(
779 indoc! {"
780 ```diff
781 <<<<<<< SEARCH
782 from flask import Flask
783 =======
784 import math
785 from flask import Flask
786 >>>>>>> REPLACE
787 ```
788 "},
789 &mut parser,
790 &mut rng
791 ),
792 vec![Edit {
793 old_text: "from flask import Flask".to_string(),
794 new_text: "import math\nfrom flask import Flask".to_string(),
795 line_hint: None,
796 }]
797 );
798 assert_eq!(
799 parser.finish(),
800 EditParserMetrics {
801 tags: 0,
802 mismatched_tags: 0
803 }
804 );
805 }
806
807 #[gpui::test(iterations = 100)]
808 fn test_diff_fenced_multiple_edits(mut rng: StdRng) {
809 let mut parser = EditParser::new(EditFormat::DiffFenced);
810 assert_eq!(
811 parse_random_chunks(
812 indoc! {"
813 <<<<<<< SEARCH
814 first old
815 =======
816 first new
817 >>>>>>> REPLACE
818
819 <<<<<<< SEARCH
820 second old
821 =======
822 second new
823 >>>>>>> REPLACE
824 "},
825 &mut parser,
826 &mut rng
827 ),
828 vec![
829 Edit {
830 old_text: "first old".to_string(),
831 new_text: "first new".to_string(),
832 line_hint: None,
833 },
834 Edit {
835 old_text: "second old".to_string(),
836 new_text: "second new".to_string(),
837 line_hint: None,
838 },
839 ]
840 );
841 assert_eq!(
842 parser.finish(),
843 EditParserMetrics {
844 tags: 0,
845 mismatched_tags: 0
846 }
847 );
848 }
849
850 #[gpui::test(iterations = 100)]
851 fn test_mixed_formats(mut rng: StdRng) {
852 // Test XML format parser only parses XML tags
853 let mut xml_parser = EditParser::new(EditFormat::XmlTags);
854 assert_eq!(
855 parse_random_chunks(
856 indoc! {"
857 <old_text>xml style old</old_text><new_text>xml style new</new_text>
858
859 <<<<<<< SEARCH
860 diff style old
861 =======
862 diff style new
863 >>>>>>> REPLACE
864 "},
865 &mut xml_parser,
866 &mut rng
867 ),
868 vec![Edit {
869 old_text: "xml style old".to_string(),
870 new_text: "xml style new".to_string(),
871 line_hint: None,
872 },]
873 );
874 assert_eq!(
875 xml_parser.finish(),
876 EditParserMetrics {
877 tags: 2,
878 mismatched_tags: 0
879 }
880 );
881
882 // Test diff-fenced format parser only parses diff markers
883 let mut diff_parser = EditParser::new(EditFormat::DiffFenced);
884 assert_eq!(
885 parse_random_chunks(
886 indoc! {"
887 <old_text>xml style old</old_text><new_text>xml style new</new_text>
888
889 <<<<<<< SEARCH
890 diff style old
891 =======
892 diff style new
893 >>>>>>> REPLACE
894 "},
895 &mut diff_parser,
896 &mut rng
897 ),
898 vec![Edit {
899 old_text: "diff style old".to_string(),
900 new_text: "diff style new".to_string(),
901 line_hint: None,
902 },]
903 );
904 assert_eq!(
905 diff_parser.finish(),
906 EditParserMetrics {
907 tags: 0,
908 mismatched_tags: 0
909 }
910 );
911 }
912
913 #[gpui::test(iterations = 100)]
914 fn test_diff_fenced_empty_sections(mut rng: StdRng) {
915 let mut parser = EditParser::new(EditFormat::DiffFenced);
916 assert_eq!(
917 parse_random_chunks(
918 indoc! {"
919 <<<<<<< SEARCH
920 =======
921 >>>>>>> REPLACE
922 "},
923 &mut parser,
924 &mut rng
925 ),
926 vec![Edit {
927 old_text: "".to_string(),
928 new_text: "".to_string(),
929 line_hint: None,
930 }]
931 );
932 assert_eq!(
933 parser.finish(),
934 EditParserMetrics {
935 tags: 0,
936 mismatched_tags: 0
937 }
938 );
939 }
940
941 #[gpui::test(iterations = 100)]
942 fn test_diff_fenced_with_line_hint(mut rng: StdRng) {
943 let mut parser = EditParser::new(EditFormat::DiffFenced);
944 let edits = parse_random_chunks(
945 indoc! {"
946 <<<<<<< SEARCH line=42
947 original text
948 =======
949 updated text
950 >>>>>>> REPLACE
951 "},
952 &mut parser,
953 &mut rng,
954 );
955 assert_eq!(
956 edits,
957 vec![Edit {
958 old_text: "original text".to_string(),
959 line_hint: Some(42),
960 new_text: "updated text".to_string(),
961 }]
962 );
963 }
964 #[gpui::test(iterations = 100)]
965 fn test_xml_line_hints(mut rng: StdRng) {
966 // Line hint is a single quoted line number
967 let mut parser = EditParser::new(EditFormat::XmlTags);
968
969 let edits = parse_random_chunks(
970 r#"
971 <old_text line="23">original code</old_text>
972 <new_text>updated code</new_text>"#,
973 &mut parser,
974 &mut rng,
975 );
976
977 assert_eq!(edits.len(), 1);
978 assert_eq!(edits[0].old_text, "original code");
979 assert_eq!(edits[0].line_hint, Some(23));
980 assert_eq!(edits[0].new_text, "updated code");
981
982 // Line hint is a single unquoted line number
983 let mut parser = EditParser::new(EditFormat::XmlTags);
984
985 let edits = parse_random_chunks(
986 r#"
987 <old_text line=45>original code</old_text>
988 <new_text>updated code</new_text>"#,
989 &mut parser,
990 &mut rng,
991 );
992
993 assert_eq!(edits.len(), 1);
994 assert_eq!(edits[0].old_text, "original code");
995 assert_eq!(edits[0].line_hint, Some(45));
996 assert_eq!(edits[0].new_text, "updated code");
997
998 // Line hint is a range
999 let mut parser = EditParser::new(EditFormat::XmlTags);
1000
1001 let edits = parse_random_chunks(
1002 r#"
1003 <old_text line="23:50">original code</old_text>
1004 <new_text>updated code</new_text>"#,
1005 &mut parser,
1006 &mut rng,
1007 );
1008
1009 assert_eq!(edits.len(), 1);
1010 assert_eq!(edits[0].old_text, "original code");
1011 assert_eq!(edits[0].line_hint, Some(23));
1012 assert_eq!(edits[0].new_text, "updated code");
1013
1014 // No line hint
1015 let mut parser = EditParser::new(EditFormat::XmlTags);
1016 let edits = parse_random_chunks(
1017 r#"
1018 <old_text>old</old_text>
1019 <new_text>new</new_text>"#,
1020 &mut parser,
1021 &mut rng,
1022 );
1023
1024 assert_eq!(edits.len(), 1);
1025 assert_eq!(edits[0].old_text, "old");
1026 assert_eq!(edits[0].line_hint, None);
1027 assert_eq!(edits[0].new_text, "new");
1028 }
1029
1030 #[derive(Default, Debug, PartialEq, Eq)]
1031 struct Edit {
1032 old_text: String,
1033 new_text: String,
1034 line_hint: Option<u32>,
1035 }
1036
1037 fn parse_random_chunks(input: &str, parser: &mut EditParser, rng: &mut StdRng) -> Vec<Edit> {
1038 let chunk_count = rng.random_range(1..=cmp::min(input.len(), 50));
1039 let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
1040 chunk_indices.sort();
1041 chunk_indices.push(input.len());
1042
1043 let mut old_text = Some(String::new());
1044 let mut new_text = None;
1045 let mut pending_edit = Edit::default();
1046 let mut edits = Vec::new();
1047 let mut last_ix = 0;
1048 for chunk_ix in chunk_indices {
1049 for event in parser.push(&input[last_ix..chunk_ix]) {
1050 match event {
1051 EditParserEvent::OldTextChunk {
1052 chunk,
1053 done,
1054 line_hint,
1055 } => {
1056 old_text.as_mut().unwrap().push_str(&chunk);
1057 if done {
1058 pending_edit.old_text = old_text.take().unwrap();
1059 pending_edit.line_hint = line_hint;
1060 new_text = Some(String::new());
1061 }
1062 }
1063 EditParserEvent::NewTextChunk { chunk, done } => {
1064 new_text.as_mut().unwrap().push_str(&chunk);
1065 if done {
1066 pending_edit.new_text = new_text.take().unwrap();
1067 edits.push(pending_edit);
1068 pending_edit = Edit::default();
1069 old_text = Some(String::new());
1070 }
1071 }
1072 }
1073 }
1074 last_ix = chunk_ix;
1075 }
1076
1077 if new_text.is_some() {
1078 pending_edit.new_text = new_text.take().unwrap();
1079 edits.push(pending_edit);
1080 }
1081
1082 edits
1083 }
1084}