1use anyhow::bail;
2use derive_more::{Add, AddAssign};
3use language_model::LanguageModel;
4use regex::Regex;
5use schemars::JsonSchema;
6use serde::{Deserialize, Serialize};
7use smallvec::SmallVec;
8use std::{mem, ops::Range, str::FromStr, sync::Arc};
9
10const OLD_TEXT_END_TAG: &str = "</old_text>";
11const NEW_TEXT_END_TAG: &str = "</new_text>";
12const EDITS_END_TAG: &str = "</edits>";
13const SEARCH_MARKER: &str = "<<<<<<< SEARCH";
14const SEPARATOR_MARKER: &str = "=======";
15const REPLACE_MARKER: &str = ">>>>>>> REPLACE";
16const SONNET_PARAMETER_INVOKE_1: &str = "</parameter>\n</invoke>";
17const SONNET_PARAMETER_INVOKE_2: &str = "</parameter></invoke>";
18const SONNET_PARAMETER_INVOKE_3: &str = "</parameter>";
19const END_TAGS: [&str; 6] = [
20 OLD_TEXT_END_TAG,
21 NEW_TEXT_END_TAG,
22 EDITS_END_TAG,
23 SONNET_PARAMETER_INVOKE_1, // Remove these after switching to streaming tool call
24 SONNET_PARAMETER_INVOKE_2,
25 SONNET_PARAMETER_INVOKE_3,
26];
27
28#[derive(Debug)]
29pub enum EditParserEvent {
30 OldTextChunk {
31 chunk: String,
32 done: bool,
33 line_hint: Option<u32>,
34 },
35 NewTextChunk {
36 chunk: String,
37 done: bool,
38 },
39}
40
41#[derive(
42 Clone, Debug, Default, PartialEq, Eq, Add, AddAssign, Serialize, Deserialize, JsonSchema,
43)]
44pub struct EditParserMetrics {
45 pub tags: usize,
46 pub mismatched_tags: usize,
47}
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
50#[serde(rename_all = "snake_case")]
51pub enum EditFormat {
52 /// XML-like tags:
53 /// <old_text>...</old_text>
54 /// <new_text>...</new_text>
55 XmlTags,
56 /// Diff-fenced format, in which:
57 /// - Text before the SEARCH marker is ignored
58 /// - Fences are optional
59 /// - Line hint is optional.
60 ///
61 /// Example:
62 ///
63 /// ```diff
64 /// <<<<<<< SEARCH line=42
65 /// ...
66 /// =======
67 /// ...
68 /// >>>>>>> REPLACE
69 /// ```
70 DiffFenced,
71}
72
73impl FromStr for EditFormat {
74 type Err = anyhow::Error;
75
76 fn from_str(s: &str) -> anyhow::Result<Self> {
77 match s.to_lowercase().as_str() {
78 "xml_tags" | "xml" => Ok(EditFormat::XmlTags),
79 "diff_fenced" | "diff-fenced" | "diff" => Ok(EditFormat::DiffFenced),
80 _ => bail!("Unknown EditFormat: {}", s),
81 }
82 }
83}
84
85impl EditFormat {
86 /// Return an optimal edit format for the language model
87 pub fn from_model(model: Arc<dyn LanguageModel>) -> anyhow::Result<Self> {
88 if model.provider_id().0 == "google" || model.id().0.to_lowercase().contains("gemini") {
89 Ok(EditFormat::DiffFenced)
90 } else {
91 Ok(EditFormat::XmlTags)
92 }
93 }
94
95 /// Return an optimal edit format for the language model,
96 /// with the ability to override it by setting the
97 /// `ZED_EDIT_FORMAT` environment variable
98 #[allow(dead_code)]
99 pub fn from_env(model: Arc<dyn LanguageModel>) -> anyhow::Result<Self> {
100 let default = EditFormat::from_model(model)?;
101 std::env::var("ZED_EDIT_FORMAT").map_or(Ok(default), |s| EditFormat::from_str(&s))
102 }
103}
104
105pub trait EditFormatParser: Send + std::fmt::Debug {
106 fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]>;
107 fn take_metrics(&mut self) -> EditParserMetrics;
108}
109
110#[derive(Debug)]
111pub struct XmlEditParser {
112 state: XmlParserState,
113 buffer: String,
114 metrics: EditParserMetrics,
115}
116
117#[derive(Debug, PartialEq)]
118enum XmlParserState {
119 Pending,
120 WithinOldText { start: bool, line_hint: Option<u32> },
121 AfterOldText,
122 WithinNewText { start: bool },
123}
124
125#[derive(Debug)]
126pub struct DiffFencedEditParser {
127 state: DiffParserState,
128 buffer: String,
129 metrics: EditParserMetrics,
130}
131
132#[derive(Debug, PartialEq)]
133enum DiffParserState {
134 Pending,
135 WithinSearch { start: bool, line_hint: Option<u32> },
136 WithinReplace { start: bool },
137}
138
139/// Main parser that delegates to format-specific parsers
140pub struct EditParser {
141 parser: Box<dyn EditFormatParser>,
142}
143
144impl XmlEditParser {
145 pub fn new() -> Self {
146 XmlEditParser {
147 state: XmlParserState::Pending,
148 buffer: String::new(),
149 metrics: EditParserMetrics::default(),
150 }
151 }
152
153 fn find_end_tag(&self) -> Option<Range<usize>> {
154 let (tag, start_ix) = END_TAGS
155 .iter()
156 .flat_map(|tag| Some((tag, self.buffer.find(tag)?)))
157 .min_by_key(|(_, ix)| *ix)?;
158 Some(start_ix..start_ix + tag.len())
159 }
160
161 fn ends_with_tag_prefix(&self) -> bool {
162 let mut end_prefixes = END_TAGS
163 .iter()
164 .flat_map(|tag| (1..tag.len()).map(move |i| &tag[..i]))
165 .chain(["\n"]);
166 end_prefixes.any(|prefix| self.buffer.ends_with(&prefix))
167 }
168
169 fn parse_line_hint(&self, tag: &str) -> Option<u32> {
170 use std::sync::LazyLock;
171 static LINE_HINT_REGEX: LazyLock<Regex> =
172 LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap());
173
174 LINE_HINT_REGEX
175 .captures(tag)
176 .and_then(|caps| caps.get(1))
177 .and_then(|m| m.as_str().parse::<u32>().ok())
178 }
179}
180
181impl EditFormatParser for XmlEditParser {
182 fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
183 self.buffer.push_str(chunk);
184
185 let mut edit_events = SmallVec::new();
186 loop {
187 match &mut self.state {
188 XmlParserState::Pending => {
189 if let Some(start) = self.buffer.find("<old_text") {
190 if let Some(tag_end) = self.buffer[start..].find('>') {
191 let tag_end = start + tag_end + 1;
192 let tag = &self.buffer[start..tag_end];
193 let line_hint = self.parse_line_hint(tag);
194 self.buffer.drain(..tag_end);
195 self.state = XmlParserState::WithinOldText {
196 start: true,
197 line_hint,
198 };
199 } else {
200 break;
201 }
202 } else {
203 break;
204 }
205 }
206 XmlParserState::WithinOldText { start, line_hint } => {
207 if !self.buffer.is_empty() {
208 if *start && self.buffer.starts_with('\n') {
209 self.buffer.remove(0);
210 }
211 *start = false;
212 }
213
214 let line_hint = *line_hint;
215 if let Some(tag_range) = self.find_end_tag() {
216 let mut chunk = self.buffer[..tag_range.start].to_string();
217 if chunk.ends_with('\n') {
218 chunk.pop();
219 }
220
221 self.metrics.tags += 1;
222 if &self.buffer[tag_range.clone()] != OLD_TEXT_END_TAG {
223 self.metrics.mismatched_tags += 1;
224 }
225
226 self.buffer.drain(..tag_range.end);
227 self.state = XmlParserState::AfterOldText;
228 edit_events.push(EditParserEvent::OldTextChunk {
229 chunk,
230 done: true,
231 line_hint,
232 });
233 } else {
234 if !self.ends_with_tag_prefix() {
235 edit_events.push(EditParserEvent::OldTextChunk {
236 chunk: mem::take(&mut self.buffer),
237 done: false,
238 line_hint,
239 });
240 }
241 break;
242 }
243 }
244 XmlParserState::AfterOldText => {
245 if let Some(start) = self.buffer.find("<new_text>") {
246 self.buffer.drain(..start + "<new_text>".len());
247 self.state = XmlParserState::WithinNewText { start: true };
248 } else {
249 break;
250 }
251 }
252 XmlParserState::WithinNewText { start } => {
253 if !self.buffer.is_empty() {
254 if *start && self.buffer.starts_with('\n') {
255 self.buffer.remove(0);
256 }
257 *start = false;
258 }
259
260 if let Some(tag_range) = self.find_end_tag() {
261 let mut chunk = self.buffer[..tag_range.start].to_string();
262 if chunk.ends_with('\n') {
263 chunk.pop();
264 }
265
266 self.metrics.tags += 1;
267 if &self.buffer[tag_range.clone()] != NEW_TEXT_END_TAG {
268 self.metrics.mismatched_tags += 1;
269 }
270
271 self.buffer.drain(..tag_range.end);
272 self.state = XmlParserState::Pending;
273 edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true });
274 } else {
275 if !self.ends_with_tag_prefix() {
276 edit_events.push(EditParserEvent::NewTextChunk {
277 chunk: mem::take(&mut self.buffer),
278 done: false,
279 });
280 }
281 break;
282 }
283 }
284 }
285 }
286 edit_events
287 }
288
289 fn take_metrics(&mut self) -> EditParserMetrics {
290 std::mem::take(&mut self.metrics)
291 }
292}
293
294impl DiffFencedEditParser {
295 pub fn new() -> Self {
296 DiffFencedEditParser {
297 state: DiffParserState::Pending,
298 buffer: String::new(),
299 metrics: EditParserMetrics::default(),
300 }
301 }
302
303 fn ends_with_diff_marker_prefix(&self) -> bool {
304 let diff_markers = [SEPARATOR_MARKER, REPLACE_MARKER];
305 let mut diff_prefixes = diff_markers
306 .iter()
307 .flat_map(|marker| (1..marker.len()).map(move |i| &marker[..i]))
308 .chain(["\n"]);
309 diff_prefixes.any(|prefix| self.buffer.ends_with(&prefix))
310 }
311
312 fn parse_line_hint(&self, search_line: &str) -> Option<u32> {
313 use regex::Regex;
314 use std::sync::LazyLock;
315 static LINE_HINT_REGEX: LazyLock<Regex> =
316 LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap());
317
318 LINE_HINT_REGEX
319 .captures(search_line)
320 .and_then(|caps| caps.get(1))
321 .and_then(|m| m.as_str().parse::<u32>().ok())
322 }
323}
324
325impl EditFormatParser for DiffFencedEditParser {
326 fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
327 self.buffer.push_str(chunk);
328
329 let mut edit_events = SmallVec::new();
330 loop {
331 match &mut self.state {
332 DiffParserState::Pending => {
333 if let Some(diff) = self.buffer.find(SEARCH_MARKER) {
334 let search_end = diff + SEARCH_MARKER.len();
335 if let Some(newline_pos) = self.buffer[search_end..].find('\n') {
336 let search_line = &self.buffer[diff..search_end + newline_pos];
337 let line_hint = self.parse_line_hint(search_line);
338 self.buffer.drain(..search_end + newline_pos + 1);
339 self.state = DiffParserState::WithinSearch {
340 start: true,
341 line_hint,
342 };
343 } else {
344 break;
345 }
346 } else {
347 break;
348 }
349 }
350 DiffParserState::WithinSearch { start, line_hint } => {
351 if !self.buffer.is_empty() {
352 if *start && self.buffer.starts_with('\n') {
353 self.buffer.remove(0);
354 }
355 *start = false;
356 }
357
358 let line_hint = *line_hint;
359 if let Some(separator_pos) = self.buffer.find(SEPARATOR_MARKER) {
360 let mut chunk = self.buffer[..separator_pos].to_string();
361 if chunk.ends_with('\n') {
362 chunk.pop();
363 }
364
365 let separator_end = separator_pos + SEPARATOR_MARKER.len();
366 if let Some(newline_pos) = self.buffer[separator_end..].find('\n') {
367 self.buffer.drain(..separator_end + newline_pos + 1);
368 self.state = DiffParserState::WithinReplace { start: true };
369 edit_events.push(EditParserEvent::OldTextChunk {
370 chunk,
371 done: true,
372 line_hint,
373 });
374 } else {
375 break;
376 }
377 } else {
378 if !self.ends_with_diff_marker_prefix() {
379 edit_events.push(EditParserEvent::OldTextChunk {
380 chunk: mem::take(&mut self.buffer),
381 done: false,
382 line_hint,
383 });
384 }
385 break;
386 }
387 }
388 DiffParserState::WithinReplace { start } => {
389 if !self.buffer.is_empty() {
390 if *start && self.buffer.starts_with('\n') {
391 self.buffer.remove(0);
392 }
393 *start = false;
394 }
395
396 if let Some(replace_pos) = self.buffer.find(REPLACE_MARKER) {
397 let mut chunk = self.buffer[..replace_pos].to_string();
398 if chunk.ends_with('\n') {
399 chunk.pop();
400 }
401
402 self.buffer.drain(..replace_pos + REPLACE_MARKER.len());
403 if let Some(newline_pos) = self.buffer.find('\n') {
404 self.buffer.drain(..newline_pos + 1);
405 } else {
406 self.buffer.clear();
407 }
408
409 self.state = DiffParserState::Pending;
410 edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true });
411 } else {
412 if !self.ends_with_diff_marker_prefix() {
413 edit_events.push(EditParserEvent::NewTextChunk {
414 chunk: mem::take(&mut self.buffer),
415 done: false,
416 });
417 }
418 break;
419 }
420 }
421 }
422 }
423 edit_events
424 }
425
426 fn take_metrics(&mut self) -> EditParserMetrics {
427 std::mem::take(&mut self.metrics)
428 }
429}
430
431impl EditParser {
432 pub fn new(format: EditFormat) -> Self {
433 let parser: Box<dyn EditFormatParser> = match format {
434 EditFormat::XmlTags => Box::new(XmlEditParser::new()),
435 EditFormat::DiffFenced => Box::new(DiffFencedEditParser::new()),
436 };
437 EditParser { parser }
438 }
439
440 pub fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
441 self.parser.push(chunk)
442 }
443
444 pub fn finish(mut self) -> EditParserMetrics {
445 self.parser.take_metrics()
446 }
447}
448
449#[cfg(test)]
450mod tests {
451 use super::*;
452 use indoc::indoc;
453 use rand::prelude::*;
454 use std::cmp;
455
456 #[gpui::test(iterations = 1000)]
457 fn test_xml_single_edit(mut rng: StdRng) {
458 let mut parser = EditParser::new(EditFormat::XmlTags);
459 assert_eq!(
460 parse_random_chunks(
461 "<old_text>original</old_text><new_text>updated</new_text>",
462 &mut parser,
463 &mut rng
464 ),
465 vec![Edit {
466 old_text: "original".to_string(),
467 new_text: "updated".to_string(),
468 line_hint: None,
469 }]
470 );
471 assert_eq!(
472 parser.finish(),
473 EditParserMetrics {
474 tags: 2,
475 mismatched_tags: 0
476 }
477 );
478 }
479
480 #[gpui::test(iterations = 1000)]
481 fn test_xml_multiple_edits(mut rng: StdRng) {
482 let mut parser = EditParser::new(EditFormat::XmlTags);
483 assert_eq!(
484 parse_random_chunks(
485 indoc! {"
486 <old_text>
487 first old
488 </old_text><new_text>first new</new_text>
489 <old_text>second old</old_text><new_text>
490 second new
491 </new_text>
492 "},
493 &mut parser,
494 &mut rng
495 ),
496 vec![
497 Edit {
498 old_text: "first old".to_string(),
499 new_text: "first new".to_string(),
500 line_hint: None,
501 },
502 Edit {
503 old_text: "second old".to_string(),
504 new_text: "second new".to_string(),
505 line_hint: None,
506 },
507 ]
508 );
509 assert_eq!(
510 parser.finish(),
511 EditParserMetrics {
512 tags: 4,
513 mismatched_tags: 0
514 }
515 );
516 }
517
518 #[gpui::test(iterations = 1000)]
519 fn test_xml_edits_with_extra_text(mut rng: StdRng) {
520 let mut parser = EditParser::new(EditFormat::XmlTags);
521 assert_eq!(
522 parse_random_chunks(
523 indoc! {"
524 ignore this <old_text>
525 content</old_text>extra stuff<new_text>updated content</new_text>trailing data
526 more text <old_text>second item
527 </old_text>middle text<new_text>modified second item</new_text>end
528 <old_text>third case</old_text><new_text>improved third case</new_text> with trailing text
529 "},
530 &mut parser,
531 &mut rng
532 ),
533 vec![
534 Edit {
535 old_text: "content".to_string(),
536 new_text: "updated content".to_string(),
537 line_hint: None,
538 },
539 Edit {
540 old_text: "second item".to_string(),
541 new_text: "modified second item".to_string(),
542 line_hint: None,
543 },
544 Edit {
545 old_text: "third case".to_string(),
546 new_text: "improved third case".to_string(),
547 line_hint: None,
548 },
549 ]
550 );
551 assert_eq!(
552 parser.finish(),
553 EditParserMetrics {
554 tags: 6,
555 mismatched_tags: 0
556 }
557 );
558 }
559
560 #[gpui::test(iterations = 1000)]
561 fn test_xml_edits_with_closing_parameter_invoke(mut rng: StdRng) {
562 // This case is a regression with Claude Sonnet 4.5.
563 // Sometimes Sonnet thinks that it's doing a tool call
564 // and closes its response with '</parameter></invoke>'
565 // instead of properly closing </new_text>
566
567 let mut parser = EditParser::new(EditFormat::XmlTags);
568 assert_eq!(
569 parse_random_chunks(
570 indoc! {"
571 <old_text>some text</old_text><new_text>updated text</parameter></invoke>
572 <old_text>more text</old_text><new_text>upd</parameter></new_text>
573 "},
574 &mut parser,
575 &mut rng
576 ),
577 vec![
578 Edit {
579 old_text: "some text".to_string(),
580 new_text: "updated text".to_string(),
581 line_hint: None,
582 },
583 Edit {
584 old_text: "more text".to_string(),
585 new_text: "upd".to_string(),
586 line_hint: None,
587 },
588 ]
589 );
590 assert_eq!(
591 parser.finish(),
592 EditParserMetrics {
593 tags: 4,
594 mismatched_tags: 2
595 }
596 );
597 }
598
599 #[gpui::test(iterations = 1000)]
600 fn test_xml_nested_tags(mut rng: StdRng) {
601 let mut parser = EditParser::new(EditFormat::XmlTags);
602 assert_eq!(
603 parse_random_chunks(
604 "<old_text>code with <tag>nested</tag> elements</old_text><new_text>new <code>content</code></new_text>",
605 &mut parser,
606 &mut rng
607 ),
608 vec![Edit {
609 old_text: "code with <tag>nested</tag> elements".to_string(),
610 new_text: "new <code>content</code>".to_string(),
611 line_hint: None,
612 }]
613 );
614 assert_eq!(
615 parser.finish(),
616 EditParserMetrics {
617 tags: 2,
618 mismatched_tags: 0
619 }
620 );
621 }
622
623 #[gpui::test(iterations = 1000)]
624 fn test_xml_empty_old_and_new_text(mut rng: StdRng) {
625 let mut parser = EditParser::new(EditFormat::XmlTags);
626 assert_eq!(
627 parse_random_chunks(
628 "<old_text></old_text><new_text></new_text>",
629 &mut parser,
630 &mut rng
631 ),
632 vec![Edit {
633 old_text: "".to_string(),
634 new_text: "".to_string(),
635 line_hint: None,
636 }]
637 );
638 assert_eq!(
639 parser.finish(),
640 EditParserMetrics {
641 tags: 2,
642 mismatched_tags: 0
643 }
644 );
645 }
646
647 #[gpui::test(iterations = 100)]
648 fn test_xml_multiline_content(mut rng: StdRng) {
649 let mut parser = EditParser::new(EditFormat::XmlTags);
650 assert_eq!(
651 parse_random_chunks(
652 "<old_text>line1\nline2\nline3</old_text><new_text>line1\nmodified line2\nline3</new_text>",
653 &mut parser,
654 &mut rng
655 ),
656 vec![Edit {
657 old_text: "line1\nline2\nline3".to_string(),
658 new_text: "line1\nmodified line2\nline3".to_string(),
659 line_hint: None,
660 }]
661 );
662 assert_eq!(
663 parser.finish(),
664 EditParserMetrics {
665 tags: 2,
666 mismatched_tags: 0
667 }
668 );
669 }
670
671 #[gpui::test(iterations = 1000)]
672 fn test_xml_mismatched_tags(mut rng: StdRng) {
673 let mut parser = EditParser::new(EditFormat::XmlTags);
674 assert_eq!(
675 parse_random_chunks(
676 // Reduced from an actual Sonnet 3.7 output
677 indoc! {"
678 <old_text>
679 a
680 b
681 c
682 </new_text>
683 <new_text>
684 a
685 B
686 c
687 </old_text>
688 <old_text>
689 d
690 e
691 f
692 </new_text>
693 <new_text>
694 D
695 e
696 F
697 </old_text>
698 "},
699 &mut parser,
700 &mut rng
701 ),
702 vec![
703 Edit {
704 old_text: "a\nb\nc".to_string(),
705 new_text: "a\nB\nc".to_string(),
706 line_hint: None,
707 },
708 Edit {
709 old_text: "d\ne\nf".to_string(),
710 new_text: "D\ne\nF".to_string(),
711 line_hint: None,
712 }
713 ]
714 );
715 assert_eq!(
716 parser.finish(),
717 EditParserMetrics {
718 tags: 4,
719 mismatched_tags: 4
720 }
721 );
722
723 let mut parser = EditParser::new(EditFormat::XmlTags);
724 assert_eq!(
725 parse_random_chunks(
726 // Reduced from an actual Opus 4 output
727 indoc! {"
728 <edits>
729 <old_text>
730 Lorem
731 </old_text>
732 <new_text>
733 LOREM
734 </edits>
735 "},
736 &mut parser,
737 &mut rng
738 ),
739 vec![Edit {
740 old_text: "Lorem".to_string(),
741 new_text: "LOREM".to_string(),
742 line_hint: None,
743 },]
744 );
745 assert_eq!(
746 parser.finish(),
747 EditParserMetrics {
748 tags: 2,
749 mismatched_tags: 1
750 }
751 );
752 }
753
754 #[gpui::test(iterations = 1000)]
755 fn test_diff_fenced_single_edit(mut rng: StdRng) {
756 let mut parser = EditParser::new(EditFormat::DiffFenced);
757 assert_eq!(
758 parse_random_chunks(
759 indoc! {"
760 <<<<<<< SEARCH
761 original text
762 =======
763 updated text
764 >>>>>>> REPLACE
765 "},
766 &mut parser,
767 &mut rng
768 ),
769 vec![Edit {
770 old_text: "original text".to_string(),
771 new_text: "updated text".to_string(),
772 line_hint: None,
773 }]
774 );
775 assert_eq!(
776 parser.finish(),
777 EditParserMetrics {
778 tags: 0,
779 mismatched_tags: 0
780 }
781 );
782 }
783
784 #[gpui::test(iterations = 100)]
785 fn test_diff_fenced_with_markdown_fences(mut rng: StdRng) {
786 let mut parser = EditParser::new(EditFormat::DiffFenced);
787 assert_eq!(
788 parse_random_chunks(
789 indoc! {"
790 ```diff
791 <<<<<<< SEARCH
792 from flask import Flask
793 =======
794 import math
795 from flask import Flask
796 >>>>>>> REPLACE
797 ```
798 "},
799 &mut parser,
800 &mut rng
801 ),
802 vec![Edit {
803 old_text: "from flask import Flask".to_string(),
804 new_text: "import math\nfrom flask import Flask".to_string(),
805 line_hint: None,
806 }]
807 );
808 assert_eq!(
809 parser.finish(),
810 EditParserMetrics {
811 tags: 0,
812 mismatched_tags: 0
813 }
814 );
815 }
816
817 #[gpui::test(iterations = 100)]
818 fn test_diff_fenced_multiple_edits(mut rng: StdRng) {
819 let mut parser = EditParser::new(EditFormat::DiffFenced);
820 assert_eq!(
821 parse_random_chunks(
822 indoc! {"
823 <<<<<<< SEARCH
824 first old
825 =======
826 first new
827 >>>>>>> REPLACE
828
829 <<<<<<< SEARCH
830 second old
831 =======
832 second new
833 >>>>>>> REPLACE
834 "},
835 &mut parser,
836 &mut rng
837 ),
838 vec![
839 Edit {
840 old_text: "first old".to_string(),
841 new_text: "first new".to_string(),
842 line_hint: None,
843 },
844 Edit {
845 old_text: "second old".to_string(),
846 new_text: "second new".to_string(),
847 line_hint: None,
848 },
849 ]
850 );
851 assert_eq!(
852 parser.finish(),
853 EditParserMetrics {
854 tags: 0,
855 mismatched_tags: 0
856 }
857 );
858 }
859
860 #[gpui::test(iterations = 100)]
861 fn test_mixed_formats(mut rng: StdRng) {
862 // Test XML format parser only parses XML tags
863 let mut xml_parser = EditParser::new(EditFormat::XmlTags);
864 assert_eq!(
865 parse_random_chunks(
866 indoc! {"
867 <old_text>xml style old</old_text><new_text>xml style new</new_text>
868
869 <<<<<<< SEARCH
870 diff style old
871 =======
872 diff style new
873 >>>>>>> REPLACE
874 "},
875 &mut xml_parser,
876 &mut rng
877 ),
878 vec![Edit {
879 old_text: "xml style old".to_string(),
880 new_text: "xml style new".to_string(),
881 line_hint: None,
882 },]
883 );
884 assert_eq!(
885 xml_parser.finish(),
886 EditParserMetrics {
887 tags: 2,
888 mismatched_tags: 0
889 }
890 );
891
892 // Test diff-fenced format parser only parses diff markers
893 let mut diff_parser = EditParser::new(EditFormat::DiffFenced);
894 assert_eq!(
895 parse_random_chunks(
896 indoc! {"
897 <old_text>xml style old</old_text><new_text>xml style new</new_text>
898
899 <<<<<<< SEARCH
900 diff style old
901 =======
902 diff style new
903 >>>>>>> REPLACE
904 "},
905 &mut diff_parser,
906 &mut rng
907 ),
908 vec![Edit {
909 old_text: "diff style old".to_string(),
910 new_text: "diff style new".to_string(),
911 line_hint: None,
912 },]
913 );
914 assert_eq!(
915 diff_parser.finish(),
916 EditParserMetrics {
917 tags: 0,
918 mismatched_tags: 0
919 }
920 );
921 }
922
923 #[gpui::test(iterations = 100)]
924 fn test_diff_fenced_empty_sections(mut rng: StdRng) {
925 let mut parser = EditParser::new(EditFormat::DiffFenced);
926 assert_eq!(
927 parse_random_chunks(
928 indoc! {"
929 <<<<<<< SEARCH
930 =======
931 >>>>>>> REPLACE
932 "},
933 &mut parser,
934 &mut rng
935 ),
936 vec![Edit {
937 old_text: "".to_string(),
938 new_text: "".to_string(),
939 line_hint: None,
940 }]
941 );
942 assert_eq!(
943 parser.finish(),
944 EditParserMetrics {
945 tags: 0,
946 mismatched_tags: 0
947 }
948 );
949 }
950
951 #[gpui::test(iterations = 100)]
952 fn test_diff_fenced_with_line_hint(mut rng: StdRng) {
953 let mut parser = EditParser::new(EditFormat::DiffFenced);
954 let edits = parse_random_chunks(
955 indoc! {"
956 <<<<<<< SEARCH line=42
957 original text
958 =======
959 updated text
960 >>>>>>> REPLACE
961 "},
962 &mut parser,
963 &mut rng,
964 );
965 assert_eq!(
966 edits,
967 vec![Edit {
968 old_text: "original text".to_string(),
969 line_hint: Some(42),
970 new_text: "updated text".to_string(),
971 }]
972 );
973 }
974 #[gpui::test(iterations = 100)]
975 fn test_xml_line_hints(mut rng: StdRng) {
976 // Line hint is a single quoted line number
977 let mut parser = EditParser::new(EditFormat::XmlTags);
978
979 let edits = parse_random_chunks(
980 r#"
981 <old_text line="23">original code</old_text>
982 <new_text>updated code</new_text>"#,
983 &mut parser,
984 &mut rng,
985 );
986
987 assert_eq!(edits.len(), 1);
988 assert_eq!(edits[0].old_text, "original code");
989 assert_eq!(edits[0].line_hint, Some(23));
990 assert_eq!(edits[0].new_text, "updated code");
991
992 // Line hint is a single unquoted line number
993 let mut parser = EditParser::new(EditFormat::XmlTags);
994
995 let edits = parse_random_chunks(
996 r#"
997 <old_text line=45>original code</old_text>
998 <new_text>updated code</new_text>"#,
999 &mut parser,
1000 &mut rng,
1001 );
1002
1003 assert_eq!(edits.len(), 1);
1004 assert_eq!(edits[0].old_text, "original code");
1005 assert_eq!(edits[0].line_hint, Some(45));
1006 assert_eq!(edits[0].new_text, "updated code");
1007
1008 // Line hint is a range
1009 let mut parser = EditParser::new(EditFormat::XmlTags);
1010
1011 let edits = parse_random_chunks(
1012 r#"
1013 <old_text line="23:50">original code</old_text>
1014 <new_text>updated code</new_text>"#,
1015 &mut parser,
1016 &mut rng,
1017 );
1018
1019 assert_eq!(edits.len(), 1);
1020 assert_eq!(edits[0].old_text, "original code");
1021 assert_eq!(edits[0].line_hint, Some(23));
1022 assert_eq!(edits[0].new_text, "updated code");
1023
1024 // No line hint
1025 let mut parser = EditParser::new(EditFormat::XmlTags);
1026 let edits = parse_random_chunks(
1027 r#"
1028 <old_text>old</old_text>
1029 <new_text>new</new_text>"#,
1030 &mut parser,
1031 &mut rng,
1032 );
1033
1034 assert_eq!(edits.len(), 1);
1035 assert_eq!(edits[0].old_text, "old");
1036 assert_eq!(edits[0].line_hint, None);
1037 assert_eq!(edits[0].new_text, "new");
1038 }
1039
1040 #[derive(Default, Debug, PartialEq, Eq)]
1041 struct Edit {
1042 old_text: String,
1043 new_text: String,
1044 line_hint: Option<u32>,
1045 }
1046
1047 fn parse_random_chunks(input: &str, parser: &mut EditParser, rng: &mut StdRng) -> Vec<Edit> {
1048 let chunk_count = rng.random_range(1..=cmp::min(input.len(), 50));
1049 let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
1050 chunk_indices.sort();
1051 chunk_indices.push(input.len());
1052
1053 let mut old_text = Some(String::new());
1054 let mut new_text = None;
1055 let mut pending_edit = Edit::default();
1056 let mut edits = Vec::new();
1057 let mut last_ix = 0;
1058 for chunk_ix in chunk_indices {
1059 for event in parser.push(&input[last_ix..chunk_ix]) {
1060 match event {
1061 EditParserEvent::OldTextChunk {
1062 chunk,
1063 done,
1064 line_hint,
1065 } => {
1066 old_text.as_mut().unwrap().push_str(&chunk);
1067 if done {
1068 pending_edit.old_text = old_text.take().unwrap();
1069 pending_edit.line_hint = line_hint;
1070 new_text = Some(String::new());
1071 }
1072 }
1073 EditParserEvent::NewTextChunk { chunk, done } => {
1074 new_text.as_mut().unwrap().push_str(&chunk);
1075 if done {
1076 pending_edit.new_text = new_text.take().unwrap();
1077 edits.push(pending_edit);
1078 pending_edit = Edit::default();
1079 old_text = Some(String::new());
1080 }
1081 }
1082 }
1083 }
1084 last_ix = chunk_ix;
1085 }
1086
1087 if new_text.is_some() {
1088 pending_edit.new_text = new_text.take().unwrap();
1089 edits.push(pending_edit);
1090 }
1091
1092 edits
1093 }
1094}