1use anyhow::bail;
2use derive_more::{Add, AddAssign};
3use language_model::LanguageModel;
4use regex::Regex;
5use schemars::JsonSchema;
6use serde::{Deserialize, Serialize};
7use smallvec::SmallVec;
8use std::{mem, ops::Range, str::FromStr, sync::Arc};
9
10const OLD_TEXT_END_TAG: &str = "</old_text>";
11const NEW_TEXT_END_TAG: &str = "</new_text>";
12const EDITS_END_TAG: &str = "</edits>";
13const SEARCH_MARKER: &str = "<<<<<<< SEARCH";
14const SEPARATOR_MARKER: &str = "=======";
15const REPLACE_MARKER: &str = ">>>>>>> REPLACE";
16const END_TAGS: [&str; 3] = [OLD_TEXT_END_TAG, NEW_TEXT_END_TAG, EDITS_END_TAG];
17
18#[derive(Debug)]
19pub enum EditParserEvent {
20 OldTextChunk {
21 chunk: String,
22 done: bool,
23 line_hint: Option<u32>,
24 },
25 NewTextChunk {
26 chunk: String,
27 done: bool,
28 },
29}
30
31#[derive(
32 Clone, Debug, Default, PartialEq, Eq, Add, AddAssign, Serialize, Deserialize, JsonSchema,
33)]
34pub struct EditParserMetrics {
35 pub tags: usize,
36 pub mismatched_tags: usize,
37}
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
40#[serde(rename_all = "snake_case")]
41pub enum EditFormat {
42 /// XML-like tags:
43 /// <old_text>...</old_text>
44 /// <new_text>...</new_text>
45 XmlTags,
46 /// Diff-fenced format, in which:
47 /// - Text before the SEARCH marker is ignored
48 /// - Fences are optional
49 /// - Line hint is optional.
50 ///
51 /// Example:
52 ///
53 /// ```diff
54 /// <<<<<<< SEARCH line=42
55 /// ...
56 /// =======
57 /// ...
58 /// >>>>>>> REPLACE
59 /// ```
60 DiffFenced,
61}
62
63impl FromStr for EditFormat {
64 type Err = anyhow::Error;
65
66 fn from_str(s: &str) -> anyhow::Result<Self> {
67 match s.to_lowercase().as_str() {
68 "xml_tags" | "xml" => Ok(EditFormat::XmlTags),
69 "diff_fenced" | "diff-fenced" | "diff" => Ok(EditFormat::DiffFenced),
70 _ => bail!("Unknown EditFormat: {}", s),
71 }
72 }
73}
74
75impl EditFormat {
76 /// Return an optimal edit format for the language model
77 pub fn from_model(model: Arc<dyn LanguageModel>) -> anyhow::Result<Self> {
78 if model.provider_id().0 == "google" || model.id().0.to_lowercase().contains("gemini") {
79 Ok(EditFormat::DiffFenced)
80 } else {
81 Ok(EditFormat::XmlTags)
82 }
83 }
84
85 /// Return an optimal edit format for the language model,
86 /// with the ability to override it by setting the
87 /// `ZED_EDIT_FORMAT` environment variable
88 #[allow(dead_code)]
89 pub fn from_env(model: Arc<dyn LanguageModel>) -> anyhow::Result<Self> {
90 let default = EditFormat::from_model(model)?;
91 std::env::var("ZED_EDIT_FORMAT").map_or(Ok(default), |s| EditFormat::from_str(&s))
92 }
93}
94
95pub trait EditFormatParser: Send + std::fmt::Debug {
96 fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]>;
97 fn take_metrics(&mut self) -> EditParserMetrics;
98}
99
100#[derive(Debug)]
101pub struct XmlEditParser {
102 state: XmlParserState,
103 buffer: String,
104 metrics: EditParserMetrics,
105}
106
107#[derive(Debug, PartialEq)]
108enum XmlParserState {
109 Pending,
110 WithinOldText { start: bool, line_hint: Option<u32> },
111 AfterOldText,
112 WithinNewText { start: bool },
113}
114
115#[derive(Debug)]
116pub struct DiffFencedEditParser {
117 state: DiffParserState,
118 buffer: String,
119 metrics: EditParserMetrics,
120}
121
122#[derive(Debug, PartialEq)]
123enum DiffParserState {
124 Pending,
125 WithinSearch { start: bool, line_hint: Option<u32> },
126 WithinReplace { start: bool },
127}
128
129/// Main parser that delegates to format-specific parsers
130pub struct EditParser {
131 parser: Box<dyn EditFormatParser>,
132}
133
134impl XmlEditParser {
135 pub fn new() -> Self {
136 XmlEditParser {
137 state: XmlParserState::Pending,
138 buffer: String::new(),
139 metrics: EditParserMetrics::default(),
140 }
141 }
142
143 fn find_end_tag(&self) -> Option<Range<usize>> {
144 let (tag, start_ix) = END_TAGS
145 .iter()
146 .flat_map(|tag| Some((tag, self.buffer.find(tag)?)))
147 .min_by_key(|(_, ix)| *ix)?;
148 Some(start_ix..start_ix + tag.len())
149 }
150
151 fn ends_with_tag_prefix(&self) -> bool {
152 let mut end_prefixes = END_TAGS
153 .iter()
154 .flat_map(|tag| (1..tag.len()).map(move |i| &tag[..i]))
155 .chain(["\n"]);
156 end_prefixes.any(|prefix| self.buffer.ends_with(&prefix))
157 }
158
159 fn parse_line_hint(&self, tag: &str) -> Option<u32> {
160 use std::sync::LazyLock;
161 static LINE_HINT_REGEX: LazyLock<Regex> =
162 LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap());
163
164 LINE_HINT_REGEX
165 .captures(tag)
166 .and_then(|caps| caps.get(1))
167 .and_then(|m| m.as_str().parse::<u32>().ok())
168 }
169}
170
171impl EditFormatParser for XmlEditParser {
172 fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
173 self.buffer.push_str(chunk);
174
175 let mut edit_events = SmallVec::new();
176 loop {
177 match &mut self.state {
178 XmlParserState::Pending => {
179 if let Some(start) = self.buffer.find("<old_text") {
180 if let Some(tag_end) = self.buffer[start..].find('>') {
181 let tag_end = start + tag_end + 1;
182 let tag = &self.buffer[start..tag_end];
183 let line_hint = self.parse_line_hint(tag);
184 self.buffer.drain(..tag_end);
185 self.state = XmlParserState::WithinOldText {
186 start: true,
187 line_hint,
188 };
189 } else {
190 break;
191 }
192 } else {
193 break;
194 }
195 }
196 XmlParserState::WithinOldText { start, line_hint } => {
197 if !self.buffer.is_empty() {
198 if *start && self.buffer.starts_with('\n') {
199 self.buffer.remove(0);
200 }
201 *start = false;
202 }
203
204 let line_hint = *line_hint;
205 if let Some(tag_range) = self.find_end_tag() {
206 let mut chunk = self.buffer[..tag_range.start].to_string();
207 if chunk.ends_with('\n') {
208 chunk.pop();
209 }
210
211 self.metrics.tags += 1;
212 if &self.buffer[tag_range.clone()] != OLD_TEXT_END_TAG {
213 self.metrics.mismatched_tags += 1;
214 }
215
216 self.buffer.drain(..tag_range.end);
217 self.state = XmlParserState::AfterOldText;
218 edit_events.push(EditParserEvent::OldTextChunk {
219 chunk,
220 done: true,
221 line_hint,
222 });
223 } else {
224 if !self.ends_with_tag_prefix() {
225 edit_events.push(EditParserEvent::OldTextChunk {
226 chunk: mem::take(&mut self.buffer),
227 done: false,
228 line_hint,
229 });
230 }
231 break;
232 }
233 }
234 XmlParserState::AfterOldText => {
235 if let Some(start) = self.buffer.find("<new_text>") {
236 self.buffer.drain(..start + "<new_text>".len());
237 self.state = XmlParserState::WithinNewText { start: true };
238 } else {
239 break;
240 }
241 }
242 XmlParserState::WithinNewText { start } => {
243 if !self.buffer.is_empty() {
244 if *start && self.buffer.starts_with('\n') {
245 self.buffer.remove(0);
246 }
247 *start = false;
248 }
249
250 if let Some(tag_range) = self.find_end_tag() {
251 let mut chunk = self.buffer[..tag_range.start].to_string();
252 if chunk.ends_with('\n') {
253 chunk.pop();
254 }
255
256 self.metrics.tags += 1;
257 if &self.buffer[tag_range.clone()] != NEW_TEXT_END_TAG {
258 self.metrics.mismatched_tags += 1;
259 }
260
261 self.buffer.drain(..tag_range.end);
262 self.state = XmlParserState::Pending;
263 edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true });
264 } else {
265 if !self.ends_with_tag_prefix() {
266 edit_events.push(EditParserEvent::NewTextChunk {
267 chunk: mem::take(&mut self.buffer),
268 done: false,
269 });
270 }
271 break;
272 }
273 }
274 }
275 }
276 edit_events
277 }
278
279 fn take_metrics(&mut self) -> EditParserMetrics {
280 std::mem::take(&mut self.metrics)
281 }
282}
283
284impl DiffFencedEditParser {
285 pub fn new() -> Self {
286 DiffFencedEditParser {
287 state: DiffParserState::Pending,
288 buffer: String::new(),
289 metrics: EditParserMetrics::default(),
290 }
291 }
292
293 fn ends_with_diff_marker_prefix(&self) -> bool {
294 let diff_markers = [SEPARATOR_MARKER, REPLACE_MARKER];
295 let mut diff_prefixes = diff_markers
296 .iter()
297 .flat_map(|marker| (1..marker.len()).map(move |i| &marker[..i]))
298 .chain(["\n"]);
299 diff_prefixes.any(|prefix| self.buffer.ends_with(&prefix))
300 }
301
302 fn parse_line_hint(&self, search_line: &str) -> Option<u32> {
303 use regex::Regex;
304 use std::sync::LazyLock;
305 static LINE_HINT_REGEX: LazyLock<Regex> =
306 LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap());
307
308 LINE_HINT_REGEX
309 .captures(search_line)
310 .and_then(|caps| caps.get(1))
311 .and_then(|m| m.as_str().parse::<u32>().ok())
312 }
313}
314
315impl EditFormatParser for DiffFencedEditParser {
316 fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
317 self.buffer.push_str(chunk);
318
319 let mut edit_events = SmallVec::new();
320 loop {
321 match &mut self.state {
322 DiffParserState::Pending => {
323 if let Some(diff) = self.buffer.find(SEARCH_MARKER) {
324 let search_end = diff + SEARCH_MARKER.len();
325 if let Some(newline_pos) = self.buffer[search_end..].find('\n') {
326 let search_line = &self.buffer[diff..search_end + newline_pos];
327 let line_hint = self.parse_line_hint(search_line);
328 self.buffer.drain(..search_end + newline_pos + 1);
329 self.state = DiffParserState::WithinSearch {
330 start: true,
331 line_hint,
332 };
333 } else {
334 break;
335 }
336 } else {
337 break;
338 }
339 }
340 DiffParserState::WithinSearch { start, line_hint } => {
341 if !self.buffer.is_empty() {
342 if *start && self.buffer.starts_with('\n') {
343 self.buffer.remove(0);
344 }
345 *start = false;
346 }
347
348 let line_hint = *line_hint;
349 if let Some(separator_pos) = self.buffer.find(SEPARATOR_MARKER) {
350 let mut chunk = self.buffer[..separator_pos].to_string();
351 if chunk.ends_with('\n') {
352 chunk.pop();
353 }
354
355 let separator_end = separator_pos + SEPARATOR_MARKER.len();
356 if let Some(newline_pos) = self.buffer[separator_end..].find('\n') {
357 self.buffer.drain(..separator_end + newline_pos + 1);
358 self.state = DiffParserState::WithinReplace { start: true };
359 edit_events.push(EditParserEvent::OldTextChunk {
360 chunk,
361 done: true,
362 line_hint,
363 });
364 } else {
365 break;
366 }
367 } else {
368 if !self.ends_with_diff_marker_prefix() {
369 edit_events.push(EditParserEvent::OldTextChunk {
370 chunk: mem::take(&mut self.buffer),
371 done: false,
372 line_hint,
373 });
374 }
375 break;
376 }
377 }
378 DiffParserState::WithinReplace { start } => {
379 if !self.buffer.is_empty() {
380 if *start && self.buffer.starts_with('\n') {
381 self.buffer.remove(0);
382 }
383 *start = false;
384 }
385
386 if let Some(replace_pos) = self.buffer.find(REPLACE_MARKER) {
387 let mut chunk = self.buffer[..replace_pos].to_string();
388 if chunk.ends_with('\n') {
389 chunk.pop();
390 }
391
392 self.buffer.drain(..replace_pos + REPLACE_MARKER.len());
393 if let Some(newline_pos) = self.buffer.find('\n') {
394 self.buffer.drain(..newline_pos + 1);
395 } else {
396 self.buffer.clear();
397 }
398
399 self.state = DiffParserState::Pending;
400 edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true });
401 } else {
402 if !self.ends_with_diff_marker_prefix() {
403 edit_events.push(EditParserEvent::NewTextChunk {
404 chunk: mem::take(&mut self.buffer),
405 done: false,
406 });
407 }
408 break;
409 }
410 }
411 }
412 }
413 edit_events
414 }
415
416 fn take_metrics(&mut self) -> EditParserMetrics {
417 std::mem::take(&mut self.metrics)
418 }
419}
420
421impl EditParser {
422 pub fn new(format: EditFormat) -> Self {
423 let parser: Box<dyn EditFormatParser> = match format {
424 EditFormat::XmlTags => Box::new(XmlEditParser::new()),
425 EditFormat::DiffFenced => Box::new(DiffFencedEditParser::new()),
426 };
427 EditParser { parser }
428 }
429
430 pub fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
431 self.parser.push(chunk)
432 }
433
434 pub fn finish(mut self) -> EditParserMetrics {
435 self.parser.take_metrics()
436 }
437}
438
439#[cfg(test)]
440mod tests {
441 use super::*;
442 use indoc::indoc;
443 use rand::prelude::*;
444 use std::cmp;
445
446 #[gpui::test(iterations = 1000)]
447 fn test_xml_single_edit(mut rng: StdRng) {
448 let mut parser = EditParser::new(EditFormat::XmlTags);
449 assert_eq!(
450 parse_random_chunks(
451 "<old_text>original</old_text><new_text>updated</new_text>",
452 &mut parser,
453 &mut rng
454 ),
455 vec![Edit {
456 old_text: "original".to_string(),
457 new_text: "updated".to_string(),
458 line_hint: None,
459 }]
460 );
461 assert_eq!(
462 parser.finish(),
463 EditParserMetrics {
464 tags: 2,
465 mismatched_tags: 0
466 }
467 );
468 }
469
470 #[gpui::test(iterations = 1000)]
471 fn test_xml_multiple_edits(mut rng: StdRng) {
472 let mut parser = EditParser::new(EditFormat::XmlTags);
473 assert_eq!(
474 parse_random_chunks(
475 indoc! {"
476 <old_text>
477 first old
478 </old_text><new_text>first new</new_text>
479 <old_text>second old</old_text><new_text>
480 second new
481 </new_text>
482 "},
483 &mut parser,
484 &mut rng
485 ),
486 vec![
487 Edit {
488 old_text: "first old".to_string(),
489 new_text: "first new".to_string(),
490 line_hint: None,
491 },
492 Edit {
493 old_text: "second old".to_string(),
494 new_text: "second new".to_string(),
495 line_hint: None,
496 },
497 ]
498 );
499 assert_eq!(
500 parser.finish(),
501 EditParserMetrics {
502 tags: 4,
503 mismatched_tags: 0
504 }
505 );
506 }
507
508 #[gpui::test(iterations = 1000)]
509 fn test_xml_edits_with_extra_text(mut rng: StdRng) {
510 let mut parser = EditParser::new(EditFormat::XmlTags);
511 assert_eq!(
512 parse_random_chunks(
513 indoc! {"
514 ignore this <old_text>
515 content</old_text>extra stuff<new_text>updated content</new_text>trailing data
516 more text <old_text>second item
517 </old_text>middle text<new_text>modified second item</new_text>end
518 <old_text>third case</old_text><new_text>improved third case</new_text> with trailing text
519 "},
520 &mut parser,
521 &mut rng
522 ),
523 vec![
524 Edit {
525 old_text: "content".to_string(),
526 new_text: "updated content".to_string(),
527 line_hint: None,
528 },
529 Edit {
530 old_text: "second item".to_string(),
531 new_text: "modified second item".to_string(),
532 line_hint: None,
533 },
534 Edit {
535 old_text: "third case".to_string(),
536 new_text: "improved third case".to_string(),
537 line_hint: None,
538 },
539 ]
540 );
541 assert_eq!(
542 parser.finish(),
543 EditParserMetrics {
544 tags: 6,
545 mismatched_tags: 0
546 }
547 );
548 }
549
550 #[gpui::test(iterations = 1000)]
551 fn test_xml_nested_tags(mut rng: StdRng) {
552 let mut parser = EditParser::new(EditFormat::XmlTags);
553 assert_eq!(
554 parse_random_chunks(
555 "<old_text>code with <tag>nested</tag> elements</old_text><new_text>new <code>content</code></new_text>",
556 &mut parser,
557 &mut rng
558 ),
559 vec![Edit {
560 old_text: "code with <tag>nested</tag> elements".to_string(),
561 new_text: "new <code>content</code>".to_string(),
562 line_hint: None,
563 }]
564 );
565 assert_eq!(
566 parser.finish(),
567 EditParserMetrics {
568 tags: 2,
569 mismatched_tags: 0
570 }
571 );
572 }
573
574 #[gpui::test(iterations = 1000)]
575 fn test_xml_empty_old_and_new_text(mut rng: StdRng) {
576 let mut parser = EditParser::new(EditFormat::XmlTags);
577 assert_eq!(
578 parse_random_chunks(
579 "<old_text></old_text><new_text></new_text>",
580 &mut parser,
581 &mut rng
582 ),
583 vec![Edit {
584 old_text: "".to_string(),
585 new_text: "".to_string(),
586 line_hint: None,
587 }]
588 );
589 assert_eq!(
590 parser.finish(),
591 EditParserMetrics {
592 tags: 2,
593 mismatched_tags: 0
594 }
595 );
596 }
597
598 #[gpui::test(iterations = 100)]
599 fn test_xml_multiline_content(mut rng: StdRng) {
600 let mut parser = EditParser::new(EditFormat::XmlTags);
601 assert_eq!(
602 parse_random_chunks(
603 "<old_text>line1\nline2\nline3</old_text><new_text>line1\nmodified line2\nline3</new_text>",
604 &mut parser,
605 &mut rng
606 ),
607 vec![Edit {
608 old_text: "line1\nline2\nline3".to_string(),
609 new_text: "line1\nmodified line2\nline3".to_string(),
610 line_hint: None,
611 }]
612 );
613 assert_eq!(
614 parser.finish(),
615 EditParserMetrics {
616 tags: 2,
617 mismatched_tags: 0
618 }
619 );
620 }
621
622 #[gpui::test(iterations = 1000)]
623 fn test_xml_mismatched_tags(mut rng: StdRng) {
624 let mut parser = EditParser::new(EditFormat::XmlTags);
625 assert_eq!(
626 parse_random_chunks(
627 // Reduced from an actual Sonnet 3.7 output
628 indoc! {"
629 <old_text>
630 a
631 b
632 c
633 </new_text>
634 <new_text>
635 a
636 B
637 c
638 </old_text>
639 <old_text>
640 d
641 e
642 f
643 </new_text>
644 <new_text>
645 D
646 e
647 F
648 </old_text>
649 "},
650 &mut parser,
651 &mut rng
652 ),
653 vec![
654 Edit {
655 old_text: "a\nb\nc".to_string(),
656 new_text: "a\nB\nc".to_string(),
657 line_hint: None,
658 },
659 Edit {
660 old_text: "d\ne\nf".to_string(),
661 new_text: "D\ne\nF".to_string(),
662 line_hint: None,
663 }
664 ]
665 );
666 assert_eq!(
667 parser.finish(),
668 EditParserMetrics {
669 tags: 4,
670 mismatched_tags: 4
671 }
672 );
673
674 let mut parser = EditParser::new(EditFormat::XmlTags);
675 assert_eq!(
676 parse_random_chunks(
677 // Reduced from an actual Opus 4 output
678 indoc! {"
679 <edits>
680 <old_text>
681 Lorem
682 </old_text>
683 <new_text>
684 LOREM
685 </edits>
686 "},
687 &mut parser,
688 &mut rng
689 ),
690 vec![Edit {
691 old_text: "Lorem".to_string(),
692 new_text: "LOREM".to_string(),
693 line_hint: None,
694 },]
695 );
696 assert_eq!(
697 parser.finish(),
698 EditParserMetrics {
699 tags: 2,
700 mismatched_tags: 1
701 }
702 );
703 }
704
705 #[gpui::test(iterations = 1000)]
706 fn test_diff_fenced_single_edit(mut rng: StdRng) {
707 let mut parser = EditParser::new(EditFormat::DiffFenced);
708 assert_eq!(
709 parse_random_chunks(
710 indoc! {"
711 <<<<<<< SEARCH
712 original text
713 =======
714 updated text
715 >>>>>>> REPLACE
716 "},
717 &mut parser,
718 &mut rng
719 ),
720 vec![Edit {
721 old_text: "original text".to_string(),
722 new_text: "updated text".to_string(),
723 line_hint: None,
724 }]
725 );
726 assert_eq!(
727 parser.finish(),
728 EditParserMetrics {
729 tags: 0,
730 mismatched_tags: 0
731 }
732 );
733 }
734
735 #[gpui::test(iterations = 100)]
736 fn test_diff_fenced_with_markdown_fences(mut rng: StdRng) {
737 let mut parser = EditParser::new(EditFormat::DiffFenced);
738 assert_eq!(
739 parse_random_chunks(
740 indoc! {"
741 ```diff
742 <<<<<<< SEARCH
743 from flask import Flask
744 =======
745 import math
746 from flask import Flask
747 >>>>>>> REPLACE
748 ```
749 "},
750 &mut parser,
751 &mut rng
752 ),
753 vec![Edit {
754 old_text: "from flask import Flask".to_string(),
755 new_text: "import math\nfrom flask import Flask".to_string(),
756 line_hint: None,
757 }]
758 );
759 assert_eq!(
760 parser.finish(),
761 EditParserMetrics {
762 tags: 0,
763 mismatched_tags: 0
764 }
765 );
766 }
767
768 #[gpui::test(iterations = 100)]
769 fn test_diff_fenced_multiple_edits(mut rng: StdRng) {
770 let mut parser = EditParser::new(EditFormat::DiffFenced);
771 assert_eq!(
772 parse_random_chunks(
773 indoc! {"
774 <<<<<<< SEARCH
775 first old
776 =======
777 first new
778 >>>>>>> REPLACE
779
780 <<<<<<< SEARCH
781 second old
782 =======
783 second new
784 >>>>>>> REPLACE
785 "},
786 &mut parser,
787 &mut rng
788 ),
789 vec![
790 Edit {
791 old_text: "first old".to_string(),
792 new_text: "first new".to_string(),
793 line_hint: None,
794 },
795 Edit {
796 old_text: "second old".to_string(),
797 new_text: "second new".to_string(),
798 line_hint: None,
799 },
800 ]
801 );
802 assert_eq!(
803 parser.finish(),
804 EditParserMetrics {
805 tags: 0,
806 mismatched_tags: 0
807 }
808 );
809 }
810
811 #[gpui::test(iterations = 100)]
812 fn test_mixed_formats(mut rng: StdRng) {
813 // Test XML format parser only parses XML tags
814 let mut xml_parser = EditParser::new(EditFormat::XmlTags);
815 assert_eq!(
816 parse_random_chunks(
817 indoc! {"
818 <old_text>xml style old</old_text><new_text>xml style new</new_text>
819
820 <<<<<<< SEARCH
821 diff style old
822 =======
823 diff style new
824 >>>>>>> REPLACE
825 "},
826 &mut xml_parser,
827 &mut rng
828 ),
829 vec![Edit {
830 old_text: "xml style old".to_string(),
831 new_text: "xml style new".to_string(),
832 line_hint: None,
833 },]
834 );
835 assert_eq!(
836 xml_parser.finish(),
837 EditParserMetrics {
838 tags: 2,
839 mismatched_tags: 0
840 }
841 );
842
843 // Test diff-fenced format parser only parses diff markers
844 let mut diff_parser = EditParser::new(EditFormat::DiffFenced);
845 assert_eq!(
846 parse_random_chunks(
847 indoc! {"
848 <old_text>xml style old</old_text><new_text>xml style new</new_text>
849
850 <<<<<<< SEARCH
851 diff style old
852 =======
853 diff style new
854 >>>>>>> REPLACE
855 "},
856 &mut diff_parser,
857 &mut rng
858 ),
859 vec![Edit {
860 old_text: "diff style old".to_string(),
861 new_text: "diff style new".to_string(),
862 line_hint: None,
863 },]
864 );
865 assert_eq!(
866 diff_parser.finish(),
867 EditParserMetrics {
868 tags: 0,
869 mismatched_tags: 0
870 }
871 );
872 }
873
874 #[gpui::test(iterations = 100)]
875 fn test_diff_fenced_empty_sections(mut rng: StdRng) {
876 let mut parser = EditParser::new(EditFormat::DiffFenced);
877 assert_eq!(
878 parse_random_chunks(
879 indoc! {"
880 <<<<<<< SEARCH
881 =======
882 >>>>>>> REPLACE
883 "},
884 &mut parser,
885 &mut rng
886 ),
887 vec![Edit {
888 old_text: "".to_string(),
889 new_text: "".to_string(),
890 line_hint: None,
891 }]
892 );
893 assert_eq!(
894 parser.finish(),
895 EditParserMetrics {
896 tags: 0,
897 mismatched_tags: 0
898 }
899 );
900 }
901
902 #[gpui::test(iterations = 100)]
903 fn test_diff_fenced_with_line_hint(mut rng: StdRng) {
904 let mut parser = EditParser::new(EditFormat::DiffFenced);
905 let edits = parse_random_chunks(
906 indoc! {"
907 <<<<<<< SEARCH line=42
908 original text
909 =======
910 updated text
911 >>>>>>> REPLACE
912 "},
913 &mut parser,
914 &mut rng,
915 );
916 assert_eq!(
917 edits,
918 vec![Edit {
919 old_text: "original text".to_string(),
920 line_hint: Some(42),
921 new_text: "updated text".to_string(),
922 }]
923 );
924 }
925 #[gpui::test(iterations = 100)]
926 fn test_xml_line_hints(mut rng: StdRng) {
927 // Line hint is a single quoted line number
928 let mut parser = EditParser::new(EditFormat::XmlTags);
929
930 let edits = parse_random_chunks(
931 r#"
932 <old_text line="23">original code</old_text>
933 <new_text>updated code</new_text>"#,
934 &mut parser,
935 &mut rng,
936 );
937
938 assert_eq!(edits.len(), 1);
939 assert_eq!(edits[0].old_text, "original code");
940 assert_eq!(edits[0].line_hint, Some(23));
941 assert_eq!(edits[0].new_text, "updated code");
942
943 // Line hint is a single unquoted line number
944 let mut parser = EditParser::new(EditFormat::XmlTags);
945
946 let edits = parse_random_chunks(
947 r#"
948 <old_text line=45>original code</old_text>
949 <new_text>updated code</new_text>"#,
950 &mut parser,
951 &mut rng,
952 );
953
954 assert_eq!(edits.len(), 1);
955 assert_eq!(edits[0].old_text, "original code");
956 assert_eq!(edits[0].line_hint, Some(45));
957 assert_eq!(edits[0].new_text, "updated code");
958
959 // Line hint is a range
960 let mut parser = EditParser::new(EditFormat::XmlTags);
961
962 let edits = parse_random_chunks(
963 r#"
964 <old_text line="23:50">original code</old_text>
965 <new_text>updated code</new_text>"#,
966 &mut parser,
967 &mut rng,
968 );
969
970 assert_eq!(edits.len(), 1);
971 assert_eq!(edits[0].old_text, "original code");
972 assert_eq!(edits[0].line_hint, Some(23));
973 assert_eq!(edits[0].new_text, "updated code");
974
975 // No line hint
976 let mut parser = EditParser::new(EditFormat::XmlTags);
977 let edits = parse_random_chunks(
978 r#"
979 <old_text>old</old_text>
980 <new_text>new</new_text>"#,
981 &mut parser,
982 &mut rng,
983 );
984
985 assert_eq!(edits.len(), 1);
986 assert_eq!(edits[0].old_text, "old");
987 assert_eq!(edits[0].line_hint, None);
988 assert_eq!(edits[0].new_text, "new");
989 }
990
991 #[derive(Default, Debug, PartialEq, Eq)]
992 struct Edit {
993 old_text: String,
994 new_text: String,
995 line_hint: Option<u32>,
996 }
997
998 fn parse_random_chunks(input: &str, parser: &mut EditParser, rng: &mut StdRng) -> Vec<Edit> {
999 let chunk_count = rng.random_range(1..=cmp::min(input.len(), 50));
1000 let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
1001 chunk_indices.sort();
1002 chunk_indices.push(input.len());
1003
1004 let mut old_text = Some(String::new());
1005 let mut new_text = None;
1006 let mut pending_edit = Edit::default();
1007 let mut edits = Vec::new();
1008 let mut last_ix = 0;
1009 for chunk_ix in chunk_indices {
1010 for event in parser.push(&input[last_ix..chunk_ix]) {
1011 match event {
1012 EditParserEvent::OldTextChunk {
1013 chunk,
1014 done,
1015 line_hint,
1016 } => {
1017 old_text.as_mut().unwrap().push_str(&chunk);
1018 if done {
1019 pending_edit.old_text = old_text.take().unwrap();
1020 pending_edit.line_hint = line_hint;
1021 new_text = Some(String::new());
1022 }
1023 }
1024 EditParserEvent::NewTextChunk { chunk, done } => {
1025 new_text.as_mut().unwrap().push_str(&chunk);
1026 if done {
1027 pending_edit.new_text = new_text.take().unwrap();
1028 edits.push(pending_edit);
1029 pending_edit = Edit::default();
1030 old_text = Some(String::new());
1031 }
1032 }
1033 }
1034 }
1035 last_ix = chunk_ix;
1036 }
1037
1038 edits
1039 }
1040}