edit_parser.rs

   1use anyhow::bail;
   2use derive_more::{Add, AddAssign};
   3use language_model::LanguageModel;
   4use regex::Regex;
   5use schemars::JsonSchema;
   6use serde::{Deserialize, Serialize};
   7use smallvec::SmallVec;
   8use std::{mem, ops::Range, str::FromStr, sync::Arc};
   9
  10const OLD_TEXT_END_TAG: &str = "</old_text>";
  11const NEW_TEXT_END_TAG: &str = "</new_text>";
  12const EDITS_END_TAG: &str = "</edits>";
  13const SEARCH_MARKER: &str = "<<<<<<< SEARCH";
  14const SEPARATOR_MARKER: &str = "=======";
  15const REPLACE_MARKER: &str = ">>>>>>> REPLACE";
  16const SONNET_PARAMETER_INVOKE_1: &str = "</parameter>\n</invoke>";
  17const SONNET_PARAMETER_INVOKE_2: &str = "</parameter></invoke>";
  18const END_TAGS: [&str; 5] = [
  19    OLD_TEXT_END_TAG,
  20    NEW_TEXT_END_TAG,
  21    EDITS_END_TAG,
  22    SONNET_PARAMETER_INVOKE_1, // Remove this after switching to streaming tool call
  23    SONNET_PARAMETER_INVOKE_2,
  24];
  25
  26#[derive(Debug)]
  27pub enum EditParserEvent {
  28    OldTextChunk {
  29        chunk: String,
  30        done: bool,
  31        line_hint: Option<u32>,
  32    },
  33    NewTextChunk {
  34        chunk: String,
  35        done: bool,
  36    },
  37}
  38
  39#[derive(
  40    Clone, Debug, Default, PartialEq, Eq, Add, AddAssign, Serialize, Deserialize, JsonSchema,
  41)]
  42pub struct EditParserMetrics {
  43    pub tags: usize,
  44    pub mismatched_tags: usize,
  45}
  46
  47#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
  48#[serde(rename_all = "snake_case")]
  49pub enum EditFormat {
  50    /// XML-like tags:
  51    /// <old_text>...</old_text>
  52    /// <new_text>...</new_text>
  53    XmlTags,
  54    /// Diff-fenced format, in which:
  55    /// - Text before the SEARCH marker is ignored
  56    /// - Fences are optional
  57    /// - Line hint is optional.
  58    ///
  59    /// Example:
  60    ///
  61    /// ```diff
  62    /// <<<<<<< SEARCH line=42
  63    /// ...
  64    /// =======
  65    /// ...
  66    /// >>>>>>> REPLACE
  67    /// ```
  68    DiffFenced,
  69}
  70
  71impl FromStr for EditFormat {
  72    type Err = anyhow::Error;
  73
  74    fn from_str(s: &str) -> anyhow::Result<Self> {
  75        match s.to_lowercase().as_str() {
  76            "xml_tags" | "xml" => Ok(EditFormat::XmlTags),
  77            "diff_fenced" | "diff-fenced" | "diff" => Ok(EditFormat::DiffFenced),
  78            _ => bail!("Unknown EditFormat: {}", s),
  79        }
  80    }
  81}
  82
  83impl EditFormat {
  84    /// Return an optimal edit format for the language model
  85    pub fn from_model(model: Arc<dyn LanguageModel>) -> anyhow::Result<Self> {
  86        if model.provider_id().0 == "google" || model.id().0.to_lowercase().contains("gemini") {
  87            Ok(EditFormat::DiffFenced)
  88        } else {
  89            Ok(EditFormat::XmlTags)
  90        }
  91    }
  92
  93    /// Return an optimal edit format for the language model,
  94    /// with the ability to override it by setting the
  95    /// `ZED_EDIT_FORMAT` environment variable
  96    #[allow(dead_code)]
  97    pub fn from_env(model: Arc<dyn LanguageModel>) -> anyhow::Result<Self> {
  98        let default = EditFormat::from_model(model)?;
  99        std::env::var("ZED_EDIT_FORMAT").map_or(Ok(default), |s| EditFormat::from_str(&s))
 100    }
 101}
 102
 103pub trait EditFormatParser: Send + std::fmt::Debug {
 104    fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]>;
 105    fn take_metrics(&mut self) -> EditParserMetrics;
 106}
 107
 108#[derive(Debug)]
 109pub struct XmlEditParser {
 110    state: XmlParserState,
 111    buffer: String,
 112    metrics: EditParserMetrics,
 113}
 114
 115#[derive(Debug, PartialEq)]
 116enum XmlParserState {
 117    Pending,
 118    WithinOldText { start: bool, line_hint: Option<u32> },
 119    AfterOldText,
 120    WithinNewText { start: bool },
 121}
 122
 123#[derive(Debug)]
 124pub struct DiffFencedEditParser {
 125    state: DiffParserState,
 126    buffer: String,
 127    metrics: EditParserMetrics,
 128}
 129
 130#[derive(Debug, PartialEq)]
 131enum DiffParserState {
 132    Pending,
 133    WithinSearch { start: bool, line_hint: Option<u32> },
 134    WithinReplace { start: bool },
 135}
 136
 137/// Main parser that delegates to format-specific parsers
 138pub struct EditParser {
 139    parser: Box<dyn EditFormatParser>,
 140}
 141
 142impl XmlEditParser {
 143    pub fn new() -> Self {
 144        XmlEditParser {
 145            state: XmlParserState::Pending,
 146            buffer: String::new(),
 147            metrics: EditParserMetrics::default(),
 148        }
 149    }
 150
 151    fn find_end_tag(&self) -> Option<Range<usize>> {
 152        let (tag, start_ix) = END_TAGS
 153            .iter()
 154            .flat_map(|tag| Some((tag, self.buffer.find(tag)?)))
 155            .min_by_key(|(_, ix)| *ix)?;
 156        Some(start_ix..start_ix + tag.len())
 157    }
 158
 159    fn ends_with_tag_prefix(&self) -> bool {
 160        let mut end_prefixes = END_TAGS
 161            .iter()
 162            .flat_map(|tag| (1..tag.len()).map(move |i| &tag[..i]))
 163            .chain(["\n"]);
 164        end_prefixes.any(|prefix| self.buffer.ends_with(&prefix))
 165    }
 166
 167    fn parse_line_hint(&self, tag: &str) -> Option<u32> {
 168        use std::sync::LazyLock;
 169        static LINE_HINT_REGEX: LazyLock<Regex> =
 170            LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap());
 171
 172        LINE_HINT_REGEX
 173            .captures(tag)
 174            .and_then(|caps| caps.get(1))
 175            .and_then(|m| m.as_str().parse::<u32>().ok())
 176    }
 177}
 178
 179impl EditFormatParser for XmlEditParser {
 180    fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
 181        self.buffer.push_str(chunk);
 182
 183        let mut edit_events = SmallVec::new();
 184        loop {
 185            match &mut self.state {
 186                XmlParserState::Pending => {
 187                    if let Some(start) = self.buffer.find("<old_text") {
 188                        if let Some(tag_end) = self.buffer[start..].find('>') {
 189                            let tag_end = start + tag_end + 1;
 190                            let tag = &self.buffer[start..tag_end];
 191                            let line_hint = self.parse_line_hint(tag);
 192                            self.buffer.drain(..tag_end);
 193                            self.state = XmlParserState::WithinOldText {
 194                                start: true,
 195                                line_hint,
 196                            };
 197                        } else {
 198                            break;
 199                        }
 200                    } else {
 201                        break;
 202                    }
 203                }
 204                XmlParserState::WithinOldText { start, line_hint } => {
 205                    if !self.buffer.is_empty() {
 206                        if *start && self.buffer.starts_with('\n') {
 207                            self.buffer.remove(0);
 208                        }
 209                        *start = false;
 210                    }
 211
 212                    let line_hint = *line_hint;
 213                    if let Some(tag_range) = self.find_end_tag() {
 214                        let mut chunk = self.buffer[..tag_range.start].to_string();
 215                        if chunk.ends_with('\n') {
 216                            chunk.pop();
 217                        }
 218
 219                        self.metrics.tags += 1;
 220                        if &self.buffer[tag_range.clone()] != OLD_TEXT_END_TAG {
 221                            self.metrics.mismatched_tags += 1;
 222                        }
 223
 224                        self.buffer.drain(..tag_range.end);
 225                        self.state = XmlParserState::AfterOldText;
 226                        edit_events.push(EditParserEvent::OldTextChunk {
 227                            chunk,
 228                            done: true,
 229                            line_hint,
 230                        });
 231                    } else {
 232                        if !self.ends_with_tag_prefix() {
 233                            edit_events.push(EditParserEvent::OldTextChunk {
 234                                chunk: mem::take(&mut self.buffer),
 235                                done: false,
 236                                line_hint,
 237                            });
 238                        }
 239                        break;
 240                    }
 241                }
 242                XmlParserState::AfterOldText => {
 243                    if let Some(start) = self.buffer.find("<new_text>") {
 244                        self.buffer.drain(..start + "<new_text>".len());
 245                        self.state = XmlParserState::WithinNewText { start: true };
 246                    } else {
 247                        break;
 248                    }
 249                }
 250                XmlParserState::WithinNewText { start } => {
 251                    if !self.buffer.is_empty() {
 252                        if *start && self.buffer.starts_with('\n') {
 253                            self.buffer.remove(0);
 254                        }
 255                        *start = false;
 256                    }
 257
 258                    if let Some(tag_range) = self.find_end_tag() {
 259                        let mut chunk = self.buffer[..tag_range.start].to_string();
 260                        if chunk.ends_with('\n') {
 261                            chunk.pop();
 262                        }
 263
 264                        self.metrics.tags += 1;
 265                        if &self.buffer[tag_range.clone()] != NEW_TEXT_END_TAG {
 266                            self.metrics.mismatched_tags += 1;
 267                        }
 268
 269                        self.buffer.drain(..tag_range.end);
 270                        self.state = XmlParserState::Pending;
 271                        edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true });
 272                    } else {
 273                        if !self.ends_with_tag_prefix() {
 274                            edit_events.push(EditParserEvent::NewTextChunk {
 275                                chunk: mem::take(&mut self.buffer),
 276                                done: false,
 277                            });
 278                        }
 279                        break;
 280                    }
 281                }
 282            }
 283        }
 284        edit_events
 285    }
 286
 287    fn take_metrics(&mut self) -> EditParserMetrics {
 288        std::mem::take(&mut self.metrics)
 289    }
 290}
 291
 292impl DiffFencedEditParser {
 293    pub fn new() -> Self {
 294        DiffFencedEditParser {
 295            state: DiffParserState::Pending,
 296            buffer: String::new(),
 297            metrics: EditParserMetrics::default(),
 298        }
 299    }
 300
 301    fn ends_with_diff_marker_prefix(&self) -> bool {
 302        let diff_markers = [SEPARATOR_MARKER, REPLACE_MARKER];
 303        let mut diff_prefixes = diff_markers
 304            .iter()
 305            .flat_map(|marker| (1..marker.len()).map(move |i| &marker[..i]))
 306            .chain(["\n"]);
 307        diff_prefixes.any(|prefix| self.buffer.ends_with(&prefix))
 308    }
 309
 310    fn parse_line_hint(&self, search_line: &str) -> Option<u32> {
 311        use regex::Regex;
 312        use std::sync::LazyLock;
 313        static LINE_HINT_REGEX: LazyLock<Regex> =
 314            LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap());
 315
 316        LINE_HINT_REGEX
 317            .captures(search_line)
 318            .and_then(|caps| caps.get(1))
 319            .and_then(|m| m.as_str().parse::<u32>().ok())
 320    }
 321}
 322
 323impl EditFormatParser for DiffFencedEditParser {
 324    fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
 325        self.buffer.push_str(chunk);
 326
 327        let mut edit_events = SmallVec::new();
 328        loop {
 329            match &mut self.state {
 330                DiffParserState::Pending => {
 331                    if let Some(diff) = self.buffer.find(SEARCH_MARKER) {
 332                        let search_end = diff + SEARCH_MARKER.len();
 333                        if let Some(newline_pos) = self.buffer[search_end..].find('\n') {
 334                            let search_line = &self.buffer[diff..search_end + newline_pos];
 335                            let line_hint = self.parse_line_hint(search_line);
 336                            self.buffer.drain(..search_end + newline_pos + 1);
 337                            self.state = DiffParserState::WithinSearch {
 338                                start: true,
 339                                line_hint,
 340                            };
 341                        } else {
 342                            break;
 343                        }
 344                    } else {
 345                        break;
 346                    }
 347                }
 348                DiffParserState::WithinSearch { start, line_hint } => {
 349                    if !self.buffer.is_empty() {
 350                        if *start && self.buffer.starts_with('\n') {
 351                            self.buffer.remove(0);
 352                        }
 353                        *start = false;
 354                    }
 355
 356                    let line_hint = *line_hint;
 357                    if let Some(separator_pos) = self.buffer.find(SEPARATOR_MARKER) {
 358                        let mut chunk = self.buffer[..separator_pos].to_string();
 359                        if chunk.ends_with('\n') {
 360                            chunk.pop();
 361                        }
 362
 363                        let separator_end = separator_pos + SEPARATOR_MARKER.len();
 364                        if let Some(newline_pos) = self.buffer[separator_end..].find('\n') {
 365                            self.buffer.drain(..separator_end + newline_pos + 1);
 366                            self.state = DiffParserState::WithinReplace { start: true };
 367                            edit_events.push(EditParserEvent::OldTextChunk {
 368                                chunk,
 369                                done: true,
 370                                line_hint,
 371                            });
 372                        } else {
 373                            break;
 374                        }
 375                    } else {
 376                        if !self.ends_with_diff_marker_prefix() {
 377                            edit_events.push(EditParserEvent::OldTextChunk {
 378                                chunk: mem::take(&mut self.buffer),
 379                                done: false,
 380                                line_hint,
 381                            });
 382                        }
 383                        break;
 384                    }
 385                }
 386                DiffParserState::WithinReplace { start } => {
 387                    if !self.buffer.is_empty() {
 388                        if *start && self.buffer.starts_with('\n') {
 389                            self.buffer.remove(0);
 390                        }
 391                        *start = false;
 392                    }
 393
 394                    if let Some(replace_pos) = self.buffer.find(REPLACE_MARKER) {
 395                        let mut chunk = self.buffer[..replace_pos].to_string();
 396                        if chunk.ends_with('\n') {
 397                            chunk.pop();
 398                        }
 399
 400                        self.buffer.drain(..replace_pos + REPLACE_MARKER.len());
 401                        if let Some(newline_pos) = self.buffer.find('\n') {
 402                            self.buffer.drain(..newline_pos + 1);
 403                        } else {
 404                            self.buffer.clear();
 405                        }
 406
 407                        self.state = DiffParserState::Pending;
 408                        edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true });
 409                    } else {
 410                        if !self.ends_with_diff_marker_prefix() {
 411                            edit_events.push(EditParserEvent::NewTextChunk {
 412                                chunk: mem::take(&mut self.buffer),
 413                                done: false,
 414                            });
 415                        }
 416                        break;
 417                    }
 418                }
 419            }
 420        }
 421        edit_events
 422    }
 423
 424    fn take_metrics(&mut self) -> EditParserMetrics {
 425        std::mem::take(&mut self.metrics)
 426    }
 427}
 428
 429impl EditParser {
 430    pub fn new(format: EditFormat) -> Self {
 431        let parser: Box<dyn EditFormatParser> = match format {
 432            EditFormat::XmlTags => Box::new(XmlEditParser::new()),
 433            EditFormat::DiffFenced => Box::new(DiffFencedEditParser::new()),
 434        };
 435        EditParser { parser }
 436    }
 437
 438    pub fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
 439        self.parser.push(chunk)
 440    }
 441
 442    pub fn finish(mut self) -> EditParserMetrics {
 443        self.parser.take_metrics()
 444    }
 445}
 446
 447#[cfg(test)]
 448mod tests {
 449    use super::*;
 450    use indoc::indoc;
 451    use rand::prelude::*;
 452    use std::cmp;
 453
 454    #[gpui::test(iterations = 1000)]
 455    fn test_xml_single_edit(mut rng: StdRng) {
 456        let mut parser = EditParser::new(EditFormat::XmlTags);
 457        assert_eq!(
 458            parse_random_chunks(
 459                "<old_text>original</old_text><new_text>updated</new_text>",
 460                &mut parser,
 461                &mut rng
 462            ),
 463            vec![Edit {
 464                old_text: "original".to_string(),
 465                new_text: "updated".to_string(),
 466                line_hint: None,
 467            }]
 468        );
 469        assert_eq!(
 470            parser.finish(),
 471            EditParserMetrics {
 472                tags: 2,
 473                mismatched_tags: 0
 474            }
 475        );
 476    }
 477
 478    #[gpui::test(iterations = 1000)]
 479    fn test_xml_multiple_edits(mut rng: StdRng) {
 480        let mut parser = EditParser::new(EditFormat::XmlTags);
 481        assert_eq!(
 482            parse_random_chunks(
 483                indoc! {"
 484                    <old_text>
 485                    first old
 486                    </old_text><new_text>first new</new_text>
 487                    <old_text>second old</old_text><new_text>
 488                    second new
 489                    </new_text>
 490                "},
 491                &mut parser,
 492                &mut rng
 493            ),
 494            vec![
 495                Edit {
 496                    old_text: "first old".to_string(),
 497                    new_text: "first new".to_string(),
 498                    line_hint: None,
 499                },
 500                Edit {
 501                    old_text: "second old".to_string(),
 502                    new_text: "second new".to_string(),
 503                    line_hint: None,
 504                },
 505            ]
 506        );
 507        assert_eq!(
 508            parser.finish(),
 509            EditParserMetrics {
 510                tags: 4,
 511                mismatched_tags: 0
 512            }
 513        );
 514    }
 515
 516    #[gpui::test(iterations = 1000)]
 517    fn test_xml_edits_with_extra_text(mut rng: StdRng) {
 518        let mut parser = EditParser::new(EditFormat::XmlTags);
 519        assert_eq!(
 520            parse_random_chunks(
 521                indoc! {"
 522                    ignore this <old_text>
 523                    content</old_text>extra stuff<new_text>updated content</new_text>trailing data
 524                    more text <old_text>second item
 525                    </old_text>middle text<new_text>modified second item</new_text>end
 526                    <old_text>third case</old_text><new_text>improved third case</new_text> with trailing text
 527                "},
 528                &mut parser,
 529                &mut rng
 530            ),
 531            vec![
 532                Edit {
 533                    old_text: "content".to_string(),
 534                    new_text: "updated content".to_string(),
 535                    line_hint: None,
 536                },
 537                Edit {
 538                    old_text: "second item".to_string(),
 539                    new_text: "modified second item".to_string(),
 540                    line_hint: None,
 541                },
 542                Edit {
 543                    old_text: "third case".to_string(),
 544                    new_text: "improved third case".to_string(),
 545                    line_hint: None,
 546                },
 547            ]
 548        );
 549        assert_eq!(
 550            parser.finish(),
 551            EditParserMetrics {
 552                tags: 6,
 553                mismatched_tags: 0
 554            }
 555        );
 556    }
 557
 558    #[gpui::test(iterations = 1000)]
 559    fn test_xml_edits_with_closing_parameter_invoke(mut rng: StdRng) {
 560        // This case is a regression with Claude Sonnet 4.5.
 561        // Sometimes Sonnet thinks that it's doing a tool call
 562        // and closes its response with '</parameter></invoke>'
 563        // instead of properly closing </new_text>
 564
 565        let mut parser = EditParser::new(EditFormat::XmlTags);
 566        assert_eq!(
 567            parse_random_chunks(
 568                indoc! {"
 569                    <old_text>some text</old_text><new_text>updated text</parameter></invoke>
 570                "},
 571                &mut parser,
 572                &mut rng
 573            ),
 574            vec![Edit {
 575                old_text: "some text".to_string(),
 576                new_text: "updated text".to_string(),
 577                line_hint: None,
 578            },]
 579        );
 580        assert_eq!(
 581            parser.finish(),
 582            EditParserMetrics {
 583                tags: 2,
 584                mismatched_tags: 1
 585            }
 586        );
 587    }
 588
 589    #[gpui::test(iterations = 1000)]
 590    fn test_xml_nested_tags(mut rng: StdRng) {
 591        let mut parser = EditParser::new(EditFormat::XmlTags);
 592        assert_eq!(
 593            parse_random_chunks(
 594                "<old_text>code with <tag>nested</tag> elements</old_text><new_text>new <code>content</code></new_text>",
 595                &mut parser,
 596                &mut rng
 597            ),
 598            vec![Edit {
 599                old_text: "code with <tag>nested</tag> elements".to_string(),
 600                new_text: "new <code>content</code>".to_string(),
 601                line_hint: None,
 602            }]
 603        );
 604        assert_eq!(
 605            parser.finish(),
 606            EditParserMetrics {
 607                tags: 2,
 608                mismatched_tags: 0
 609            }
 610        );
 611    }
 612
 613    #[gpui::test(iterations = 1000)]
 614    fn test_xml_empty_old_and_new_text(mut rng: StdRng) {
 615        let mut parser = EditParser::new(EditFormat::XmlTags);
 616        assert_eq!(
 617            parse_random_chunks(
 618                "<old_text></old_text><new_text></new_text>",
 619                &mut parser,
 620                &mut rng
 621            ),
 622            vec![Edit {
 623                old_text: "".to_string(),
 624                new_text: "".to_string(),
 625                line_hint: None,
 626            }]
 627        );
 628        assert_eq!(
 629            parser.finish(),
 630            EditParserMetrics {
 631                tags: 2,
 632                mismatched_tags: 0
 633            }
 634        );
 635    }
 636
 637    #[gpui::test(iterations = 100)]
 638    fn test_xml_multiline_content(mut rng: StdRng) {
 639        let mut parser = EditParser::new(EditFormat::XmlTags);
 640        assert_eq!(
 641            parse_random_chunks(
 642                "<old_text>line1\nline2\nline3</old_text><new_text>line1\nmodified line2\nline3</new_text>",
 643                &mut parser,
 644                &mut rng
 645            ),
 646            vec![Edit {
 647                old_text: "line1\nline2\nline3".to_string(),
 648                new_text: "line1\nmodified line2\nline3".to_string(),
 649                line_hint: None,
 650            }]
 651        );
 652        assert_eq!(
 653            parser.finish(),
 654            EditParserMetrics {
 655                tags: 2,
 656                mismatched_tags: 0
 657            }
 658        );
 659    }
 660
 661    #[gpui::test(iterations = 1000)]
 662    fn test_xml_mismatched_tags(mut rng: StdRng) {
 663        let mut parser = EditParser::new(EditFormat::XmlTags);
 664        assert_eq!(
 665            parse_random_chunks(
 666                // Reduced from an actual Sonnet 3.7 output
 667                indoc! {"
 668                    <old_text>
 669                    a
 670                    b
 671                    c
 672                    </new_text>
 673                    <new_text>
 674                    a
 675                    B
 676                    c
 677                    </old_text>
 678                    <old_text>
 679                    d
 680                    e
 681                    f
 682                    </new_text>
 683                    <new_text>
 684                    D
 685                    e
 686                    F
 687                    </old_text>
 688                "},
 689                &mut parser,
 690                &mut rng
 691            ),
 692            vec![
 693                Edit {
 694                    old_text: "a\nb\nc".to_string(),
 695                    new_text: "a\nB\nc".to_string(),
 696                    line_hint: None,
 697                },
 698                Edit {
 699                    old_text: "d\ne\nf".to_string(),
 700                    new_text: "D\ne\nF".to_string(),
 701                    line_hint: None,
 702                }
 703            ]
 704        );
 705        assert_eq!(
 706            parser.finish(),
 707            EditParserMetrics {
 708                tags: 4,
 709                mismatched_tags: 4
 710            }
 711        );
 712
 713        let mut parser = EditParser::new(EditFormat::XmlTags);
 714        assert_eq!(
 715            parse_random_chunks(
 716                // Reduced from an actual Opus 4 output
 717                indoc! {"
 718                    <edits>
 719                    <old_text>
 720                    Lorem
 721                    </old_text>
 722                    <new_text>
 723                    LOREM
 724                    </edits>
 725                "},
 726                &mut parser,
 727                &mut rng
 728            ),
 729            vec![Edit {
 730                old_text: "Lorem".to_string(),
 731                new_text: "LOREM".to_string(),
 732                line_hint: None,
 733            },]
 734        );
 735        assert_eq!(
 736            parser.finish(),
 737            EditParserMetrics {
 738                tags: 2,
 739                mismatched_tags: 1
 740            }
 741        );
 742    }
 743
 744    #[gpui::test(iterations = 1000)]
 745    fn test_diff_fenced_single_edit(mut rng: StdRng) {
 746        let mut parser = EditParser::new(EditFormat::DiffFenced);
 747        assert_eq!(
 748            parse_random_chunks(
 749                indoc! {"
 750                    <<<<<<< SEARCH
 751                    original text
 752                    =======
 753                    updated text
 754                    >>>>>>> REPLACE
 755                "},
 756                &mut parser,
 757                &mut rng
 758            ),
 759            vec![Edit {
 760                old_text: "original text".to_string(),
 761                new_text: "updated text".to_string(),
 762                line_hint: None,
 763            }]
 764        );
 765        assert_eq!(
 766            parser.finish(),
 767            EditParserMetrics {
 768                tags: 0,
 769                mismatched_tags: 0
 770            }
 771        );
 772    }
 773
 774    #[gpui::test(iterations = 100)]
 775    fn test_diff_fenced_with_markdown_fences(mut rng: StdRng) {
 776        let mut parser = EditParser::new(EditFormat::DiffFenced);
 777        assert_eq!(
 778            parse_random_chunks(
 779                indoc! {"
 780                    ```diff
 781                    <<<<<<< SEARCH
 782                    from flask import Flask
 783                    =======
 784                    import math
 785                    from flask import Flask
 786                    >>>>>>> REPLACE
 787                    ```
 788                "},
 789                &mut parser,
 790                &mut rng
 791            ),
 792            vec![Edit {
 793                old_text: "from flask import Flask".to_string(),
 794                new_text: "import math\nfrom flask import Flask".to_string(),
 795                line_hint: None,
 796            }]
 797        );
 798        assert_eq!(
 799            parser.finish(),
 800            EditParserMetrics {
 801                tags: 0,
 802                mismatched_tags: 0
 803            }
 804        );
 805    }
 806
 807    #[gpui::test(iterations = 100)]
 808    fn test_diff_fenced_multiple_edits(mut rng: StdRng) {
 809        let mut parser = EditParser::new(EditFormat::DiffFenced);
 810        assert_eq!(
 811            parse_random_chunks(
 812                indoc! {"
 813                    <<<<<<< SEARCH
 814                    first old
 815                    =======
 816                    first new
 817                    >>>>>>> REPLACE
 818
 819                    <<<<<<< SEARCH
 820                    second old
 821                    =======
 822                    second new
 823                    >>>>>>> REPLACE
 824                "},
 825                &mut parser,
 826                &mut rng
 827            ),
 828            vec![
 829                Edit {
 830                    old_text: "first old".to_string(),
 831                    new_text: "first new".to_string(),
 832                    line_hint: None,
 833                },
 834                Edit {
 835                    old_text: "second old".to_string(),
 836                    new_text: "second new".to_string(),
 837                    line_hint: None,
 838                },
 839            ]
 840        );
 841        assert_eq!(
 842            parser.finish(),
 843            EditParserMetrics {
 844                tags: 0,
 845                mismatched_tags: 0
 846            }
 847        );
 848    }
 849
 850    #[gpui::test(iterations = 100)]
 851    fn test_mixed_formats(mut rng: StdRng) {
 852        // Test XML format parser only parses XML tags
 853        let mut xml_parser = EditParser::new(EditFormat::XmlTags);
 854        assert_eq!(
 855            parse_random_chunks(
 856                indoc! {"
 857                    <old_text>xml style old</old_text><new_text>xml style new</new_text>
 858
 859                    <<<<<<< SEARCH
 860                    diff style old
 861                    =======
 862                    diff style new
 863                    >>>>>>> REPLACE
 864                "},
 865                &mut xml_parser,
 866                &mut rng
 867            ),
 868            vec![Edit {
 869                old_text: "xml style old".to_string(),
 870                new_text: "xml style new".to_string(),
 871                line_hint: None,
 872            },]
 873        );
 874        assert_eq!(
 875            xml_parser.finish(),
 876            EditParserMetrics {
 877                tags: 2,
 878                mismatched_tags: 0
 879            }
 880        );
 881
 882        // Test diff-fenced format parser only parses diff markers
 883        let mut diff_parser = EditParser::new(EditFormat::DiffFenced);
 884        assert_eq!(
 885            parse_random_chunks(
 886                indoc! {"
 887                    <old_text>xml style old</old_text><new_text>xml style new</new_text>
 888
 889                    <<<<<<< SEARCH
 890                    diff style old
 891                    =======
 892                    diff style new
 893                    >>>>>>> REPLACE
 894                "},
 895                &mut diff_parser,
 896                &mut rng
 897            ),
 898            vec![Edit {
 899                old_text: "diff style old".to_string(),
 900                new_text: "diff style new".to_string(),
 901                line_hint: None,
 902            },]
 903        );
 904        assert_eq!(
 905            diff_parser.finish(),
 906            EditParserMetrics {
 907                tags: 0,
 908                mismatched_tags: 0
 909            }
 910        );
 911    }
 912
 913    #[gpui::test(iterations = 100)]
 914    fn test_diff_fenced_empty_sections(mut rng: StdRng) {
 915        let mut parser = EditParser::new(EditFormat::DiffFenced);
 916        assert_eq!(
 917            parse_random_chunks(
 918                indoc! {"
 919                <<<<<<< SEARCH
 920                =======
 921                >>>>>>> REPLACE
 922            "},
 923                &mut parser,
 924                &mut rng
 925            ),
 926            vec![Edit {
 927                old_text: "".to_string(),
 928                new_text: "".to_string(),
 929                line_hint: None,
 930            }]
 931        );
 932        assert_eq!(
 933            parser.finish(),
 934            EditParserMetrics {
 935                tags: 0,
 936                mismatched_tags: 0
 937            }
 938        );
 939    }
 940
 941    #[gpui::test(iterations = 100)]
 942    fn test_diff_fenced_with_line_hint(mut rng: StdRng) {
 943        let mut parser = EditParser::new(EditFormat::DiffFenced);
 944        let edits = parse_random_chunks(
 945            indoc! {"
 946                <<<<<<< SEARCH line=42
 947                original text
 948                =======
 949                updated text
 950                >>>>>>> REPLACE
 951            "},
 952            &mut parser,
 953            &mut rng,
 954        );
 955        assert_eq!(
 956            edits,
 957            vec![Edit {
 958                old_text: "original text".to_string(),
 959                line_hint: Some(42),
 960                new_text: "updated text".to_string(),
 961            }]
 962        );
 963    }
 964    #[gpui::test(iterations = 100)]
 965    fn test_xml_line_hints(mut rng: StdRng) {
 966        // Line hint is a single quoted line number
 967        let mut parser = EditParser::new(EditFormat::XmlTags);
 968
 969        let edits = parse_random_chunks(
 970            r#"
 971                    <old_text line="23">original code</old_text>
 972                    <new_text>updated code</new_text>"#,
 973            &mut parser,
 974            &mut rng,
 975        );
 976
 977        assert_eq!(edits.len(), 1);
 978        assert_eq!(edits[0].old_text, "original code");
 979        assert_eq!(edits[0].line_hint, Some(23));
 980        assert_eq!(edits[0].new_text, "updated code");
 981
 982        // Line hint is a single unquoted line number
 983        let mut parser = EditParser::new(EditFormat::XmlTags);
 984
 985        let edits = parse_random_chunks(
 986            r#"
 987                    <old_text line=45>original code</old_text>
 988                    <new_text>updated code</new_text>"#,
 989            &mut parser,
 990            &mut rng,
 991        );
 992
 993        assert_eq!(edits.len(), 1);
 994        assert_eq!(edits[0].old_text, "original code");
 995        assert_eq!(edits[0].line_hint, Some(45));
 996        assert_eq!(edits[0].new_text, "updated code");
 997
 998        // Line hint is a range
 999        let mut parser = EditParser::new(EditFormat::XmlTags);
1000
1001        let edits = parse_random_chunks(
1002            r#"
1003            <old_text line="23:50">original code</old_text>
1004            <new_text>updated code</new_text>"#,
1005            &mut parser,
1006            &mut rng,
1007        );
1008
1009        assert_eq!(edits.len(), 1);
1010        assert_eq!(edits[0].old_text, "original code");
1011        assert_eq!(edits[0].line_hint, Some(23));
1012        assert_eq!(edits[0].new_text, "updated code");
1013
1014        // No line hint
1015        let mut parser = EditParser::new(EditFormat::XmlTags);
1016        let edits = parse_random_chunks(
1017            r#"
1018            <old_text>old</old_text>
1019            <new_text>new</new_text>"#,
1020            &mut parser,
1021            &mut rng,
1022        );
1023
1024        assert_eq!(edits.len(), 1);
1025        assert_eq!(edits[0].old_text, "old");
1026        assert_eq!(edits[0].line_hint, None);
1027        assert_eq!(edits[0].new_text, "new");
1028    }
1029
1030    #[derive(Default, Debug, PartialEq, Eq)]
1031    struct Edit {
1032        old_text: String,
1033        new_text: String,
1034        line_hint: Option<u32>,
1035    }
1036
1037    fn parse_random_chunks(input: &str, parser: &mut EditParser, rng: &mut StdRng) -> Vec<Edit> {
1038        let chunk_count = rng.random_range(1..=cmp::min(input.len(), 50));
1039        let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
1040        chunk_indices.sort();
1041        chunk_indices.push(input.len());
1042
1043        let mut old_text = Some(String::new());
1044        let mut new_text = None;
1045        let mut pending_edit = Edit::default();
1046        let mut edits = Vec::new();
1047        let mut last_ix = 0;
1048        for chunk_ix in chunk_indices {
1049            for event in parser.push(&input[last_ix..chunk_ix]) {
1050                match event {
1051                    EditParserEvent::OldTextChunk {
1052                        chunk,
1053                        done,
1054                        line_hint,
1055                    } => {
1056                        old_text.as_mut().unwrap().push_str(&chunk);
1057                        if done {
1058                            pending_edit.old_text = old_text.take().unwrap();
1059                            pending_edit.line_hint = line_hint;
1060                            new_text = Some(String::new());
1061                        }
1062                    }
1063                    EditParserEvent::NewTextChunk { chunk, done } => {
1064                        new_text.as_mut().unwrap().push_str(&chunk);
1065                        if done {
1066                            pending_edit.new_text = new_text.take().unwrap();
1067                            edits.push(pending_edit);
1068                            pending_edit = Edit::default();
1069                            old_text = Some(String::new());
1070                        }
1071                    }
1072                }
1073            }
1074            last_ix = chunk_ix;
1075        }
1076
1077        if new_text.is_some() {
1078            pending_edit.new_text = new_text.take().unwrap();
1079            edits.push(pending_edit);
1080        }
1081
1082        edits
1083    }
1084}