edit_parser.rs

   1use anyhow::bail;
   2use derive_more::{Add, AddAssign};
   3use language_model::LanguageModel;
   4use regex::Regex;
   5use schemars::JsonSchema;
   6use serde::{Deserialize, Serialize};
   7use smallvec::SmallVec;
   8use std::{mem, ops::Range, str::FromStr, sync::Arc};
   9
  10const OLD_TEXT_END_TAG: &str = "</old_text>";
  11const NEW_TEXT_END_TAG: &str = "</new_text>";
  12const EDITS_END_TAG: &str = "</edits>";
  13const SEARCH_MARKER: &str = "<<<<<<< SEARCH";
  14const SEPARATOR_MARKER: &str = "=======";
  15const REPLACE_MARKER: &str = ">>>>>>> REPLACE";
  16const SONNET_PARAMETER_INVOKE_1: &str = "</parameter>\n</invoke>";
  17const SONNET_PARAMETER_INVOKE_2: &str = "</parameter></invoke>";
  18const SONNET_PARAMETER_INVOKE_3: &str = "</parameter>";
  19const END_TAGS: [&str; 6] = [
  20    OLD_TEXT_END_TAG,
  21    NEW_TEXT_END_TAG,
  22    EDITS_END_TAG,
  23    SONNET_PARAMETER_INVOKE_1, // Remove these after switching to streaming tool call
  24    SONNET_PARAMETER_INVOKE_2,
  25    SONNET_PARAMETER_INVOKE_3,
  26];
  27
  28#[derive(Debug)]
  29pub enum EditParserEvent {
  30    OldTextChunk {
  31        chunk: String,
  32        done: bool,
  33        line_hint: Option<u32>,
  34    },
  35    NewTextChunk {
  36        chunk: String,
  37        done: bool,
  38    },
  39}
  40
  41#[derive(
  42    Clone, Debug, Default, PartialEq, Eq, Add, AddAssign, Serialize, Deserialize, JsonSchema,
  43)]
  44pub struct EditParserMetrics {
  45    pub tags: usize,
  46    pub mismatched_tags: usize,
  47}
  48
  49#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
  50#[serde(rename_all = "snake_case")]
  51pub enum EditFormat {
  52    /// XML-like tags:
  53    /// <old_text>...</old_text>
  54    /// <new_text>...</new_text>
  55    XmlTags,
  56    /// Diff-fenced format, in which:
  57    /// - Text before the SEARCH marker is ignored
  58    /// - Fences are optional
  59    /// - Line hint is optional.
  60    ///
  61    /// Example:
  62    ///
  63    /// ```diff
  64    /// <<<<<<< SEARCH line=42
  65    /// ...
  66    /// =======
  67    /// ...
  68    /// >>>>>>> REPLACE
  69    /// ```
  70    DiffFenced,
  71}
  72
  73impl FromStr for EditFormat {
  74    type Err = anyhow::Error;
  75
  76    fn from_str(s: &str) -> anyhow::Result<Self> {
  77        match s.to_lowercase().as_str() {
  78            "xml_tags" | "xml" => Ok(EditFormat::XmlTags),
  79            "diff_fenced" | "diff-fenced" | "diff" => Ok(EditFormat::DiffFenced),
  80            _ => bail!("Unknown EditFormat: {}", s),
  81        }
  82    }
  83}
  84
  85impl EditFormat {
  86    /// Return an optimal edit format for the language model
  87    pub fn from_model(model: Arc<dyn LanguageModel>) -> anyhow::Result<Self> {
  88        if model.provider_id().0 == "google" || model.id().0.to_lowercase().contains("gemini") {
  89            Ok(EditFormat::DiffFenced)
  90        } else {
  91            Ok(EditFormat::XmlTags)
  92        }
  93    }
  94
  95    /// Return an optimal edit format for the language model,
  96    /// with the ability to override it by setting the
  97    /// `ZED_EDIT_FORMAT` environment variable
  98    #[allow(dead_code)]
  99    pub fn from_env(model: Arc<dyn LanguageModel>) -> anyhow::Result<Self> {
 100        let default = EditFormat::from_model(model)?;
 101        std::env::var("ZED_EDIT_FORMAT").map_or(Ok(default), |s| EditFormat::from_str(&s))
 102    }
 103}
 104
 105pub trait EditFormatParser: Send + std::fmt::Debug {
 106    fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]>;
 107    fn take_metrics(&mut self) -> EditParserMetrics;
 108}
 109
 110#[derive(Debug)]
 111pub struct XmlEditParser {
 112    state: XmlParserState,
 113    buffer: String,
 114    metrics: EditParserMetrics,
 115}
 116
 117#[derive(Debug, PartialEq)]
 118enum XmlParserState {
 119    Pending,
 120    WithinOldText { start: bool, line_hint: Option<u32> },
 121    AfterOldText,
 122    WithinNewText { start: bool },
 123}
 124
 125#[derive(Debug)]
 126pub struct DiffFencedEditParser {
 127    state: DiffParserState,
 128    buffer: String,
 129    metrics: EditParserMetrics,
 130}
 131
 132#[derive(Debug, PartialEq)]
 133enum DiffParserState {
 134    Pending,
 135    WithinSearch { start: bool, line_hint: Option<u32> },
 136    WithinReplace { start: bool },
 137}
 138
 139/// Main parser that delegates to format-specific parsers
 140pub struct EditParser {
 141    parser: Box<dyn EditFormatParser>,
 142}
 143
 144impl XmlEditParser {
 145    pub fn new() -> Self {
 146        XmlEditParser {
 147            state: XmlParserState::Pending,
 148            buffer: String::new(),
 149            metrics: EditParserMetrics::default(),
 150        }
 151    }
 152
 153    fn find_end_tag(&self) -> Option<Range<usize>> {
 154        let (tag, start_ix) = END_TAGS
 155            .iter()
 156            .flat_map(|tag| Some((tag, self.buffer.find(tag)?)))
 157            .min_by_key(|(_, ix)| *ix)?;
 158        Some(start_ix..start_ix + tag.len())
 159    }
 160
 161    fn ends_with_tag_prefix(&self) -> bool {
 162        let mut end_prefixes = END_TAGS
 163            .iter()
 164            .flat_map(|tag| (1..tag.len()).map(move |i| &tag[..i]))
 165            .chain(["\n"]);
 166        end_prefixes.any(|prefix| self.buffer.ends_with(&prefix))
 167    }
 168
 169    fn parse_line_hint(&self, tag: &str) -> Option<u32> {
 170        use std::sync::LazyLock;
 171        static LINE_HINT_REGEX: LazyLock<Regex> =
 172            LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap());
 173
 174        LINE_HINT_REGEX
 175            .captures(tag)
 176            .and_then(|caps| caps.get(1))
 177            .and_then(|m| m.as_str().parse::<u32>().ok())
 178    }
 179}
 180
 181impl EditFormatParser for XmlEditParser {
 182    fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
 183        self.buffer.push_str(chunk);
 184
 185        let mut edit_events = SmallVec::new();
 186        loop {
 187            match &mut self.state {
 188                XmlParserState::Pending => {
 189                    if let Some(start) = self.buffer.find("<old_text") {
 190                        if let Some(tag_end) = self.buffer[start..].find('>') {
 191                            let tag_end = start + tag_end + 1;
 192                            let tag = &self.buffer[start..tag_end];
 193                            let line_hint = self.parse_line_hint(tag);
 194                            self.buffer.drain(..tag_end);
 195                            self.state = XmlParserState::WithinOldText {
 196                                start: true,
 197                                line_hint,
 198                            };
 199                        } else {
 200                            break;
 201                        }
 202                    } else {
 203                        break;
 204                    }
 205                }
 206                XmlParserState::WithinOldText { start, line_hint } => {
 207                    if !self.buffer.is_empty() {
 208                        if *start && self.buffer.starts_with('\n') {
 209                            self.buffer.remove(0);
 210                        }
 211                        *start = false;
 212                    }
 213
 214                    let line_hint = *line_hint;
 215                    if let Some(tag_range) = self.find_end_tag() {
 216                        let mut chunk = self.buffer[..tag_range.start].to_string();
 217                        if chunk.ends_with('\n') {
 218                            chunk.pop();
 219                        }
 220
 221                        self.metrics.tags += 1;
 222                        if &self.buffer[tag_range.clone()] != OLD_TEXT_END_TAG {
 223                            self.metrics.mismatched_tags += 1;
 224                        }
 225
 226                        self.buffer.drain(..tag_range.end);
 227                        self.state = XmlParserState::AfterOldText;
 228                        edit_events.push(EditParserEvent::OldTextChunk {
 229                            chunk,
 230                            done: true,
 231                            line_hint,
 232                        });
 233                    } else {
 234                        if !self.ends_with_tag_prefix() {
 235                            edit_events.push(EditParserEvent::OldTextChunk {
 236                                chunk: mem::take(&mut self.buffer),
 237                                done: false,
 238                                line_hint,
 239                            });
 240                        }
 241                        break;
 242                    }
 243                }
 244                XmlParserState::AfterOldText => {
 245                    if let Some(start) = self.buffer.find("<new_text>") {
 246                        self.buffer.drain(..start + "<new_text>".len());
 247                        self.state = XmlParserState::WithinNewText { start: true };
 248                    } else {
 249                        break;
 250                    }
 251                }
 252                XmlParserState::WithinNewText { start } => {
 253                    if !self.buffer.is_empty() {
 254                        if *start && self.buffer.starts_with('\n') {
 255                            self.buffer.remove(0);
 256                        }
 257                        *start = false;
 258                    }
 259
 260                    if let Some(tag_range) = self.find_end_tag() {
 261                        let mut chunk = self.buffer[..tag_range.start].to_string();
 262                        if chunk.ends_with('\n') {
 263                            chunk.pop();
 264                        }
 265
 266                        self.metrics.tags += 1;
 267                        if &self.buffer[tag_range.clone()] != NEW_TEXT_END_TAG {
 268                            self.metrics.mismatched_tags += 1;
 269                        }
 270
 271                        self.buffer.drain(..tag_range.end);
 272                        self.state = XmlParserState::Pending;
 273                        edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true });
 274                    } else {
 275                        if !self.ends_with_tag_prefix() {
 276                            edit_events.push(EditParserEvent::NewTextChunk {
 277                                chunk: mem::take(&mut self.buffer),
 278                                done: false,
 279                            });
 280                        }
 281                        break;
 282                    }
 283                }
 284            }
 285        }
 286        edit_events
 287    }
 288
 289    fn take_metrics(&mut self) -> EditParserMetrics {
 290        std::mem::take(&mut self.metrics)
 291    }
 292}
 293
 294impl DiffFencedEditParser {
 295    pub fn new() -> Self {
 296        DiffFencedEditParser {
 297            state: DiffParserState::Pending,
 298            buffer: String::new(),
 299            metrics: EditParserMetrics::default(),
 300        }
 301    }
 302
 303    fn ends_with_diff_marker_prefix(&self) -> bool {
 304        let diff_markers = [SEPARATOR_MARKER, REPLACE_MARKER];
 305        let mut diff_prefixes = diff_markers
 306            .iter()
 307            .flat_map(|marker| (1..marker.len()).map(move |i| &marker[..i]))
 308            .chain(["\n"]);
 309        diff_prefixes.any(|prefix| self.buffer.ends_with(&prefix))
 310    }
 311
 312    fn parse_line_hint(&self, search_line: &str) -> Option<u32> {
 313        use regex::Regex;
 314        use std::sync::LazyLock;
 315        static LINE_HINT_REGEX: LazyLock<Regex> =
 316            LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap());
 317
 318        LINE_HINT_REGEX
 319            .captures(search_line)
 320            .and_then(|caps| caps.get(1))
 321            .and_then(|m| m.as_str().parse::<u32>().ok())
 322    }
 323}
 324
 325impl EditFormatParser for DiffFencedEditParser {
 326    fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
 327        self.buffer.push_str(chunk);
 328
 329        let mut edit_events = SmallVec::new();
 330        loop {
 331            match &mut self.state {
 332                DiffParserState::Pending => {
 333                    if let Some(diff) = self.buffer.find(SEARCH_MARKER) {
 334                        let search_end = diff + SEARCH_MARKER.len();
 335                        if let Some(newline_pos) = self.buffer[search_end..].find('\n') {
 336                            let search_line = &self.buffer[diff..search_end + newline_pos];
 337                            let line_hint = self.parse_line_hint(search_line);
 338                            self.buffer.drain(..search_end + newline_pos + 1);
 339                            self.state = DiffParserState::WithinSearch {
 340                                start: true,
 341                                line_hint,
 342                            };
 343                        } else {
 344                            break;
 345                        }
 346                    } else {
 347                        break;
 348                    }
 349                }
 350                DiffParserState::WithinSearch { start, line_hint } => {
 351                    if !self.buffer.is_empty() {
 352                        if *start && self.buffer.starts_with('\n') {
 353                            self.buffer.remove(0);
 354                        }
 355                        *start = false;
 356                    }
 357
 358                    let line_hint = *line_hint;
 359                    if let Some(separator_pos) = self.buffer.find(SEPARATOR_MARKER) {
 360                        let mut chunk = self.buffer[..separator_pos].to_string();
 361                        if chunk.ends_with('\n') {
 362                            chunk.pop();
 363                        }
 364
 365                        let separator_end = separator_pos + SEPARATOR_MARKER.len();
 366                        if let Some(newline_pos) = self.buffer[separator_end..].find('\n') {
 367                            self.buffer.drain(..separator_end + newline_pos + 1);
 368                            self.state = DiffParserState::WithinReplace { start: true };
 369                            edit_events.push(EditParserEvent::OldTextChunk {
 370                                chunk,
 371                                done: true,
 372                                line_hint,
 373                            });
 374                        } else {
 375                            break;
 376                        }
 377                    } else {
 378                        if !self.ends_with_diff_marker_prefix() {
 379                            edit_events.push(EditParserEvent::OldTextChunk {
 380                                chunk: mem::take(&mut self.buffer),
 381                                done: false,
 382                                line_hint,
 383                            });
 384                        }
 385                        break;
 386                    }
 387                }
 388                DiffParserState::WithinReplace { start } => {
 389                    if !self.buffer.is_empty() {
 390                        if *start && self.buffer.starts_with('\n') {
 391                            self.buffer.remove(0);
 392                        }
 393                        *start = false;
 394                    }
 395
 396                    if let Some(replace_pos) = self.buffer.find(REPLACE_MARKER) {
 397                        let mut chunk = self.buffer[..replace_pos].to_string();
 398                        if chunk.ends_with('\n') {
 399                            chunk.pop();
 400                        }
 401
 402                        self.buffer.drain(..replace_pos + REPLACE_MARKER.len());
 403                        if let Some(newline_pos) = self.buffer.find('\n') {
 404                            self.buffer.drain(..newline_pos + 1);
 405                        } else {
 406                            self.buffer.clear();
 407                        }
 408
 409                        self.state = DiffParserState::Pending;
 410                        edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true });
 411                    } else {
 412                        if !self.ends_with_diff_marker_prefix() {
 413                            edit_events.push(EditParserEvent::NewTextChunk {
 414                                chunk: mem::take(&mut self.buffer),
 415                                done: false,
 416                            });
 417                        }
 418                        break;
 419                    }
 420                }
 421            }
 422        }
 423        edit_events
 424    }
 425
 426    fn take_metrics(&mut self) -> EditParserMetrics {
 427        std::mem::take(&mut self.metrics)
 428    }
 429}
 430
 431impl EditParser {
 432    pub fn new(format: EditFormat) -> Self {
 433        let parser: Box<dyn EditFormatParser> = match format {
 434            EditFormat::XmlTags => Box::new(XmlEditParser::new()),
 435            EditFormat::DiffFenced => Box::new(DiffFencedEditParser::new()),
 436        };
 437        EditParser { parser }
 438    }
 439
 440    pub fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
 441        self.parser.push(chunk)
 442    }
 443
 444    pub fn finish(mut self) -> EditParserMetrics {
 445        self.parser.take_metrics()
 446    }
 447}
 448
 449#[cfg(test)]
 450mod tests {
 451    use super::*;
 452    use indoc::indoc;
 453    use rand::prelude::*;
 454    use std::cmp;
 455
 456    #[gpui::test(iterations = 1000)]
 457    fn test_xml_single_edit(mut rng: StdRng) {
 458        let mut parser = EditParser::new(EditFormat::XmlTags);
 459        assert_eq!(
 460            parse_random_chunks(
 461                "<old_text>original</old_text><new_text>updated</new_text>",
 462                &mut parser,
 463                &mut rng
 464            ),
 465            vec![Edit {
 466                old_text: "original".to_string(),
 467                new_text: "updated".to_string(),
 468                line_hint: None,
 469            }]
 470        );
 471        assert_eq!(
 472            parser.finish(),
 473            EditParserMetrics {
 474                tags: 2,
 475                mismatched_tags: 0
 476            }
 477        );
 478    }
 479
 480    #[gpui::test(iterations = 1000)]
 481    fn test_xml_multiple_edits(mut rng: StdRng) {
 482        let mut parser = EditParser::new(EditFormat::XmlTags);
 483        assert_eq!(
 484            parse_random_chunks(
 485                indoc! {"
 486                    <old_text>
 487                    first old
 488                    </old_text><new_text>first new</new_text>
 489                    <old_text>second old</old_text><new_text>
 490                    second new
 491                    </new_text>
 492                "},
 493                &mut parser,
 494                &mut rng
 495            ),
 496            vec![
 497                Edit {
 498                    old_text: "first old".to_string(),
 499                    new_text: "first new".to_string(),
 500                    line_hint: None,
 501                },
 502                Edit {
 503                    old_text: "second old".to_string(),
 504                    new_text: "second new".to_string(),
 505                    line_hint: None,
 506                },
 507            ]
 508        );
 509        assert_eq!(
 510            parser.finish(),
 511            EditParserMetrics {
 512                tags: 4,
 513                mismatched_tags: 0
 514            }
 515        );
 516    }
 517
 518    #[gpui::test(iterations = 1000)]
 519    fn test_xml_edits_with_extra_text(mut rng: StdRng) {
 520        let mut parser = EditParser::new(EditFormat::XmlTags);
 521        assert_eq!(
 522            parse_random_chunks(
 523                indoc! {"
 524                    ignore this <old_text>
 525                    content</old_text>extra stuff<new_text>updated content</new_text>trailing data
 526                    more text <old_text>second item
 527                    </old_text>middle text<new_text>modified second item</new_text>end
 528                    <old_text>third case</old_text><new_text>improved third case</new_text> with trailing text
 529                "},
 530                &mut parser,
 531                &mut rng
 532            ),
 533            vec![
 534                Edit {
 535                    old_text: "content".to_string(),
 536                    new_text: "updated content".to_string(),
 537                    line_hint: None,
 538                },
 539                Edit {
 540                    old_text: "second item".to_string(),
 541                    new_text: "modified second item".to_string(),
 542                    line_hint: None,
 543                },
 544                Edit {
 545                    old_text: "third case".to_string(),
 546                    new_text: "improved third case".to_string(),
 547                    line_hint: None,
 548                },
 549            ]
 550        );
 551        assert_eq!(
 552            parser.finish(),
 553            EditParserMetrics {
 554                tags: 6,
 555                mismatched_tags: 0
 556            }
 557        );
 558    }
 559
 560    #[gpui::test(iterations = 1000)]
 561    fn test_xml_edits_with_closing_parameter_invoke(mut rng: StdRng) {
 562        // This case is a regression with Claude Sonnet 4.5.
 563        // Sometimes Sonnet thinks that it's doing a tool call
 564        // and closes its response with '</parameter></invoke>'
 565        // instead of properly closing </new_text>
 566
 567        let mut parser = EditParser::new(EditFormat::XmlTags);
 568        assert_eq!(
 569            parse_random_chunks(
 570                indoc! {"
 571                    <old_text>some text</old_text><new_text>updated text</parameter></invoke>
 572                    <old_text>more text</old_text><new_text>upd</parameter></new_text>
 573                "},
 574                &mut parser,
 575                &mut rng
 576            ),
 577            vec![
 578                Edit {
 579                    old_text: "some text".to_string(),
 580                    new_text: "updated text".to_string(),
 581                    line_hint: None,
 582                },
 583                Edit {
 584                    old_text: "more text".to_string(),
 585                    new_text: "upd".to_string(),
 586                    line_hint: None,
 587                },
 588            ]
 589        );
 590        assert_eq!(
 591            parser.finish(),
 592            EditParserMetrics {
 593                tags: 4,
 594                mismatched_tags: 2
 595            }
 596        );
 597    }
 598
 599    #[gpui::test(iterations = 1000)]
 600    fn test_xml_nested_tags(mut rng: StdRng) {
 601        let mut parser = EditParser::new(EditFormat::XmlTags);
 602        assert_eq!(
 603            parse_random_chunks(
 604                "<old_text>code with <tag>nested</tag> elements</old_text><new_text>new <code>content</code></new_text>",
 605                &mut parser,
 606                &mut rng
 607            ),
 608            vec![Edit {
 609                old_text: "code with <tag>nested</tag> elements".to_string(),
 610                new_text: "new <code>content</code>".to_string(),
 611                line_hint: None,
 612            }]
 613        );
 614        assert_eq!(
 615            parser.finish(),
 616            EditParserMetrics {
 617                tags: 2,
 618                mismatched_tags: 0
 619            }
 620        );
 621    }
 622
 623    #[gpui::test(iterations = 1000)]
 624    fn test_xml_empty_old_and_new_text(mut rng: StdRng) {
 625        let mut parser = EditParser::new(EditFormat::XmlTags);
 626        assert_eq!(
 627            parse_random_chunks(
 628                "<old_text></old_text><new_text></new_text>",
 629                &mut parser,
 630                &mut rng
 631            ),
 632            vec![Edit {
 633                old_text: "".to_string(),
 634                new_text: "".to_string(),
 635                line_hint: None,
 636            }]
 637        );
 638        assert_eq!(
 639            parser.finish(),
 640            EditParserMetrics {
 641                tags: 2,
 642                mismatched_tags: 0
 643            }
 644        );
 645    }
 646
 647    #[gpui::test(iterations = 100)]
 648    fn test_xml_multiline_content(mut rng: StdRng) {
 649        let mut parser = EditParser::new(EditFormat::XmlTags);
 650        assert_eq!(
 651            parse_random_chunks(
 652                "<old_text>line1\nline2\nline3</old_text><new_text>line1\nmodified line2\nline3</new_text>",
 653                &mut parser,
 654                &mut rng
 655            ),
 656            vec![Edit {
 657                old_text: "line1\nline2\nline3".to_string(),
 658                new_text: "line1\nmodified line2\nline3".to_string(),
 659                line_hint: None,
 660            }]
 661        );
 662        assert_eq!(
 663            parser.finish(),
 664            EditParserMetrics {
 665                tags: 2,
 666                mismatched_tags: 0
 667            }
 668        );
 669    }
 670
 671    #[gpui::test(iterations = 1000)]
 672    fn test_xml_mismatched_tags(mut rng: StdRng) {
 673        let mut parser = EditParser::new(EditFormat::XmlTags);
 674        assert_eq!(
 675            parse_random_chunks(
 676                // Reduced from an actual Sonnet 3.7 output
 677                indoc! {"
 678                    <old_text>
 679                    a
 680                    b
 681                    c
 682                    </new_text>
 683                    <new_text>
 684                    a
 685                    B
 686                    c
 687                    </old_text>
 688                    <old_text>
 689                    d
 690                    e
 691                    f
 692                    </new_text>
 693                    <new_text>
 694                    D
 695                    e
 696                    F
 697                    </old_text>
 698                "},
 699                &mut parser,
 700                &mut rng
 701            ),
 702            vec![
 703                Edit {
 704                    old_text: "a\nb\nc".to_string(),
 705                    new_text: "a\nB\nc".to_string(),
 706                    line_hint: None,
 707                },
 708                Edit {
 709                    old_text: "d\ne\nf".to_string(),
 710                    new_text: "D\ne\nF".to_string(),
 711                    line_hint: None,
 712                }
 713            ]
 714        );
 715        assert_eq!(
 716            parser.finish(),
 717            EditParserMetrics {
 718                tags: 4,
 719                mismatched_tags: 4
 720            }
 721        );
 722
 723        let mut parser = EditParser::new(EditFormat::XmlTags);
 724        assert_eq!(
 725            parse_random_chunks(
 726                // Reduced from an actual Opus 4 output
 727                indoc! {"
 728                    <edits>
 729                    <old_text>
 730                    Lorem
 731                    </old_text>
 732                    <new_text>
 733                    LOREM
 734                    </edits>
 735                "},
 736                &mut parser,
 737                &mut rng
 738            ),
 739            vec![Edit {
 740                old_text: "Lorem".to_string(),
 741                new_text: "LOREM".to_string(),
 742                line_hint: None,
 743            },]
 744        );
 745        assert_eq!(
 746            parser.finish(),
 747            EditParserMetrics {
 748                tags: 2,
 749                mismatched_tags: 1
 750            }
 751        );
 752    }
 753
 754    #[gpui::test(iterations = 1000)]
 755    fn test_diff_fenced_single_edit(mut rng: StdRng) {
 756        let mut parser = EditParser::new(EditFormat::DiffFenced);
 757        assert_eq!(
 758            parse_random_chunks(
 759                indoc! {"
 760                    <<<<<<< SEARCH
 761                    original text
 762                    =======
 763                    updated text
 764                    >>>>>>> REPLACE
 765                "},
 766                &mut parser,
 767                &mut rng
 768            ),
 769            vec![Edit {
 770                old_text: "original text".to_string(),
 771                new_text: "updated text".to_string(),
 772                line_hint: None,
 773            }]
 774        );
 775        assert_eq!(
 776            parser.finish(),
 777            EditParserMetrics {
 778                tags: 0,
 779                mismatched_tags: 0
 780            }
 781        );
 782    }
 783
 784    #[gpui::test(iterations = 100)]
 785    fn test_diff_fenced_with_markdown_fences(mut rng: StdRng) {
 786        let mut parser = EditParser::new(EditFormat::DiffFenced);
 787        assert_eq!(
 788            parse_random_chunks(
 789                indoc! {"
 790                    ```diff
 791                    <<<<<<< SEARCH
 792                    from flask import Flask
 793                    =======
 794                    import math
 795                    from flask import Flask
 796                    >>>>>>> REPLACE
 797                    ```
 798                "},
 799                &mut parser,
 800                &mut rng
 801            ),
 802            vec![Edit {
 803                old_text: "from flask import Flask".to_string(),
 804                new_text: "import math\nfrom flask import Flask".to_string(),
 805                line_hint: None,
 806            }]
 807        );
 808        assert_eq!(
 809            parser.finish(),
 810            EditParserMetrics {
 811                tags: 0,
 812                mismatched_tags: 0
 813            }
 814        );
 815    }
 816
 817    #[gpui::test(iterations = 100)]
 818    fn test_diff_fenced_multiple_edits(mut rng: StdRng) {
 819        let mut parser = EditParser::new(EditFormat::DiffFenced);
 820        assert_eq!(
 821            parse_random_chunks(
 822                indoc! {"
 823                    <<<<<<< SEARCH
 824                    first old
 825                    =======
 826                    first new
 827                    >>>>>>> REPLACE
 828
 829                    <<<<<<< SEARCH
 830                    second old
 831                    =======
 832                    second new
 833                    >>>>>>> REPLACE
 834                "},
 835                &mut parser,
 836                &mut rng
 837            ),
 838            vec![
 839                Edit {
 840                    old_text: "first old".to_string(),
 841                    new_text: "first new".to_string(),
 842                    line_hint: None,
 843                },
 844                Edit {
 845                    old_text: "second old".to_string(),
 846                    new_text: "second new".to_string(),
 847                    line_hint: None,
 848                },
 849            ]
 850        );
 851        assert_eq!(
 852            parser.finish(),
 853            EditParserMetrics {
 854                tags: 0,
 855                mismatched_tags: 0
 856            }
 857        );
 858    }
 859
 860    #[gpui::test(iterations = 100)]
 861    fn test_mixed_formats(mut rng: StdRng) {
 862        // Test XML format parser only parses XML tags
 863        let mut xml_parser = EditParser::new(EditFormat::XmlTags);
 864        assert_eq!(
 865            parse_random_chunks(
 866                indoc! {"
 867                    <old_text>xml style old</old_text><new_text>xml style new</new_text>
 868
 869                    <<<<<<< SEARCH
 870                    diff style old
 871                    =======
 872                    diff style new
 873                    >>>>>>> REPLACE
 874                "},
 875                &mut xml_parser,
 876                &mut rng
 877            ),
 878            vec![Edit {
 879                old_text: "xml style old".to_string(),
 880                new_text: "xml style new".to_string(),
 881                line_hint: None,
 882            },]
 883        );
 884        assert_eq!(
 885            xml_parser.finish(),
 886            EditParserMetrics {
 887                tags: 2,
 888                mismatched_tags: 0
 889            }
 890        );
 891
 892        // Test diff-fenced format parser only parses diff markers
 893        let mut diff_parser = EditParser::new(EditFormat::DiffFenced);
 894        assert_eq!(
 895            parse_random_chunks(
 896                indoc! {"
 897                    <old_text>xml style old</old_text><new_text>xml style new</new_text>
 898
 899                    <<<<<<< SEARCH
 900                    diff style old
 901                    =======
 902                    diff style new
 903                    >>>>>>> REPLACE
 904                "},
 905                &mut diff_parser,
 906                &mut rng
 907            ),
 908            vec![Edit {
 909                old_text: "diff style old".to_string(),
 910                new_text: "diff style new".to_string(),
 911                line_hint: None,
 912            },]
 913        );
 914        assert_eq!(
 915            diff_parser.finish(),
 916            EditParserMetrics {
 917                tags: 0,
 918                mismatched_tags: 0
 919            }
 920        );
 921    }
 922
 923    #[gpui::test(iterations = 100)]
 924    fn test_diff_fenced_empty_sections(mut rng: StdRng) {
 925        let mut parser = EditParser::new(EditFormat::DiffFenced);
 926        assert_eq!(
 927            parse_random_chunks(
 928                indoc! {"
 929                <<<<<<< SEARCH
 930                =======
 931                >>>>>>> REPLACE
 932            "},
 933                &mut parser,
 934                &mut rng
 935            ),
 936            vec![Edit {
 937                old_text: "".to_string(),
 938                new_text: "".to_string(),
 939                line_hint: None,
 940            }]
 941        );
 942        assert_eq!(
 943            parser.finish(),
 944            EditParserMetrics {
 945                tags: 0,
 946                mismatched_tags: 0
 947            }
 948        );
 949    }
 950
 951    #[gpui::test(iterations = 100)]
 952    fn test_diff_fenced_with_line_hint(mut rng: StdRng) {
 953        let mut parser = EditParser::new(EditFormat::DiffFenced);
 954        let edits = parse_random_chunks(
 955            indoc! {"
 956                <<<<<<< SEARCH line=42
 957                original text
 958                =======
 959                updated text
 960                >>>>>>> REPLACE
 961            "},
 962            &mut parser,
 963            &mut rng,
 964        );
 965        assert_eq!(
 966            edits,
 967            vec![Edit {
 968                old_text: "original text".to_string(),
 969                line_hint: Some(42),
 970                new_text: "updated text".to_string(),
 971            }]
 972        );
 973    }
 974    #[gpui::test(iterations = 100)]
 975    fn test_xml_line_hints(mut rng: StdRng) {
 976        // Line hint is a single quoted line number
 977        let mut parser = EditParser::new(EditFormat::XmlTags);
 978
 979        let edits = parse_random_chunks(
 980            r#"
 981                    <old_text line="23">original code</old_text>
 982                    <new_text>updated code</new_text>"#,
 983            &mut parser,
 984            &mut rng,
 985        );
 986
 987        assert_eq!(edits.len(), 1);
 988        assert_eq!(edits[0].old_text, "original code");
 989        assert_eq!(edits[0].line_hint, Some(23));
 990        assert_eq!(edits[0].new_text, "updated code");
 991
 992        // Line hint is a single unquoted line number
 993        let mut parser = EditParser::new(EditFormat::XmlTags);
 994
 995        let edits = parse_random_chunks(
 996            r#"
 997                    <old_text line=45>original code</old_text>
 998                    <new_text>updated code</new_text>"#,
 999            &mut parser,
1000            &mut rng,
1001        );
1002
1003        assert_eq!(edits.len(), 1);
1004        assert_eq!(edits[0].old_text, "original code");
1005        assert_eq!(edits[0].line_hint, Some(45));
1006        assert_eq!(edits[0].new_text, "updated code");
1007
1008        // Line hint is a range
1009        let mut parser = EditParser::new(EditFormat::XmlTags);
1010
1011        let edits = parse_random_chunks(
1012            r#"
1013            <old_text line="23:50">original code</old_text>
1014            <new_text>updated code</new_text>"#,
1015            &mut parser,
1016            &mut rng,
1017        );
1018
1019        assert_eq!(edits.len(), 1);
1020        assert_eq!(edits[0].old_text, "original code");
1021        assert_eq!(edits[0].line_hint, Some(23));
1022        assert_eq!(edits[0].new_text, "updated code");
1023
1024        // No line hint
1025        let mut parser = EditParser::new(EditFormat::XmlTags);
1026        let edits = parse_random_chunks(
1027            r#"
1028            <old_text>old</old_text>
1029            <new_text>new</new_text>"#,
1030            &mut parser,
1031            &mut rng,
1032        );
1033
1034        assert_eq!(edits.len(), 1);
1035        assert_eq!(edits[0].old_text, "old");
1036        assert_eq!(edits[0].line_hint, None);
1037        assert_eq!(edits[0].new_text, "new");
1038    }
1039
1040    #[derive(Default, Debug, PartialEq, Eq)]
1041    struct Edit {
1042        old_text: String,
1043        new_text: String,
1044        line_hint: Option<u32>,
1045    }
1046
1047    fn parse_random_chunks(input: &str, parser: &mut EditParser, rng: &mut StdRng) -> Vec<Edit> {
1048        let chunk_count = rng.random_range(1..=cmp::min(input.len(), 50));
1049        let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
1050        chunk_indices.sort();
1051        chunk_indices.push(input.len());
1052
1053        let mut old_text = Some(String::new());
1054        let mut new_text = None;
1055        let mut pending_edit = Edit::default();
1056        let mut edits = Vec::new();
1057        let mut last_ix = 0;
1058        for chunk_ix in chunk_indices {
1059            for event in parser.push(&input[last_ix..chunk_ix]) {
1060                match event {
1061                    EditParserEvent::OldTextChunk {
1062                        chunk,
1063                        done,
1064                        line_hint,
1065                    } => {
1066                        old_text.as_mut().unwrap().push_str(&chunk);
1067                        if done {
1068                            pending_edit.old_text = old_text.take().unwrap();
1069                            pending_edit.line_hint = line_hint;
1070                            new_text = Some(String::new());
1071                        }
1072                    }
1073                    EditParserEvent::NewTextChunk { chunk, done } => {
1074                        new_text.as_mut().unwrap().push_str(&chunk);
1075                        if done {
1076                            pending_edit.new_text = new_text.take().unwrap();
1077                            edits.push(pending_edit);
1078                            pending_edit = Edit::default();
1079                            old_text = Some(String::new());
1080                        }
1081                    }
1082                }
1083            }
1084            last_ix = chunk_ix;
1085        }
1086
1087        if new_text.is_some() {
1088            pending_edit.new_text = new_text.take().unwrap();
1089            edits.push(pending_edit);
1090        }
1091
1092        edits
1093    }
1094}