edit_parser.rs

   1use anyhow::bail;
   2use derive_more::{Add, AddAssign};
   3use language_model::LanguageModel;
   4use regex::Regex;
   5use schemars::JsonSchema;
   6use serde::{Deserialize, Serialize};
   7use smallvec::SmallVec;
   8use std::{mem, ops::Range, str::FromStr, sync::Arc};
   9
  10const OLD_TEXT_END_TAG: &str = "</old_text>";
  11const NEW_TEXT_END_TAG: &str = "</new_text>";
  12const EDITS_END_TAG: &str = "</edits>";
  13const SEARCH_MARKER: &str = "<<<<<<< SEARCH";
  14const SEPARATOR_MARKER: &str = "=======";
  15const REPLACE_MARKER: &str = ">>>>>>> REPLACE";
  16const END_TAGS: [&str; 3] = [OLD_TEXT_END_TAG, NEW_TEXT_END_TAG, EDITS_END_TAG];
  17
  18#[derive(Debug)]
  19pub enum EditParserEvent {
  20    OldTextChunk {
  21        chunk: String,
  22        done: bool,
  23        line_hint: Option<u32>,
  24    },
  25    NewTextChunk {
  26        chunk: String,
  27        done: bool,
  28    },
  29}
  30
  31#[derive(
  32    Clone, Debug, Default, PartialEq, Eq, Add, AddAssign, Serialize, Deserialize, JsonSchema,
  33)]
  34pub struct EditParserMetrics {
  35    pub tags: usize,
  36    pub mismatched_tags: usize,
  37}
  38
  39#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
  40#[serde(rename_all = "snake_case")]
  41pub enum EditFormat {
  42    /// XML-like tags:
  43    /// <old_text>...</old_text>
  44    /// <new_text>...</new_text>
  45    XmlTags,
  46    /// Diff-fenced format, in which:
  47    /// - Text before the SEARCH marker is ignored
  48    /// - Fences are optional
  49    /// - Line hint is optional.
  50    ///
  51    /// Example:
  52    ///
  53    /// ```diff
  54    /// <<<<<<< SEARCH line=42
  55    /// ...
  56    /// =======
  57    /// ...
  58    /// >>>>>>> REPLACE
  59    /// ```
  60    DiffFenced,
  61}
  62
  63impl FromStr for EditFormat {
  64    type Err = anyhow::Error;
  65
  66    fn from_str(s: &str) -> anyhow::Result<Self> {
  67        match s.to_lowercase().as_str() {
  68            "xml_tags" | "xml" => Ok(EditFormat::XmlTags),
  69            "diff_fenced" | "diff-fenced" | "diff" => Ok(EditFormat::DiffFenced),
  70            _ => bail!("Unknown EditFormat: {}", s),
  71        }
  72    }
  73}
  74
  75impl EditFormat {
  76    /// Return an optimal edit format for the language model
  77    pub fn from_model(model: Arc<dyn LanguageModel>) -> anyhow::Result<Self> {
  78        if model.provider_id().0 == "google" || model.id().0.to_lowercase().contains("gemini") {
  79            Ok(EditFormat::DiffFenced)
  80        } else {
  81            Ok(EditFormat::XmlTags)
  82        }
  83    }
  84
  85    /// Return an optimal edit format for the language model,
  86    /// with the ability to override it by setting the
  87    /// `ZED_EDIT_FORMAT` environment variable
  88    #[allow(dead_code)]
  89    pub fn from_env(model: Arc<dyn LanguageModel>) -> anyhow::Result<Self> {
  90        let default = EditFormat::from_model(model)?;
  91        std::env::var("ZED_EDIT_FORMAT").map_or(Ok(default), |s| EditFormat::from_str(&s))
  92    }
  93}
  94
  95pub trait EditFormatParser: Send + std::fmt::Debug {
  96    fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]>;
  97    fn take_metrics(&mut self) -> EditParserMetrics;
  98}
  99
 100#[derive(Debug)]
 101pub struct XmlEditParser {
 102    state: XmlParserState,
 103    buffer: String,
 104    metrics: EditParserMetrics,
 105}
 106
 107#[derive(Debug, PartialEq)]
 108enum XmlParserState {
 109    Pending,
 110    WithinOldText { start: bool, line_hint: Option<u32> },
 111    AfterOldText,
 112    WithinNewText { start: bool },
 113}
 114
 115#[derive(Debug)]
 116pub struct DiffFencedEditParser {
 117    state: DiffParserState,
 118    buffer: String,
 119    metrics: EditParserMetrics,
 120}
 121
 122#[derive(Debug, PartialEq)]
 123enum DiffParserState {
 124    Pending,
 125    WithinSearch { start: bool, line_hint: Option<u32> },
 126    WithinReplace { start: bool },
 127}
 128
 129/// Main parser that delegates to format-specific parsers
 130pub struct EditParser {
 131    parser: Box<dyn EditFormatParser>,
 132}
 133
 134impl XmlEditParser {
 135    pub fn new() -> Self {
 136        XmlEditParser {
 137            state: XmlParserState::Pending,
 138            buffer: String::new(),
 139            metrics: EditParserMetrics::default(),
 140        }
 141    }
 142
 143    fn find_end_tag(&self) -> Option<Range<usize>> {
 144        let (tag, start_ix) = END_TAGS
 145            .iter()
 146            .flat_map(|tag| Some((tag, self.buffer.find(tag)?)))
 147            .min_by_key(|(_, ix)| *ix)?;
 148        Some(start_ix..start_ix + tag.len())
 149    }
 150
 151    fn ends_with_tag_prefix(&self) -> bool {
 152        let mut end_prefixes = END_TAGS
 153            .iter()
 154            .flat_map(|tag| (1..tag.len()).map(move |i| &tag[..i]))
 155            .chain(["\n"]);
 156        end_prefixes.any(|prefix| self.buffer.ends_with(&prefix))
 157    }
 158
 159    fn parse_line_hint(&self, tag: &str) -> Option<u32> {
 160        use std::sync::LazyLock;
 161        static LINE_HINT_REGEX: LazyLock<Regex> =
 162            LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap());
 163
 164        LINE_HINT_REGEX
 165            .captures(tag)
 166            .and_then(|caps| caps.get(1))
 167            .and_then(|m| m.as_str().parse::<u32>().ok())
 168    }
 169}
 170
 171impl EditFormatParser for XmlEditParser {
 172    fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
 173        self.buffer.push_str(chunk);
 174
 175        let mut edit_events = SmallVec::new();
 176        loop {
 177            match &mut self.state {
 178                XmlParserState::Pending => {
 179                    if let Some(start) = self.buffer.find("<old_text") {
 180                        if let Some(tag_end) = self.buffer[start..].find('>') {
 181                            let tag_end = start + tag_end + 1;
 182                            let tag = &self.buffer[start..tag_end];
 183                            let line_hint = self.parse_line_hint(tag);
 184                            self.buffer.drain(..tag_end);
 185                            self.state = XmlParserState::WithinOldText {
 186                                start: true,
 187                                line_hint,
 188                            };
 189                        } else {
 190                            break;
 191                        }
 192                    } else {
 193                        break;
 194                    }
 195                }
 196                XmlParserState::WithinOldText { start, line_hint } => {
 197                    if !self.buffer.is_empty() {
 198                        if *start && self.buffer.starts_with('\n') {
 199                            self.buffer.remove(0);
 200                        }
 201                        *start = false;
 202                    }
 203
 204                    let line_hint = *line_hint;
 205                    if let Some(tag_range) = self.find_end_tag() {
 206                        let mut chunk = self.buffer[..tag_range.start].to_string();
 207                        if chunk.ends_with('\n') {
 208                            chunk.pop();
 209                        }
 210
 211                        self.metrics.tags += 1;
 212                        if &self.buffer[tag_range.clone()] != OLD_TEXT_END_TAG {
 213                            self.metrics.mismatched_tags += 1;
 214                        }
 215
 216                        self.buffer.drain(..tag_range.end);
 217                        self.state = XmlParserState::AfterOldText;
 218                        edit_events.push(EditParserEvent::OldTextChunk {
 219                            chunk,
 220                            done: true,
 221                            line_hint,
 222                        });
 223                    } else {
 224                        if !self.ends_with_tag_prefix() {
 225                            edit_events.push(EditParserEvent::OldTextChunk {
 226                                chunk: mem::take(&mut self.buffer),
 227                                done: false,
 228                                line_hint,
 229                            });
 230                        }
 231                        break;
 232                    }
 233                }
 234                XmlParserState::AfterOldText => {
 235                    if let Some(start) = self.buffer.find("<new_text>") {
 236                        self.buffer.drain(..start + "<new_text>".len());
 237                        self.state = XmlParserState::WithinNewText { start: true };
 238                    } else {
 239                        break;
 240                    }
 241                }
 242                XmlParserState::WithinNewText { start } => {
 243                    if !self.buffer.is_empty() {
 244                        if *start && self.buffer.starts_with('\n') {
 245                            self.buffer.remove(0);
 246                        }
 247                        *start = false;
 248                    }
 249
 250                    if let Some(tag_range) = self.find_end_tag() {
 251                        let mut chunk = self.buffer[..tag_range.start].to_string();
 252                        if chunk.ends_with('\n') {
 253                            chunk.pop();
 254                        }
 255
 256                        self.metrics.tags += 1;
 257                        if &self.buffer[tag_range.clone()] != NEW_TEXT_END_TAG {
 258                            self.metrics.mismatched_tags += 1;
 259                        }
 260
 261                        self.buffer.drain(..tag_range.end);
 262                        self.state = XmlParserState::Pending;
 263                        edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true });
 264                    } else {
 265                        if !self.ends_with_tag_prefix() {
 266                            edit_events.push(EditParserEvent::NewTextChunk {
 267                                chunk: mem::take(&mut self.buffer),
 268                                done: false,
 269                            });
 270                        }
 271                        break;
 272                    }
 273                }
 274            }
 275        }
 276        edit_events
 277    }
 278
 279    fn take_metrics(&mut self) -> EditParserMetrics {
 280        std::mem::take(&mut self.metrics)
 281    }
 282}
 283
 284impl DiffFencedEditParser {
 285    pub fn new() -> Self {
 286        DiffFencedEditParser {
 287            state: DiffParserState::Pending,
 288            buffer: String::new(),
 289            metrics: EditParserMetrics::default(),
 290        }
 291    }
 292
 293    fn ends_with_diff_marker_prefix(&self) -> bool {
 294        let diff_markers = [SEPARATOR_MARKER, REPLACE_MARKER];
 295        let mut diff_prefixes = diff_markers
 296            .iter()
 297            .flat_map(|marker| (1..marker.len()).map(move |i| &marker[..i]))
 298            .chain(["\n"]);
 299        diff_prefixes.any(|prefix| self.buffer.ends_with(&prefix))
 300    }
 301
 302    fn parse_line_hint(&self, search_line: &str) -> Option<u32> {
 303        use regex::Regex;
 304        use std::sync::LazyLock;
 305        static LINE_HINT_REGEX: LazyLock<Regex> =
 306            LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap());
 307
 308        LINE_HINT_REGEX
 309            .captures(search_line)
 310            .and_then(|caps| caps.get(1))
 311            .and_then(|m| m.as_str().parse::<u32>().ok())
 312    }
 313}
 314
 315impl EditFormatParser for DiffFencedEditParser {
 316    fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
 317        self.buffer.push_str(chunk);
 318
 319        let mut edit_events = SmallVec::new();
 320        loop {
 321            match &mut self.state {
 322                DiffParserState::Pending => {
 323                    if let Some(diff) = self.buffer.find(SEARCH_MARKER) {
 324                        let search_end = diff + SEARCH_MARKER.len();
 325                        if let Some(newline_pos) = self.buffer[search_end..].find('\n') {
 326                            let search_line = &self.buffer[diff..search_end + newline_pos];
 327                            let line_hint = self.parse_line_hint(search_line);
 328                            self.buffer.drain(..search_end + newline_pos + 1);
 329                            self.state = DiffParserState::WithinSearch {
 330                                start: true,
 331                                line_hint,
 332                            };
 333                        } else {
 334                            break;
 335                        }
 336                    } else {
 337                        break;
 338                    }
 339                }
 340                DiffParserState::WithinSearch { start, line_hint } => {
 341                    if !self.buffer.is_empty() {
 342                        if *start && self.buffer.starts_with('\n') {
 343                            self.buffer.remove(0);
 344                        }
 345                        *start = false;
 346                    }
 347
 348                    let line_hint = *line_hint;
 349                    if let Some(separator_pos) = self.buffer.find(SEPARATOR_MARKER) {
 350                        let mut chunk = self.buffer[..separator_pos].to_string();
 351                        if chunk.ends_with('\n') {
 352                            chunk.pop();
 353                        }
 354
 355                        let separator_end = separator_pos + SEPARATOR_MARKER.len();
 356                        if let Some(newline_pos) = self.buffer[separator_end..].find('\n') {
 357                            self.buffer.drain(..separator_end + newline_pos + 1);
 358                            self.state = DiffParserState::WithinReplace { start: true };
 359                            edit_events.push(EditParserEvent::OldTextChunk {
 360                                chunk,
 361                                done: true,
 362                                line_hint,
 363                            });
 364                        } else {
 365                            break;
 366                        }
 367                    } else {
 368                        if !self.ends_with_diff_marker_prefix() {
 369                            edit_events.push(EditParserEvent::OldTextChunk {
 370                                chunk: mem::take(&mut self.buffer),
 371                                done: false,
 372                                line_hint,
 373                            });
 374                        }
 375                        break;
 376                    }
 377                }
 378                DiffParserState::WithinReplace { start } => {
 379                    if !self.buffer.is_empty() {
 380                        if *start && self.buffer.starts_with('\n') {
 381                            self.buffer.remove(0);
 382                        }
 383                        *start = false;
 384                    }
 385
 386                    if let Some(replace_pos) = self.buffer.find(REPLACE_MARKER) {
 387                        let mut chunk = self.buffer[..replace_pos].to_string();
 388                        if chunk.ends_with('\n') {
 389                            chunk.pop();
 390                        }
 391
 392                        self.buffer.drain(..replace_pos + REPLACE_MARKER.len());
 393                        if let Some(newline_pos) = self.buffer.find('\n') {
 394                            self.buffer.drain(..newline_pos + 1);
 395                        } else {
 396                            self.buffer.clear();
 397                        }
 398
 399                        self.state = DiffParserState::Pending;
 400                        edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true });
 401                    } else {
 402                        if !self.ends_with_diff_marker_prefix() {
 403                            edit_events.push(EditParserEvent::NewTextChunk {
 404                                chunk: mem::take(&mut self.buffer),
 405                                done: false,
 406                            });
 407                        }
 408                        break;
 409                    }
 410                }
 411            }
 412        }
 413        edit_events
 414    }
 415
 416    fn take_metrics(&mut self) -> EditParserMetrics {
 417        std::mem::take(&mut self.metrics)
 418    }
 419}
 420
 421impl EditParser {
 422    pub fn new(format: EditFormat) -> Self {
 423        let parser: Box<dyn EditFormatParser> = match format {
 424            EditFormat::XmlTags => Box::new(XmlEditParser::new()),
 425            EditFormat::DiffFenced => Box::new(DiffFencedEditParser::new()),
 426        };
 427        EditParser { parser }
 428    }
 429
 430    pub fn push(&mut self, chunk: &str) -> SmallVec<[EditParserEvent; 1]> {
 431        self.parser.push(chunk)
 432    }
 433
 434    pub fn finish(mut self) -> EditParserMetrics {
 435        self.parser.take_metrics()
 436    }
 437}
 438
 439#[cfg(test)]
 440mod tests {
 441    use super::*;
 442    use indoc::indoc;
 443    use rand::prelude::*;
 444    use std::cmp;
 445
 446    #[gpui::test(iterations = 1000)]
 447    fn test_xml_single_edit(mut rng: StdRng) {
 448        let mut parser = EditParser::new(EditFormat::XmlTags);
 449        assert_eq!(
 450            parse_random_chunks(
 451                "<old_text>original</old_text><new_text>updated</new_text>",
 452                &mut parser,
 453                &mut rng
 454            ),
 455            vec![Edit {
 456                old_text: "original".to_string(),
 457                new_text: "updated".to_string(),
 458                line_hint: None,
 459            }]
 460        );
 461        assert_eq!(
 462            parser.finish(),
 463            EditParserMetrics {
 464                tags: 2,
 465                mismatched_tags: 0
 466            }
 467        );
 468    }
 469
 470    #[gpui::test(iterations = 1000)]
 471    fn test_xml_multiple_edits(mut rng: StdRng) {
 472        let mut parser = EditParser::new(EditFormat::XmlTags);
 473        assert_eq!(
 474            parse_random_chunks(
 475                indoc! {"
 476                    <old_text>
 477                    first old
 478                    </old_text><new_text>first new</new_text>
 479                    <old_text>second old</old_text><new_text>
 480                    second new
 481                    </new_text>
 482                "},
 483                &mut parser,
 484                &mut rng
 485            ),
 486            vec![
 487                Edit {
 488                    old_text: "first old".to_string(),
 489                    new_text: "first new".to_string(),
 490                    line_hint: None,
 491                },
 492                Edit {
 493                    old_text: "second old".to_string(),
 494                    new_text: "second new".to_string(),
 495                    line_hint: None,
 496                },
 497            ]
 498        );
 499        assert_eq!(
 500            parser.finish(),
 501            EditParserMetrics {
 502                tags: 4,
 503                mismatched_tags: 0
 504            }
 505        );
 506    }
 507
 508    #[gpui::test(iterations = 1000)]
 509    fn test_xml_edits_with_extra_text(mut rng: StdRng) {
 510        let mut parser = EditParser::new(EditFormat::XmlTags);
 511        assert_eq!(
 512            parse_random_chunks(
 513                indoc! {"
 514                    ignore this <old_text>
 515                    content</old_text>extra stuff<new_text>updated content</new_text>trailing data
 516                    more text <old_text>second item
 517                    </old_text>middle text<new_text>modified second item</new_text>end
 518                    <old_text>third case</old_text><new_text>improved third case</new_text> with trailing text
 519                "},
 520                &mut parser,
 521                &mut rng
 522            ),
 523            vec![
 524                Edit {
 525                    old_text: "content".to_string(),
 526                    new_text: "updated content".to_string(),
 527                    line_hint: None,
 528                },
 529                Edit {
 530                    old_text: "second item".to_string(),
 531                    new_text: "modified second item".to_string(),
 532                    line_hint: None,
 533                },
 534                Edit {
 535                    old_text: "third case".to_string(),
 536                    new_text: "improved third case".to_string(),
 537                    line_hint: None,
 538                },
 539            ]
 540        );
 541        assert_eq!(
 542            parser.finish(),
 543            EditParserMetrics {
 544                tags: 6,
 545                mismatched_tags: 0
 546            }
 547        );
 548    }
 549
 550    #[gpui::test(iterations = 1000)]
 551    fn test_xml_nested_tags(mut rng: StdRng) {
 552        let mut parser = EditParser::new(EditFormat::XmlTags);
 553        assert_eq!(
 554            parse_random_chunks(
 555                "<old_text>code with <tag>nested</tag> elements</old_text><new_text>new <code>content</code></new_text>",
 556                &mut parser,
 557                &mut rng
 558            ),
 559            vec![Edit {
 560                old_text: "code with <tag>nested</tag> elements".to_string(),
 561                new_text: "new <code>content</code>".to_string(),
 562                line_hint: None,
 563            }]
 564        );
 565        assert_eq!(
 566            parser.finish(),
 567            EditParserMetrics {
 568                tags: 2,
 569                mismatched_tags: 0
 570            }
 571        );
 572    }
 573
 574    #[gpui::test(iterations = 1000)]
 575    fn test_xml_empty_old_and_new_text(mut rng: StdRng) {
 576        let mut parser = EditParser::new(EditFormat::XmlTags);
 577        assert_eq!(
 578            parse_random_chunks(
 579                "<old_text></old_text><new_text></new_text>",
 580                &mut parser,
 581                &mut rng
 582            ),
 583            vec![Edit {
 584                old_text: "".to_string(),
 585                new_text: "".to_string(),
 586                line_hint: None,
 587            }]
 588        );
 589        assert_eq!(
 590            parser.finish(),
 591            EditParserMetrics {
 592                tags: 2,
 593                mismatched_tags: 0
 594            }
 595        );
 596    }
 597
 598    #[gpui::test(iterations = 100)]
 599    fn test_xml_multiline_content(mut rng: StdRng) {
 600        let mut parser = EditParser::new(EditFormat::XmlTags);
 601        assert_eq!(
 602            parse_random_chunks(
 603                "<old_text>line1\nline2\nline3</old_text><new_text>line1\nmodified line2\nline3</new_text>",
 604                &mut parser,
 605                &mut rng
 606            ),
 607            vec![Edit {
 608                old_text: "line1\nline2\nline3".to_string(),
 609                new_text: "line1\nmodified line2\nline3".to_string(),
 610                line_hint: None,
 611            }]
 612        );
 613        assert_eq!(
 614            parser.finish(),
 615            EditParserMetrics {
 616                tags: 2,
 617                mismatched_tags: 0
 618            }
 619        );
 620    }
 621
 622    #[gpui::test(iterations = 1000)]
 623    fn test_xml_mismatched_tags(mut rng: StdRng) {
 624        let mut parser = EditParser::new(EditFormat::XmlTags);
 625        assert_eq!(
 626            parse_random_chunks(
 627                // Reduced from an actual Sonnet 3.7 output
 628                indoc! {"
 629                    <old_text>
 630                    a
 631                    b
 632                    c
 633                    </new_text>
 634                    <new_text>
 635                    a
 636                    B
 637                    c
 638                    </old_text>
 639                    <old_text>
 640                    d
 641                    e
 642                    f
 643                    </new_text>
 644                    <new_text>
 645                    D
 646                    e
 647                    F
 648                    </old_text>
 649                "},
 650                &mut parser,
 651                &mut rng
 652            ),
 653            vec![
 654                Edit {
 655                    old_text: "a\nb\nc".to_string(),
 656                    new_text: "a\nB\nc".to_string(),
 657                    line_hint: None,
 658                },
 659                Edit {
 660                    old_text: "d\ne\nf".to_string(),
 661                    new_text: "D\ne\nF".to_string(),
 662                    line_hint: None,
 663                }
 664            ]
 665        );
 666        assert_eq!(
 667            parser.finish(),
 668            EditParserMetrics {
 669                tags: 4,
 670                mismatched_tags: 4
 671            }
 672        );
 673
 674        let mut parser = EditParser::new(EditFormat::XmlTags);
 675        assert_eq!(
 676            parse_random_chunks(
 677                // Reduced from an actual Opus 4 output
 678                indoc! {"
 679                    <edits>
 680                    <old_text>
 681                    Lorem
 682                    </old_text>
 683                    <new_text>
 684                    LOREM
 685                    </edits>
 686                "},
 687                &mut parser,
 688                &mut rng
 689            ),
 690            vec![Edit {
 691                old_text: "Lorem".to_string(),
 692                new_text: "LOREM".to_string(),
 693                line_hint: None,
 694            },]
 695        );
 696        assert_eq!(
 697            parser.finish(),
 698            EditParserMetrics {
 699                tags: 2,
 700                mismatched_tags: 1
 701            }
 702        );
 703    }
 704
 705    #[gpui::test(iterations = 1000)]
 706    fn test_diff_fenced_single_edit(mut rng: StdRng) {
 707        let mut parser = EditParser::new(EditFormat::DiffFenced);
 708        assert_eq!(
 709            parse_random_chunks(
 710                indoc! {"
 711                    <<<<<<< SEARCH
 712                    original text
 713                    =======
 714                    updated text
 715                    >>>>>>> REPLACE
 716                "},
 717                &mut parser,
 718                &mut rng
 719            ),
 720            vec![Edit {
 721                old_text: "original text".to_string(),
 722                new_text: "updated text".to_string(),
 723                line_hint: None,
 724            }]
 725        );
 726        assert_eq!(
 727            parser.finish(),
 728            EditParserMetrics {
 729                tags: 0,
 730                mismatched_tags: 0
 731            }
 732        );
 733    }
 734
 735    #[gpui::test(iterations = 100)]
 736    fn test_diff_fenced_with_markdown_fences(mut rng: StdRng) {
 737        let mut parser = EditParser::new(EditFormat::DiffFenced);
 738        assert_eq!(
 739            parse_random_chunks(
 740                indoc! {"
 741                    ```diff
 742                    <<<<<<< SEARCH
 743                    from flask import Flask
 744                    =======
 745                    import math
 746                    from flask import Flask
 747                    >>>>>>> REPLACE
 748                    ```
 749                "},
 750                &mut parser,
 751                &mut rng
 752            ),
 753            vec![Edit {
 754                old_text: "from flask import Flask".to_string(),
 755                new_text: "import math\nfrom flask import Flask".to_string(),
 756                line_hint: None,
 757            }]
 758        );
 759        assert_eq!(
 760            parser.finish(),
 761            EditParserMetrics {
 762                tags: 0,
 763                mismatched_tags: 0
 764            }
 765        );
 766    }
 767
 768    #[gpui::test(iterations = 100)]
 769    fn test_diff_fenced_multiple_edits(mut rng: StdRng) {
 770        let mut parser = EditParser::new(EditFormat::DiffFenced);
 771        assert_eq!(
 772            parse_random_chunks(
 773                indoc! {"
 774                    <<<<<<< SEARCH
 775                    first old
 776                    =======
 777                    first new
 778                    >>>>>>> REPLACE
 779
 780                    <<<<<<< SEARCH
 781                    second old
 782                    =======
 783                    second new
 784                    >>>>>>> REPLACE
 785                "},
 786                &mut parser,
 787                &mut rng
 788            ),
 789            vec![
 790                Edit {
 791                    old_text: "first old".to_string(),
 792                    new_text: "first new".to_string(),
 793                    line_hint: None,
 794                },
 795                Edit {
 796                    old_text: "second old".to_string(),
 797                    new_text: "second new".to_string(),
 798                    line_hint: None,
 799                },
 800            ]
 801        );
 802        assert_eq!(
 803            parser.finish(),
 804            EditParserMetrics {
 805                tags: 0,
 806                mismatched_tags: 0
 807            }
 808        );
 809    }
 810
 811    #[gpui::test(iterations = 100)]
 812    fn test_mixed_formats(mut rng: StdRng) {
 813        // Test XML format parser only parses XML tags
 814        let mut xml_parser = EditParser::new(EditFormat::XmlTags);
 815        assert_eq!(
 816            parse_random_chunks(
 817                indoc! {"
 818                    <old_text>xml style old</old_text><new_text>xml style new</new_text>
 819
 820                    <<<<<<< SEARCH
 821                    diff style old
 822                    =======
 823                    diff style new
 824                    >>>>>>> REPLACE
 825                "},
 826                &mut xml_parser,
 827                &mut rng
 828            ),
 829            vec![Edit {
 830                old_text: "xml style old".to_string(),
 831                new_text: "xml style new".to_string(),
 832                line_hint: None,
 833            },]
 834        );
 835        assert_eq!(
 836            xml_parser.finish(),
 837            EditParserMetrics {
 838                tags: 2,
 839                mismatched_tags: 0
 840            }
 841        );
 842
 843        // Test diff-fenced format parser only parses diff markers
 844        let mut diff_parser = EditParser::new(EditFormat::DiffFenced);
 845        assert_eq!(
 846            parse_random_chunks(
 847                indoc! {"
 848                    <old_text>xml style old</old_text><new_text>xml style new</new_text>
 849
 850                    <<<<<<< SEARCH
 851                    diff style old
 852                    =======
 853                    diff style new
 854                    >>>>>>> REPLACE
 855                "},
 856                &mut diff_parser,
 857                &mut rng
 858            ),
 859            vec![Edit {
 860                old_text: "diff style old".to_string(),
 861                new_text: "diff style new".to_string(),
 862                line_hint: None,
 863            },]
 864        );
 865        assert_eq!(
 866            diff_parser.finish(),
 867            EditParserMetrics {
 868                tags: 0,
 869                mismatched_tags: 0
 870            }
 871        );
 872    }
 873
 874    #[gpui::test(iterations = 100)]
 875    fn test_diff_fenced_empty_sections(mut rng: StdRng) {
 876        let mut parser = EditParser::new(EditFormat::DiffFenced);
 877        assert_eq!(
 878            parse_random_chunks(
 879                indoc! {"
 880                <<<<<<< SEARCH
 881                =======
 882                >>>>>>> REPLACE
 883            "},
 884                &mut parser,
 885                &mut rng
 886            ),
 887            vec![Edit {
 888                old_text: "".to_string(),
 889                new_text: "".to_string(),
 890                line_hint: None,
 891            }]
 892        );
 893        assert_eq!(
 894            parser.finish(),
 895            EditParserMetrics {
 896                tags: 0,
 897                mismatched_tags: 0
 898            }
 899        );
 900    }
 901
 902    #[gpui::test(iterations = 100)]
 903    fn test_diff_fenced_with_line_hint(mut rng: StdRng) {
 904        let mut parser = EditParser::new(EditFormat::DiffFenced);
 905        let edits = parse_random_chunks(
 906            indoc! {"
 907                <<<<<<< SEARCH line=42
 908                original text
 909                =======
 910                updated text
 911                >>>>>>> REPLACE
 912            "},
 913            &mut parser,
 914            &mut rng,
 915        );
 916        assert_eq!(
 917            edits,
 918            vec![Edit {
 919                old_text: "original text".to_string(),
 920                line_hint: Some(42),
 921                new_text: "updated text".to_string(),
 922            }]
 923        );
 924    }
 925    #[gpui::test(iterations = 100)]
 926    fn test_xml_line_hints(mut rng: StdRng) {
 927        // Line hint is a single quoted line number
 928        let mut parser = EditParser::new(EditFormat::XmlTags);
 929
 930        let edits = parse_random_chunks(
 931            r#"
 932                    <old_text line="23">original code</old_text>
 933                    <new_text>updated code</new_text>"#,
 934            &mut parser,
 935            &mut rng,
 936        );
 937
 938        assert_eq!(edits.len(), 1);
 939        assert_eq!(edits[0].old_text, "original code");
 940        assert_eq!(edits[0].line_hint, Some(23));
 941        assert_eq!(edits[0].new_text, "updated code");
 942
 943        // Line hint is a single unquoted line number
 944        let mut parser = EditParser::new(EditFormat::XmlTags);
 945
 946        let edits = parse_random_chunks(
 947            r#"
 948                    <old_text line=45>original code</old_text>
 949                    <new_text>updated code</new_text>"#,
 950            &mut parser,
 951            &mut rng,
 952        );
 953
 954        assert_eq!(edits.len(), 1);
 955        assert_eq!(edits[0].old_text, "original code");
 956        assert_eq!(edits[0].line_hint, Some(45));
 957        assert_eq!(edits[0].new_text, "updated code");
 958
 959        // Line hint is a range
 960        let mut parser = EditParser::new(EditFormat::XmlTags);
 961
 962        let edits = parse_random_chunks(
 963            r#"
 964            <old_text line="23:50">original code</old_text>
 965            <new_text>updated code</new_text>"#,
 966            &mut parser,
 967            &mut rng,
 968        );
 969
 970        assert_eq!(edits.len(), 1);
 971        assert_eq!(edits[0].old_text, "original code");
 972        assert_eq!(edits[0].line_hint, Some(23));
 973        assert_eq!(edits[0].new_text, "updated code");
 974
 975        // No line hint
 976        let mut parser = EditParser::new(EditFormat::XmlTags);
 977        let edits = parse_random_chunks(
 978            r#"
 979            <old_text>old</old_text>
 980            <new_text>new</new_text>"#,
 981            &mut parser,
 982            &mut rng,
 983        );
 984
 985        assert_eq!(edits.len(), 1);
 986        assert_eq!(edits[0].old_text, "old");
 987        assert_eq!(edits[0].line_hint, None);
 988        assert_eq!(edits[0].new_text, "new");
 989    }
 990
 991    #[derive(Default, Debug, PartialEq, Eq)]
 992    struct Edit {
 993        old_text: String,
 994        new_text: String,
 995        line_hint: Option<u32>,
 996    }
 997
 998    fn parse_random_chunks(input: &str, parser: &mut EditParser, rng: &mut StdRng) -> Vec<Edit> {
 999        let chunk_count = rng.random_range(1..=cmp::min(input.len(), 50));
1000        let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
1001        chunk_indices.sort();
1002        chunk_indices.push(input.len());
1003
1004        let mut old_text = Some(String::new());
1005        let mut new_text = None;
1006        let mut pending_edit = Edit::default();
1007        let mut edits = Vec::new();
1008        let mut last_ix = 0;
1009        for chunk_ix in chunk_indices {
1010            for event in parser.push(&input[last_ix..chunk_ix]) {
1011                match event {
1012                    EditParserEvent::OldTextChunk {
1013                        chunk,
1014                        done,
1015                        line_hint,
1016                    } => {
1017                        old_text.as_mut().unwrap().push_str(&chunk);
1018                        if done {
1019                            pending_edit.old_text = old_text.take().unwrap();
1020                            pending_edit.line_hint = line_hint;
1021                            new_text = Some(String::new());
1022                        }
1023                    }
1024                    EditParserEvent::NewTextChunk { chunk, done } => {
1025                        new_text.as_mut().unwrap().push_str(&chunk);
1026                        if done {
1027                            pending_edit.new_text = new_text.take().unwrap();
1028                            edits.push(pending_edit);
1029                            pending_edit = Edit::default();
1030                            old_text = Some(String::new());
1031                        }
1032                    }
1033                }
1034            }
1035            last_ix = chunk_ix;
1036        }
1037
1038        edits
1039    }
1040}