From c70f300c13c90b3a357f8869461bdb001fcdb51c Mon Sep 17 00:00:00 2001 From: Oleksiy Syvokon Date: Tue, 17 Feb 2026 12:48:36 +0200 Subject: [PATCH] ep: Make `extract_last_codeblock` more robust (#49347) Release Notes: - N/A --- .../edit_prediction_cli/src/format_prompt.rs | 111 +++++++++++++----- crates/edit_prediction_cli/src/repair.rs | 19 +-- 2 files changed, 90 insertions(+), 40 deletions(-) diff --git a/crates/edit_prediction_cli/src/format_prompt.rs b/crates/edit_prediction_cli/src/format_prompt.rs index 8783cf8605af8450c12e224be9654f8288fb6580..113baf04d08e69912ef8d0f0005d24a82fa47998 100644 --- a/crates/edit_prediction_cli/src/format_prompt.rs +++ b/crates/edit_prediction_cli/src/format_prompt.rs @@ -210,9 +210,10 @@ impl TeacherPrompt { pub fn parse(example: &Example, response: &str) -> Result<(String, Option)> { // Check if the model indicated no edits are needed - let last_codeblock = extract_last_codeblock(&response); - if last_codeblock.trim() == Self::NO_EDITS { - return Ok((String::new(), None)); + if let Some(last_codeblock) = extract_last_codeblock(&response) { + if last_codeblock.trim() == Self::NO_EDITS { + return Ok((String::new(), None)); + } } // Extract updated (new) editable region from the model response. @@ -431,36 +432,49 @@ pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option Some(result) } -pub(crate) fn extract_last_codeblock(text: &str) -> String { - let mut last_block = None; - let mut search_start = 0; - - while let Some(start) = text[search_start..].find("```") { - let start = start + search_start; - let bytes = text.as_bytes(); - let mut backtick_end = start; - - while backtick_end < bytes.len() && bytes[backtick_end] == b'`' { - backtick_end += 1; - } +pub(crate) fn extract_last_codeblock(text: &str) -> Option { + let lines: Vec<&str> = text.lines().collect(); - let backtick_count = backtick_end - start; - let closing_pattern = format!("\n{}", "`".repeat(backtick_count)); + // Search from the end for a closing fence (line containing only backticks, 3+) + let mut closing_line_idx = None; + let mut backtick_count = 0; - while backtick_end < bytes.len() && bytes[backtick_end] != b'\n' { - backtick_end += 1; + for i in (0..lines.len()).rev() { + let line = lines[i].trim(); + if line.len() >= 3 && line.chars().all(|c| c == '`') { + closing_line_idx = Some(i); + backtick_count = line.len(); + break; } + } - if let Some(end_pos) = text[backtick_end..].find(&closing_pattern) { - let code_block = &text[backtick_end + 1..backtick_end + end_pos + 1]; - last_block = Some(code_block.to_string()); - search_start = backtick_end + end_pos + closing_pattern.len(); - } else { - break; + let closing_idx = closing_line_idx?; + + // Search backwards for matching opening fence + // Opening fence starts with same backtick count, possibly followed by language/metadata + let opening_pattern = "`".repeat(backtick_count); + + for i in (0..closing_idx).rev() { + let line = lines[i]; + if line.starts_with(&opening_pattern) { + // Ensure it's exactly the right number of backticks (not more) + let rest = &line[backtick_count..]; + if rest.is_empty() || !rest.starts_with('`') { + // Found matching opening fence + // Extract content between opening and closing (exclusive) + if closing_idx > i + 1 { + let content = lines[i + 1..closing_idx].join("\n"); + // Preserve trailing newline to match previous behavior + return Some(format!("{}\n", content)); + } else { + // Empty block + return Some(String::new()); + } + } } } - last_block.unwrap_or_else(|| text.to_string()) + None } #[cfg(test)] @@ -480,7 +494,7 @@ mod tests { last block ````` "}; - let last_block = extract_last_codeblock(text); + let last_block = extract_last_codeblock(text).unwrap(); assert_eq!(last_block, "last block\n"); } @@ -493,7 +507,7 @@ mod tests { more content ````` "}; - let last_block = extract_last_codeblock(text); + let last_block = extract_last_codeblock(text).unwrap(); assert_eq!( last_block, "content with ``` inline\nand ```python nested\nmore content\n" @@ -508,7 +522,7 @@ mod tests { and here```more```stuff ````` "}; - let last_block = extract_last_codeblock(text); + let last_block = extract_last_codeblock(text).unwrap(); assert_eq!( last_block, "here is some `code` with inline backticks\nand here```more```stuff\n" @@ -557,7 +571,7 @@ mod tests { ``` ````` "#}; - let last_block = extract_last_codeblock(text); + let last_block = extract_last_codeblock(text).unwrap(); assert_eq!( last_block, indoc::indoc! {r#" @@ -599,7 +613,42 @@ mod tests { NO_EDITS ````` "}; - let codeblock = extract_last_codeblock(response); + let codeblock = extract_last_codeblock(response).unwrap(); assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS); } + + #[test] + fn test_extract_codeblock_no_valid_block() { + // Text with no code blocks should return None + let text = "Just some plain text without any code blocks"; + assert!(extract_last_codeblock(text).is_none()); + + // Unclosed code block should return None + let text = indoc::indoc! {" + ``` + unclosed block + "}; + assert!(extract_last_codeblock(text).is_none()); + + // Analysis text with nested markdown but no proper outer block + let text = indoc::indoc! {" + # Analysis + Looking at this: + ``` + some code + ``` + But then more analysis without wrapping block + "}; + // This should find the inner block + let result = extract_last_codeblock(text).unwrap(); + assert_eq!(result, "some code\n"); + } + + #[test] + fn test_extract_codeblock_no_trailing_newline() { + // Text ending without trailing newline after closing fence + let text = "`````\ncontent here\n`````"; + let result = extract_last_codeblock(text).unwrap(); + assert_eq!(result, "content here\n"); + } } diff --git a/crates/edit_prediction_cli/src/repair.rs b/crates/edit_prediction_cli/src/repair.rs index bf59604eb4b14d6860504151037fb09bffc37104..a8b0002b8396808f5fedb22496f383eb3896198d 100644 --- a/crates/edit_prediction_cli/src/repair.rs +++ b/crates/edit_prediction_cli/src/repair.rs @@ -227,15 +227,16 @@ pub fn needs_repair(example: &Example, confidence_threshold: u8) -> bool { /// Handles the `KEEP_PREVIOUS` sentinel by copying the teacher's prediction, /// and delegates normal output to `TeacherPrompt::parse`. pub fn parse(example: &Example, actual_output: &str) -> Result<(String, Option)> { - let last_codeblock = extract_last_codeblock(actual_output); - if last_codeblock.trim() == KEEP_PREVIOUS { - let original = example - .predictions - .first() - .context("no original prediction to keep")?; - let patch = original.actual_patch.clone().unwrap_or_default(); - let cursor = original.actual_cursor.clone(); - return Ok((patch, cursor)); + if let Some(last_codeblock) = extract_last_codeblock(actual_output) { + if last_codeblock.trim() == KEEP_PREVIOUS { + let original = example + .predictions + .first() + .context("no original prediction to keep")?; + let patch = original.actual_patch.clone().unwrap_or_default(); + let cursor = original.actual_cursor.clone(); + return Ok((patch, cursor)); + } } TeacherPrompt::parse(example, actual_output)