@@ -210,9 +210,10 @@ impl TeacherPrompt {
pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
// Check if the model indicated no edits are needed
- let last_codeblock = extract_last_codeblock(&response);
- if last_codeblock.trim() == Self::NO_EDITS {
- return Ok((String::new(), None));
+ if let Some(last_codeblock) = extract_last_codeblock(&response) {
+ if last_codeblock.trim() == Self::NO_EDITS {
+ return Ok((String::new(), None));
+ }
}
// Extract updated (new) editable region from the model response.
@@ -431,36 +432,49 @@ pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String>
Some(result)
}
-pub(crate) fn extract_last_codeblock(text: &str) -> String {
- let mut last_block = None;
- let mut search_start = 0;
-
- while let Some(start) = text[search_start..].find("```") {
- let start = start + search_start;
- let bytes = text.as_bytes();
- let mut backtick_end = start;
-
- while backtick_end < bytes.len() && bytes[backtick_end] == b'`' {
- backtick_end += 1;
- }
+pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
+ let lines: Vec<&str> = text.lines().collect();
- let backtick_count = backtick_end - start;
- let closing_pattern = format!("\n{}", "`".repeat(backtick_count));
+ // Search from the end for a closing fence (line containing only backticks, 3+)
+ let mut closing_line_idx = None;
+ let mut backtick_count = 0;
- while backtick_end < bytes.len() && bytes[backtick_end] != b'\n' {
- backtick_end += 1;
+ for i in (0..lines.len()).rev() {
+ let line = lines[i].trim();
+ if line.len() >= 3 && line.chars().all(|c| c == '`') {
+ closing_line_idx = Some(i);
+ backtick_count = line.len();
+ break;
}
+ }
- if let Some(end_pos) = text[backtick_end..].find(&closing_pattern) {
- let code_block = &text[backtick_end + 1..backtick_end + end_pos + 1];
- last_block = Some(code_block.to_string());
- search_start = backtick_end + end_pos + closing_pattern.len();
- } else {
- break;
+ let closing_idx = closing_line_idx?;
+
+ // Search backwards for matching opening fence
+ // Opening fence starts with same backtick count, possibly followed by language/metadata
+ let opening_pattern = "`".repeat(backtick_count);
+
+ for i in (0..closing_idx).rev() {
+ let line = lines[i];
+ if line.starts_with(&opening_pattern) {
+ // Ensure it's exactly the right number of backticks (not more)
+ let rest = &line[backtick_count..];
+ if rest.is_empty() || !rest.starts_with('`') {
+ // Found matching opening fence
+ // Extract content between opening and closing (exclusive)
+ if closing_idx > i + 1 {
+ let content = lines[i + 1..closing_idx].join("\n");
+ // Preserve trailing newline to match previous behavior
+ return Some(format!("{}\n", content));
+ } else {
+ // Empty block
+ return Some(String::new());
+ }
+ }
}
}
- last_block.unwrap_or_else(|| text.to_string())
+ None
}
#[cfg(test)]
@@ -480,7 +494,7 @@ mod tests {
last block
`````
"};
- let last_block = extract_last_codeblock(text);
+ let last_block = extract_last_codeblock(text).unwrap();
assert_eq!(last_block, "last block\n");
}
@@ -493,7 +507,7 @@ mod tests {
more content
`````
"};
- let last_block = extract_last_codeblock(text);
+ let last_block = extract_last_codeblock(text).unwrap();
assert_eq!(
last_block,
"content with ``` inline\nand ```python nested\nmore content\n"
@@ -508,7 +522,7 @@ mod tests {
and here```more```stuff
`````
"};
- let last_block = extract_last_codeblock(text);
+ let last_block = extract_last_codeblock(text).unwrap();
assert_eq!(
last_block,
"here is some `code` with inline backticks\nand here```more```stuff\n"
@@ -557,7 +571,7 @@ mod tests {
```
`````
"#};
- let last_block = extract_last_codeblock(text);
+ let last_block = extract_last_codeblock(text).unwrap();
assert_eq!(
last_block,
indoc::indoc! {r#"
@@ -599,7 +613,42 @@ mod tests {
NO_EDITS
`````
"};
- let codeblock = extract_last_codeblock(response);
+ let codeblock = extract_last_codeblock(response).unwrap();
assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
}
+
+ #[test]
+ fn test_extract_codeblock_no_valid_block() {
+ // Text with no code blocks should return None
+ let text = "Just some plain text without any code blocks";
+ assert!(extract_last_codeblock(text).is_none());
+
+ // Unclosed code block should return None
+ let text = indoc::indoc! {"
+ ```
+ unclosed block
+ "};
+ assert!(extract_last_codeblock(text).is_none());
+
+ // Analysis text with nested markdown but no proper outer block
+ let text = indoc::indoc! {"
+ # Analysis
+ Looking at this:
+ ```
+ some code
+ ```
+ But then more analysis without wrapping block
+ "};
+ // This should find the inner block
+ let result = extract_last_codeblock(text).unwrap();
+ assert_eq!(result, "some code\n");
+ }
+
+ #[test]
+ fn test_extract_codeblock_no_trailing_newline() {
+ // Text ending without trailing newline after closing fence
+ let text = "`````\ncontent here\n`````";
+ let result = extract_last_codeblock(text).unwrap();
+ assert_eq!(result, "content here\n");
+ }
}
@@ -227,15 +227,16 @@ pub fn needs_repair(example: &Example, confidence_threshold: u8) -> bool {
/// Handles the `KEEP_PREVIOUS` sentinel by copying the teacher's prediction,
/// and delegates normal output to `TeacherPrompt::parse`.
pub fn parse(example: &Example, actual_output: &str) -> Result<(String, Option<ActualCursor>)> {
- let last_codeblock = extract_last_codeblock(actual_output);
- if last_codeblock.trim() == KEEP_PREVIOUS {
- let original = example
- .predictions
- .first()
- .context("no original prediction to keep")?;
- let patch = original.actual_patch.clone().unwrap_or_default();
- let cursor = original.actual_cursor.clone();
- return Ok((patch, cursor));
+ if let Some(last_codeblock) = extract_last_codeblock(actual_output) {
+ if last_codeblock.trim() == KEEP_PREVIOUS {
+ let original = example
+ .predictions
+ .first()
+ .context("no original prediction to keep")?;
+ let patch = original.actual_patch.clone().unwrap_or_default();
+ let cursor = original.actual_cursor.clone();
+ return Ok((patch, cursor));
+ }
}
TeacherPrompt::parse(example, actual_output)