format_prompt.rs

  1use crate::{
  2    FormatPromptArgs, PredictionProvider,
  3    example::{ActualCursor, Example, ExamplePrompt},
  4    headless::EpAppState,
  5    progress::{ExampleProgress, Step},
  6    retrieve_context::run_context_retrieval,
  7};
  8use anyhow::{Context as _, Result, anyhow};
  9use edit_prediction::udiff;
 10use gpui::AsyncApp;
 11use similar::DiffableStr;
 12use std::ops::Range;
 13use std::sync::Arc;
 14use zeta_prompt::{
 15    ZetaFormat, encode_patch_as_output_for_format, excerpt_range_for_format, format_zeta_prompt,
 16    multi_region, output_end_marker_for_format, resolve_cursor_region,
 17};
 18
 19pub async fn run_format_prompt(
 20    example: &mut Example,
 21    args: &FormatPromptArgs,
 22    app_state: Arc<EpAppState>,
 23    example_progress: &ExampleProgress,
 24    cx: AsyncApp,
 25) -> Result<()> {
 26    run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
 27
 28    let step_progress = example_progress.start(Step::FormatPrompt);
 29
 30    let prompt_inputs = example
 31        .prompt_inputs
 32        .as_ref()
 33        .context("prompt_inputs must be set after context retrieval")?;
 34
 35    match args.provider {
 36        PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
 37            step_progress.set_substatus("formatting teacher prompt");
 38
 39            let zeta_format = ZetaFormat::default();
 40            let (editable_range, context_range) =
 41                excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
 42
 43            let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
 44            example.prompt = Some(ExamplePrompt {
 45                input: prompt,
 46                expected_output: String::new(),
 47                rejected_output: None,
 48                prefill: None,
 49                provider: args.provider,
 50            });
 51        }
 52        PredictionProvider::TeacherMultiRegion(_)
 53        | PredictionProvider::TeacherMultiRegionNonBatching(_) => {
 54            step_progress.set_substatus("formatting teacher multi-region prompt");
 55
 56            let zeta_format = ZetaFormat::default();
 57            let (editable_range, context_range) =
 58                excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
 59
 60            let prompt =
 61                TeacherMultiRegionPrompt::format_prompt(example, editable_range, context_range);
 62            example.prompt = Some(ExamplePrompt {
 63                input: prompt,
 64                expected_output: String::new(),
 65                rejected_output: None,
 66                prefill: None,
 67                provider: args.provider,
 68            });
 69        }
 70        PredictionProvider::Zeta2(zeta_format) => {
 71            step_progress.set_substatus("formatting zeta2 prompt");
 72
 73            let prompt = format_zeta_prompt(prompt_inputs, zeta_format);
 74            let prefill = zeta_prompt::get_prefill(prompt_inputs, zeta_format);
 75            let expected_output = example
 76                .spec
 77                .expected_patches_with_cursor_positions()
 78                .into_iter()
 79                .next()
 80                .and_then(|(expected_patch, expected_cursor_offset)| {
 81                    zeta2_output_for_patch(
 82                        prompt_inputs,
 83                        &expected_patch,
 84                        expected_cursor_offset,
 85                        zeta_format,
 86                    )
 87                    .ok()
 88                })
 89                .unwrap_or_default();
 90
 91            let rejected_output = example.spec.rejected_patch.as_ref().and_then(|patch| {
 92                zeta2_output_for_patch(prompt_inputs, patch, None, zeta_format).ok()
 93            });
 94
 95            example.prompt = Some(ExamplePrompt {
 96                input: prompt,
 97                expected_output,
 98                rejected_output,
 99                provider: args.provider,
100                prefill: Some(prefill),
101            });
102        }
103        _ => {
104            panic!("Cannot format prompt for {:?}", args.provider);
105        }
106    };
107    Ok(())
108}
109
110pub fn zeta2_output_for_patch(
111    input: &zeta_prompt::ZetaPromptInput,
112    patch: &str,
113    cursor_offset: Option<usize>,
114    version: ZetaFormat,
115) -> Result<String> {
116    let (context, editable_range, _, _) = resolve_cursor_region(input, version);
117    let mut old_editable_region = context[editable_range].to_string();
118
119    if !old_editable_region.ends_with_newline() {
120        old_editable_region.push('\n');
121    }
122
123    if let Some(encoded_output) =
124        encode_patch_as_output_for_format(version, &old_editable_region, patch, cursor_offset)?
125    {
126        return Ok(encoded_output);
127    }
128
129    let (result, first_hunk_offset) =
130        udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
131            || {
132                format!(
133                    "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
134                    patch, old_editable_region
135                )
136            },
137        )?;
138
139    if version == ZetaFormat::V0306SeedMultiRegions {
140        let cursor_in_new = cursor_offset.map(|cursor_offset| {
141            let hunk_start = first_hunk_offset.unwrap_or(0);
142            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
143        });
144        return multi_region::encode_from_old_and_new(
145            &old_editable_region,
146            &result,
147            cursor_in_new,
148            zeta_prompt::CURSOR_MARKER,
149            zeta_prompt::seed_coder::END_MARKER,
150            zeta_prompt::seed_coder::NO_EDITS,
151        );
152    }
153
154    let mut result = result;
155    if let Some(cursor_offset) = cursor_offset {
156        // The cursor_offset is relative to the start of the hunk's new text (context + additions).
157        // We need to add where the hunk context matched in the editable region to compute
158        // the actual cursor position in the result.
159        let hunk_start = first_hunk_offset.unwrap_or(0);
160        let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
161        result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
162    }
163
164    if let Some(end_marker) = output_end_marker_for_format(version) {
165        if !result.ends_with('\n') {
166            result.push('\n');
167        }
168        result.push_str(end_marker);
169    }
170
171    Ok(result)
172}
173
174pub struct TeacherPrompt;
175
176impl TeacherPrompt {
177    pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
178    pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
179    pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
180    pub(crate) const NO_EDITS: &str = "NO_EDITS";
181
182    /// Truncate edit history to this number of last lines
183    const MAX_HISTORY_LINES: usize = 128;
184
185    pub fn format_prompt(
186        example: &Example,
187        editable_range: Range<usize>,
188        context_range: Range<usize>,
189    ) -> String {
190        let edit_history = Self::format_edit_history(&example.spec.edit_history);
191        let context = Self::format_context(example);
192        let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
193
194        let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
195        let prompt = prompt_template
196            .replace("{{context}}", &context)
197            .replace("{{edit_history}}", &edit_history)
198            .replace("{{cursor_excerpt}}", &cursor_excerpt);
199
200        prompt
201    }
202
203    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
204        // Check if the model indicated no edits are needed
205        let no_edits = (String::new(), None);
206        if let Some(last_codeblock) = extract_last_codeblock(&response) {
207            if last_codeblock.trim() == Self::NO_EDITS {
208                return Ok(no_edits);
209            }
210        }
211
212        if response.trim().ends_with(Self::NO_EDITS) {
213            return Ok(no_edits);
214        }
215
216        // Extract updated (new) editable region from the model response.
217        let new_editable_region = Self::extract_editable_region(&response)?;
218        let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
219        let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
220        let old_editable_region = Self::extract_editable_region(
221            &example
222                .prompt
223                .as_ref()
224                .context("example prompt missing")?
225                .input,
226        )?
227        .replace(Self::USER_CURSOR_MARKER, "");
228
229        let prompt_inputs = example
230            .prompt_inputs
231            .as_ref()
232            .context("example is missing prompt inputs")?;
233
234        // Normalize leading newlines: if old starts with newline but new doesn't,
235        // prepend newline to new to preserve whitespace structure.
236        // This handles the case where the model drops the leading blank line.
237        if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
238            new_editable_region.insert(0, '\n');
239        }
240
241        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
242        let (editable_region_offset, _) = excerpt
243            .match_indices(&old_editable_region)
244            .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
245            .context("editable region not found in prompt content")?;
246        let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
247
248        let editable_region_lines = old_editable_region.lines().count() as u32;
249        let diff = language::unified_diff_with_context(
250            &old_editable_region,
251            &new_editable_region,
252            editable_region_start_line as u32,
253            editable_region_start_line as u32,
254            editable_region_lines,
255        );
256
257        let diff = indoc::formatdoc! {"
258            --- a/{path}
259            +++ b/{path}
260            {diff}",
261            path = example.spec.cursor_path.to_string_lossy(),
262            diff = diff,
263        };
264
265        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
266            ActualCursor::from_editable_region(
267                &example.spec.cursor_path,
268                editable_region_cursor_offset,
269                &new_editable_region,
270                excerpt,
271                editable_region_offset,
272                editable_region_start_line,
273            )
274        });
275
276        Ok((diff, actual_cursor))
277    }
278
279    fn format_edit_history(edit_history: &str) -> String {
280        let lines: Vec<&str> = edit_history.lines().collect();
281
282        if lines.is_empty() {
283            return "(No edit history)".to_string();
284        }
285
286        if lines.len() > Self::MAX_HISTORY_LINES {
287            let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
288            format!("{truncated}\n[...truncated...]")
289        } else {
290            lines.join("\n")
291        }
292    }
293
294    pub fn format_context(example: &Example) -> String {
295        let related_files = example
296            .prompt_inputs
297            .as_ref()
298            .and_then(|pi| pi.related_files.as_deref());
299
300        let Some(related_files) = related_files else {
301            return "(No context)".to_string();
302        };
303
304        if related_files.is_empty() {
305            return "(No context)".to_string();
306        }
307
308        let prefix = "`````";
309        let suffix = "`````\n\n";
310        let max_tokens = 1024;
311        zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
312    }
313
314    fn format_cursor_excerpt(
315        example: &Example,
316        editable_range: Range<usize>,
317        context_range: Range<usize>,
318    ) -> String {
319        let mut result = String::new();
320
321        let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
322        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
323        let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
324
325        let path_str = example.spec.cursor_path.to_string_lossy();
326        result.push_str(&format!("`````{path_str}\n"));
327        result.push_str(&excerpt[context_range.start..editable_range.start]);
328        result.push_str(Self::EDITABLE_REGION_START);
329        result.push_str(&excerpt[editable_range.start..cursor_offset]);
330        result.push_str(Self::USER_CURSOR_MARKER);
331        result.push_str(&excerpt[cursor_offset..editable_range.end]);
332        result.push_str(Self::EDITABLE_REGION_END);
333        result.push_str(&excerpt[editable_range.end..context_range.end]);
334        result.push_str("\n`````");
335
336        result
337    }
338
339    pub fn extract_editable_region(text: &str) -> Result<String> {
340        let start = text
341            .rfind(Self::EDITABLE_REGION_START)
342            .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
343        let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
344
345        if start >= end {
346            return Err(anyhow!("Invalid editable region markers"));
347        }
348
349        let region = &text[start..end];
350        Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
351    }
352}
353
354pub struct TeacherMultiRegionPrompt;
355
356impl TeacherMultiRegionPrompt {
357    pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
358    pub(crate) const NO_EDITS: &str = "NO_EDITS";
359
360    /// Truncate edit history to this number of last lines
361    const MAX_HISTORY_LINES: usize = 128;
362
363    pub fn format_prompt(
364        example: &Example,
365        editable_range: Range<usize>,
366        context_range: Range<usize>,
367    ) -> String {
368        let edit_history = Self::format_edit_history(&example.spec.edit_history);
369        let context = Self::format_context(example);
370        let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
371
372        let prompt_template = crate::prompt_assets::get_prompt("teacher_multi_region.md");
373        let prompt = prompt_template
374            .replace("{{context}}", &context)
375            .replace("{{edit_history}}", &edit_history)
376            .replace("{{cursor_excerpt}}", &cursor_excerpt);
377
378        prompt
379    }
380
381    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
382        let no_edits = (String::new(), None);
383        if let Some(last_codeblock) = extract_last_codeblock(&response) {
384            if last_codeblock.trim() == Self::NO_EDITS {
385                return Ok(no_edits);
386            }
387        }
388
389        if response.trim().ends_with(Self::NO_EDITS) {
390            return Ok(no_edits);
391        }
392
393        let prompt_inputs = example
394            .prompt_inputs
395            .as_ref()
396            .context("example is missing prompt inputs")?;
397
398        let zeta_format = ZetaFormat::default();
399        let (editable_range, _) =
400            excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
401        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
402        let old_editable_region = &excerpt[editable_range.clone()];
403        let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
404
405        let codeblock =
406            extract_last_codeblock(&response).context("no codeblock found in model response")?;
407        let (start_num, end_num, raw_new_span) = multi_region::extract_marker_span(&codeblock)?;
408
409        let start_idx = start_num
410            .checked_sub(1)
411            .context("marker numbers are 1-indexed")?;
412        let end_idx = end_num
413            .checked_sub(1)
414            .context("marker numbers are 1-indexed")?;
415        let start_byte = *marker_offsets
416            .get(start_idx)
417            .context("start marker number out of range")?;
418        let end_byte = *marker_offsets
419            .get(end_idx)
420            .context("end marker number out of range")?;
421
422        if start_byte > end_byte {
423            return Err(anyhow!("start marker must come before end marker"));
424        }
425
426        let cursor_in_span = raw_new_span.find(Self::USER_CURSOR_MARKER);
427        let new_span = raw_new_span.replace(Self::USER_CURSOR_MARKER, "");
428
429        let old_span = &old_editable_region[start_byte..end_byte];
430        let mut new_span = new_span;
431        if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
432            new_span.push('\n');
433        }
434        if !old_span.ends_with('\n') && new_span.ends_with('\n') {
435            new_span.pop();
436        }
437
438        let mut new_editable_region = String::new();
439        new_editable_region.push_str(&old_editable_region[..start_byte]);
440        new_editable_region.push_str(&new_span);
441        new_editable_region.push_str(&old_editable_region[end_byte..]);
442
443        let cursor_offset = cursor_in_span.map(|pos| start_byte + pos);
444
445        if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
446            new_editable_region.insert(0, '\n');
447        }
448
449        let editable_region_offset = editable_range.start;
450        let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
451
452        let editable_region_lines = old_editable_region.lines().count() as u32;
453        let diff = language::unified_diff_with_context(
454            old_editable_region,
455            &new_editable_region,
456            editable_region_start_line as u32,
457            editable_region_start_line as u32,
458            editable_region_lines,
459        );
460
461        let diff = indoc::formatdoc! {"
462            --- a/{path}
463            +++ b/{path}
464            {diff}",
465            path = example.spec.cursor_path.to_string_lossy(),
466            diff = diff,
467        };
468
469        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
470            ActualCursor::from_editable_region(
471                &example.spec.cursor_path,
472                editable_region_cursor_offset,
473                &new_editable_region,
474                excerpt,
475                editable_region_offset,
476                editable_region_start_line,
477            )
478        });
479
480        Ok((diff, actual_cursor))
481    }
482
483    fn format_edit_history(edit_history: &str) -> String {
484        let lines: Vec<&str> = edit_history.lines().collect();
485
486        if lines.is_empty() {
487            return "(No edit history)".to_string();
488        }
489
490        if lines.len() > Self::MAX_HISTORY_LINES {
491            let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
492            format!("{truncated}\n[...truncated...]")
493        } else {
494            lines.join("\n")
495        }
496    }
497
498    pub fn format_context(example: &Example) -> String {
499        let related_files = example
500            .prompt_inputs
501            .as_ref()
502            .and_then(|pi| pi.related_files.as_deref());
503        let Some(related_files) = related_files else {
504            return "(No context)".to_string();
505        };
506
507        if related_files.is_empty() {
508            return "(No context)".to_string();
509        }
510
511        let prefix = "`````";
512        let suffix = "`````\n\n";
513        let max_tokens = 1024;
514        zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
515    }
516
517    fn format_cursor_excerpt(
518        example: &Example,
519        editable_range: Range<usize>,
520        context_range: Range<usize>,
521    ) -> String {
522        let mut result = String::new();
523
524        let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
525        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
526        let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
527
528        let editable_text = &excerpt[editable_range.clone()];
529        let cursor_in_editable = cursor_offset - editable_range.start;
530
531        let path_str = example.spec.cursor_path.to_string_lossy();
532        result.push_str(&format!("`````{path_str}\n"));
533
534        result.push_str(&excerpt[context_range.start..editable_range.start]);
535
536        multi_region::write_editable_with_markers(
537            &mut result,
538            editable_text,
539            cursor_in_editable,
540            Self::USER_CURSOR_MARKER,
541        );
542
543        result.push_str(&excerpt[editable_range.end..context_range.end]);
544        result.push_str("\n`````");
545
546        result
547    }
548}
549
550/// Extract the cursor excerpt from an example.
551/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
552pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
553    // If we have the original prompt, extract the cursor excerpt from it
554    if let Some(prompt) = &example.prompt {
555        // Find "# 3. Current File" section and extract the content
556        if let Some(start) = prompt.input.find("# 3. Current File") {
557            let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
558            let backtick_count = prompt.input[content_start..]
559                .chars()
560                .take_while(|&c| c == '`')
561                .count();
562            let content_start = content_start + backtick_count;
563
564            // Find the path line and skip it
565            let newline_pos = prompt.input[content_start..].find('\n')?;
566            let text_start = content_start + newline_pos + 1;
567
568            // Find the closing backticks
569            let closing_pattern = "`".repeat(backtick_count);
570            let text_end = prompt.input[text_start..].find(&closing_pattern)?;
571            let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
572
573            let path_str = example.spec.cursor_path.to_string_lossy();
574            return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
575        }
576    }
577
578    // Fallback: construct from prompt_inputs if available
579    let prompt_inputs = example.prompt_inputs.as_ref()?;
580    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
581    let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
582
583    // Simple fallback: just show content around cursor with markers
584    let path_str = example.spec.cursor_path.to_string_lossy();
585    let mut result = format!("`````{path_str}\n");
586    result.push_str(TeacherPrompt::EDITABLE_REGION_START);
587    result.push_str(&excerpt[..cursor_offset]);
588    result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
589    result.push_str(&excerpt[cursor_offset..]);
590    result.push_str(TeacherPrompt::EDITABLE_REGION_END);
591    result.push_str("\n`````");
592
593    Some(result)
594}
595
596pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
597    let lines: Vec<&str> = text.lines().collect();
598
599    // Search from the end for a closing fence (line containing only backticks, 3+)
600    let mut closing_line_idx = None;
601    let mut backtick_count = 0;
602
603    for i in (0..lines.len()).rev() {
604        let line = lines[i].trim();
605        if line.len() >= 3 && line.chars().all(|c| c == '`') {
606            closing_line_idx = Some(i);
607            backtick_count = line.len();
608            break;
609        }
610    }
611
612    let closing_idx = closing_line_idx?;
613
614    // Search backwards for matching opening fence
615    // Opening fence starts with same backtick count, possibly followed by language/metadata
616    let opening_pattern = "`".repeat(backtick_count);
617
618    for i in (0..closing_idx).rev() {
619        let line = lines[i];
620        if line.starts_with(&opening_pattern) {
621            // Ensure it's exactly the right number of backticks (not more)
622            let rest = &line[backtick_count..];
623            if rest.is_empty() || !rest.starts_with('`') {
624                // Found matching opening fence
625                // Extract content between opening and closing (exclusive)
626                if closing_idx > i + 1 {
627                    let content = lines[i + 1..closing_idx].join("\n");
628                    // Preserve trailing newline to match previous behavior
629                    return Some(format!("{}\n", content));
630                } else {
631                    // Empty block
632                    return Some(String::new());
633                }
634            }
635        }
636    }
637
638    None
639}
640
641#[cfg(test)]
642mod tests {
643    use super::*;
644
645    #[test]
646    fn test_extract_last_code_block() {
647        let text = indoc::indoc! {"
648            Some thinking
649
650            ```
651            first block
652            ```
653
654            `````path='something' lines=1:2
655            last block
656            `````
657            "};
658        let last_block = extract_last_codeblock(text).unwrap();
659        assert_eq!(last_block, "last block\n");
660    }
661
662    #[test]
663    fn test_extract_codeblock_with_nested_fences() {
664        let text = indoc::indoc! {"
665            `````
666            content with ``` inline
667            and ```python nested
668            more content
669            `````
670            "};
671        let last_block = extract_last_codeblock(text).unwrap();
672        assert_eq!(
673            last_block,
674            "content with ``` inline\nand ```python nested\nmore content\n"
675        );
676    }
677
678    #[test]
679    fn test_extract_codeblock_ignores_inline_backticks() {
680        let text = indoc::indoc! {"
681            `````
682            here is some `code` with inline backticks
683            and here```more```stuff
684            `````
685            "};
686        let last_block = extract_last_codeblock(text).unwrap();
687        assert_eq!(
688            last_block,
689            "here is some `code` with inline backticks\nand here```more```stuff\n"
690        );
691    }
692
693    #[test]
694    fn test_extract_editable_region_old_format() {
695        let text = indoc::indoc! {"
696            some lines
697            are
698            here
699            <|editable_region_start|>
700            one
701            two three
702
703            <|editable_region_end|>
704            more
705            lines here
706            "};
707        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
708        assert_eq!(
709            parsed,
710            indoc::indoc! {"
711            one
712            two three"}
713        );
714    }
715
716    #[test]
717    fn test_extract_editable_region_marker_format() {
718        let text = indoc::indoc! {"
719            some context
720            <|marker_1|>
721            one
722            two three
723            <|marker_2|>
724            more context
725            "};
726        let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
727        assert_eq!(parsed, "one\ntwo three");
728    }
729
730    #[test]
731    fn test_extract_editable_region_multi_markers() {
732        let text = indoc::indoc! {"
733            prefix
734            <|marker_1|>
735            aaa
736            bbb
737            <|marker_2|>
738            ccc
739            ddd
740            <|marker_3|>
741            suffix
742            "};
743        let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
744        // Intermediate marker and its trailing \n are stripped
745        assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
746    }
747
748    #[test]
749    fn test_extract_last_codeblock_nested_bibtex() {
750        let text = indoc::indoc! {r#"
751            Looking at the edit history, I can see that a Citation section was just added.
752
753            `````
754            ## Collaborations
755            Our mission is to create a 4D generative model.
756
757            ## Citation
758
759            If you found Unique3D helpful, please cite our report:
760            ```bibtex
761            @misc{wu2024unique3d,
762                  title={Unique3D},
763            }
764            ```
765            `````
766            "#};
767        let last_block = extract_last_codeblock(text).unwrap();
768        assert_eq!(
769            last_block,
770            indoc::indoc! {r#"
771            ## Collaborations
772            Our mission is to create a 4D generative model.
773
774            ## Citation
775
776            If you found Unique3D helpful, please cite our report:
777            ```bibtex
778            @misc{wu2024unique3d,
779                  title={Unique3D},
780            }
781            ```
782            "#}
783        );
784    }
785
786    #[test]
787    fn test_extract_editable_region_no_markers() {
788        let text = indoc::indoc! {"
789            one
790            two three"};
791        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
792        assert_eq!(
793            parsed,
794            indoc::indoc! {"
795            one
796            two three"}
797        );
798    }
799
800    #[test]
801    fn test_parse_no_edits_response() {
802        let response = indoc::indoc! {"
803            The code is already complete. There is no clear next edit to make.
804
805            `````
806            NO_EDITS
807            `````
808        "};
809        let codeblock = extract_last_codeblock(response).unwrap();
810        assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
811    }
812
813    #[test]
814    fn test_extract_codeblock_no_valid_block() {
815        // Text with no code blocks should return None
816        let text = "Just some plain text without any code blocks";
817        assert!(extract_last_codeblock(text).is_none());
818
819        // Unclosed code block should return None
820        let text = indoc::indoc! {"
821            ```
822            unclosed block
823        "};
824        assert!(extract_last_codeblock(text).is_none());
825
826        // Analysis text with nested markdown but no proper outer block
827        let text = indoc::indoc! {"
828            # Analysis
829            Looking at this:
830            ```
831            some code
832            ```
833            But then more analysis without wrapping block
834        "};
835        // This should find the inner block
836        let result = extract_last_codeblock(text).unwrap();
837        assert_eq!(result, "some code\n");
838    }
839
840    #[test]
841    fn test_extract_codeblock_no_trailing_newline() {
842        // Text ending without trailing newline after closing fence
843        let text = "`````\ncontent here\n`````";
844        let result = extract_last_codeblock(text).unwrap();
845        assert_eq!(result, "content here\n");
846    }
847}