format_prompt.rs

  1use crate::{
  2    FormatPromptArgs, PredictionProvider,
  3    example::{ActualCursor, Example, ExamplePrompt},
  4    headless::EpAppState,
  5    progress::{ExampleProgress, Step},
  6    retrieve_context::run_context_retrieval,
  7};
  8use anyhow::{Context as _, Result, anyhow};
  9use edit_prediction::udiff;
 10use gpui::AsyncApp;
 11use similar::DiffableStr;
 12use std::ops::Range;
 13use std::sync::Arc;
 14use zeta_prompt::{
 15    ZetaFormat, encode_patch_as_output_for_format, excerpt_range_for_format, format_zeta_prompt,
 16    multi_region, output_end_marker_for_format, resolve_cursor_region,
 17};
 18
 19pub async fn run_format_prompt(
 20    example: &mut Example,
 21    args: &FormatPromptArgs,
 22    app_state: Arc<EpAppState>,
 23    example_progress: &ExampleProgress,
 24    cx: AsyncApp,
 25) -> Result<()> {
 26    run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
 27
 28    let step_progress = example_progress.start(Step::FormatPrompt);
 29
 30    let prompt_inputs = example
 31        .prompt_inputs
 32        .as_ref()
 33        .context("prompt_inputs must be set after context retrieval")?;
 34
 35    match args.provider {
 36        PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
 37            step_progress.set_substatus("formatting teacher prompt");
 38
 39            let zeta_format = ZetaFormat::default();
 40            let (editable_range, context_range) =
 41                excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
 42
 43            let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
 44            example.prompt = Some(ExamplePrompt {
 45                input: prompt,
 46                expected_output: String::new(),
 47                rejected_output: None,
 48                prefill: None,
 49                provider: args.provider,
 50            });
 51        }
 52        PredictionProvider::TeacherMultiRegion(_)
 53        | PredictionProvider::TeacherMultiRegionNonBatching(_) => {
 54            step_progress.set_substatus("formatting teacher multi-region prompt");
 55
 56            let zeta_format = ZetaFormat::default();
 57            let (editable_range, context_range) =
 58                excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
 59
 60            let prompt =
 61                TeacherMultiRegionPrompt::format_prompt(example, editable_range, context_range);
 62            example.prompt = Some(ExamplePrompt {
 63                input: prompt,
 64                expected_output: String::new(),
 65                rejected_output: None,
 66                prefill: None,
 67                provider: args.provider,
 68            });
 69        }
 70        PredictionProvider::Zeta2(zeta_format) => {
 71            step_progress.set_substatus("formatting zeta2 prompt");
 72
 73            let prompt = format_zeta_prompt(prompt_inputs, zeta_format);
 74            let prefill = zeta_prompt::get_prefill(prompt_inputs, zeta_format);
 75            let expected_output = example
 76                .spec
 77                .expected_patches_with_cursor_positions()
 78                .into_iter()
 79                .next()
 80                .and_then(|(expected_patch, expected_cursor_offset)| {
 81                    zeta2_output_for_patch(
 82                        prompt_inputs,
 83                        &expected_patch,
 84                        expected_cursor_offset,
 85                        zeta_format,
 86                    )
 87                    .ok()
 88                })
 89                .unwrap_or_default();
 90
 91            let rejected_output = example.spec.rejected_patch.as_ref().and_then(|patch| {
 92                zeta2_output_for_patch(prompt_inputs, patch, None, zeta_format).ok()
 93            });
 94
 95            example.prompt = prompt.map(|prompt| ExamplePrompt {
 96                input: prompt,
 97                expected_output,
 98                rejected_output,
 99                provider: args.provider,
100                prefill: Some(prefill),
101            });
102        }
103        _ => {
104            panic!("Cannot format prompt for {:?}", args.provider);
105        }
106    };
107    Ok(())
108}
109
110pub fn zeta2_output_for_patch(
111    input: &zeta_prompt::ZetaPromptInput,
112    patch: &str,
113    cursor_offset: Option<usize>,
114    version: ZetaFormat,
115) -> Result<String> {
116    let (context, editable_range, _, _) = resolve_cursor_region(input, version);
117    let mut old_editable_region = context[editable_range].to_string();
118
119    if !old_editable_region.ends_with_newline() {
120        old_editable_region.push('\n');
121    }
122
123    if let Some(encoded_output) =
124        encode_patch_as_output_for_format(version, &old_editable_region, patch, cursor_offset)?
125    {
126        return Ok(encoded_output);
127    }
128
129    let (result, first_hunk_offset) =
130        udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
131            || {
132                format!(
133                    "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
134                    patch, old_editable_region
135                )
136            },
137        )?;
138
139    if version == ZetaFormat::V0317SeedMultiRegions {
140        let cursor_in_new = cursor_offset.map(|cursor_offset| {
141            let hunk_start = first_hunk_offset.unwrap_or(0);
142            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
143        });
144        return multi_region::encode_from_old_and_new_v0317(
145            &old_editable_region,
146            &result,
147            cursor_in_new,
148            zeta_prompt::CURSOR_MARKER,
149            multi_region::V0317_END_MARKER,
150        );
151    }
152
153    if version == ZetaFormat::V0316SeedMultiRegions {
154        let cursor_in_new = cursor_offset.map(|cursor_offset| {
155            let hunk_start = first_hunk_offset.unwrap_or(0);
156            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
157        });
158        return multi_region::encode_from_old_and_new_v0316(
159            &old_editable_region,
160            &result,
161            cursor_in_new,
162            zeta_prompt::CURSOR_MARKER,
163            multi_region::V0316_END_MARKER,
164        );
165    }
166
167    if version == ZetaFormat::V0306SeedMultiRegions {
168        let cursor_in_new = cursor_offset.map(|cursor_offset| {
169            let hunk_start = first_hunk_offset.unwrap_or(0);
170            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
171        });
172        return multi_region::encode_from_old_and_new(
173            &old_editable_region,
174            &result,
175            cursor_in_new,
176            zeta_prompt::CURSOR_MARKER,
177            zeta_prompt::seed_coder::END_MARKER,
178            zeta_prompt::seed_coder::NO_EDITS,
179        );
180    }
181
182    let mut result = result;
183    if let Some(cursor_offset) = cursor_offset {
184        // The cursor_offset is relative to the start of the hunk's new text (context + additions).
185        // We need to add where the hunk context matched in the editable region to compute
186        // the actual cursor position in the result.
187        let hunk_start = first_hunk_offset.unwrap_or(0);
188        let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
189        result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
190    }
191
192    if let Some(end_marker) = output_end_marker_for_format(version) {
193        if !result.ends_with('\n') {
194            result.push('\n');
195        }
196        result.push_str(end_marker);
197    }
198
199    Ok(result)
200}
201
202pub struct TeacherPrompt;
203
204impl TeacherPrompt {
205    pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
206    pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
207    pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
208    pub(crate) const NO_EDITS: &str = "NO_EDITS";
209
210    /// Truncate edit history to this number of last lines
211    const MAX_HISTORY_LINES: usize = 128;
212
213    pub fn format_prompt(
214        example: &Example,
215        editable_range: Range<usize>,
216        context_range: Range<usize>,
217    ) -> String {
218        let edit_history = Self::format_edit_history(&example.spec.edit_history);
219        let context = Self::format_context(example);
220        let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
221
222        let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
223        let prompt = prompt_template
224            .replace("{{context}}", &context)
225            .replace("{{edit_history}}", &edit_history)
226            .replace("{{cursor_excerpt}}", &cursor_excerpt);
227
228        prompt
229    }
230
231    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
232        // Check if the model indicated no edits are needed
233        let no_edits = (String::new(), None);
234        if let Some(last_codeblock) = extract_last_codeblock(&response) {
235            if last_codeblock.trim() == Self::NO_EDITS {
236                return Ok(no_edits);
237            }
238        }
239
240        if response.trim().ends_with(Self::NO_EDITS) {
241            return Ok(no_edits);
242        }
243
244        // Extract updated (new) editable region from the model response.
245        let new_editable_region = Self::extract_editable_region(&response)?;
246        let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
247        let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
248        let old_editable_region = Self::extract_editable_region(
249            &example
250                .prompt
251                .as_ref()
252                .context("example prompt missing")?
253                .input,
254        )?
255        .replace(Self::USER_CURSOR_MARKER, "");
256
257        let prompt_inputs = example
258            .prompt_inputs
259            .as_ref()
260            .context("example is missing prompt inputs")?;
261
262        // Normalize leading newlines: if old starts with newline but new doesn't,
263        // prepend newline to new to preserve whitespace structure.
264        // This handles the case where the model drops the leading blank line.
265        if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
266            new_editable_region.insert(0, '\n');
267        }
268
269        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
270        let (editable_region_offset, _) = excerpt
271            .match_indices(&old_editable_region)
272            .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
273            .context("editable region not found in prompt content")?;
274        let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
275
276        let editable_region_lines = old_editable_region.lines().count() as u32;
277        let diff = language::unified_diff_with_context(
278            &old_editable_region,
279            &new_editable_region,
280            editable_region_start_line as u32,
281            editable_region_start_line as u32,
282            editable_region_lines,
283        );
284
285        let diff = indoc::formatdoc! {"
286            --- a/{path}
287            +++ b/{path}
288            {diff}",
289            path = example.spec.cursor_path.to_string_lossy(),
290            diff = diff,
291        };
292
293        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
294            ActualCursor::from_editable_region(
295                &example.spec.cursor_path,
296                editable_region_cursor_offset,
297                &new_editable_region,
298                excerpt,
299                editable_region_offset,
300                editable_region_start_line,
301            )
302        });
303
304        Ok((diff, actual_cursor))
305    }
306
307    fn format_edit_history(edit_history: &str) -> String {
308        let lines: Vec<&str> = edit_history.lines().collect();
309
310        if lines.is_empty() {
311            return "(No edit history)".to_string();
312        }
313
314        if lines.len() > Self::MAX_HISTORY_LINES {
315            let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
316            format!("{truncated}\n[...truncated...]")
317        } else {
318            lines.join("\n")
319        }
320    }
321
322    pub fn format_context(example: &Example) -> String {
323        let related_files = example
324            .prompt_inputs
325            .as_ref()
326            .and_then(|pi| pi.related_files.as_deref());
327
328        let Some(related_files) = related_files else {
329            return "(No context)".to_string();
330        };
331
332        if related_files.is_empty() {
333            return "(No context)".to_string();
334        }
335
336        let prefix = "`````";
337        let suffix = "`````\n\n";
338        let max_tokens = 1024;
339        zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
340    }
341
342    fn format_cursor_excerpt(
343        example: &Example,
344        editable_range: Range<usize>,
345        context_range: Range<usize>,
346    ) -> String {
347        let mut result = String::new();
348
349        let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
350        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
351        let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
352
353        let path_str = example.spec.cursor_path.to_string_lossy();
354        result.push_str(&format!("`````{path_str}\n"));
355        result.push_str(&excerpt[context_range.start..editable_range.start]);
356        result.push_str(Self::EDITABLE_REGION_START);
357        result.push_str(&excerpt[editable_range.start..cursor_offset]);
358        result.push_str(Self::USER_CURSOR_MARKER);
359        result.push_str(&excerpt[cursor_offset..editable_range.end]);
360        result.push_str(Self::EDITABLE_REGION_END);
361        result.push_str(&excerpt[editable_range.end..context_range.end]);
362        result.push_str("\n`````");
363
364        result
365    }
366
367    pub fn extract_editable_region(text: &str) -> Result<String> {
368        let start = text
369            .rfind(Self::EDITABLE_REGION_START)
370            .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
371        let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
372
373        if start >= end {
374            return Err(anyhow!("Invalid editable region markers"));
375        }
376
377        let region = &text[start..end];
378        Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
379    }
380}
381
382pub struct TeacherMultiRegionPrompt;
383
384impl TeacherMultiRegionPrompt {
385    pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
386    pub(crate) const NO_EDITS: &str = "NO_EDITS";
387
388    /// Truncate edit history to this number of last lines
389    const MAX_HISTORY_LINES: usize = 128;
390
391    pub fn format_prompt(
392        example: &Example,
393        editable_range: Range<usize>,
394        context_range: Range<usize>,
395    ) -> String {
396        let edit_history = Self::format_edit_history(&example.spec.edit_history);
397        let context = Self::format_context(example);
398        let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
399
400        let prompt_template = crate::prompt_assets::get_prompt("teacher_multi_region.md");
401        let prompt = prompt_template
402            .replace("{{context}}", &context)
403            .replace("{{edit_history}}", &edit_history)
404            .replace("{{cursor_excerpt}}", &cursor_excerpt);
405
406        prompt
407    }
408
409    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
410        let no_edits = (String::new(), None);
411        if let Some(last_codeblock) = extract_last_codeblock(&response) {
412            if last_codeblock.trim() == Self::NO_EDITS {
413                return Ok(no_edits);
414            }
415        }
416
417        if response.trim().ends_with(Self::NO_EDITS) {
418            return Ok(no_edits);
419        }
420
421        let prompt_inputs = example
422            .prompt_inputs
423            .as_ref()
424            .context("example is missing prompt inputs")?;
425
426        let zeta_format = ZetaFormat::default();
427        let (editable_range, _) =
428            excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
429        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
430        let old_editable_region = &excerpt[editable_range.clone()];
431        let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
432
433        let codeblock =
434            extract_last_codeblock(&response).context("no codeblock found in model response")?;
435        let (start_num, end_num, raw_new_span) = multi_region::extract_marker_span(&codeblock)?;
436
437        let start_idx = start_num
438            .checked_sub(1)
439            .context("marker numbers are 1-indexed")?;
440        let end_idx = end_num
441            .checked_sub(1)
442            .context("marker numbers are 1-indexed")?;
443        let start_byte = *marker_offsets
444            .get(start_idx)
445            .context("start marker number out of range")?;
446        let end_byte = *marker_offsets
447            .get(end_idx)
448            .context("end marker number out of range")?;
449
450        if start_byte > end_byte {
451            return Err(anyhow!("start marker must come before end marker"));
452        }
453
454        let cursor_in_span = raw_new_span.find(Self::USER_CURSOR_MARKER);
455        let new_span = raw_new_span.replace(Self::USER_CURSOR_MARKER, "");
456
457        let old_span = &old_editable_region[start_byte..end_byte];
458        let mut new_span = new_span;
459        if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
460            new_span.push('\n');
461        }
462        if !old_span.ends_with('\n') && new_span.ends_with('\n') {
463            new_span.pop();
464        }
465
466        let mut new_editable_region = String::new();
467        new_editable_region.push_str(&old_editable_region[..start_byte]);
468        new_editable_region.push_str(&new_span);
469        new_editable_region.push_str(&old_editable_region[end_byte..]);
470
471        let cursor_offset = cursor_in_span.map(|pos| start_byte + pos);
472
473        if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
474            new_editable_region.insert(0, '\n');
475        }
476
477        let editable_region_offset = editable_range.start;
478        let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
479
480        let editable_region_lines = old_editable_region.lines().count() as u32;
481        let diff = language::unified_diff_with_context(
482            old_editable_region,
483            &new_editable_region,
484            editable_region_start_line as u32,
485            editable_region_start_line as u32,
486            editable_region_lines,
487        );
488
489        let diff = indoc::formatdoc! {"
490            --- a/{path}
491            +++ b/{path}
492            {diff}",
493            path = example.spec.cursor_path.to_string_lossy(),
494            diff = diff,
495        };
496
497        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
498            ActualCursor::from_editable_region(
499                &example.spec.cursor_path,
500                editable_region_cursor_offset,
501                &new_editable_region,
502                excerpt,
503                editable_region_offset,
504                editable_region_start_line,
505            )
506        });
507
508        Ok((diff, actual_cursor))
509    }
510
511    fn format_edit_history(edit_history: &str) -> String {
512        let lines: Vec<&str> = edit_history.lines().collect();
513
514        if lines.is_empty() {
515            return "(No edit history)".to_string();
516        }
517
518        if lines.len() > Self::MAX_HISTORY_LINES {
519            let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
520            format!("{truncated}\n[...truncated...]")
521        } else {
522            lines.join("\n")
523        }
524    }
525
526    pub fn format_context(example: &Example) -> String {
527        let related_files = example
528            .prompt_inputs
529            .as_ref()
530            .and_then(|pi| pi.related_files.as_deref());
531        let Some(related_files) = related_files else {
532            return "(No context)".to_string();
533        };
534
535        if related_files.is_empty() {
536            return "(No context)".to_string();
537        }
538
539        let prefix = "`````";
540        let suffix = "`````\n\n";
541        let max_tokens = 1024;
542        zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
543    }
544
545    fn format_cursor_excerpt(
546        example: &Example,
547        editable_range: Range<usize>,
548        context_range: Range<usize>,
549    ) -> String {
550        let mut result = String::new();
551
552        let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
553        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
554        let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
555
556        let editable_text = &excerpt[editable_range.clone()];
557        let cursor_in_editable = cursor_offset - editable_range.start;
558
559        let path_str = example.spec.cursor_path.to_string_lossy();
560        result.push_str(&format!("`````{path_str}\n"));
561
562        result.push_str(&excerpt[context_range.start..editable_range.start]);
563
564        multi_region::write_editable_with_markers(
565            &mut result,
566            editable_text,
567            cursor_in_editable,
568            Self::USER_CURSOR_MARKER,
569        );
570
571        result.push_str(&excerpt[editable_range.end..context_range.end]);
572        result.push_str("\n`````");
573
574        result
575    }
576}
577
578/// Extract the cursor excerpt from an example.
579/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
580pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
581    // If we have the original prompt, extract the cursor excerpt from it
582    if let Some(prompt) = &example.prompt {
583        // Find "# 3. Current File" section and extract the content
584        if let Some(start) = prompt.input.find("# 3. Current File") {
585            let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
586            let backtick_count = prompt.input[content_start..]
587                .chars()
588                .take_while(|&c| c == '`')
589                .count();
590            let content_start = content_start + backtick_count;
591
592            // Find the path line and skip it
593            let newline_pos = prompt.input[content_start..].find('\n')?;
594            let text_start = content_start + newline_pos + 1;
595
596            // Find the closing backticks
597            let closing_pattern = "`".repeat(backtick_count);
598            let text_end = prompt.input[text_start..].find(&closing_pattern)?;
599            let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
600
601            let path_str = example.spec.cursor_path.to_string_lossy();
602            return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
603        }
604    }
605
606    // Fallback: construct from prompt_inputs if available
607    let prompt_inputs = example.prompt_inputs.as_ref()?;
608    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
609    let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
610
611    // Simple fallback: just show content around cursor with markers
612    let path_str = example.spec.cursor_path.to_string_lossy();
613    let mut result = format!("`````{path_str}\n");
614    result.push_str(TeacherPrompt::EDITABLE_REGION_START);
615    result.push_str(&excerpt[..cursor_offset]);
616    result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
617    result.push_str(&excerpt[cursor_offset..]);
618    result.push_str(TeacherPrompt::EDITABLE_REGION_END);
619    result.push_str("\n`````");
620
621    Some(result)
622}
623
624pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
625    let lines: Vec<&str> = text.lines().collect();
626
627    // Search from the end for a closing fence (line containing only backticks, 3+)
628    let mut closing_line_idx = None;
629    let mut backtick_count = 0;
630
631    for i in (0..lines.len()).rev() {
632        let line = lines[i].trim();
633        if line.len() >= 3 && line.chars().all(|c| c == '`') {
634            closing_line_idx = Some(i);
635            backtick_count = line.len();
636            break;
637        }
638    }
639
640    let closing_idx = closing_line_idx?;
641
642    // Search backwards for matching opening fence
643    // Opening fence starts with same backtick count, possibly followed by language/metadata
644    let opening_pattern = "`".repeat(backtick_count);
645
646    for i in (0..closing_idx).rev() {
647        let line = lines[i];
648        if line.starts_with(&opening_pattern) {
649            // Ensure it's exactly the right number of backticks (not more)
650            let rest = &line[backtick_count..];
651            if rest.is_empty() || !rest.starts_with('`') {
652                // Found matching opening fence
653                // Extract content between opening and closing (exclusive)
654                if closing_idx > i + 1 {
655                    let content = lines[i + 1..closing_idx].join("\n");
656                    // Preserve trailing newline to match previous behavior
657                    return Some(format!("{}\n", content));
658                } else {
659                    // Empty block
660                    return Some(String::new());
661                }
662            }
663        }
664    }
665
666    None
667}
668
669#[cfg(test)]
670mod tests {
671    use super::*;
672
673    #[test]
674    fn test_extract_last_code_block() {
675        let text = indoc::indoc! {"
676            Some thinking
677
678            ```
679            first block
680            ```
681
682            `````path='something' lines=1:2
683            last block
684            `````
685            "};
686        let last_block = extract_last_codeblock(text).unwrap();
687        assert_eq!(last_block, "last block\n");
688    }
689
690    #[test]
691    fn test_extract_codeblock_with_nested_fences() {
692        let text = indoc::indoc! {"
693            `````
694            content with ``` inline
695            and ```python nested
696            more content
697            `````
698            "};
699        let last_block = extract_last_codeblock(text).unwrap();
700        assert_eq!(
701            last_block,
702            "content with ``` inline\nand ```python nested\nmore content\n"
703        );
704    }
705
706    #[test]
707    fn test_extract_codeblock_ignores_inline_backticks() {
708        let text = indoc::indoc! {"
709            `````
710            here is some `code` with inline backticks
711            and here```more```stuff
712            `````
713            "};
714        let last_block = extract_last_codeblock(text).unwrap();
715        assert_eq!(
716            last_block,
717            "here is some `code` with inline backticks\nand here```more```stuff\n"
718        );
719    }
720
721    #[test]
722    fn test_extract_editable_region_old_format() {
723        let text = indoc::indoc! {"
724            some lines
725            are
726            here
727            <|editable_region_start|>
728            one
729            two three
730
731            <|editable_region_end|>
732            more
733            lines here
734            "};
735        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
736        assert_eq!(
737            parsed,
738            indoc::indoc! {"
739            one
740            two three"}
741        );
742    }
743
744    #[test]
745    fn test_extract_editable_region_marker_format() {
746        let text = indoc::indoc! {"
747            some context
748            <|marker_1|>
749            one
750            two three
751            <|marker_2|>
752            more context
753            "};
754        let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
755        assert_eq!(parsed, "one\ntwo three");
756    }
757
758    #[test]
759    fn test_extract_editable_region_multi_markers() {
760        let text = indoc::indoc! {"
761            prefix
762            <|marker_1|>
763            aaa
764            bbb
765            <|marker_2|>
766            ccc
767            ddd
768            <|marker_3|>
769            suffix
770            "};
771        let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
772        // Intermediate marker and its trailing \n are stripped
773        assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
774    }
775
776    #[test]
777    fn test_extract_last_codeblock_nested_bibtex() {
778        let text = indoc::indoc! {r#"
779            Looking at the edit history, I can see that a Citation section was just added.
780
781            `````
782            ## Collaborations
783            Our mission is to create a 4D generative model.
784
785            ## Citation
786
787            If you found Unique3D helpful, please cite our report:
788            ```bibtex
789            @misc{wu2024unique3d,
790                  title={Unique3D},
791            }
792            ```
793            `````
794            "#};
795        let last_block = extract_last_codeblock(text).unwrap();
796        assert_eq!(
797            last_block,
798            indoc::indoc! {r#"
799            ## Collaborations
800            Our mission is to create a 4D generative model.
801
802            ## Citation
803
804            If you found Unique3D helpful, please cite our report:
805            ```bibtex
806            @misc{wu2024unique3d,
807                  title={Unique3D},
808            }
809            ```
810            "#}
811        );
812    }
813
814    #[test]
815    fn test_extract_editable_region_no_markers() {
816        let text = indoc::indoc! {"
817            one
818            two three"};
819        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
820        assert_eq!(
821            parsed,
822            indoc::indoc! {"
823            one
824            two three"}
825        );
826    }
827
828    #[test]
829    fn test_parse_no_edits_response() {
830        let response = indoc::indoc! {"
831            The code is already complete. There is no clear next edit to make.
832
833            `````
834            NO_EDITS
835            `````
836        "};
837        let codeblock = extract_last_codeblock(response).unwrap();
838        assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
839    }
840
841    #[test]
842    fn test_extract_codeblock_no_valid_block() {
843        // Text with no code blocks should return None
844        let text = "Just some plain text without any code blocks";
845        assert!(extract_last_codeblock(text).is_none());
846
847        // Unclosed code block should return None
848        let text = indoc::indoc! {"
849            ```
850            unclosed block
851        "};
852        assert!(extract_last_codeblock(text).is_none());
853
854        // Analysis text with nested markdown but no proper outer block
855        let text = indoc::indoc! {"
856            # Analysis
857            Looking at this:
858            ```
859            some code
860            ```
861            But then more analysis without wrapping block
862        "};
863        // This should find the inner block
864        let result = extract_last_codeblock(text).unwrap();
865        assert_eq!(result, "some code\n");
866    }
867
868    #[test]
869    fn test_extract_codeblock_no_trailing_newline() {
870        // Text ending without trailing newline after closing fence
871        let text = "`````\ncontent here\n`````";
872        let result = extract_last_codeblock(text).unwrap();
873        assert_eq!(result, "content here\n");
874    }
875}