format_prompt.rs

  1use crate::{
  2    FormatPromptArgs, PredictionProvider,
  3    example::{ActualCursor, Example, ExamplePrompt},
  4    headless::EpAppState,
  5    progress::{ExampleProgress, Step},
  6    retrieve_context::run_context_retrieval,
  7};
  8use anyhow::{Context as _, Result, anyhow};
  9use gpui::AsyncApp;
 10use similar::DiffableStr;
 11use std::ops::Range;
 12use std::sync::Arc;
 13use zeta_prompt::udiff;
 14use zeta_prompt::{
 15    ZetaFormat, encode_patch_as_output_for_format, excerpt_range_for_format, format_zeta_prompt,
 16    multi_region, output_end_marker_for_format, resolve_cursor_region,
 17};
 18
 19pub async fn run_format_prompt(
 20    example: &mut Example,
 21    args: &FormatPromptArgs,
 22    app_state: Arc<EpAppState>,
 23    example_progress: &ExampleProgress,
 24    cx: AsyncApp,
 25) -> Result<()> {
 26    run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
 27
 28    let step_progress = example_progress.start(Step::FormatPrompt);
 29
 30    let prompt_inputs = example
 31        .prompt_inputs
 32        .as_ref()
 33        .context("prompt_inputs must be set after context retrieval")?;
 34
 35    match args.provider {
 36        PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
 37            step_progress.set_substatus("formatting teacher prompt");
 38
 39            let zeta_format = ZetaFormat::default();
 40            let (editable_range, context_range) =
 41                excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
 42
 43            let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
 44            example.prompt = Some(ExamplePrompt {
 45                input: prompt,
 46                expected_output: None,
 47                rejected_output: None,
 48                prefill: None,
 49                provider: args.provider,
 50            });
 51        }
 52        PredictionProvider::TeacherMultiRegion(_)
 53        | PredictionProvider::TeacherMultiRegionNonBatching(_) => {
 54            step_progress.set_substatus("formatting teacher multi-region prompt");
 55
 56            let zeta_format = ZetaFormat::default();
 57            let (editable_range, context_range) =
 58                excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
 59
 60            let prompt =
 61                TeacherMultiRegionPrompt::format_prompt(example, editable_range, context_range);
 62            example.prompt = Some(ExamplePrompt {
 63                input: prompt,
 64                expected_output: None,
 65                rejected_output: None,
 66                prefill: None,
 67                provider: args.provider,
 68            });
 69        }
 70        PredictionProvider::Zeta2(zeta_format) => {
 71            step_progress.set_substatus("formatting zeta2 prompt");
 72
 73            let prompt = format_zeta_prompt(prompt_inputs, zeta_format);
 74            let prefill = zeta_prompt::get_prefill(prompt_inputs, zeta_format);
 75            let expected_output = example
 76                .spec
 77                .expected_patches_with_cursor_positions()
 78                .into_iter()
 79                .next()
 80                .and_then(|(expected_patch, expected_cursor_offset)| {
 81                    zeta2_output_for_patch(
 82                        prompt_inputs,
 83                        &expected_patch,
 84                        expected_cursor_offset,
 85                        zeta_format,
 86                    )
 87                    .ok()
 88                });
 89
 90            let rejected_output = example.spec.rejected_patch.as_ref().and_then(|patch| {
 91                zeta2_output_for_patch(prompt_inputs, patch, None, zeta_format).ok()
 92            });
 93
 94            example.prompt = prompt.map(|prompt| ExamplePrompt {
 95                input: prompt,
 96                expected_output,
 97                rejected_output,
 98                provider: args.provider,
 99                prefill: Some(prefill),
100            });
101        }
102        _ => {
103            panic!("Cannot format prompt for {:?}", args.provider);
104        }
105    };
106    Ok(())
107}
108
109pub fn zeta2_output_for_patch(
110    input: &zeta_prompt::ZetaPromptInput,
111    patch: &str,
112    cursor_offset: Option<usize>,
113    version: ZetaFormat,
114) -> Result<String> {
115    let (context, editable_range, _, _) = resolve_cursor_region(input, version);
116    let mut old_editable_region = context[editable_range].to_string();
117
118    if !old_editable_region.ends_with_newline() {
119        old_editable_region.push('\n');
120    }
121
122    if let Some(encoded_output) =
123        encode_patch_as_output_for_format(version, &old_editable_region, patch, cursor_offset)?
124    {
125        return Ok(encoded_output);
126    }
127
128    let (result, first_hunk_offset) =
129        udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
130            || {
131                format!(
132                    "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
133                    patch, old_editable_region
134                )
135            },
136        )?;
137
138    if version == ZetaFormat::V0317SeedMultiRegions {
139        let cursor_in_new = cursor_offset.map(|cursor_offset| {
140            let hunk_start = first_hunk_offset.unwrap_or(0);
141            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
142        });
143        return multi_region::encode_from_old_and_new_v0317(
144            &old_editable_region,
145            &result,
146            cursor_in_new,
147            zeta_prompt::CURSOR_MARKER,
148            multi_region::V0317_END_MARKER,
149        );
150    }
151
152    if version == ZetaFormat::V0318SeedMultiRegions {
153        let cursor_in_new = cursor_offset.map(|cursor_offset| {
154            let hunk_start = first_hunk_offset.unwrap_or(0);
155            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
156        });
157        return multi_region::encode_from_old_and_new_v0318(
158            &old_editable_region,
159            &result,
160            cursor_in_new,
161            zeta_prompt::CURSOR_MARKER,
162            multi_region::V0318_END_MARKER,
163        );
164    }
165
166    if version == ZetaFormat::V0316SeedMultiRegions {
167        let cursor_in_new = cursor_offset.map(|cursor_offset| {
168            let hunk_start = first_hunk_offset.unwrap_or(0);
169            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
170        });
171        return multi_region::encode_from_old_and_new_v0316(
172            &old_editable_region,
173            &result,
174            cursor_in_new,
175            zeta_prompt::CURSOR_MARKER,
176            multi_region::V0316_END_MARKER,
177        );
178    }
179
180    if version == ZetaFormat::V0306SeedMultiRegions {
181        let cursor_in_new = cursor_offset.map(|cursor_offset| {
182            let hunk_start = first_hunk_offset.unwrap_or(0);
183            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
184        });
185        return multi_region::encode_from_old_and_new(
186            &old_editable_region,
187            &result,
188            cursor_in_new,
189            zeta_prompt::CURSOR_MARKER,
190            zeta_prompt::seed_coder::END_MARKER,
191            zeta_prompt::seed_coder::NO_EDITS,
192        );
193    }
194
195    let mut result = result;
196    if let Some(cursor_offset) = cursor_offset {
197        // The cursor_offset is relative to the start of the hunk's new text (context + additions).
198        // We need to add where the hunk context matched in the editable region to compute
199        // the actual cursor position in the result.
200        let hunk_start = first_hunk_offset.unwrap_or(0);
201        let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
202        result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
203    }
204
205    if let Some(end_marker) = output_end_marker_for_format(version) {
206        if !result.ends_with('\n') {
207            result.push('\n');
208        }
209        result.push_str(end_marker);
210    }
211
212    Ok(result)
213}
214
215pub struct TeacherPrompt;
216
217impl TeacherPrompt {
218    pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
219    pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
220    pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
221    pub(crate) const NO_EDITS: &str = "NO_EDITS";
222
223    /// Truncate edit history to this number of last lines
224    const MAX_HISTORY_LINES: usize = 128;
225
226    pub fn format_prompt(
227        example: &Example,
228        editable_range: Range<usize>,
229        context_range: Range<usize>,
230    ) -> String {
231        let edit_history = Self::format_edit_history(&example.spec.edit_history);
232        let context = Self::format_context(example);
233        let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
234
235        let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
236        let prompt = prompt_template
237            .replace("{{context}}", &context)
238            .replace("{{edit_history}}", &edit_history)
239            .replace("{{cursor_excerpt}}", &cursor_excerpt);
240
241        prompt
242    }
243
244    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
245        // Check if the model indicated no edits are needed
246        let no_edits = (String::new(), None);
247        if let Some(last_codeblock) = extract_last_codeblock(&response) {
248            if last_codeblock.trim() == Self::NO_EDITS {
249                return Ok(no_edits);
250            }
251        }
252
253        if response
254            .trim_end_matches(&[' ', '\n', '`'])
255            .ends_with(Self::NO_EDITS)
256        {
257            return Ok(no_edits);
258        }
259
260        // Extract updated (new) editable region from the model response.
261        let new_editable_region = Self::extract_editable_region(&response)?;
262        let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
263        let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
264        let old_editable_region = Self::extract_editable_region(
265            &example
266                .prompt
267                .as_ref()
268                .context("example prompt missing")?
269                .input,
270        )?
271        .replace(Self::USER_CURSOR_MARKER, "");
272
273        let prompt_inputs = example
274            .prompt_inputs
275            .as_ref()
276            .context("example is missing prompt inputs")?;
277
278        // Normalize leading newlines: if old starts with newline but new doesn't,
279        // prepend newline to new to preserve whitespace structure.
280        // This handles the case where the model drops the leading blank line.
281        if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
282            new_editable_region.insert(0, '\n');
283        }
284
285        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
286        let (editable_region_offset, _) = excerpt
287            .match_indices(&old_editable_region)
288            .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
289            .context("editable region not found in prompt content")?;
290        let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
291
292        let editable_region_lines = old_editable_region.lines().count() as u32;
293        let diff = language::unified_diff_with_context(
294            &old_editable_region,
295            &new_editable_region,
296            editable_region_start_line as u32,
297            editable_region_start_line as u32,
298            editable_region_lines,
299        );
300
301        let diff = indoc::formatdoc! {"
302            --- a/{path}
303            +++ b/{path}
304            {diff}",
305            path = example.spec.cursor_path.to_string_lossy(),
306            diff = diff,
307        };
308
309        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
310            ActualCursor::from_editable_region(
311                &example.spec.cursor_path,
312                editable_region_cursor_offset,
313                &new_editable_region,
314                excerpt,
315                editable_region_offset,
316                editable_region_start_line,
317            )
318        });
319
320        Ok((diff, actual_cursor))
321    }
322
323    fn format_edit_history(edit_history: &str) -> String {
324        let lines: Vec<&str> = edit_history.lines().collect();
325
326        if lines.is_empty() {
327            return "(No edit history)".to_string();
328        }
329
330        if lines.len() > Self::MAX_HISTORY_LINES {
331            let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
332            format!("{truncated}\n[...truncated...]")
333        } else {
334            lines.join("\n")
335        }
336    }
337
338    pub fn format_context(example: &Example) -> String {
339        let related_files = example
340            .prompt_inputs
341            .as_ref()
342            .and_then(|pi| pi.related_files.as_deref());
343
344        let Some(related_files) = related_files else {
345            return "(No context)".to_string();
346        };
347
348        if related_files.is_empty() {
349            return "(No context)".to_string();
350        }
351
352        let prefix = "`````";
353        let suffix = "`````\n\n";
354        let max_tokens = 1024;
355        zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
356    }
357
358    fn format_cursor_excerpt(
359        example: &Example,
360        editable_range: Range<usize>,
361        context_range: Range<usize>,
362    ) -> String {
363        let mut result = String::new();
364
365        let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
366        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
367        let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
368
369        let path_str = example.spec.cursor_path.to_string_lossy();
370        result.push_str(&format!("`````{path_str}\n"));
371        result.push_str(&excerpt[context_range.start..editable_range.start]);
372        result.push_str(Self::EDITABLE_REGION_START);
373        result.push_str(&excerpt[editable_range.start..cursor_offset]);
374        result.push_str(Self::USER_CURSOR_MARKER);
375        result.push_str(&excerpt[cursor_offset..editable_range.end]);
376        result.push_str(Self::EDITABLE_REGION_END);
377        result.push_str(&excerpt[editable_range.end..context_range.end]);
378        result.push_str("\n`````");
379
380        result
381    }
382
383    pub fn extract_editable_region(text: &str) -> Result<String> {
384        let start = text
385            .rfind(Self::EDITABLE_REGION_START)
386            .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
387        let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
388
389        if start >= end {
390            return Err(anyhow!("Invalid editable region markers"));
391        }
392
393        let region = &text[start..end];
394        Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
395    }
396}
397
398pub struct TeacherMultiRegionPrompt;
399
400impl TeacherMultiRegionPrompt {
401    pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
402    pub(crate) const NO_EDITS: &str = "NO_EDITS";
403
404    /// Truncate edit history to this number of last lines
405    const MAX_HISTORY_LINES: usize = 128;
406
407    pub fn format_prompt(
408        example: &Example,
409        editable_range: Range<usize>,
410        context_range: Range<usize>,
411    ) -> String {
412        let edit_history = Self::format_edit_history(&example.spec.edit_history);
413        let context = Self::format_context(example);
414        let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
415
416        let prompt_template = crate::prompt_assets::get_prompt("teacher_multi_region.md");
417        let prompt = prompt_template
418            .replace("{{context}}", &context)
419            .replace("{{edit_history}}", &edit_history)
420            .replace("{{cursor_excerpt}}", &cursor_excerpt);
421
422        prompt
423    }
424
425    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
426        let no_edits = (String::new(), None);
427        if let Some(last_codeblock) = extract_last_codeblock(&response) {
428            if last_codeblock.trim() == Self::NO_EDITS {
429                return Ok(no_edits);
430            }
431        }
432
433        if response.trim().ends_with(Self::NO_EDITS) {
434            return Ok(no_edits);
435        }
436
437        let prompt_inputs = example
438            .prompt_inputs
439            .as_ref()
440            .context("example is missing prompt inputs")?;
441
442        let zeta_format = ZetaFormat::default();
443        let (editable_range, _) =
444            excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
445        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
446        let old_editable_region = &excerpt[editable_range.clone()];
447        let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
448
449        let codeblock =
450            extract_last_codeblock(&response).context("no codeblock found in model response")?;
451        let (start_num, end_num, raw_new_span) = multi_region::extract_marker_span(&codeblock)?;
452
453        let start_idx = start_num
454            .checked_sub(1)
455            .context("marker numbers are 1-indexed")?;
456        let end_idx = end_num
457            .checked_sub(1)
458            .context("marker numbers are 1-indexed")?;
459        let start_byte = *marker_offsets
460            .get(start_idx)
461            .context("start marker number out of range")?;
462        let end_byte = *marker_offsets
463            .get(end_idx)
464            .context("end marker number out of range")?;
465
466        if start_byte > end_byte {
467            return Err(anyhow!("start marker must come before end marker"));
468        }
469
470        let cursor_in_span = raw_new_span.find(Self::USER_CURSOR_MARKER);
471        let new_span = raw_new_span.replace(Self::USER_CURSOR_MARKER, "");
472
473        let old_span = &old_editable_region[start_byte..end_byte];
474        let mut new_span = new_span;
475        if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
476            new_span.push('\n');
477        }
478        if !old_span.ends_with('\n') && new_span.ends_with('\n') {
479            new_span.pop();
480        }
481
482        let mut new_editable_region = String::new();
483        new_editable_region.push_str(&old_editable_region[..start_byte]);
484        new_editable_region.push_str(&new_span);
485        new_editable_region.push_str(&old_editable_region[end_byte..]);
486
487        let cursor_offset = cursor_in_span.map(|pos| start_byte + pos);
488
489        if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
490            new_editable_region.insert(0, '\n');
491        }
492
493        let editable_region_offset = editable_range.start;
494        let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
495
496        let editable_region_lines = old_editable_region.lines().count() as u32;
497        let diff = language::unified_diff_with_context(
498            old_editable_region,
499            &new_editable_region,
500            editable_region_start_line as u32,
501            editable_region_start_line as u32,
502            editable_region_lines,
503        );
504
505        let diff = indoc::formatdoc! {"
506            --- a/{path}
507            +++ b/{path}
508            {diff}",
509            path = example.spec.cursor_path.to_string_lossy(),
510            diff = diff,
511        };
512
513        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
514            ActualCursor::from_editable_region(
515                &example.spec.cursor_path,
516                editable_region_cursor_offset,
517                &new_editable_region,
518                excerpt,
519                editable_region_offset,
520                editable_region_start_line,
521            )
522        });
523
524        Ok((diff, actual_cursor))
525    }
526
527    fn format_edit_history(edit_history: &str) -> String {
528        let lines: Vec<&str> = edit_history.lines().collect();
529
530        if lines.is_empty() {
531            return "(No edit history)".to_string();
532        }
533
534        if lines.len() > Self::MAX_HISTORY_LINES {
535            let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
536            format!("{truncated}\n[...truncated...]")
537        } else {
538            lines.join("\n")
539        }
540    }
541
542    pub fn format_context(example: &Example) -> String {
543        let related_files = example
544            .prompt_inputs
545            .as_ref()
546            .and_then(|pi| pi.related_files.as_deref());
547        let Some(related_files) = related_files else {
548            return "(No context)".to_string();
549        };
550
551        if related_files.is_empty() {
552            return "(No context)".to_string();
553        }
554
555        let prefix = "`````";
556        let suffix = "`````\n\n";
557        let max_tokens = 1024;
558        zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
559    }
560
561    fn format_cursor_excerpt(
562        example: &Example,
563        editable_range: Range<usize>,
564        context_range: Range<usize>,
565    ) -> String {
566        let mut result = String::new();
567
568        let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
569        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
570        let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
571
572        let editable_text = &excerpt[editable_range.clone()];
573        let cursor_in_editable = cursor_offset - editable_range.start;
574
575        let path_str = example.spec.cursor_path.to_string_lossy();
576        result.push_str(&format!("`````{path_str}\n"));
577
578        result.push_str(&excerpt[context_range.start..editable_range.start]);
579
580        multi_region::write_editable_with_markers(
581            &mut result,
582            editable_text,
583            cursor_in_editable,
584            Self::USER_CURSOR_MARKER,
585        );
586
587        result.push_str(&excerpt[editable_range.end..context_range.end]);
588        result.push_str("\n`````");
589
590        result
591    }
592}
593
594/// Extract the cursor excerpt from an example.
595/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
596pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
597    // If we have the original prompt, extract the cursor excerpt from it
598    if let Some(prompt) = &example.prompt {
599        // Find "# 3. Current File" section and extract the content
600        if let Some(start) = prompt.input.find("# 3. Current File") {
601            let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
602            let backtick_count = prompt.input[content_start..]
603                .chars()
604                .take_while(|&c| c == '`')
605                .count();
606            let content_start = content_start + backtick_count;
607
608            // Find the path line and skip it
609            let newline_pos = prompt.input[content_start..].find('\n')?;
610            let text_start = content_start + newline_pos + 1;
611
612            // Find the closing backticks
613            let closing_pattern = "`".repeat(backtick_count);
614            let text_end = prompt.input[text_start..].find(&closing_pattern)?;
615            let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
616
617            let path_str = example.spec.cursor_path.to_string_lossy();
618            return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
619        }
620    }
621
622    // Fallback: construct from prompt_inputs if available
623    let prompt_inputs = example.prompt_inputs.as_ref()?;
624    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
625    let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
626
627    // Simple fallback: just show content around cursor with markers
628    let path_str = example.spec.cursor_path.to_string_lossy();
629    let mut result = format!("`````{path_str}\n");
630    result.push_str(TeacherPrompt::EDITABLE_REGION_START);
631    result.push_str(&excerpt[..cursor_offset]);
632    result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
633    result.push_str(&excerpt[cursor_offset..]);
634    result.push_str(TeacherPrompt::EDITABLE_REGION_END);
635    result.push_str("\n`````");
636
637    Some(result)
638}
639
640pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
641    let lines: Vec<&str> = text.lines().collect();
642
643    // Search from the end for a closing fence (line containing only backticks, 3+)
644    let mut closing_line_idx = None;
645    let mut backtick_count = 0;
646
647    for i in (0..lines.len()).rev() {
648        let line = lines[i].trim();
649        if line.len() >= 3 && line.chars().all(|c| c == '`') {
650            closing_line_idx = Some(i);
651            backtick_count = line.len();
652            break;
653        }
654    }
655
656    let closing_idx = closing_line_idx?;
657
658    // Search backwards for matching opening fence
659    // Opening fence starts with same backtick count, possibly followed by language/metadata
660    let opening_pattern = "`".repeat(backtick_count);
661
662    for i in (0..closing_idx).rev() {
663        let line = lines[i];
664        if line.starts_with(&opening_pattern) {
665            // Ensure it's exactly the right number of backticks (not more)
666            let rest = &line[backtick_count..];
667            if rest.is_empty() || !rest.starts_with('`') {
668                // Found matching opening fence
669                // Extract content between opening and closing (exclusive)
670                if closing_idx > i + 1 {
671                    let content = lines[i + 1..closing_idx].join("\n");
672                    // Preserve trailing newline to match previous behavior
673                    return Some(format!("{}\n", content));
674                } else {
675                    // Empty block
676                    return Some(String::new());
677                }
678            }
679        }
680    }
681
682    None
683}
684
685#[cfg(test)]
686mod tests {
687    use super::*;
688
689    #[test]
690    fn test_extract_last_code_block() {
691        let text = indoc::indoc! {"
692            Some thinking
693
694            ```
695            first block
696            ```
697
698            `````path='something' lines=1:2
699            last block
700            `````
701            "};
702        let last_block = extract_last_codeblock(text).unwrap();
703        assert_eq!(last_block, "last block\n");
704    }
705
706    #[test]
707    fn test_extract_codeblock_with_nested_fences() {
708        let text = indoc::indoc! {"
709            `````
710            content with ``` inline
711            and ```python nested
712            more content
713            `````
714            "};
715        let last_block = extract_last_codeblock(text).unwrap();
716        assert_eq!(
717            last_block,
718            "content with ``` inline\nand ```python nested\nmore content\n"
719        );
720    }
721
722    #[test]
723    fn test_extract_codeblock_ignores_inline_backticks() {
724        let text = indoc::indoc! {"
725            `````
726            here is some `code` with inline backticks
727            and here```more```stuff
728            `````
729            "};
730        let last_block = extract_last_codeblock(text).unwrap();
731        assert_eq!(
732            last_block,
733            "here is some `code` with inline backticks\nand here```more```stuff\n"
734        );
735    }
736
737    #[test]
738    fn test_extract_editable_region_old_format() {
739        let text = indoc::indoc! {"
740            some lines
741            are
742            here
743            <|editable_region_start|>
744            one
745            two three
746
747            <|editable_region_end|>
748            more
749            lines here
750            "};
751        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
752        assert_eq!(
753            parsed,
754            indoc::indoc! {"
755            one
756            two three"}
757        );
758    }
759
760    #[test]
761    fn test_extract_editable_region_marker_format() {
762        let text = indoc::indoc! {"
763            some context
764            <|marker_1|>
765            one
766            two three
767            <|marker_2|>
768            more context
769            "};
770        let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
771        assert_eq!(parsed, "one\ntwo three");
772    }
773
774    #[test]
775    fn test_extract_editable_region_multi_markers() {
776        let text = indoc::indoc! {"
777            prefix
778            <|marker_1|>
779            aaa
780            bbb
781            <|marker_2|>
782            ccc
783            ddd
784            <|marker_3|>
785            suffix
786            "};
787        let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
788        // Intermediate marker and its trailing \n are stripped
789        assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
790    }
791
792    #[test]
793    fn test_extract_last_codeblock_nested_bibtex() {
794        let text = indoc::indoc! {r#"
795            Looking at the edit history, I can see that a Citation section was just added.
796
797            `````
798            ## Collaborations
799            Our mission is to create a 4D generative model.
800
801            ## Citation
802
803            If you found Unique3D helpful, please cite our report:
804            ```bibtex
805            @misc{wu2024unique3d,
806                  title={Unique3D},
807            }
808            ```
809            `````
810            "#};
811        let last_block = extract_last_codeblock(text).unwrap();
812        assert_eq!(
813            last_block,
814            indoc::indoc! {r#"
815            ## Collaborations
816            Our mission is to create a 4D generative model.
817
818            ## Citation
819
820            If you found Unique3D helpful, please cite our report:
821            ```bibtex
822            @misc{wu2024unique3d,
823                  title={Unique3D},
824            }
825            ```
826            "#}
827        );
828    }
829
830    #[test]
831    fn test_extract_editable_region_no_markers() {
832        let text = indoc::indoc! {"
833            one
834            two three"};
835        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
836        assert_eq!(
837            parsed,
838            indoc::indoc! {"
839            one
840            two three"}
841        );
842    }
843
844    #[test]
845    fn test_parse_no_edits_response() {
846        let response = indoc::indoc! {"
847            The code is already complete. There is no clear next edit to make.
848
849            `````
850            NO_EDITS
851            `````
852        "};
853        let codeblock = extract_last_codeblock(response).unwrap();
854        assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
855    }
856
857    #[test]
858    fn test_extract_codeblock_no_valid_block() {
859        // Text with no code blocks should return None
860        let text = "Just some plain text without any code blocks";
861        assert!(extract_last_codeblock(text).is_none());
862
863        // Unclosed code block should return None
864        let text = indoc::indoc! {"
865            ```
866            unclosed block
867        "};
868        assert!(extract_last_codeblock(text).is_none());
869
870        // Analysis text with nested markdown but no proper outer block
871        let text = indoc::indoc! {"
872            # Analysis
873            Looking at this:
874            ```
875            some code
876            ```
877            But then more analysis without wrapping block
878        "};
879        // This should find the inner block
880        let result = extract_last_codeblock(text).unwrap();
881        assert_eq!(result, "some code\n");
882    }
883
884    #[test]
885    fn test_extract_codeblock_no_trailing_newline() {
886        // Text ending without trailing newline after closing fence
887        let text = "`````\ncontent here\n`````";
888        let result = extract_last_codeblock(text).unwrap();
889        assert_eq!(result, "content here\n");
890    }
891
892    #[test]
893    fn test_parse_no_edits_response_with_trailing_backticks() {
894        let response = "NO_EDITS```";
895
896        let parsed = TeacherPrompt::parse(
897            &Example {
898                spec: edit_prediction::example_spec::ExampleSpec {
899                    name: "test".to_string(),
900                    repository_url: "https://github.com/zed-industries/zed.git".to_string(),
901                    revision: "HEAD".to_string(),
902                    tags: Vec::new(),
903                    reasoning: None,
904                    uncommitted_diff: String::new(),
905                    cursor_path: std::sync::Arc::from(std::path::Path::new("src/main.rs")),
906                    cursor_position: "0:0".to_string(),
907                    edit_history: String::new(),
908                    expected_patches: Vec::new(),
909                    rejected_patch: None,
910                    telemetry: None,
911                    human_feedback: Vec::new(),
912                    rating: None,
913                },
914                prompt_inputs: None,
915                prompt: None,
916                predictions: Vec::new(),
917                score: Vec::new(),
918                qa: Vec::new(),
919                zed_version: None,
920                state: None,
921            },
922            response,
923        )
924        .unwrap();
925
926        assert!(parsed.0.is_empty());
927        assert!(parsed.1.is_none());
928    }
929}