format_prompt.rs

  1use crate::{
  2    FormatPromptArgs, PredictionProvider,
  3    example::{ActualCursor, Example, ExamplePrompt},
  4    headless::EpAppState,
  5    progress::{ExampleProgress, Step},
  6    retrieve_context::run_context_retrieval,
  7};
  8use anyhow::{Context as _, Result, anyhow};
  9use gpui::AsyncApp;
 10use similar::DiffableStr;
 11use std::ops::Range;
 12use std::sync::Arc;
 13use zeta_prompt::udiff;
 14use zeta_prompt::{
 15    ZetaFormat, encode_patch_as_output_for_format, excerpt_range_for_format, format_zeta_prompt,
 16    multi_region, output_end_marker_for_format, resolve_cursor_region,
 17};
 18
 19pub async fn run_format_prompt(
 20    example: &mut Example,
 21    args: &FormatPromptArgs,
 22    app_state: Arc<EpAppState>,
 23    example_progress: &ExampleProgress,
 24    cx: AsyncApp,
 25) -> Result<()> {
 26    run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
 27
 28    let step_progress = example_progress.start(Step::FormatPrompt);
 29
 30    let prompt_inputs = example
 31        .prompt_inputs
 32        .as_ref()
 33        .context("prompt_inputs must be set after context retrieval")?;
 34
 35    match args.provider {
 36        PredictionProvider::Teacher(_, zeta_format)
 37        | PredictionProvider::TeacherNonBatching(_, zeta_format) => {
 38            step_progress.set_substatus("formatting teacher prompt");
 39
 40            let (editable_range, context_range) =
 41                excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
 42
 43            let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
 44            example.prompt = Some(ExamplePrompt {
 45                input: prompt,
 46                expected_output: None,
 47                rejected_output: None,
 48                prefill: None,
 49                provider: args.provider,
 50            });
 51        }
 52        PredictionProvider::TeacherMultiRegion(_)
 53        | PredictionProvider::TeacherMultiRegionNonBatching(_) => {
 54            step_progress.set_substatus("formatting teacher multi-region prompt");
 55
 56            let zeta_format = ZetaFormat::default();
 57            let (editable_range, context_range) =
 58                excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
 59
 60            let prompt =
 61                TeacherMultiRegionPrompt::format_prompt(example, editable_range, context_range);
 62            example.prompt = Some(ExamplePrompt {
 63                input: prompt,
 64                expected_output: None,
 65                rejected_output: None,
 66                prefill: None,
 67                provider: args.provider,
 68            });
 69        }
 70        PredictionProvider::Zeta2(zeta_format) => {
 71            step_progress.set_substatus("formatting zeta2 prompt");
 72
 73            let prompt = format_zeta_prompt(prompt_inputs, zeta_format);
 74            let prefill = zeta_prompt::get_prefill(prompt_inputs, zeta_format);
 75            let expected_output = example
 76                .spec
 77                .expected_patches_with_cursor_positions()
 78                .into_iter()
 79                .next()
 80                .and_then(|(expected_patch, expected_cursor_offset)| {
 81                    zeta2_output_for_patch(
 82                        prompt_inputs,
 83                        &expected_patch,
 84                        expected_cursor_offset,
 85                        zeta_format,
 86                    )
 87                    .ok()
 88                });
 89
 90            let rejected_output = example.spec.rejected_patch.as_ref().and_then(|patch| {
 91                zeta2_output_for_patch(prompt_inputs, patch, None, zeta_format).ok()
 92            });
 93
 94            example.prompt = prompt.map(|prompt| ExamplePrompt {
 95                input: prompt,
 96                expected_output,
 97                rejected_output,
 98                provider: args.provider,
 99                prefill: Some(prefill),
100            });
101        }
102        _ => {
103            panic!("Cannot format prompt for {:?}", args.provider);
104        }
105    };
106    Ok(())
107}
108
109pub fn zeta2_output_for_patch(
110    input: &zeta_prompt::ZetaPromptInput,
111    patch: &str,
112    cursor_offset: Option<usize>,
113    version: ZetaFormat,
114) -> Result<String> {
115    let (context, editable_range, _, _) = resolve_cursor_region(input, version);
116    let mut old_editable_region = context[editable_range].to_string();
117
118    if !old_editable_region.ends_with_newline() {
119        old_editable_region.push('\n');
120    }
121
122    if let Some(encoded_output) =
123        encode_patch_as_output_for_format(version, &old_editable_region, patch, cursor_offset)?
124    {
125        return Ok(encoded_output);
126    }
127
128    let (result, first_hunk_offset) =
129        udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
130            || {
131                format!(
132                    "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
133                    patch, old_editable_region
134                )
135            },
136        )?;
137
138    if version == ZetaFormat::V0317SeedMultiRegions {
139        let cursor_in_new = cursor_offset.map(|cursor_offset| {
140            let hunk_start = first_hunk_offset.unwrap_or(0);
141            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
142        });
143        return multi_region::encode_from_old_and_new_v0317(
144            &old_editable_region,
145            &result,
146            cursor_in_new,
147            zeta_prompt::CURSOR_MARKER,
148            multi_region::V0317_END_MARKER,
149        );
150    }
151
152    if version == ZetaFormat::V0318SeedMultiRegions {
153        let cursor_in_new = cursor_offset.map(|cursor_offset| {
154            let hunk_start = first_hunk_offset.unwrap_or(0);
155            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
156        });
157        return multi_region::encode_from_old_and_new_v0318(
158            &old_editable_region,
159            &result,
160            cursor_in_new,
161            zeta_prompt::CURSOR_MARKER,
162            multi_region::V0318_END_MARKER,
163        );
164    }
165
166    if version == ZetaFormat::V0327SingleFile {
167        let cursor_in_new = cursor_offset.map(|cursor_offset| {
168            let hunk_start = first_hunk_offset.unwrap_or(0);
169            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
170        });
171        return multi_region::encode_from_old_and_new_v0318(
172            &old_editable_region,
173            &result,
174            cursor_in_new,
175            zeta_prompt::CURSOR_MARKER,
176            multi_region::V0327_END_MARKER,
177        );
178    }
179
180    if version == ZetaFormat::V0316SeedMultiRegions {
181        let cursor_in_new = cursor_offset.map(|cursor_offset| {
182            let hunk_start = first_hunk_offset.unwrap_or(0);
183            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
184        });
185        return multi_region::encode_from_old_and_new_v0316(
186            &old_editable_region,
187            &result,
188            cursor_in_new,
189            zeta_prompt::CURSOR_MARKER,
190            multi_region::V0316_END_MARKER,
191        );
192    }
193
194    if version == ZetaFormat::V0306SeedMultiRegions {
195        let cursor_in_new = cursor_offset.map(|cursor_offset| {
196            let hunk_start = first_hunk_offset.unwrap_or(0);
197            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
198        });
199        return multi_region::encode_from_old_and_new(
200            &old_editable_region,
201            &result,
202            cursor_in_new,
203            zeta_prompt::CURSOR_MARKER,
204            zeta_prompt::seed_coder::END_MARKER,
205            zeta_prompt::seed_coder::NO_EDITS,
206        );
207    }
208
209    let mut result = result;
210    if let Some(cursor_offset) = cursor_offset {
211        // The cursor_offset is relative to the start of the hunk's new text (context + additions).
212        // We need to add where the hunk context matched in the editable region to compute
213        // the actual cursor position in the result.
214        let hunk_start = first_hunk_offset.unwrap_or(0);
215        let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
216        result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
217    }
218
219    if let Some(end_marker) = output_end_marker_for_format(version) {
220        if !result.ends_with('\n') {
221            result.push('\n');
222        }
223        result.push_str(end_marker);
224    }
225
226    Ok(result)
227}
228
229pub struct TeacherPrompt;
230
231impl TeacherPrompt {
232    pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
233    pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
234    pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
235    pub(crate) const NO_EDITS: &str = "NO_EDITS";
236
237    /// Truncate edit history to this number of last lines
238    const MAX_HISTORY_LINES: usize = 128;
239
240    pub fn format_prompt(
241        example: &Example,
242        editable_range: Range<usize>,
243        context_range: Range<usize>,
244    ) -> String {
245        let edit_history = Self::format_edit_history(&example.spec.edit_history);
246        let context = Self::format_context(example);
247        let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
248
249        let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
250        let prompt = prompt_template
251            .replace("{{context}}", &context)
252            .replace("{{edit_history}}", &edit_history)
253            .replace("{{cursor_excerpt}}", &cursor_excerpt);
254
255        prompt
256    }
257
258    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
259        // Check if the model indicated no edits are needed
260        let no_edits = (String::new(), None);
261        if let Some(last_codeblock) = extract_last_codeblock(&response) {
262            if last_codeblock.trim() == Self::NO_EDITS {
263                return Ok(no_edits);
264            }
265        }
266
267        if response
268            .trim_end_matches(&[' ', '\n', '`'])
269            .ends_with(Self::NO_EDITS)
270        {
271            return Ok(no_edits);
272        }
273
274        // Extract updated (new) editable region from the model response.
275        let new_editable_region = Self::extract_editable_region(&response)?;
276        let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
277        let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
278        let old_editable_region = Self::extract_editable_region(
279            &example
280                .prompt
281                .as_ref()
282                .context("example prompt missing")?
283                .input,
284        )?
285        .replace(Self::USER_CURSOR_MARKER, "");
286
287        let prompt_inputs = example
288            .prompt_inputs
289            .as_ref()
290            .context("example is missing prompt inputs")?;
291
292        // Normalize leading newlines: if old starts with newline but new doesn't,
293        // prepend newline to new to preserve whitespace structure.
294        // This handles the case where the model drops the leading blank line.
295        if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
296            new_editable_region.insert(0, '\n');
297        }
298
299        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
300        let (editable_region_offset, _) = excerpt
301            .match_indices(&old_editable_region)
302            .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
303            .context("editable region not found in prompt content")?;
304        let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
305
306        let editable_region_lines = old_editable_region.lines().count() as u32;
307        let diff = language::unified_diff_with_context(
308            &old_editable_region,
309            &new_editable_region,
310            editable_region_start_line as u32,
311            editable_region_start_line as u32,
312            editable_region_lines,
313        );
314
315        let diff = indoc::formatdoc! {"
316            --- a/{path}
317            +++ b/{path}
318            {diff}",
319            path = example.spec.cursor_path.to_string_lossy(),
320            diff = diff,
321        };
322
323        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
324            ActualCursor::from_editable_region(
325                &example.spec.cursor_path,
326                editable_region_cursor_offset,
327                &new_editable_region,
328                excerpt,
329                editable_region_offset,
330                editable_region_start_line,
331            )
332        });
333
334        Ok((diff, actual_cursor))
335    }
336
337    fn format_edit_history(edit_history: &str) -> String {
338        let lines: Vec<&str> = edit_history.lines().collect();
339
340        if lines.is_empty() {
341            return "(No edit history)".to_string();
342        }
343
344        if lines.len() > Self::MAX_HISTORY_LINES {
345            let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
346            format!("{truncated}\n[...truncated...]")
347        } else {
348            lines.join("\n")
349        }
350    }
351
352    pub fn format_context(example: &Example) -> String {
353        let related_files = example
354            .prompt_inputs
355            .as_ref()
356            .and_then(|pi| pi.related_files.as_deref());
357
358        let Some(related_files) = related_files else {
359            return "(No context)".to_string();
360        };
361
362        if related_files.is_empty() {
363            return "(No context)".to_string();
364        }
365
366        let prefix = "`````";
367        let suffix = "`````\n\n";
368        let max_tokens = 1024;
369        zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
370    }
371
372    fn format_cursor_excerpt(
373        example: &Example,
374        editable_range: Range<usize>,
375        context_range: Range<usize>,
376    ) -> String {
377        let mut result = String::new();
378
379        let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
380        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
381        let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
382
383        let path_str = example.spec.cursor_path.to_string_lossy();
384        result.push_str(&format!("`````{path_str}\n"));
385        result.push_str(&excerpt[context_range.start..editable_range.start]);
386        result.push_str(Self::EDITABLE_REGION_START);
387        result.push_str(&excerpt[editable_range.start..cursor_offset]);
388        result.push_str(Self::USER_CURSOR_MARKER);
389        result.push_str(&excerpt[cursor_offset..editable_range.end]);
390        result.push_str(Self::EDITABLE_REGION_END);
391        result.push_str(&excerpt[editable_range.end..context_range.end]);
392        result.push_str("\n`````");
393
394        result
395    }
396
397    pub fn extract_editable_region(text: &str) -> Result<String> {
398        let start = text
399            .rfind(Self::EDITABLE_REGION_START)
400            .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
401        let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
402
403        if start >= end {
404            return Err(anyhow!("Invalid editable region markers"));
405        }
406
407        let region = &text[start..end];
408        Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
409    }
410}
411
412pub struct TeacherMultiRegionPrompt;
413
414impl TeacherMultiRegionPrompt {
415    pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
416    pub(crate) const NO_EDITS: &str = "NO_EDITS";
417
418    /// Truncate edit history to this number of last lines
419    const MAX_HISTORY_LINES: usize = 128;
420
421    pub fn format_prompt(
422        example: &Example,
423        editable_range: Range<usize>,
424        context_range: Range<usize>,
425    ) -> String {
426        let edit_history = Self::format_edit_history(&example.spec.edit_history);
427        let context = Self::format_context(example);
428        let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
429
430        let prompt_template = crate::prompt_assets::get_prompt("teacher_multi_region.md");
431        let prompt = prompt_template
432            .replace("{{context}}", &context)
433            .replace("{{edit_history}}", &edit_history)
434            .replace("{{cursor_excerpt}}", &cursor_excerpt);
435
436        prompt
437    }
438
439    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
440        let no_edits = (String::new(), None);
441        if let Some(last_codeblock) = extract_last_codeblock(&response) {
442            if last_codeblock.trim() == Self::NO_EDITS {
443                return Ok(no_edits);
444            }
445        }
446
447        if response.trim().ends_with(Self::NO_EDITS) {
448            return Ok(no_edits);
449        }
450
451        let prompt_inputs = example
452            .prompt_inputs
453            .as_ref()
454            .context("example is missing prompt inputs")?;
455
456        let zeta_format = ZetaFormat::default();
457        let (editable_range, _) =
458            excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
459        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
460        let old_editable_region = &excerpt[editable_range.clone()];
461        let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
462
463        let codeblock =
464            extract_last_codeblock(&response).context("no codeblock found in model response")?;
465        let (start_num, end_num, raw_new_span) = multi_region::extract_marker_span(&codeblock)?;
466
467        let start_idx = start_num
468            .checked_sub(1)
469            .context("marker numbers are 1-indexed")?;
470        let end_idx = end_num
471            .checked_sub(1)
472            .context("marker numbers are 1-indexed")?;
473        let start_byte = *marker_offsets
474            .get(start_idx)
475            .context("start marker number out of range")?;
476        let end_byte = *marker_offsets
477            .get(end_idx)
478            .context("end marker number out of range")?;
479
480        if start_byte > end_byte {
481            return Err(anyhow!("start marker must come before end marker"));
482        }
483
484        let cursor_in_span = raw_new_span.find(Self::USER_CURSOR_MARKER);
485        let new_span = raw_new_span.replace(Self::USER_CURSOR_MARKER, "");
486
487        let old_span = &old_editable_region[start_byte..end_byte];
488        let mut new_span = new_span;
489        if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
490            new_span.push('\n');
491        }
492        if !old_span.ends_with('\n') && new_span.ends_with('\n') {
493            new_span.pop();
494        }
495
496        let mut new_editable_region = String::new();
497        new_editable_region.push_str(&old_editable_region[..start_byte]);
498        new_editable_region.push_str(&new_span);
499        new_editable_region.push_str(&old_editable_region[end_byte..]);
500
501        let cursor_offset = cursor_in_span.map(|pos| start_byte + pos);
502
503        if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
504            new_editable_region.insert(0, '\n');
505        }
506
507        let editable_region_offset = editable_range.start;
508        let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
509
510        let editable_region_lines = old_editable_region.lines().count() as u32;
511        let diff = language::unified_diff_with_context(
512            old_editable_region,
513            &new_editable_region,
514            editable_region_start_line as u32,
515            editable_region_start_line as u32,
516            editable_region_lines,
517        );
518
519        let diff = indoc::formatdoc! {"
520            --- a/{path}
521            +++ b/{path}
522            {diff}",
523            path = example.spec.cursor_path.to_string_lossy(),
524            diff = diff,
525        };
526
527        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
528            ActualCursor::from_editable_region(
529                &example.spec.cursor_path,
530                editable_region_cursor_offset,
531                &new_editable_region,
532                excerpt,
533                editable_region_offset,
534                editable_region_start_line,
535            )
536        });
537
538        Ok((diff, actual_cursor))
539    }
540
541    fn format_edit_history(edit_history: &str) -> String {
542        let lines: Vec<&str> = edit_history.lines().collect();
543
544        if lines.is_empty() {
545            return "(No edit history)".to_string();
546        }
547
548        if lines.len() > Self::MAX_HISTORY_LINES {
549            let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
550            format!("{truncated}\n[...truncated...]")
551        } else {
552            lines.join("\n")
553        }
554    }
555
556    pub fn format_context(example: &Example) -> String {
557        let related_files = example
558            .prompt_inputs
559            .as_ref()
560            .and_then(|pi| pi.related_files.as_deref());
561        let Some(related_files) = related_files else {
562            return "(No context)".to_string();
563        };
564
565        if related_files.is_empty() {
566            return "(No context)".to_string();
567        }
568
569        let prefix = "`````";
570        let suffix = "`````\n\n";
571        let max_tokens = 1024;
572        zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
573    }
574
575    fn format_cursor_excerpt(
576        example: &Example,
577        editable_range: Range<usize>,
578        context_range: Range<usize>,
579    ) -> String {
580        let mut result = String::new();
581
582        let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
583        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
584        let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
585
586        let editable_text = &excerpt[editable_range.clone()];
587        let cursor_in_editable = cursor_offset - editable_range.start;
588
589        let path_str = example.spec.cursor_path.to_string_lossy();
590        result.push_str(&format!("`````{path_str}\n"));
591
592        result.push_str(&excerpt[context_range.start..editable_range.start]);
593
594        multi_region::write_editable_with_markers(
595            &mut result,
596            editable_text,
597            cursor_in_editable,
598            Self::USER_CURSOR_MARKER,
599        );
600
601        result.push_str(&excerpt[editable_range.end..context_range.end]);
602        result.push_str("\n`````");
603
604        result
605    }
606}
607
608/// Extract the cursor excerpt from an example.
609/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
610pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
611    // If we have the original prompt, extract the cursor excerpt from it
612    if let Some(prompt) = &example.prompt {
613        // Find "# 3. Current File" section and extract the content
614        if let Some(start) = prompt.input.find("# 3. Current File") {
615            let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
616            let backtick_count = prompt.input[content_start..]
617                .chars()
618                .take_while(|&c| c == '`')
619                .count();
620            let content_start = content_start + backtick_count;
621
622            // Find the path line and skip it
623            let newline_pos = prompt.input[content_start..].find('\n')?;
624            let text_start = content_start + newline_pos + 1;
625
626            // Find the closing backticks
627            let closing_pattern = "`".repeat(backtick_count);
628            let text_end = prompt.input[text_start..].find(&closing_pattern)?;
629            let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
630
631            let path_str = example.spec.cursor_path.to_string_lossy();
632            return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
633        }
634    }
635
636    // Fallback: construct from prompt_inputs if available
637    let prompt_inputs = example.prompt_inputs.as_ref()?;
638    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
639    let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
640
641    // Simple fallback: just show content around cursor with markers
642    let path_str = example.spec.cursor_path.to_string_lossy();
643    let mut result = format!("`````{path_str}\n");
644    result.push_str(TeacherPrompt::EDITABLE_REGION_START);
645    result.push_str(&excerpt[..cursor_offset]);
646    result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
647    result.push_str(&excerpt[cursor_offset..]);
648    result.push_str(TeacherPrompt::EDITABLE_REGION_END);
649    result.push_str("\n`````");
650
651    Some(result)
652}
653
654pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
655    let lines: Vec<&str> = text.lines().collect();
656
657    // Search from the end for a closing fence (line containing only backticks, 3+)
658    let mut closing_line_idx = None;
659    let mut backtick_count = 0;
660
661    for i in (0..lines.len()).rev() {
662        let line = lines[i].trim();
663        if line.len() >= 3 && line.chars().all(|c| c == '`') {
664            closing_line_idx = Some(i);
665            backtick_count = line.len();
666            break;
667        }
668    }
669
670    let closing_idx = closing_line_idx?;
671
672    // Search backwards for matching opening fence
673    // Opening fence starts with same backtick count, possibly followed by language/metadata
674    let opening_pattern = "`".repeat(backtick_count);
675
676    for i in (0..closing_idx).rev() {
677        let line = lines[i];
678        if line.starts_with(&opening_pattern) {
679            // Ensure it's exactly the right number of backticks (not more)
680            let rest = &line[backtick_count..];
681            if rest.is_empty() || !rest.starts_with('`') {
682                // Found matching opening fence
683                // Extract content between opening and closing (exclusive)
684                if closing_idx > i + 1 {
685                    let content = lines[i + 1..closing_idx].join("\n");
686                    // Preserve trailing newline to match previous behavior
687                    return Some(format!("{}\n", content));
688                } else {
689                    // Empty block
690                    return Some(String::new());
691                }
692            }
693        }
694    }
695
696    None
697}
698
699#[cfg(test)]
700mod tests {
701    use super::*;
702
703    #[test]
704    fn test_extract_last_code_block() {
705        let text = indoc::indoc! {"
706            Some thinking
707
708            ```
709            first block
710            ```
711
712            `````path='something' lines=1:2
713            last block
714            `````
715            "};
716        let last_block = extract_last_codeblock(text).unwrap();
717        assert_eq!(last_block, "last block\n");
718    }
719
720    #[test]
721    fn test_extract_codeblock_with_nested_fences() {
722        let text = indoc::indoc! {"
723            `````
724            content with ``` inline
725            and ```python nested
726            more content
727            `````
728            "};
729        let last_block = extract_last_codeblock(text).unwrap();
730        assert_eq!(
731            last_block,
732            "content with ``` inline\nand ```python nested\nmore content\n"
733        );
734    }
735
736    #[test]
737    fn test_extract_codeblock_ignores_inline_backticks() {
738        let text = indoc::indoc! {"
739            `````
740            here is some `code` with inline backticks
741            and here```more```stuff
742            `````
743            "};
744        let last_block = extract_last_codeblock(text).unwrap();
745        assert_eq!(
746            last_block,
747            "here is some `code` with inline backticks\nand here```more```stuff\n"
748        );
749    }
750
751    #[test]
752    fn test_extract_editable_region_old_format() {
753        let text = indoc::indoc! {"
754            some lines
755            are
756            here
757            <|editable_region_start|>
758            one
759            two three
760
761            <|editable_region_end|>
762            more
763            lines here
764            "};
765        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
766        assert_eq!(
767            parsed,
768            indoc::indoc! {"
769            one
770            two three"}
771        );
772    }
773
774    #[test]
775    fn test_extract_editable_region_marker_format() {
776        let text = indoc::indoc! {"
777            some context
778            <|marker_1|>
779            one
780            two three
781            <|marker_2|>
782            more context
783            "};
784        let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
785        assert_eq!(parsed, "one\ntwo three");
786    }
787
788    #[test]
789    fn test_extract_editable_region_multi_markers() {
790        let text = indoc::indoc! {"
791            prefix
792            <|marker_1|>
793            aaa
794            bbb
795            <|marker_2|>
796            ccc
797            ddd
798            <|marker_3|>
799            suffix
800            "};
801        let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
802        // Intermediate marker and its trailing \n are stripped
803        assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
804    }
805
806    #[test]
807    fn test_extract_last_codeblock_nested_bibtex() {
808        let text = indoc::indoc! {r#"
809            Looking at the edit history, I can see that a Citation section was just added.
810
811            `````
812            ## Collaborations
813            Our mission is to create a 4D generative model.
814
815            ## Citation
816
817            If you found Unique3D helpful, please cite our report:
818            ```bibtex
819            @misc{wu2024unique3d,
820                  title={Unique3D},
821            }
822            ```
823            `````
824            "#};
825        let last_block = extract_last_codeblock(text).unwrap();
826        assert_eq!(
827            last_block,
828            indoc::indoc! {r#"
829            ## Collaborations
830            Our mission is to create a 4D generative model.
831
832            ## Citation
833
834            If you found Unique3D helpful, please cite our report:
835            ```bibtex
836            @misc{wu2024unique3d,
837                  title={Unique3D},
838            }
839            ```
840            "#}
841        );
842    }
843
844    #[test]
845    fn test_extract_editable_region_no_markers() {
846        let text = indoc::indoc! {"
847            one
848            two three"};
849        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
850        assert_eq!(
851            parsed,
852            indoc::indoc! {"
853            one
854            two three"}
855        );
856    }
857
858    #[test]
859    fn test_parse_no_edits_response() {
860        let response = indoc::indoc! {"
861            The code is already complete. There is no clear next edit to make.
862
863            `````
864            NO_EDITS
865            `````
866        "};
867        let codeblock = extract_last_codeblock(response).unwrap();
868        assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
869    }
870
871    #[test]
872    fn test_extract_codeblock_no_valid_block() {
873        // Text with no code blocks should return None
874        let text = "Just some plain text without any code blocks";
875        assert!(extract_last_codeblock(text).is_none());
876
877        // Unclosed code block should return None
878        let text = indoc::indoc! {"
879            ```
880            unclosed block
881        "};
882        assert!(extract_last_codeblock(text).is_none());
883
884        // Analysis text with nested markdown but no proper outer block
885        let text = indoc::indoc! {"
886            # Analysis
887            Looking at this:
888            ```
889            some code
890            ```
891            But then more analysis without wrapping block
892        "};
893        // This should find the inner block
894        let result = extract_last_codeblock(text).unwrap();
895        assert_eq!(result, "some code\n");
896    }
897
898    #[test]
899    fn test_extract_codeblock_no_trailing_newline() {
900        // Text ending without trailing newline after closing fence
901        let text = "`````\ncontent here\n`````";
902        let result = extract_last_codeblock(text).unwrap();
903        assert_eq!(result, "content here\n");
904    }
905
906    #[test]
907    fn test_parse_no_edits_response_with_trailing_backticks() {
908        let response = "NO_EDITS```";
909
910        let parsed = TeacherPrompt::parse(
911            &Example {
912                spec: edit_prediction::example_spec::ExampleSpec {
913                    name: "test".to_string(),
914                    repository_url: "https://github.com/zed-industries/zed.git".to_string(),
915                    revision: "HEAD".to_string(),
916                    tags: Vec::new(),
917                    reasoning: None,
918                    uncommitted_diff: String::new(),
919                    cursor_path: std::sync::Arc::from(std::path::Path::new("src/main.rs")),
920                    cursor_position: "0:0".to_string(),
921                    edit_history: String::new(),
922                    expected_patches: Vec::new(),
923                    rejected_patch: None,
924                    telemetry: None,
925                    human_feedback: Vec::new(),
926                    rating: None,
927                },
928                prompt_inputs: None,
929                prompt: None,
930                predictions: Vec::new(),
931                score: Vec::new(),
932                qa: Vec::new(),
933                zed_version: None,
934                state: None,
935            },
936            response,
937        )
938        .unwrap();
939
940        assert!(parsed.0.is_empty());
941        assert!(parsed.1.is_none());
942    }
943}