format_prompt.rs

  1use crate::{
  2    FormatPromptArgs, PredictionProvider,
  3    example::{ActualCursor, Example, ExamplePrompt},
  4    headless::EpAppState,
  5    progress::{ExampleProgress, Step},
  6    retrieve_context::run_context_retrieval,
  7};
  8use anyhow::{Context as _, Result, anyhow};
  9use edit_prediction::udiff;
 10use gpui::AsyncApp;
 11use similar::DiffableStr;
 12use std::ops::Range;
 13use std::sync::Arc;
 14use zeta_prompt::{
 15    ZetaFormat, encode_patch_as_output_for_format, excerpt_range_for_format, format_zeta_prompt,
 16    multi_region, output_end_marker_for_format, resolve_cursor_region,
 17};
 18
 19pub async fn run_format_prompt(
 20    example: &mut Example,
 21    args: &FormatPromptArgs,
 22    app_state: Arc<EpAppState>,
 23    example_progress: &ExampleProgress,
 24    cx: AsyncApp,
 25) -> Result<()> {
 26    run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
 27
 28    let step_progress = example_progress.start(Step::FormatPrompt);
 29
 30    let prompt_inputs = example
 31        .prompt_inputs
 32        .as_ref()
 33        .context("prompt_inputs must be set after context retrieval")?;
 34
 35    match args.provider {
 36        PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
 37            step_progress.set_substatus("formatting teacher prompt");
 38
 39            let zeta_format = ZetaFormat::default();
 40            let (editable_range, context_range) =
 41                excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
 42
 43            let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
 44            example.prompt = Some(ExamplePrompt {
 45                input: prompt,
 46                expected_output: String::new(),
 47                rejected_output: None,
 48                prefill: None,
 49                provider: args.provider,
 50            });
 51        }
 52        PredictionProvider::TeacherMultiRegion(_)
 53        | PredictionProvider::TeacherMultiRegionNonBatching(_) => {
 54            step_progress.set_substatus("formatting teacher multi-region prompt");
 55
 56            let zeta_format = ZetaFormat::default();
 57            let (editable_range, context_range) =
 58                excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
 59
 60            let prompt =
 61                TeacherMultiRegionPrompt::format_prompt(example, editable_range, context_range);
 62            example.prompt = Some(ExamplePrompt {
 63                input: prompt,
 64                expected_output: String::new(),
 65                rejected_output: None,
 66                prefill: None,
 67                provider: args.provider,
 68            });
 69        }
 70        PredictionProvider::Zeta2(zeta_format) => {
 71            step_progress.set_substatus("formatting zeta2 prompt");
 72
 73            let prompt = format_zeta_prompt(prompt_inputs, zeta_format);
 74            let prefill = zeta_prompt::get_prefill(prompt_inputs, zeta_format);
 75            let expected_output = example
 76                .spec
 77                .expected_patches_with_cursor_positions()
 78                .into_iter()
 79                .next()
 80                .and_then(|(expected_patch, expected_cursor_offset)| {
 81                    zeta2_output_for_patch(
 82                        prompt_inputs,
 83                        &expected_patch,
 84                        expected_cursor_offset,
 85                        zeta_format,
 86                    )
 87                    .ok()
 88                })
 89                .unwrap_or_default();
 90
 91            let rejected_output = example.spec.rejected_patch.as_ref().and_then(|patch| {
 92                zeta2_output_for_patch(prompt_inputs, patch, None, zeta_format).ok()
 93            });
 94
 95            example.prompt = prompt.map(|prompt| ExamplePrompt {
 96                input: prompt,
 97                expected_output,
 98                rejected_output,
 99                provider: args.provider,
100                prefill: Some(prefill),
101            });
102        }
103        _ => {
104            panic!("Cannot format prompt for {:?}", args.provider);
105        }
106    };
107    Ok(())
108}
109
110pub fn zeta2_output_for_patch(
111    input: &zeta_prompt::ZetaPromptInput,
112    patch: &str,
113    cursor_offset: Option<usize>,
114    version: ZetaFormat,
115) -> Result<String> {
116    let (context, editable_range, _, _) = resolve_cursor_region(input, version);
117    let mut old_editable_region = context[editable_range].to_string();
118
119    if !old_editable_region.ends_with_newline() {
120        old_editable_region.push('\n');
121    }
122
123    if let Some(encoded_output) =
124        encode_patch_as_output_for_format(version, &old_editable_region, patch, cursor_offset)?
125    {
126        return Ok(encoded_output);
127    }
128
129    let (result, first_hunk_offset) =
130        udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
131            || {
132                format!(
133                    "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
134                    patch, old_editable_region
135                )
136            },
137        )?;
138
139    if version == ZetaFormat::V0317SeedMultiRegions {
140        let cursor_in_new = cursor_offset.map(|cursor_offset| {
141            let hunk_start = first_hunk_offset.unwrap_or(0);
142            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
143        });
144        return multi_region::encode_from_old_and_new_v0317(
145            &old_editable_region,
146            &result,
147            cursor_in_new,
148            zeta_prompt::CURSOR_MARKER,
149            multi_region::V0317_END_MARKER,
150        );
151    }
152
153    if version == ZetaFormat::V0318SeedMultiRegions {
154        let cursor_in_new = cursor_offset.map(|cursor_offset| {
155            let hunk_start = first_hunk_offset.unwrap_or(0);
156            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
157        });
158        return multi_region::encode_from_old_and_new_v0318(
159            &old_editable_region,
160            &result,
161            cursor_in_new,
162            zeta_prompt::CURSOR_MARKER,
163            multi_region::V0318_END_MARKER,
164        );
165    }
166
167    if version == ZetaFormat::V0316SeedMultiRegions {
168        let cursor_in_new = cursor_offset.map(|cursor_offset| {
169            let hunk_start = first_hunk_offset.unwrap_or(0);
170            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
171        });
172        return multi_region::encode_from_old_and_new_v0316(
173            &old_editable_region,
174            &result,
175            cursor_in_new,
176            zeta_prompt::CURSOR_MARKER,
177            multi_region::V0316_END_MARKER,
178        );
179    }
180
181    if version == ZetaFormat::V0306SeedMultiRegions {
182        let cursor_in_new = cursor_offset.map(|cursor_offset| {
183            let hunk_start = first_hunk_offset.unwrap_or(0);
184            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
185        });
186        return multi_region::encode_from_old_and_new(
187            &old_editable_region,
188            &result,
189            cursor_in_new,
190            zeta_prompt::CURSOR_MARKER,
191            zeta_prompt::seed_coder::END_MARKER,
192            zeta_prompt::seed_coder::NO_EDITS,
193        );
194    }
195
196    let mut result = result;
197    if let Some(cursor_offset) = cursor_offset {
198        // The cursor_offset is relative to the start of the hunk's new text (context + additions).
199        // We need to add where the hunk context matched in the editable region to compute
200        // the actual cursor position in the result.
201        let hunk_start = first_hunk_offset.unwrap_or(0);
202        let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
203        result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
204    }
205
206    if let Some(end_marker) = output_end_marker_for_format(version) {
207        if !result.ends_with('\n') {
208            result.push('\n');
209        }
210        result.push_str(end_marker);
211    }
212
213    Ok(result)
214}
215
216pub struct TeacherPrompt;
217
218impl TeacherPrompt {
219    pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
220    pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
221    pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
222    pub(crate) const NO_EDITS: &str = "NO_EDITS";
223
224    /// Truncate edit history to this number of last lines
225    const MAX_HISTORY_LINES: usize = 128;
226
227    pub fn format_prompt(
228        example: &Example,
229        editable_range: Range<usize>,
230        context_range: Range<usize>,
231    ) -> String {
232        let edit_history = Self::format_edit_history(&example.spec.edit_history);
233        let context = Self::format_context(example);
234        let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
235
236        let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
237        let prompt = prompt_template
238            .replace("{{context}}", &context)
239            .replace("{{edit_history}}", &edit_history)
240            .replace("{{cursor_excerpt}}", &cursor_excerpt);
241
242        prompt
243    }
244
245    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
246        // Check if the model indicated no edits are needed
247        let no_edits = (String::new(), None);
248        if let Some(last_codeblock) = extract_last_codeblock(&response) {
249            if last_codeblock.trim() == Self::NO_EDITS {
250                return Ok(no_edits);
251            }
252        }
253
254        if response
255            .trim_end_matches(&[' ', '\n', '`'])
256            .ends_with(Self::NO_EDITS)
257        {
258            return Ok(no_edits);
259        }
260
261        // Extract updated (new) editable region from the model response.
262        let new_editable_region = Self::extract_editable_region(&response)?;
263        let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
264        let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
265        let old_editable_region = Self::extract_editable_region(
266            &example
267                .prompt
268                .as_ref()
269                .context("example prompt missing")?
270                .input,
271        )?
272        .replace(Self::USER_CURSOR_MARKER, "");
273
274        let prompt_inputs = example
275            .prompt_inputs
276            .as_ref()
277            .context("example is missing prompt inputs")?;
278
279        // Normalize leading newlines: if old starts with newline but new doesn't,
280        // prepend newline to new to preserve whitespace structure.
281        // This handles the case where the model drops the leading blank line.
282        if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
283            new_editable_region.insert(0, '\n');
284        }
285
286        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
287        let (editable_region_offset, _) = excerpt
288            .match_indices(&old_editable_region)
289            .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
290            .context("editable region not found in prompt content")?;
291        let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
292
293        let editable_region_lines = old_editable_region.lines().count() as u32;
294        let diff = language::unified_diff_with_context(
295            &old_editable_region,
296            &new_editable_region,
297            editable_region_start_line as u32,
298            editable_region_start_line as u32,
299            editable_region_lines,
300        );
301
302        let diff = indoc::formatdoc! {"
303            --- a/{path}
304            +++ b/{path}
305            {diff}",
306            path = example.spec.cursor_path.to_string_lossy(),
307            diff = diff,
308        };
309
310        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
311            ActualCursor::from_editable_region(
312                &example.spec.cursor_path,
313                editable_region_cursor_offset,
314                &new_editable_region,
315                excerpt,
316                editable_region_offset,
317                editable_region_start_line,
318            )
319        });
320
321        Ok((diff, actual_cursor))
322    }
323
324    fn format_edit_history(edit_history: &str) -> String {
325        let lines: Vec<&str> = edit_history.lines().collect();
326
327        if lines.is_empty() {
328            return "(No edit history)".to_string();
329        }
330
331        if lines.len() > Self::MAX_HISTORY_LINES {
332            let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
333            format!("{truncated}\n[...truncated...]")
334        } else {
335            lines.join("\n")
336        }
337    }
338
339    pub fn format_context(example: &Example) -> String {
340        let related_files = example
341            .prompt_inputs
342            .as_ref()
343            .and_then(|pi| pi.related_files.as_deref());
344
345        let Some(related_files) = related_files else {
346            return "(No context)".to_string();
347        };
348
349        if related_files.is_empty() {
350            return "(No context)".to_string();
351        }
352
353        let prefix = "`````";
354        let suffix = "`````\n\n";
355        let max_tokens = 1024;
356        zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
357    }
358
359    fn format_cursor_excerpt(
360        example: &Example,
361        editable_range: Range<usize>,
362        context_range: Range<usize>,
363    ) -> String {
364        let mut result = String::new();
365
366        let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
367        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
368        let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
369
370        let path_str = example.spec.cursor_path.to_string_lossy();
371        result.push_str(&format!("`````{path_str}\n"));
372        result.push_str(&excerpt[context_range.start..editable_range.start]);
373        result.push_str(Self::EDITABLE_REGION_START);
374        result.push_str(&excerpt[editable_range.start..cursor_offset]);
375        result.push_str(Self::USER_CURSOR_MARKER);
376        result.push_str(&excerpt[cursor_offset..editable_range.end]);
377        result.push_str(Self::EDITABLE_REGION_END);
378        result.push_str(&excerpt[editable_range.end..context_range.end]);
379        result.push_str("\n`````");
380
381        result
382    }
383
384    pub fn extract_editable_region(text: &str) -> Result<String> {
385        let start = text
386            .rfind(Self::EDITABLE_REGION_START)
387            .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
388        let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
389
390        if start >= end {
391            return Err(anyhow!("Invalid editable region markers"));
392        }
393
394        let region = &text[start..end];
395        Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
396    }
397}
398
399pub struct TeacherMultiRegionPrompt;
400
401impl TeacherMultiRegionPrompt {
402    pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
403    pub(crate) const NO_EDITS: &str = "NO_EDITS";
404
405    /// Truncate edit history to this number of last lines
406    const MAX_HISTORY_LINES: usize = 128;
407
408    pub fn format_prompt(
409        example: &Example,
410        editable_range: Range<usize>,
411        context_range: Range<usize>,
412    ) -> String {
413        let edit_history = Self::format_edit_history(&example.spec.edit_history);
414        let context = Self::format_context(example);
415        let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
416
417        let prompt_template = crate::prompt_assets::get_prompt("teacher_multi_region.md");
418        let prompt = prompt_template
419            .replace("{{context}}", &context)
420            .replace("{{edit_history}}", &edit_history)
421            .replace("{{cursor_excerpt}}", &cursor_excerpt);
422
423        prompt
424    }
425
426    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
427        let no_edits = (String::new(), None);
428        if let Some(last_codeblock) = extract_last_codeblock(&response) {
429            if last_codeblock.trim() == Self::NO_EDITS {
430                return Ok(no_edits);
431            }
432        }
433
434        if response.trim().ends_with(Self::NO_EDITS) {
435            return Ok(no_edits);
436        }
437
438        let prompt_inputs = example
439            .prompt_inputs
440            .as_ref()
441            .context("example is missing prompt inputs")?;
442
443        let zeta_format = ZetaFormat::default();
444        let (editable_range, _) =
445            excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
446        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
447        let old_editable_region = &excerpt[editable_range.clone()];
448        let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
449
450        let codeblock =
451            extract_last_codeblock(&response).context("no codeblock found in model response")?;
452        let (start_num, end_num, raw_new_span) = multi_region::extract_marker_span(&codeblock)?;
453
454        let start_idx = start_num
455            .checked_sub(1)
456            .context("marker numbers are 1-indexed")?;
457        let end_idx = end_num
458            .checked_sub(1)
459            .context("marker numbers are 1-indexed")?;
460        let start_byte = *marker_offsets
461            .get(start_idx)
462            .context("start marker number out of range")?;
463        let end_byte = *marker_offsets
464            .get(end_idx)
465            .context("end marker number out of range")?;
466
467        if start_byte > end_byte {
468            return Err(anyhow!("start marker must come before end marker"));
469        }
470
471        let cursor_in_span = raw_new_span.find(Self::USER_CURSOR_MARKER);
472        let new_span = raw_new_span.replace(Self::USER_CURSOR_MARKER, "");
473
474        let old_span = &old_editable_region[start_byte..end_byte];
475        let mut new_span = new_span;
476        if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
477            new_span.push('\n');
478        }
479        if !old_span.ends_with('\n') && new_span.ends_with('\n') {
480            new_span.pop();
481        }
482
483        let mut new_editable_region = String::new();
484        new_editable_region.push_str(&old_editable_region[..start_byte]);
485        new_editable_region.push_str(&new_span);
486        new_editable_region.push_str(&old_editable_region[end_byte..]);
487
488        let cursor_offset = cursor_in_span.map(|pos| start_byte + pos);
489
490        if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
491            new_editable_region.insert(0, '\n');
492        }
493
494        let editable_region_offset = editable_range.start;
495        let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
496
497        let editable_region_lines = old_editable_region.lines().count() as u32;
498        let diff = language::unified_diff_with_context(
499            old_editable_region,
500            &new_editable_region,
501            editable_region_start_line as u32,
502            editable_region_start_line as u32,
503            editable_region_lines,
504        );
505
506        let diff = indoc::formatdoc! {"
507            --- a/{path}
508            +++ b/{path}
509            {diff}",
510            path = example.spec.cursor_path.to_string_lossy(),
511            diff = diff,
512        };
513
514        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
515            ActualCursor::from_editable_region(
516                &example.spec.cursor_path,
517                editable_region_cursor_offset,
518                &new_editable_region,
519                excerpt,
520                editable_region_offset,
521                editable_region_start_line,
522            )
523        });
524
525        Ok((diff, actual_cursor))
526    }
527
528    fn format_edit_history(edit_history: &str) -> String {
529        let lines: Vec<&str> = edit_history.lines().collect();
530
531        if lines.is_empty() {
532            return "(No edit history)".to_string();
533        }
534
535        if lines.len() > Self::MAX_HISTORY_LINES {
536            let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
537            format!("{truncated}\n[...truncated...]")
538        } else {
539            lines.join("\n")
540        }
541    }
542
543    pub fn format_context(example: &Example) -> String {
544        let related_files = example
545            .prompt_inputs
546            .as_ref()
547            .and_then(|pi| pi.related_files.as_deref());
548        let Some(related_files) = related_files else {
549            return "(No context)".to_string();
550        };
551
552        if related_files.is_empty() {
553            return "(No context)".to_string();
554        }
555
556        let prefix = "`````";
557        let suffix = "`````\n\n";
558        let max_tokens = 1024;
559        zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
560    }
561
562    fn format_cursor_excerpt(
563        example: &Example,
564        editable_range: Range<usize>,
565        context_range: Range<usize>,
566    ) -> String {
567        let mut result = String::new();
568
569        let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
570        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
571        let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
572
573        let editable_text = &excerpt[editable_range.clone()];
574        let cursor_in_editable = cursor_offset - editable_range.start;
575
576        let path_str = example.spec.cursor_path.to_string_lossy();
577        result.push_str(&format!("`````{path_str}\n"));
578
579        result.push_str(&excerpt[context_range.start..editable_range.start]);
580
581        multi_region::write_editable_with_markers(
582            &mut result,
583            editable_text,
584            cursor_in_editable,
585            Self::USER_CURSOR_MARKER,
586        );
587
588        result.push_str(&excerpt[editable_range.end..context_range.end]);
589        result.push_str("\n`````");
590
591        result
592    }
593}
594
595/// Extract the cursor excerpt from an example.
596/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
597pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
598    // If we have the original prompt, extract the cursor excerpt from it
599    if let Some(prompt) = &example.prompt {
600        // Find "# 3. Current File" section and extract the content
601        if let Some(start) = prompt.input.find("# 3. Current File") {
602            let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
603            let backtick_count = prompt.input[content_start..]
604                .chars()
605                .take_while(|&c| c == '`')
606                .count();
607            let content_start = content_start + backtick_count;
608
609            // Find the path line and skip it
610            let newline_pos = prompt.input[content_start..].find('\n')?;
611            let text_start = content_start + newline_pos + 1;
612
613            // Find the closing backticks
614            let closing_pattern = "`".repeat(backtick_count);
615            let text_end = prompt.input[text_start..].find(&closing_pattern)?;
616            let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
617
618            let path_str = example.spec.cursor_path.to_string_lossy();
619            return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
620        }
621    }
622
623    // Fallback: construct from prompt_inputs if available
624    let prompt_inputs = example.prompt_inputs.as_ref()?;
625    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
626    let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
627
628    // Simple fallback: just show content around cursor with markers
629    let path_str = example.spec.cursor_path.to_string_lossy();
630    let mut result = format!("`````{path_str}\n");
631    result.push_str(TeacherPrompt::EDITABLE_REGION_START);
632    result.push_str(&excerpt[..cursor_offset]);
633    result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
634    result.push_str(&excerpt[cursor_offset..]);
635    result.push_str(TeacherPrompt::EDITABLE_REGION_END);
636    result.push_str("\n`````");
637
638    Some(result)
639}
640
641pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
642    let lines: Vec<&str> = text.lines().collect();
643
644    // Search from the end for a closing fence (line containing only backticks, 3+)
645    let mut closing_line_idx = None;
646    let mut backtick_count = 0;
647
648    for i in (0..lines.len()).rev() {
649        let line = lines[i].trim();
650        if line.len() >= 3 && line.chars().all(|c| c == '`') {
651            closing_line_idx = Some(i);
652            backtick_count = line.len();
653            break;
654        }
655    }
656
657    let closing_idx = closing_line_idx?;
658
659    // Search backwards for matching opening fence
660    // Opening fence starts with same backtick count, possibly followed by language/metadata
661    let opening_pattern = "`".repeat(backtick_count);
662
663    for i in (0..closing_idx).rev() {
664        let line = lines[i];
665        if line.starts_with(&opening_pattern) {
666            // Ensure it's exactly the right number of backticks (not more)
667            let rest = &line[backtick_count..];
668            if rest.is_empty() || !rest.starts_with('`') {
669                // Found matching opening fence
670                // Extract content between opening and closing (exclusive)
671                if closing_idx > i + 1 {
672                    let content = lines[i + 1..closing_idx].join("\n");
673                    // Preserve trailing newline to match previous behavior
674                    return Some(format!("{}\n", content));
675                } else {
676                    // Empty block
677                    return Some(String::new());
678                }
679            }
680        }
681    }
682
683    None
684}
685
686#[cfg(test)]
687mod tests {
688    use super::*;
689
690    #[test]
691    fn test_extract_last_code_block() {
692        let text = indoc::indoc! {"
693            Some thinking
694
695            ```
696            first block
697            ```
698
699            `````path='something' lines=1:2
700            last block
701            `````
702            "};
703        let last_block = extract_last_codeblock(text).unwrap();
704        assert_eq!(last_block, "last block\n");
705    }
706
707    #[test]
708    fn test_extract_codeblock_with_nested_fences() {
709        let text = indoc::indoc! {"
710            `````
711            content with ``` inline
712            and ```python nested
713            more content
714            `````
715            "};
716        let last_block = extract_last_codeblock(text).unwrap();
717        assert_eq!(
718            last_block,
719            "content with ``` inline\nand ```python nested\nmore content\n"
720        );
721    }
722
723    #[test]
724    fn test_extract_codeblock_ignores_inline_backticks() {
725        let text = indoc::indoc! {"
726            `````
727            here is some `code` with inline backticks
728            and here```more```stuff
729            `````
730            "};
731        let last_block = extract_last_codeblock(text).unwrap();
732        assert_eq!(
733            last_block,
734            "here is some `code` with inline backticks\nand here```more```stuff\n"
735        );
736    }
737
738    #[test]
739    fn test_extract_editable_region_old_format() {
740        let text = indoc::indoc! {"
741            some lines
742            are
743            here
744            <|editable_region_start|>
745            one
746            two three
747
748            <|editable_region_end|>
749            more
750            lines here
751            "};
752        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
753        assert_eq!(
754            parsed,
755            indoc::indoc! {"
756            one
757            two three"}
758        );
759    }
760
761    #[test]
762    fn test_extract_editable_region_marker_format() {
763        let text = indoc::indoc! {"
764            some context
765            <|marker_1|>
766            one
767            two three
768            <|marker_2|>
769            more context
770            "};
771        let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
772        assert_eq!(parsed, "one\ntwo three");
773    }
774
775    #[test]
776    fn test_extract_editable_region_multi_markers() {
777        let text = indoc::indoc! {"
778            prefix
779            <|marker_1|>
780            aaa
781            bbb
782            <|marker_2|>
783            ccc
784            ddd
785            <|marker_3|>
786            suffix
787            "};
788        let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
789        // Intermediate marker and its trailing \n are stripped
790        assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
791    }
792
793    #[test]
794    fn test_extract_last_codeblock_nested_bibtex() {
795        let text = indoc::indoc! {r#"
796            Looking at the edit history, I can see that a Citation section was just added.
797
798            `````
799            ## Collaborations
800            Our mission is to create a 4D generative model.
801
802            ## Citation
803
804            If you found Unique3D helpful, please cite our report:
805            ```bibtex
806            @misc{wu2024unique3d,
807                  title={Unique3D},
808            }
809            ```
810            `````
811            "#};
812        let last_block = extract_last_codeblock(text).unwrap();
813        assert_eq!(
814            last_block,
815            indoc::indoc! {r#"
816            ## Collaborations
817            Our mission is to create a 4D generative model.
818
819            ## Citation
820
821            If you found Unique3D helpful, please cite our report:
822            ```bibtex
823            @misc{wu2024unique3d,
824                  title={Unique3D},
825            }
826            ```
827            "#}
828        );
829    }
830
831    #[test]
832    fn test_extract_editable_region_no_markers() {
833        let text = indoc::indoc! {"
834            one
835            two three"};
836        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
837        assert_eq!(
838            parsed,
839            indoc::indoc! {"
840            one
841            two three"}
842        );
843    }
844
845    #[test]
846    fn test_parse_no_edits_response() {
847        let response = indoc::indoc! {"
848            The code is already complete. There is no clear next edit to make.
849
850            `````
851            NO_EDITS
852            `````
853        "};
854        let codeblock = extract_last_codeblock(response).unwrap();
855        assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
856    }
857
858    #[test]
859    fn test_extract_codeblock_no_valid_block() {
860        // Text with no code blocks should return None
861        let text = "Just some plain text without any code blocks";
862        assert!(extract_last_codeblock(text).is_none());
863
864        // Unclosed code block should return None
865        let text = indoc::indoc! {"
866            ```
867            unclosed block
868        "};
869        assert!(extract_last_codeblock(text).is_none());
870
871        // Analysis text with nested markdown but no proper outer block
872        let text = indoc::indoc! {"
873            # Analysis
874            Looking at this:
875            ```
876            some code
877            ```
878            But then more analysis without wrapping block
879        "};
880        // This should find the inner block
881        let result = extract_last_codeblock(text).unwrap();
882        assert_eq!(result, "some code\n");
883    }
884
885    #[test]
886    fn test_extract_codeblock_no_trailing_newline() {
887        // Text ending without trailing newline after closing fence
888        let text = "`````\ncontent here\n`````";
889        let result = extract_last_codeblock(text).unwrap();
890        assert_eq!(result, "content here\n");
891    }
892
893    #[test]
894    fn test_parse_no_edits_response_with_trailing_backticks() {
895        let response = "NO_EDITS```";
896
897        let parsed = TeacherPrompt::parse(
898            &Example {
899                spec: edit_prediction::example_spec::ExampleSpec {
900                    name: "test".to_string(),
901                    repository_url: "https://github.com/zed-industries/zed.git".to_string(),
902                    revision: "HEAD".to_string(),
903                    tags: Vec::new(),
904                    reasoning: None,
905                    uncommitted_diff: String::new(),
906                    cursor_path: std::sync::Arc::from(std::path::Path::new("src/main.rs")),
907                    cursor_position: "0:0".to_string(),
908                    edit_history: String::new(),
909                    expected_patches: Vec::new(),
910                    rejected_patch: None,
911                    telemetry: None,
912                    human_feedback: Vec::new(),
913                    rating: None,
914                },
915                prompt_inputs: None,
916                prompt: None,
917                predictions: Vec::new(),
918                score: Vec::new(),
919                qa: Vec::new(),
920                zed_version: None,
921                state: None,
922            },
923            response,
924        )
925        .unwrap();
926
927        assert!(parsed.0.is_empty());
928        assert!(parsed.1.is_none());
929    }
930}