format_prompt.rs

  1use crate::{
  2    FormatPromptArgs, PredictionProvider,
  3    example::{ActualCursor, Example, ExamplePrompt},
  4    headless::EpAppState,
  5    progress::{ExampleProgress, Step},
  6    retrieve_context::run_context_retrieval,
  7};
  8use anyhow::{Context as _, Result, anyhow};
  9use edit_prediction::udiff;
 10use gpui::AsyncApp;
 11use similar::DiffableStr;
 12use std::ops::Range;
 13use std::sync::Arc;
 14use zeta_prompt::{
 15    ZetaFormat, excerpt_range_for_format, format_zeta_prompt, resolve_cursor_region,
 16};
 17
 18pub async fn run_format_prompt(
 19    example: &mut Example,
 20    args: &FormatPromptArgs,
 21    app_state: Arc<EpAppState>,
 22    example_progress: &ExampleProgress,
 23    cx: AsyncApp,
 24) -> Result<()> {
 25    run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
 26
 27    let step_progress = example_progress.start(Step::FormatPrompt);
 28
 29    let prompt_inputs = example
 30        .prompt_inputs
 31        .as_ref()
 32        .context("prompt_inputs must be set after context retrieval")?;
 33
 34    match args.provider {
 35        PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
 36            step_progress.set_substatus("formatting teacher prompt");
 37
 38            let zeta_format = ZetaFormat::default();
 39            let (editable_range, context_range) =
 40                excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
 41
 42            let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
 43            example.prompt = Some(ExamplePrompt {
 44                input: prompt,
 45                expected_output: String::new(),
 46                rejected_output: None,
 47                prefill: None,
 48                provider: args.provider,
 49            });
 50        }
 51        PredictionProvider::Zeta2(zeta_format) => {
 52            step_progress.set_substatus("formatting zeta2 prompt");
 53
 54            let prompt = format_zeta_prompt(prompt_inputs, zeta_format);
 55            let prefill = zeta_prompt::get_prefill(prompt_inputs, zeta_format);
 56            let (expected_patch, expected_cursor_offset) = example
 57                .spec
 58                .expected_patches_with_cursor_positions()
 59                .into_iter()
 60                .next()
 61                .context("expected patches is empty")?;
 62            let expected_output = zeta2_output_for_patch(
 63                prompt_inputs,
 64                &expected_patch,
 65                expected_cursor_offset,
 66                zeta_format,
 67            )?;
 68            let rejected_output = example.spec.rejected_patch.as_ref().and_then(|patch| {
 69                zeta2_output_for_patch(prompt_inputs, patch, None, zeta_format).ok()
 70            });
 71
 72            example.prompt = Some(ExamplePrompt {
 73                input: prompt,
 74                expected_output,
 75                rejected_output,
 76                provider: args.provider,
 77                prefill: Some(prefill),
 78            });
 79        }
 80        _ => {
 81            panic!("Cannot format prompt for {:?}", args.provider);
 82        }
 83    };
 84    Ok(())
 85}
 86
 87pub fn zeta2_output_for_patch(
 88    input: &zeta_prompt::ZetaPromptInput,
 89    patch: &str,
 90    cursor_offset: Option<usize>,
 91    version: ZetaFormat,
 92) -> Result<String> {
 93    let (context, editable_range, _) = resolve_cursor_region(input, version);
 94    let mut old_editable_region = context[editable_range].to_string();
 95
 96    if !old_editable_region.ends_with_newline() {
 97        old_editable_region.push('\n');
 98    }
 99
100    let (mut result, first_hunk_offset) =
101        udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
102            || {
103                format!(
104                    "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
105                    patch, old_editable_region
106                )
107            },
108        )?;
109
110    if let Some(cursor_offset) = cursor_offset {
111        // The cursor_offset is relative to the start of the hunk's new text (context + additions).
112        // We need to add where the hunk context matched in the editable region to compute
113        // the actual cursor position in the result.
114        let hunk_start = first_hunk_offset.unwrap_or(0);
115        let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
116        result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
117    }
118
119    match version {
120        ZetaFormat::V0120GitMergeMarkers
121        | ZetaFormat::V0131GitMergeMarkersPrefix
122        | ZetaFormat::V0211SeedCoder => {
123            if !result.ends_with('\n') {
124                result.push('\n');
125            }
126            result.push_str(zeta_prompt::v0120_git_merge_markers::END_MARKER);
127        }
128        _ => (),
129    }
130
131    Ok(result)
132}
133
134pub struct TeacherPrompt;
135
136impl TeacherPrompt {
137    pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
138    pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
139    pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
140    pub(crate) const NO_EDITS: &str = "NO_EDITS";
141
142    /// Truncate edit history to this number of last lines
143    const MAX_HISTORY_LINES: usize = 128;
144
145    pub fn format_prompt(
146        example: &Example,
147        editable_range: Range<usize>,
148        context_range: Range<usize>,
149    ) -> String {
150        let edit_history = Self::format_edit_history(&example.spec.edit_history);
151        let context = Self::format_context(example);
152        let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
153
154        let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
155        let prompt = prompt_template
156            .replace("{{context}}", &context)
157            .replace("{{edit_history}}", &edit_history)
158            .replace("{{cursor_excerpt}}", &cursor_excerpt);
159
160        prompt
161    }
162
163    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
164        // Check if the model indicated no edits are needed
165        let no_edits = (String::new(), None);
166        if let Some(last_codeblock) = extract_last_codeblock(&response) {
167            if last_codeblock.trim() == Self::NO_EDITS {
168                return Ok(no_edits);
169            }
170        }
171
172        if response.trim().ends_with(Self::NO_EDITS) {
173            return Ok(no_edits);
174        }
175
176        // Extract updated (new) editable region from the model response.
177        let new_editable_region = Self::extract_editable_region(&response)?;
178        let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
179        let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
180        let old_editable_region = Self::extract_editable_region(
181            &example
182                .prompt
183                .as_ref()
184                .context("example prompt missing")?
185                .input,
186        )?
187        .replace(Self::USER_CURSOR_MARKER, "");
188
189        let prompt_inputs = example
190            .prompt_inputs
191            .as_ref()
192            .context("example is missing prompt inputs")?;
193
194        // Normalize leading newlines: if old starts with newline but new doesn't,
195        // prepend newline to new to preserve whitespace structure.
196        // This handles the case where the model drops the leading blank line.
197        if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
198            new_editable_region.insert(0, '\n');
199        }
200
201        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
202        let (editable_region_offset, _) = excerpt
203            .match_indices(&old_editable_region)
204            .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
205            .context("editable region not found in prompt content")?;
206        let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
207
208        // Use full context so cursor offset (relative to editable region start) aligns with diff content
209        let editable_region_lines = old_editable_region.lines().count() as u32;
210        let diff = language::unified_diff_with_context(
211            &old_editable_region,
212            &new_editable_region,
213            editable_region_start_line as u32,
214            editable_region_start_line as u32,
215            editable_region_lines,
216        );
217
218        let diff = indoc::formatdoc! {"
219            --- a/{path}
220            +++ b/{path}
221            {diff}",
222            path = example.spec.cursor_path.to_string_lossy(),
223            diff = diff,
224        };
225
226        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
227            ActualCursor::from_editable_region(
228                &example.spec.cursor_path,
229                editable_region_cursor_offset,
230                &new_editable_region,
231                excerpt,
232                editable_region_offset,
233                editable_region_start_line,
234            )
235        });
236
237        Ok((diff, actual_cursor))
238    }
239
240    fn format_edit_history(edit_history: &str) -> String {
241        let lines: Vec<&str> = edit_history.lines().collect();
242
243        if lines.is_empty() {
244            return "(No edit history)".to_string();
245        }
246
247        if lines.len() > Self::MAX_HISTORY_LINES {
248            let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
249            format!("{truncated}\n[...truncated...]")
250        } else {
251            lines.join("\n")
252        }
253    }
254
255    pub fn format_context(example: &Example) -> String {
256        let related_files = example.prompt_inputs.as_ref().map(|pi| &pi.related_files);
257        let Some(related_files) = related_files else {
258            return "(No context)".to_string();
259        };
260
261        if related_files.is_empty() {
262            return "(No context)".to_string();
263        }
264
265        let prefix = "`````";
266        let suffix = "`````\n\n";
267        let max_tokens = 1024;
268        zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
269    }
270
271    fn format_cursor_excerpt(
272        example: &Example,
273        editable_range: Range<usize>,
274        context_range: Range<usize>,
275    ) -> String {
276        let mut result = String::new();
277
278        let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
279        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
280        let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
281
282        let path_str = example.spec.cursor_path.to_string_lossy();
283        result.push_str(&format!("`````{path_str}\n"));
284        result.push_str(&excerpt[context_range.start..editable_range.start]);
285        result.push_str(Self::EDITABLE_REGION_START);
286        result.push_str(&excerpt[editable_range.start..cursor_offset]);
287        result.push_str(Self::USER_CURSOR_MARKER);
288        result.push_str(&excerpt[cursor_offset..editable_range.end]);
289        result.push_str(Self::EDITABLE_REGION_END);
290        result.push_str(&excerpt[editable_range.end..context_range.end]);
291        result.push_str("\n`````");
292
293        result
294    }
295
296    pub fn extract_editable_region(text: &str) -> Result<String> {
297        let start = text
298            .rfind(Self::EDITABLE_REGION_START)
299            .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
300        let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
301
302        if start >= end {
303            return Err(anyhow!("Invalid editable region markers"));
304        }
305
306        let region = &text[start..end];
307        Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
308    }
309}
310
311/// Extract the cursor excerpt from an example.
312/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
313pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
314    // If we have the original prompt, extract the cursor excerpt from it
315    if let Some(prompt) = &example.prompt {
316        // Find "# 3. Current File" section and extract the content
317        if let Some(start) = prompt.input.find("# 3. Current File") {
318            let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
319            let backtick_count = prompt.input[content_start..]
320                .chars()
321                .take_while(|&c| c == '`')
322                .count();
323            let content_start = content_start + backtick_count;
324
325            // Find the path line and skip it
326            let newline_pos = prompt.input[content_start..].find('\n')?;
327            let text_start = content_start + newline_pos + 1;
328
329            // Find the closing backticks
330            let closing_pattern = "`".repeat(backtick_count);
331            let text_end = prompt.input[text_start..].find(&closing_pattern)?;
332            let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
333
334            let path_str = example.spec.cursor_path.to_string_lossy();
335            return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
336        }
337    }
338
339    // Fallback: construct from prompt_inputs if available
340    let prompt_inputs = example.prompt_inputs.as_ref()?;
341    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
342    let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
343
344    // Simple fallback: just show content around cursor with markers
345    let path_str = example.spec.cursor_path.to_string_lossy();
346    let mut result = format!("`````{path_str}\n");
347    result.push_str(TeacherPrompt::EDITABLE_REGION_START);
348    result.push_str(&excerpt[..cursor_offset]);
349    result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
350    result.push_str(&excerpt[cursor_offset..]);
351    result.push_str(TeacherPrompt::EDITABLE_REGION_END);
352    result.push_str("\n`````");
353
354    Some(result)
355}
356
357pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
358    let lines: Vec<&str> = text.lines().collect();
359
360    // Search from the end for a closing fence (line containing only backticks, 3+)
361    let mut closing_line_idx = None;
362    let mut backtick_count = 0;
363
364    for i in (0..lines.len()).rev() {
365        let line = lines[i].trim();
366        if line.len() >= 3 && line.chars().all(|c| c == '`') {
367            closing_line_idx = Some(i);
368            backtick_count = line.len();
369            break;
370        }
371    }
372
373    let closing_idx = closing_line_idx?;
374
375    // Search backwards for matching opening fence
376    // Opening fence starts with same backtick count, possibly followed by language/metadata
377    let opening_pattern = "`".repeat(backtick_count);
378
379    for i in (0..closing_idx).rev() {
380        let line = lines[i];
381        if line.starts_with(&opening_pattern) {
382            // Ensure it's exactly the right number of backticks (not more)
383            let rest = &line[backtick_count..];
384            if rest.is_empty() || !rest.starts_with('`') {
385                // Found matching opening fence
386                // Extract content between opening and closing (exclusive)
387                if closing_idx > i + 1 {
388                    let content = lines[i + 1..closing_idx].join("\n");
389                    // Preserve trailing newline to match previous behavior
390                    return Some(format!("{}\n", content));
391                } else {
392                    // Empty block
393                    return Some(String::new());
394                }
395            }
396        }
397    }
398
399    None
400}
401
402#[cfg(test)]
403mod tests {
404    use super::*;
405
406    #[test]
407    fn test_extract_last_code_block() {
408        let text = indoc::indoc! {"
409            Some thinking
410
411            ```
412            first block
413            ```
414
415            `````path='something' lines=1:2
416            last block
417            `````
418            "};
419        let last_block = extract_last_codeblock(text).unwrap();
420        assert_eq!(last_block, "last block\n");
421    }
422
423    #[test]
424    fn test_extract_codeblock_with_nested_fences() {
425        let text = indoc::indoc! {"
426            `````
427            content with ``` inline
428            and ```python nested
429            more content
430            `````
431            "};
432        let last_block = extract_last_codeblock(text).unwrap();
433        assert_eq!(
434            last_block,
435            "content with ``` inline\nand ```python nested\nmore content\n"
436        );
437    }
438
439    #[test]
440    fn test_extract_codeblock_ignores_inline_backticks() {
441        let text = indoc::indoc! {"
442            `````
443            here is some `code` with inline backticks
444            and here```more```stuff
445            `````
446            "};
447        let last_block = extract_last_codeblock(text).unwrap();
448        assert_eq!(
449            last_block,
450            "here is some `code` with inline backticks\nand here```more```stuff\n"
451        );
452    }
453
454    #[test]
455    fn test_extract_editable_region() {
456        let text = indoc::indoc! {"
457            some lines
458            are
459            here
460            <|editable_region_start|>
461            one
462            two three
463
464            <|editable_region_end|>
465            more
466            lines here
467            "};
468        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
469        assert_eq!(
470            parsed,
471            indoc::indoc! {"
472            one
473            two three"}
474        );
475    }
476
477    #[test]
478    fn test_extract_last_codeblock_nested_bibtex() {
479        let text = indoc::indoc! {r#"
480            Looking at the edit history, I can see that a Citation section was just added.
481
482            `````
483            ## Collaborations
484            Our mission is to create a 4D generative model.
485
486            ## Citation
487
488            If you found Unique3D helpful, please cite our report:
489            ```bibtex
490            @misc{wu2024unique3d,
491                  title={Unique3D},
492            }
493            ```
494            `````
495            "#};
496        let last_block = extract_last_codeblock(text).unwrap();
497        assert_eq!(
498            last_block,
499            indoc::indoc! {r#"
500            ## Collaborations
501            Our mission is to create a 4D generative model.
502
503            ## Citation
504
505            If you found Unique3D helpful, please cite our report:
506            ```bibtex
507            @misc{wu2024unique3d,
508                  title={Unique3D},
509            }
510            ```
511            "#}
512        );
513    }
514
515    #[test]
516    fn test_extract_editable_region_no_markers() {
517        let text = indoc::indoc! {"
518            one
519            two three"};
520        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
521        assert_eq!(
522            parsed,
523            indoc::indoc! {"
524            one
525            two three"}
526        );
527    }
528
529    #[test]
530    fn test_parse_no_edits_response() {
531        let response = indoc::indoc! {"
532            The code is already complete. There is no clear next edit to make.
533
534            `````
535            NO_EDITS
536            `````
537        "};
538        let codeblock = extract_last_codeblock(response).unwrap();
539        assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
540    }
541
542    #[test]
543    fn test_extract_codeblock_no_valid_block() {
544        // Text with no code blocks should return None
545        let text = "Just some plain text without any code blocks";
546        assert!(extract_last_codeblock(text).is_none());
547
548        // Unclosed code block should return None
549        let text = indoc::indoc! {"
550            ```
551            unclosed block
552        "};
553        assert!(extract_last_codeblock(text).is_none());
554
555        // Analysis text with nested markdown but no proper outer block
556        let text = indoc::indoc! {"
557            # Analysis
558            Looking at this:
559            ```
560            some code
561            ```
562            But then more analysis without wrapping block
563        "};
564        // This should find the inner block
565        let result = extract_last_codeblock(text).unwrap();
566        assert_eq!(result, "some code\n");
567    }
568
569    #[test]
570    fn test_extract_codeblock_no_trailing_newline() {
571        // Text ending without trailing newline after closing fence
572        let text = "`````\ncontent here\n`````";
573        let result = extract_last_codeblock(text).unwrap();
574        assert_eq!(result, "content here\n");
575    }
576}