format_prompt.rs

  1use crate::{
  2    FormatPromptArgs, PredictionProvider,
  3    example::{ActualCursor, Example, ExamplePrompt},
  4    headless::EpAppState,
  5    progress::{ExampleProgress, Step},
  6    retrieve_context::run_context_retrieval,
  7};
  8use anyhow::{Context as _, Result, anyhow};
  9use edit_prediction::udiff;
 10use gpui::AsyncApp;
 11use similar::DiffableStr;
 12use std::ops::Range;
 13use std::sync::Arc;
 14use zeta_prompt::{
 15    ZetaFormat, encode_patch_as_output_for_format, excerpt_range_for_format, format_zeta_prompt,
 16    output_end_marker_for_format, resolve_cursor_region,
 17};
 18
 19pub async fn run_format_prompt(
 20    example: &mut Example,
 21    args: &FormatPromptArgs,
 22    app_state: Arc<EpAppState>,
 23    example_progress: &ExampleProgress,
 24    cx: AsyncApp,
 25) -> Result<()> {
 26    run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
 27
 28    let step_progress = example_progress.start(Step::FormatPrompt);
 29
 30    let prompt_inputs = example
 31        .prompt_inputs
 32        .as_ref()
 33        .context("prompt_inputs must be set after context retrieval")?;
 34
 35    match args.provider {
 36        PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
 37            step_progress.set_substatus("formatting teacher prompt");
 38
 39            let zeta_format = ZetaFormat::default();
 40            let (editable_range, context_range) =
 41                excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
 42
 43            let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
 44            example.prompt = Some(ExamplePrompt {
 45                input: prompt,
 46                expected_output: String::new(),
 47                rejected_output: None,
 48                prefill: None,
 49                provider: args.provider,
 50            });
 51        }
 52        PredictionProvider::Zeta2(zeta_format) => {
 53            step_progress.set_substatus("formatting zeta2 prompt");
 54
 55            let prompt = format_zeta_prompt(prompt_inputs, zeta_format);
 56            let prefill = zeta_prompt::get_prefill(prompt_inputs, zeta_format);
 57            let expected_output = example
 58                .spec
 59                .expected_patches_with_cursor_positions()
 60                .into_iter()
 61                .next()
 62                .and_then(|(expected_patch, expected_cursor_offset)| {
 63                    zeta2_output_for_patch(
 64                        prompt_inputs,
 65                        &expected_patch,
 66                        expected_cursor_offset,
 67                        zeta_format,
 68                    )
 69                    .ok()
 70                })
 71                .unwrap_or_default();
 72
 73            let rejected_output = example.spec.rejected_patch.as_ref().and_then(|patch| {
 74                zeta2_output_for_patch(prompt_inputs, patch, None, zeta_format).ok()
 75            });
 76
 77            example.prompt = Some(ExamplePrompt {
 78                input: prompt,
 79                expected_output,
 80                rejected_output,
 81                provider: args.provider,
 82                prefill: Some(prefill),
 83            });
 84        }
 85        _ => {
 86            panic!("Cannot format prompt for {:?}", args.provider);
 87        }
 88    };
 89    Ok(())
 90}
 91
 92pub fn zeta2_output_for_patch(
 93    input: &zeta_prompt::ZetaPromptInput,
 94    patch: &str,
 95    cursor_offset: Option<usize>,
 96    version: ZetaFormat,
 97) -> Result<String> {
 98    let (context, editable_range, _, _) = resolve_cursor_region(input, version);
 99    let mut old_editable_region = context[editable_range].to_string();
100
101    if !old_editable_region.ends_with_newline() {
102        old_editable_region.push('\n');
103    }
104
105    if let Some(encoded_output) =
106        encode_patch_as_output_for_format(version, &old_editable_region, patch, cursor_offset)?
107    {
108        return Ok(encoded_output);
109    }
110
111    let (mut result, first_hunk_offset) =
112        udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
113            || {
114                format!(
115                    "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
116                    patch, old_editable_region
117                )
118            },
119        )?;
120
121    if let Some(cursor_offset) = cursor_offset {
122        // The cursor_offset is relative to the start of the hunk's new text (context + additions).
123        // We need to add where the hunk context matched in the editable region to compute
124        // the actual cursor position in the result.
125        let hunk_start = first_hunk_offset.unwrap_or(0);
126        let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
127        result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
128    }
129
130    if let Some(end_marker) = output_end_marker_for_format(version) {
131        if !result.ends_with('\n') {
132            result.push('\n');
133        }
134        result.push_str(end_marker);
135    }
136
137    Ok(result)
138}
139
140pub struct TeacherPrompt;
141
142impl TeacherPrompt {
143    pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
144    pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
145    pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
146    pub(crate) const NO_EDITS: &str = "NO_EDITS";
147
148    /// Truncate edit history to this number of last lines
149    const MAX_HISTORY_LINES: usize = 128;
150
151    pub fn format_prompt(
152        example: &Example,
153        editable_range: Range<usize>,
154        context_range: Range<usize>,
155    ) -> String {
156        let edit_history = Self::format_edit_history(&example.spec.edit_history);
157        let context = Self::format_context(example);
158        let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
159
160        let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
161        let prompt = prompt_template
162            .replace("{{context}}", &context)
163            .replace("{{edit_history}}", &edit_history)
164            .replace("{{cursor_excerpt}}", &cursor_excerpt);
165
166        prompt
167    }
168
169    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
170        // Check if the model indicated no edits are needed
171        let no_edits = (String::new(), None);
172        if let Some(last_codeblock) = extract_last_codeblock(&response) {
173            if last_codeblock.trim() == Self::NO_EDITS {
174                return Ok(no_edits);
175            }
176        }
177
178        if response.trim().ends_with(Self::NO_EDITS) {
179            return Ok(no_edits);
180        }
181
182        // Extract updated (new) editable region from the model response.
183        let new_editable_region = Self::extract_editable_region(&response)?;
184        let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
185        let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
186        let old_editable_region = Self::extract_editable_region(
187            &example
188                .prompt
189                .as_ref()
190                .context("example prompt missing")?
191                .input,
192        )?
193        .replace(Self::USER_CURSOR_MARKER, "");
194
195        let prompt_inputs = example
196            .prompt_inputs
197            .as_ref()
198            .context("example is missing prompt inputs")?;
199
200        // Normalize leading newlines: if old starts with newline but new doesn't,
201        // prepend newline to new to preserve whitespace structure.
202        // This handles the case where the model drops the leading blank line.
203        if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
204            new_editable_region.insert(0, '\n');
205        }
206
207        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
208        let (editable_region_offset, _) = excerpt
209            .match_indices(&old_editable_region)
210            .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
211            .context("editable region not found in prompt content")?;
212        let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
213
214        // Use full context so cursor offset (relative to editable region start) aligns with diff content
215        let editable_region_lines = old_editable_region.lines().count() as u32;
216        let diff = language::unified_diff_with_context(
217            &old_editable_region,
218            &new_editable_region,
219            editable_region_start_line as u32,
220            editable_region_start_line as u32,
221            editable_region_lines,
222        );
223
224        let diff = indoc::formatdoc! {"
225            --- a/{path}
226            +++ b/{path}
227            {diff}",
228            path = example.spec.cursor_path.to_string_lossy(),
229            diff = diff,
230        };
231
232        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
233            ActualCursor::from_editable_region(
234                &example.spec.cursor_path,
235                editable_region_cursor_offset,
236                &new_editable_region,
237                excerpt,
238                editable_region_offset,
239                editable_region_start_line,
240            )
241        });
242
243        Ok((diff, actual_cursor))
244    }
245
246    fn format_edit_history(edit_history: &str) -> String {
247        let lines: Vec<&str> = edit_history.lines().collect();
248
249        if lines.is_empty() {
250            return "(No edit history)".to_string();
251        }
252
253        if lines.len() > Self::MAX_HISTORY_LINES {
254            let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
255            format!("{truncated}\n[...truncated...]")
256        } else {
257            lines.join("\n")
258        }
259    }
260
261    pub fn format_context(example: &Example) -> String {
262        let related_files = example
263            .prompt_inputs
264            .as_ref()
265            .and_then(|pi| pi.related_files.as_deref());
266        let Some(related_files) = related_files else {
267            return "(No context)".to_string();
268        };
269
270        if related_files.is_empty() {
271            return "(No context)".to_string();
272        }
273
274        let prefix = "`````";
275        let suffix = "`````\n\n";
276        let max_tokens = 1024;
277        zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
278    }
279
280    fn format_cursor_excerpt(
281        example: &Example,
282        editable_range: Range<usize>,
283        context_range: Range<usize>,
284    ) -> String {
285        let mut result = String::new();
286
287        let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
288        let excerpt = prompt_inputs.cursor_excerpt.as_ref();
289        let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
290
291        let path_str = example.spec.cursor_path.to_string_lossy();
292        result.push_str(&format!("`````{path_str}\n"));
293        result.push_str(&excerpt[context_range.start..editable_range.start]);
294        result.push_str(Self::EDITABLE_REGION_START);
295        result.push_str(&excerpt[editable_range.start..cursor_offset]);
296        result.push_str(Self::USER_CURSOR_MARKER);
297        result.push_str(&excerpt[cursor_offset..editable_range.end]);
298        result.push_str(Self::EDITABLE_REGION_END);
299        result.push_str(&excerpt[editable_range.end..context_range.end]);
300        result.push_str("\n`````");
301
302        result
303    }
304
305    pub fn extract_editable_region(text: &str) -> Result<String> {
306        let start = text
307            .rfind(Self::EDITABLE_REGION_START)
308            .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
309        let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
310
311        if start >= end {
312            return Err(anyhow!("Invalid editable region markers"));
313        }
314
315        let region = &text[start..end];
316        Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
317    }
318}
319
320/// Extract the cursor excerpt from an example.
321/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
322pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
323    // If we have the original prompt, extract the cursor excerpt from it
324    if let Some(prompt) = &example.prompt {
325        // Find "# 3. Current File" section and extract the content
326        if let Some(start) = prompt.input.find("# 3. Current File") {
327            let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
328            let backtick_count = prompt.input[content_start..]
329                .chars()
330                .take_while(|&c| c == '`')
331                .count();
332            let content_start = content_start + backtick_count;
333
334            // Find the path line and skip it
335            let newline_pos = prompt.input[content_start..].find('\n')?;
336            let text_start = content_start + newline_pos + 1;
337
338            // Find the closing backticks
339            let closing_pattern = "`".repeat(backtick_count);
340            let text_end = prompt.input[text_start..].find(&closing_pattern)?;
341            let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
342
343            let path_str = example.spec.cursor_path.to_string_lossy();
344            return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
345        }
346    }
347
348    // Fallback: construct from prompt_inputs if available
349    let prompt_inputs = example.prompt_inputs.as_ref()?;
350    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
351    let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
352
353    // Simple fallback: just show content around cursor with markers
354    let path_str = example.spec.cursor_path.to_string_lossy();
355    let mut result = format!("`````{path_str}\n");
356    result.push_str(TeacherPrompt::EDITABLE_REGION_START);
357    result.push_str(&excerpt[..cursor_offset]);
358    result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
359    result.push_str(&excerpt[cursor_offset..]);
360    result.push_str(TeacherPrompt::EDITABLE_REGION_END);
361    result.push_str("\n`````");
362
363    Some(result)
364}
365
366pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
367    let lines: Vec<&str> = text.lines().collect();
368
369    // Search from the end for a closing fence (line containing only backticks, 3+)
370    let mut closing_line_idx = None;
371    let mut backtick_count = 0;
372
373    for i in (0..lines.len()).rev() {
374        let line = lines[i].trim();
375        if line.len() >= 3 && line.chars().all(|c| c == '`') {
376            closing_line_idx = Some(i);
377            backtick_count = line.len();
378            break;
379        }
380    }
381
382    let closing_idx = closing_line_idx?;
383
384    // Search backwards for matching opening fence
385    // Opening fence starts with same backtick count, possibly followed by language/metadata
386    let opening_pattern = "`".repeat(backtick_count);
387
388    for i in (0..closing_idx).rev() {
389        let line = lines[i];
390        if line.starts_with(&opening_pattern) {
391            // Ensure it's exactly the right number of backticks (not more)
392            let rest = &line[backtick_count..];
393            if rest.is_empty() || !rest.starts_with('`') {
394                // Found matching opening fence
395                // Extract content between opening and closing (exclusive)
396                if closing_idx > i + 1 {
397                    let content = lines[i + 1..closing_idx].join("\n");
398                    // Preserve trailing newline to match previous behavior
399                    return Some(format!("{}\n", content));
400                } else {
401                    // Empty block
402                    return Some(String::new());
403                }
404            }
405        }
406    }
407
408    None
409}
410
411#[cfg(test)]
412mod tests {
413    use super::*;
414
415    #[test]
416    fn test_extract_last_code_block() {
417        let text = indoc::indoc! {"
418            Some thinking
419
420            ```
421            first block
422            ```
423
424            `````path='something' lines=1:2
425            last block
426            `````
427            "};
428        let last_block = extract_last_codeblock(text).unwrap();
429        assert_eq!(last_block, "last block\n");
430    }
431
432    #[test]
433    fn test_extract_codeblock_with_nested_fences() {
434        let text = indoc::indoc! {"
435            `````
436            content with ``` inline
437            and ```python nested
438            more content
439            `````
440            "};
441        let last_block = extract_last_codeblock(text).unwrap();
442        assert_eq!(
443            last_block,
444            "content with ``` inline\nand ```python nested\nmore content\n"
445        );
446    }
447
448    #[test]
449    fn test_extract_codeblock_ignores_inline_backticks() {
450        let text = indoc::indoc! {"
451            `````
452            here is some `code` with inline backticks
453            and here```more```stuff
454            `````
455            "};
456        let last_block = extract_last_codeblock(text).unwrap();
457        assert_eq!(
458            last_block,
459            "here is some `code` with inline backticks\nand here```more```stuff\n"
460        );
461    }
462
463    #[test]
464    fn test_extract_editable_region() {
465        let text = indoc::indoc! {"
466            some lines
467            are
468            here
469            <|editable_region_start|>
470            one
471            two three
472
473            <|editable_region_end|>
474            more
475            lines here
476            "};
477        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
478        assert_eq!(
479            parsed,
480            indoc::indoc! {"
481            one
482            two three"}
483        );
484    }
485
486    #[test]
487    fn test_extract_last_codeblock_nested_bibtex() {
488        let text = indoc::indoc! {r#"
489            Looking at the edit history, I can see that a Citation section was just added.
490
491            `````
492            ## Collaborations
493            Our mission is to create a 4D generative model.
494
495            ## Citation
496
497            If you found Unique3D helpful, please cite our report:
498            ```bibtex
499            @misc{wu2024unique3d,
500                  title={Unique3D},
501            }
502            ```
503            `````
504            "#};
505        let last_block = extract_last_codeblock(text).unwrap();
506        assert_eq!(
507            last_block,
508            indoc::indoc! {r#"
509            ## Collaborations
510            Our mission is to create a 4D generative model.
511
512            ## Citation
513
514            If you found Unique3D helpful, please cite our report:
515            ```bibtex
516            @misc{wu2024unique3d,
517                  title={Unique3D},
518            }
519            ```
520            "#}
521        );
522    }
523
524    #[test]
525    fn test_extract_editable_region_no_markers() {
526        let text = indoc::indoc! {"
527            one
528            two three"};
529        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
530        assert_eq!(
531            parsed,
532            indoc::indoc! {"
533            one
534            two three"}
535        );
536    }
537
538    #[test]
539    fn test_parse_no_edits_response() {
540        let response = indoc::indoc! {"
541            The code is already complete. There is no clear next edit to make.
542
543            `````
544            NO_EDITS
545            `````
546        "};
547        let codeblock = extract_last_codeblock(response).unwrap();
548        assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
549    }
550
551    #[test]
552    fn test_extract_codeblock_no_valid_block() {
553        // Text with no code blocks should return None
554        let text = "Just some plain text without any code blocks";
555        assert!(extract_last_codeblock(text).is_none());
556
557        // Unclosed code block should return None
558        let text = indoc::indoc! {"
559            ```
560            unclosed block
561        "};
562        assert!(extract_last_codeblock(text).is_none());
563
564        // Analysis text with nested markdown but no proper outer block
565        let text = indoc::indoc! {"
566            # Analysis
567            Looking at this:
568            ```
569            some code
570            ```
571            But then more analysis without wrapping block
572        "};
573        // This should find the inner block
574        let result = extract_last_codeblock(text).unwrap();
575        assert_eq!(result, "some code\n");
576    }
577
578    #[test]
579    fn test_extract_codeblock_no_trailing_newline() {
580        // Text ending without trailing newline after closing fence
581        let text = "`````\ncontent here\n`````";
582        let result = extract_last_codeblock(text).unwrap();
583        assert_eq!(result, "content here\n");
584    }
585}