format_prompt.rs

  1use crate::{
  2    FormatPromptArgs, PredictionProvider,
  3    example::{ActualCursor, Example, ExamplePrompt},
  4    headless::EpAppState,
  5    progress::{ExampleProgress, Step},
  6    retrieve_context::run_context_retrieval,
  7};
  8use anyhow::{Context as _, Result, anyhow};
  9use edit_prediction::{cursor_excerpt::editable_and_context_ranges_for_cursor_position, udiff};
 10use gpui::{AppContext, AsyncApp};
 11use language::{Buffer, OffsetRangeExt, Point};
 12use similar::DiffableStr;
 13use std::sync::Arc;
 14use std::{fmt::Write as _, ops::Range};
 15use zeta_prompt::ZetaFormat;
 16use zeta_prompt::format_zeta_prompt;
 17
 18pub async fn run_format_prompt(
 19    example: &mut Example,
 20    args: &FormatPromptArgs,
 21    app_state: Arc<EpAppState>,
 22    example_progress: &ExampleProgress,
 23    cx: AsyncApp,
 24) -> Result<()> {
 25    run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
 26
 27    let step_progress = example_progress.start(Step::FormatPrompt);
 28
 29    let prompt_inputs = example
 30        .prompt_inputs
 31        .as_ref()
 32        .context("prompt_inputs must be set after context retrieval")?;
 33
 34    let language = app_state
 35        .languages
 36        .load_language_for_file_path(&example.spec.cursor_path)
 37        .await
 38        .ok();
 39    let snapshot_fut = cx.update(|cx| {
 40        Buffer::build_snapshot(
 41            prompt_inputs.content.as_str().into(),
 42            language,
 43            Some(app_state.languages.clone()),
 44            cx,
 45        )
 46    });
 47    let cursor_point = Point::new(prompt_inputs.cursor_row, prompt_inputs.cursor_column);
 48    let snapshot = cx.background_spawn(snapshot_fut).await;
 49
 50    match args.provider {
 51        PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
 52            step_progress.set_substatus("formatting teacher prompt");
 53
 54            let (editable_range, context_range) = editable_and_context_ranges_for_cursor_position(
 55                cursor_point,
 56                &snapshot,
 57                edit_prediction::zeta2::max_editable_tokens(ZetaFormat::default()),
 58                edit_prediction::zeta2::MAX_CONTEXT_TOKENS,
 59            );
 60            let editable_range = editable_range.to_offset(&snapshot);
 61            let context_range = context_range.to_offset(&snapshot);
 62
 63            let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
 64            example.prompt = Some(ExamplePrompt {
 65                input: prompt,
 66                expected_output: String::new(),
 67                rejected_output: None,
 68                prefill: None,
 69                provider: args.provider,
 70            });
 71        }
 72        PredictionProvider::Zeta2(version) => {
 73            step_progress.set_substatus("formatting zeta2 prompt");
 74
 75            let (editable_range, context_range) = editable_and_context_ranges_for_cursor_position(
 76                cursor_point,
 77                &snapshot,
 78                edit_prediction::zeta2::max_editable_tokens(version),
 79                edit_prediction::zeta2::MAX_CONTEXT_TOKENS,
 80            );
 81            let editable_range = editable_range.to_offset(&snapshot);
 82            let context_range = context_range.to_offset(&snapshot);
 83
 84            let context_start = context_range.start;
 85            let cursor_offset_in_excerpt = prompt_inputs.cursor_offset - context_start;
 86            let editable_range_in_excerpt =
 87                (editable_range.start - context_start)..(editable_range.end - context_start);
 88            let input = zeta_prompt::ZetaPromptInput {
 89                cursor_path: example.spec.cursor_path.clone(),
 90                cursor_excerpt: prompt_inputs.content[context_range].to_string().into(),
 91                editable_range_in_excerpt,
 92                cursor_offset_in_excerpt,
 93                excerpt_start_row: prompt_inputs.excerpt_start_row,
 94                events: prompt_inputs.edit_history.clone(),
 95                related_files: prompt_inputs.related_files.clone().unwrap_or_default(),
 96                excerpt_ranges: None,
 97                preferred_model: None,
 98                in_open_source_repo: example
 99                    .spec
100                    .captured_prompt_input
101                    .as_ref()
102                    .map_or(false, |input| input.in_open_source_repo),
103            };
104            let prompt = format_zeta_prompt(&input, version);
105            let prefill = zeta_prompt::get_prefill(&input, version);
106            let (expected_patch, expected_cursor_offset) = example
107                .spec
108                .expected_patches_with_cursor_positions()
109                .into_iter()
110                .next()
111                .context("expected patches is empty")?;
112            let expected_output =
113                zeta2_output_for_patch(&input, &expected_patch, expected_cursor_offset, version)?;
114            let rejected_output = example
115                .spec
116                .rejected_patch
117                .as_ref()
118                .and_then(|patch| zeta2_output_for_patch(&input, patch, None, version).ok());
119
120            example.prompt = Some(ExamplePrompt {
121                input: prompt,
122                expected_output,
123                rejected_output,
124                provider: args.provider,
125                prefill: Some(prefill),
126            });
127        }
128        _ => {
129            panic!("Cannot format prompt for {:?}", args.provider);
130        }
131    };
132    Ok(())
133}
134
135pub fn zeta2_output_for_patch(
136    input: &zeta_prompt::ZetaPromptInput,
137    patch: &str,
138    cursor_offset: Option<usize>,
139    version: ZetaFormat,
140) -> Result<String> {
141    let mut old_editable_region =
142        input.cursor_excerpt[input.editable_range_in_excerpt.clone()].to_string();
143
144    if !old_editable_region.ends_with_newline() {
145        old_editable_region.push('\n');
146    }
147
148    let (mut result, first_hunk_offset) =
149        udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
150            || {
151                format!(
152                    "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
153                    patch, old_editable_region
154                )
155            },
156        )?;
157
158    if let Some(cursor_offset) = cursor_offset {
159        // The cursor_offset is relative to the start of the hunk's new text (context + additions).
160        // We need to add where the hunk context matched in the editable region to compute
161        // the actual cursor position in the result.
162        let hunk_start = first_hunk_offset.unwrap_or(0);
163        let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
164        result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
165    }
166
167    match version {
168        ZetaFormat::V0120GitMergeMarkers
169        | ZetaFormat::V0131GitMergeMarkersPrefix
170        | ZetaFormat::V0211SeedCoder => {
171            if !result.ends_with('\n') {
172                result.push('\n');
173            }
174            result.push_str(zeta_prompt::v0120_git_merge_markers::END_MARKER);
175        }
176        _ => (),
177    }
178
179    Ok(result)
180}
181
182pub struct TeacherPrompt;
183
184impl TeacherPrompt {
185    pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
186    pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
187    pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
188    pub(crate) const NO_EDITS: &str = "NO_EDITS";
189
190    /// Truncate edit history to this number of last lines
191    const MAX_HISTORY_LINES: usize = 128;
192
193    pub fn format_prompt(
194        example: &Example,
195        editable_range: Range<usize>,
196        context_range: Range<usize>,
197    ) -> String {
198        let edit_history = Self::format_edit_history(&example.spec.edit_history);
199        let context = Self::format_context(example);
200        let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
201
202        let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
203        let prompt = prompt_template
204            .replace("{{context}}", &context)
205            .replace("{{edit_history}}", &edit_history)
206            .replace("{{cursor_excerpt}}", &cursor_excerpt);
207
208        prompt
209    }
210
211    pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
212        // Check if the model indicated no edits are needed
213        if let Some(last_codeblock) = extract_last_codeblock(&response) {
214            if last_codeblock.trim() == Self::NO_EDITS {
215                return Ok((String::new(), None));
216            }
217        }
218
219        // Extract updated (new) editable region from the model response.
220        let new_editable_region = Self::extract_editable_region(&response)?;
221        let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
222        let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
223        let old_editable_region = Self::extract_editable_region(
224            &example
225                .prompt
226                .as_ref()
227                .context("example prompt missing")?
228                .input,
229        )?
230        .replace(Self::USER_CURSOR_MARKER, "");
231
232        let prompt_inputs = example
233            .prompt_inputs
234            .as_ref()
235            .context("example is missing prompt inputs")?;
236
237        // Normalize leading newlines: if old starts with newline but new doesn't,
238        // prepend newline to new to preserve whitespace structure.
239        // This handles the case where the model drops the leading blank line.
240        if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
241            new_editable_region.insert(0, '\n');
242        }
243
244        let (editable_region_offset, _) = prompt_inputs
245            .content
246            .match_indices(&old_editable_region)
247            .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset))
248            .context("editable region not found in prompt content")?;
249        let editable_region_start_line = prompt_inputs.content[..editable_region_offset]
250            .matches('\n')
251            .count();
252
253        // Use full context so cursor offset (relative to editable region start) aligns with diff content
254        let editable_region_lines = old_editable_region.lines().count() as u32;
255        let diff = language::unified_diff_with_context(
256            &old_editable_region,
257            &new_editable_region,
258            editable_region_start_line as u32,
259            editable_region_start_line as u32,
260            editable_region_lines,
261        );
262
263        let diff = indoc::formatdoc! {"
264            --- a/{path}
265            +++ b/{path}
266            {diff}",
267            path = example.spec.cursor_path.to_string_lossy(),
268            diff = diff,
269        };
270
271        let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
272            ActualCursor::from_editable_region(
273                &example.spec.cursor_path,
274                editable_region_cursor_offset,
275                &new_editable_region,
276                &prompt_inputs.content,
277                editable_region_offset,
278                editable_region_start_line,
279            )
280        });
281
282        Ok((diff, actual_cursor))
283    }
284
285    fn format_edit_history(edit_history: &str) -> String {
286        // Strip comments ("garbage lines") from edit history
287        let lines = edit_history
288            .lines()
289            .filter(|&s| Self::is_udiff_content_line(s))
290            .collect::<Vec<_>>();
291
292        let history_lines = if lines.len() > Self::MAX_HISTORY_LINES {
293            &lines[lines.len() - Self::MAX_HISTORY_LINES..]
294        } else {
295            &lines
296        };
297
298        if history_lines.is_empty() {
299            return "(No edit history)".to_string();
300        }
301
302        history_lines.join("\n")
303    }
304
305    pub fn format_context(example: &Example) -> String {
306        let related_files = example
307            .prompt_inputs
308            .as_ref()
309            .and_then(|pi| pi.related_files.as_ref());
310
311        let Some(related_files) = related_files else {
312            return "(No context)".to_string();
313        };
314
315        if related_files.is_empty() {
316            return "(No context)".to_string();
317        }
318
319        let mut prompt = String::new();
320        for file in related_files {
321            let path_str = file.path.to_string_lossy();
322            writeln!(&mut prompt, "`````{path_str}").ok();
323
324            let mut prev_row = 0;
325            for excerpt in &file.excerpts {
326                if excerpt.row_range.start > prev_row {
327                    prompt.push_str("\n");
328                }
329                prompt.push_str(&excerpt.text);
330                prompt.push('\n');
331                prev_row = excerpt.row_range.end;
332            }
333            if prev_row < file.max_row {
334                prompt.push_str("\n");
335            }
336            prompt.push_str("\n`````\n");
337        }
338
339        prompt
340    }
341
342    fn format_cursor_excerpt(
343        example: &Example,
344        editable_range: Range<usize>,
345        context_range: Range<usize>,
346    ) -> String {
347        let mut result = String::new();
348
349        let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
350
351        let path_str = example.spec.cursor_path.to_string_lossy();
352        result.push_str(&format!("`````{path_str}\n"));
353        result.push_str(&prompt_inputs.content[context_range.start..editable_range.start]);
354        result.push_str(Self::EDITABLE_REGION_START);
355        result.push_str(&prompt_inputs.content[editable_range.start..prompt_inputs.cursor_offset]);
356        result.push_str(Self::USER_CURSOR_MARKER);
357        result.push_str(&prompt_inputs.content[prompt_inputs.cursor_offset..editable_range.end]);
358        result.push_str(Self::EDITABLE_REGION_END);
359        result.push_str(&prompt_inputs.content[editable_range.end..context_range.end]);
360        result.push_str("\n`````");
361
362        result
363    }
364
365    pub fn extract_editable_region(text: &str) -> Result<String> {
366        let start = text
367            .rfind(Self::EDITABLE_REGION_START)
368            .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
369        let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
370
371        if start >= end {
372            return Err(anyhow!("Invalid editable region markers"));
373        }
374
375        let region = &text[start..end];
376        Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
377    }
378
379    fn is_udiff_content_line(s: &str) -> bool {
380        s.starts_with("-")
381            || s.starts_with("+")
382            || s.starts_with(" ")
383            || s.starts_with("---")
384            || s.starts_with("+++")
385            || s.starts_with("@@")
386    }
387}
388
389/// Extract the cursor excerpt from an example.
390/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
391pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
392    // If we have the original prompt, extract the cursor excerpt from it
393    if let Some(prompt) = &example.prompt {
394        // Find "# 3. Current File" section and extract the content
395        if let Some(start) = prompt.input.find("# 3. Current File") {
396            let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
397            let backtick_count = prompt.input[content_start..]
398                .chars()
399                .take_while(|&c| c == '`')
400                .count();
401            let content_start = content_start + backtick_count;
402
403            // Find the path line and skip it
404            let newline_pos = prompt.input[content_start..].find('\n')?;
405            let text_start = content_start + newline_pos + 1;
406
407            // Find the closing backticks
408            let closing_pattern = "`".repeat(backtick_count);
409            let text_end = prompt.input[text_start..].find(&closing_pattern)?;
410            let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
411
412            let path_str = example.spec.cursor_path.to_string_lossy();
413            return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
414        }
415    }
416
417    // Fallback: construct from prompt_inputs if available
418    let prompt_inputs = example.prompt_inputs.as_ref()?;
419    let content = &prompt_inputs.content;
420    let cursor_offset = prompt_inputs.cursor_offset;
421
422    // Simple fallback: just show content around cursor with markers
423    let path_str = example.spec.cursor_path.to_string_lossy();
424    let mut result = format!("`````{path_str}\n");
425    result.push_str(TeacherPrompt::EDITABLE_REGION_START);
426    result.push_str(&content[..cursor_offset]);
427    result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
428    result.push_str(&content[cursor_offset..]);
429    result.push_str(TeacherPrompt::EDITABLE_REGION_END);
430    result.push_str("\n`````");
431
432    Some(result)
433}
434
435pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
436    let lines: Vec<&str> = text.lines().collect();
437
438    // Search from the end for a closing fence (line containing only backticks, 3+)
439    let mut closing_line_idx = None;
440    let mut backtick_count = 0;
441
442    for i in (0..lines.len()).rev() {
443        let line = lines[i].trim();
444        if line.len() >= 3 && line.chars().all(|c| c == '`') {
445            closing_line_idx = Some(i);
446            backtick_count = line.len();
447            break;
448        }
449    }
450
451    let closing_idx = closing_line_idx?;
452
453    // Search backwards for matching opening fence
454    // Opening fence starts with same backtick count, possibly followed by language/metadata
455    let opening_pattern = "`".repeat(backtick_count);
456
457    for i in (0..closing_idx).rev() {
458        let line = lines[i];
459        if line.starts_with(&opening_pattern) {
460            // Ensure it's exactly the right number of backticks (not more)
461            let rest = &line[backtick_count..];
462            if rest.is_empty() || !rest.starts_with('`') {
463                // Found matching opening fence
464                // Extract content between opening and closing (exclusive)
465                if closing_idx > i + 1 {
466                    let content = lines[i + 1..closing_idx].join("\n");
467                    // Preserve trailing newline to match previous behavior
468                    return Some(format!("{}\n", content));
469                } else {
470                    // Empty block
471                    return Some(String::new());
472                }
473            }
474        }
475    }
476
477    None
478}
479
480#[cfg(test)]
481mod tests {
482    use super::*;
483
484    #[test]
485    fn test_extract_last_code_block() {
486        let text = indoc::indoc! {"
487            Some thinking
488
489            ```
490            first block
491            ```
492
493            `````path='something' lines=1:2
494            last block
495            `````
496            "};
497        let last_block = extract_last_codeblock(text).unwrap();
498        assert_eq!(last_block, "last block\n");
499    }
500
501    #[test]
502    fn test_extract_codeblock_with_nested_fences() {
503        let text = indoc::indoc! {"
504            `````
505            content with ``` inline
506            and ```python nested
507            more content
508            `````
509            "};
510        let last_block = extract_last_codeblock(text).unwrap();
511        assert_eq!(
512            last_block,
513            "content with ``` inline\nand ```python nested\nmore content\n"
514        );
515    }
516
517    #[test]
518    fn test_extract_codeblock_ignores_inline_backticks() {
519        let text = indoc::indoc! {"
520            `````
521            here is some `code` with inline backticks
522            and here```more```stuff
523            `````
524            "};
525        let last_block = extract_last_codeblock(text).unwrap();
526        assert_eq!(
527            last_block,
528            "here is some `code` with inline backticks\nand here```more```stuff\n"
529        );
530    }
531
532    #[test]
533    fn test_extract_editable_region() {
534        let text = indoc::indoc! {"
535            some lines
536            are
537            here
538            <|editable_region_start|>
539            one
540            two three
541
542            <|editable_region_end|>
543            more
544            lines here
545            "};
546        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
547        assert_eq!(
548            parsed,
549            indoc::indoc! {"
550            one
551            two three"}
552        );
553    }
554
555    #[test]
556    fn test_extract_last_codeblock_nested_bibtex() {
557        let text = indoc::indoc! {r#"
558            Looking at the edit history, I can see that a Citation section was just added.
559
560            `````
561            ## Collaborations
562            Our mission is to create a 4D generative model.
563
564            ## Citation
565
566            If you found Unique3D helpful, please cite our report:
567            ```bibtex
568            @misc{wu2024unique3d,
569                  title={Unique3D},
570            }
571            ```
572            `````
573            "#};
574        let last_block = extract_last_codeblock(text).unwrap();
575        assert_eq!(
576            last_block,
577            indoc::indoc! {r#"
578            ## Collaborations
579            Our mission is to create a 4D generative model.
580
581            ## Citation
582
583            If you found Unique3D helpful, please cite our report:
584            ```bibtex
585            @misc{wu2024unique3d,
586                  title={Unique3D},
587            }
588            ```
589            "#}
590        );
591    }
592
593    #[test]
594    fn test_extract_editable_region_no_markers() {
595        let text = indoc::indoc! {"
596            one
597            two three"};
598        let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
599        assert_eq!(
600            parsed,
601            indoc::indoc! {"
602            one
603            two three"}
604        );
605    }
606
607    #[test]
608    fn test_parse_no_edits_response() {
609        let response = indoc::indoc! {"
610            The code is already complete. There is no clear next edit to make.
611
612            `````
613            NO_EDITS
614            `````
615        "};
616        let codeblock = extract_last_codeblock(response).unwrap();
617        assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
618    }
619
620    #[test]
621    fn test_extract_codeblock_no_valid_block() {
622        // Text with no code blocks should return None
623        let text = "Just some plain text without any code blocks";
624        assert!(extract_last_codeblock(text).is_none());
625
626        // Unclosed code block should return None
627        let text = indoc::indoc! {"
628            ```
629            unclosed block
630        "};
631        assert!(extract_last_codeblock(text).is_none());
632
633        // Analysis text with nested markdown but no proper outer block
634        let text = indoc::indoc! {"
635            # Analysis
636            Looking at this:
637            ```
638            some code
639            ```
640            But then more analysis without wrapping block
641        "};
642        // This should find the inner block
643        let result = extract_last_codeblock(text).unwrap();
644        assert_eq!(result, "some code\n");
645    }
646
647    #[test]
648    fn test_extract_codeblock_no_trailing_newline() {
649        // Text ending without trailing newline after closing fence
650        let text = "`````\ncontent here\n`````";
651        let result = extract_last_codeblock(text).unwrap();
652        assert_eq!(result, "content here\n");
653    }
654}