1use crate::{
2 FormatPromptArgs, PredictionProvider,
3 example::{ActualCursor, Example, ExamplePrompt},
4 headless::EpAppState,
5 progress::{ExampleProgress, Step},
6 retrieve_context::run_context_retrieval,
7};
8use anyhow::{Context as _, Result, anyhow};
9use edit_prediction::{cursor_excerpt::editable_and_context_ranges_for_cursor_position, udiff};
10use gpui::{AppContext, AsyncApp};
11use language::{Buffer, OffsetRangeExt, Point};
12use similar::DiffableStr;
13use std::sync::Arc;
14use std::{fmt::Write as _, ops::Range};
15use zeta_prompt::ZetaFormat;
16use zeta_prompt::format_zeta_prompt;
17
18pub async fn run_format_prompt(
19 example: &mut Example,
20 args: &FormatPromptArgs,
21 app_state: Arc<EpAppState>,
22 example_progress: &ExampleProgress,
23 cx: AsyncApp,
24) -> Result<()> {
25 run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
26
27 let step_progress = example_progress.start(Step::FormatPrompt);
28
29 let prompt_inputs = example
30 .prompt_inputs
31 .as_ref()
32 .context("prompt_inputs must be set after context retrieval")?;
33
34 let language = app_state
35 .languages
36 .load_language_for_file_path(&example.spec.cursor_path)
37 .await
38 .ok();
39 let snapshot_fut = cx.update(|cx| {
40 Buffer::build_snapshot(
41 prompt_inputs.content.as_str().into(),
42 language,
43 Some(app_state.languages.clone()),
44 cx,
45 )
46 });
47 let cursor_point = Point::new(prompt_inputs.cursor_row, prompt_inputs.cursor_column);
48 let snapshot = cx.background_spawn(snapshot_fut).await;
49
50 match args.provider {
51 PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
52 step_progress.set_substatus("formatting teacher prompt");
53
54 let (editable_range, context_range) = editable_and_context_ranges_for_cursor_position(
55 cursor_point,
56 &snapshot,
57 edit_prediction::zeta2::max_editable_tokens(ZetaFormat::default()),
58 edit_prediction::zeta2::MAX_CONTEXT_TOKENS,
59 );
60 let editable_range = editable_range.to_offset(&snapshot);
61 let context_range = context_range.to_offset(&snapshot);
62
63 let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
64 example.prompt = Some(ExamplePrompt {
65 input: prompt,
66 expected_output: String::new(),
67 rejected_output: None,
68 prefill: None,
69 provider: args.provider,
70 });
71 }
72 PredictionProvider::Zeta2(version) => {
73 step_progress.set_substatus("formatting zeta2 prompt");
74
75 let (editable_range, context_range) = editable_and_context_ranges_for_cursor_position(
76 cursor_point,
77 &snapshot,
78 edit_prediction::zeta2::max_editable_tokens(version),
79 edit_prediction::zeta2::MAX_CONTEXT_TOKENS,
80 );
81 let editable_range = editable_range.to_offset(&snapshot);
82 let context_range = context_range.to_offset(&snapshot);
83
84 let context_start = context_range.start;
85 let cursor_offset_in_excerpt = prompt_inputs.cursor_offset - context_start;
86 let editable_range_in_excerpt =
87 (editable_range.start - context_start)..(editable_range.end - context_start);
88 let input = zeta_prompt::ZetaPromptInput {
89 cursor_path: example.spec.cursor_path.clone(),
90 cursor_excerpt: prompt_inputs.content[context_range].to_string().into(),
91 editable_range_in_excerpt,
92 cursor_offset_in_excerpt,
93 excerpt_start_row: prompt_inputs.excerpt_start_row,
94 events: prompt_inputs.edit_history.clone(),
95 related_files: prompt_inputs.related_files.clone().unwrap_or_default(),
96 excerpt_ranges: None,
97 preferred_model: None,
98 in_open_source_repo: example
99 .spec
100 .captured_prompt_input
101 .as_ref()
102 .map_or(false, |input| input.in_open_source_repo),
103 can_collect_data: false,
104 };
105 let prompt = format_zeta_prompt(&input, version);
106 let prefill = zeta_prompt::get_prefill(&input, version);
107 let (expected_patch, expected_cursor_offset) = example
108 .spec
109 .expected_patches_with_cursor_positions()
110 .into_iter()
111 .next()
112 .context("expected patches is empty")?;
113 let expected_output =
114 zeta2_output_for_patch(&input, &expected_patch, expected_cursor_offset, version)?;
115 let rejected_output = example
116 .spec
117 .rejected_patch
118 .as_ref()
119 .and_then(|patch| zeta2_output_for_patch(&input, patch, None, version).ok());
120
121 example.prompt = Some(ExamplePrompt {
122 input: prompt,
123 expected_output,
124 rejected_output,
125 provider: args.provider,
126 prefill: Some(prefill),
127 });
128 }
129 _ => {
130 panic!("Cannot format prompt for {:?}", args.provider);
131 }
132 };
133 Ok(())
134}
135
136pub fn zeta2_output_for_patch(
137 input: &zeta_prompt::ZetaPromptInput,
138 patch: &str,
139 cursor_offset: Option<usize>,
140 version: ZetaFormat,
141) -> Result<String> {
142 let mut old_editable_region =
143 input.cursor_excerpt[input.editable_range_in_excerpt.clone()].to_string();
144
145 if !old_editable_region.ends_with_newline() {
146 old_editable_region.push('\n');
147 }
148
149 let (mut result, first_hunk_offset) =
150 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
151 || {
152 format!(
153 "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
154 patch, old_editable_region
155 )
156 },
157 )?;
158
159 if let Some(cursor_offset) = cursor_offset {
160 // The cursor_offset is relative to the start of the hunk's new text (context + additions).
161 // We need to add where the hunk context matched in the editable region to compute
162 // the actual cursor position in the result.
163 let hunk_start = first_hunk_offset.unwrap_or(0);
164 let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
165 result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
166 }
167
168 match version {
169 ZetaFormat::V0120GitMergeMarkers
170 | ZetaFormat::V0131GitMergeMarkersPrefix
171 | ZetaFormat::V0211SeedCoder => {
172 if !result.ends_with('\n') {
173 result.push('\n');
174 }
175 result.push_str(zeta_prompt::v0120_git_merge_markers::END_MARKER);
176 }
177 _ => (),
178 }
179
180 Ok(result)
181}
182
183pub struct TeacherPrompt;
184
185impl TeacherPrompt {
186 pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
187 pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
188 pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
189 pub(crate) const NO_EDITS: &str = "NO_EDITS";
190
191 /// Truncate edit history to this number of last lines
192 const MAX_HISTORY_LINES: usize = 128;
193
194 pub fn format_prompt(
195 example: &Example,
196 editable_range: Range<usize>,
197 context_range: Range<usize>,
198 ) -> String {
199 let edit_history = Self::format_edit_history(&example.spec.edit_history);
200 let context = Self::format_context(example);
201 let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
202
203 let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
204 let prompt = prompt_template
205 .replace("{{context}}", &context)
206 .replace("{{edit_history}}", &edit_history)
207 .replace("{{cursor_excerpt}}", &cursor_excerpt);
208
209 prompt
210 }
211
212 pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
213 // Check if the model indicated no edits are needed
214 if let Some(last_codeblock) = extract_last_codeblock(&response) {
215 if last_codeblock.trim() == Self::NO_EDITS {
216 return Ok((String::new(), None));
217 }
218 }
219
220 // Extract updated (new) editable region from the model response.
221 let new_editable_region = Self::extract_editable_region(&response)?;
222 let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
223 let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
224 let old_editable_region = Self::extract_editable_region(
225 &example
226 .prompt
227 .as_ref()
228 .context("example prompt missing")?
229 .input,
230 )?
231 .replace(Self::USER_CURSOR_MARKER, "");
232
233 let prompt_inputs = example
234 .prompt_inputs
235 .as_ref()
236 .context("example is missing prompt inputs")?;
237
238 // Normalize leading newlines: if old starts with newline but new doesn't,
239 // prepend newline to new to preserve whitespace structure.
240 // This handles the case where the model drops the leading blank line.
241 if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
242 new_editable_region.insert(0, '\n');
243 }
244
245 let (editable_region_offset, _) = prompt_inputs
246 .content
247 .match_indices(&old_editable_region)
248 .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset))
249 .context("editable region not found in prompt content")?;
250 let editable_region_start_line = prompt_inputs.content[..editable_region_offset]
251 .matches('\n')
252 .count();
253
254 // Use full context so cursor offset (relative to editable region start) aligns with diff content
255 let editable_region_lines = old_editable_region.lines().count() as u32;
256 let diff = language::unified_diff_with_context(
257 &old_editable_region,
258 &new_editable_region,
259 editable_region_start_line as u32,
260 editable_region_start_line as u32,
261 editable_region_lines,
262 );
263
264 let diff = indoc::formatdoc! {"
265 --- a/{path}
266 +++ b/{path}
267 {diff}",
268 path = example.spec.cursor_path.to_string_lossy(),
269 diff = diff,
270 };
271
272 let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
273 ActualCursor::from_editable_region(
274 &example.spec.cursor_path,
275 editable_region_cursor_offset,
276 &new_editable_region,
277 &prompt_inputs.content,
278 editable_region_offset,
279 editable_region_start_line,
280 )
281 });
282
283 Ok((diff, actual_cursor))
284 }
285
286 fn format_edit_history(edit_history: &str) -> String {
287 // Strip comments ("garbage lines") from edit history
288 let lines = edit_history
289 .lines()
290 .filter(|&s| Self::is_udiff_content_line(s))
291 .collect::<Vec<_>>();
292
293 let history_lines = if lines.len() > Self::MAX_HISTORY_LINES {
294 &lines[lines.len() - Self::MAX_HISTORY_LINES..]
295 } else {
296 &lines
297 };
298
299 if history_lines.is_empty() {
300 return "(No edit history)".to_string();
301 }
302
303 history_lines.join("\n")
304 }
305
306 pub fn format_context(example: &Example) -> String {
307 let related_files = example
308 .prompt_inputs
309 .as_ref()
310 .and_then(|pi| pi.related_files.as_ref());
311
312 let Some(related_files) = related_files else {
313 return "(No context)".to_string();
314 };
315
316 if related_files.is_empty() {
317 return "(No context)".to_string();
318 }
319
320 let mut prompt = String::new();
321 for file in related_files {
322 let path_str = file.path.to_string_lossy();
323 writeln!(&mut prompt, "`````{path_str}").ok();
324
325 let mut prev_row = 0;
326 for excerpt in &file.excerpts {
327 if excerpt.row_range.start > prev_row {
328 prompt.push_str("…\n");
329 }
330 prompt.push_str(&excerpt.text);
331 prompt.push('\n');
332 prev_row = excerpt.row_range.end;
333 }
334 if prev_row < file.max_row {
335 prompt.push_str("…\n");
336 }
337 prompt.push_str("\n`````\n");
338 }
339
340 prompt
341 }
342
343 fn format_cursor_excerpt(
344 example: &Example,
345 editable_range: Range<usize>,
346 context_range: Range<usize>,
347 ) -> String {
348 let mut result = String::new();
349
350 let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
351
352 let path_str = example.spec.cursor_path.to_string_lossy();
353 result.push_str(&format!("`````{path_str}\n"));
354 result.push_str(&prompt_inputs.content[context_range.start..editable_range.start]);
355 result.push_str(Self::EDITABLE_REGION_START);
356 result.push_str(&prompt_inputs.content[editable_range.start..prompt_inputs.cursor_offset]);
357 result.push_str(Self::USER_CURSOR_MARKER);
358 result.push_str(&prompt_inputs.content[prompt_inputs.cursor_offset..editable_range.end]);
359 result.push_str(Self::EDITABLE_REGION_END);
360 result.push_str(&prompt_inputs.content[editable_range.end..context_range.end]);
361 result.push_str("\n`````");
362
363 result
364 }
365
366 pub fn extract_editable_region(text: &str) -> Result<String> {
367 let start = text
368 .rfind(Self::EDITABLE_REGION_START)
369 .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
370 let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
371
372 if start >= end {
373 return Err(anyhow!("Invalid editable region markers"));
374 }
375
376 let region = &text[start..end];
377 Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
378 }
379
380 fn is_udiff_content_line(s: &str) -> bool {
381 s.starts_with("-")
382 || s.starts_with("+")
383 || s.starts_with(" ")
384 || s.starts_with("---")
385 || s.starts_with("+++")
386 || s.starts_with("@@")
387 }
388}
389
390/// Extract the cursor excerpt from an example.
391/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
392pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
393 // If we have the original prompt, extract the cursor excerpt from it
394 if let Some(prompt) = &example.prompt {
395 // Find "# 3. Current File" section and extract the content
396 if let Some(start) = prompt.input.find("# 3. Current File") {
397 let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
398 let backtick_count = prompt.input[content_start..]
399 .chars()
400 .take_while(|&c| c == '`')
401 .count();
402 let content_start = content_start + backtick_count;
403
404 // Find the path line and skip it
405 let newline_pos = prompt.input[content_start..].find('\n')?;
406 let text_start = content_start + newline_pos + 1;
407
408 // Find the closing backticks
409 let closing_pattern = "`".repeat(backtick_count);
410 let text_end = prompt.input[text_start..].find(&closing_pattern)?;
411 let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
412
413 let path_str = example.spec.cursor_path.to_string_lossy();
414 return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
415 }
416 }
417
418 // Fallback: construct from prompt_inputs if available
419 let prompt_inputs = example.prompt_inputs.as_ref()?;
420 let content = &prompt_inputs.content;
421 let cursor_offset = prompt_inputs.cursor_offset;
422
423 // Simple fallback: just show content around cursor with markers
424 let path_str = example.spec.cursor_path.to_string_lossy();
425 let mut result = format!("`````{path_str}\n");
426 result.push_str(TeacherPrompt::EDITABLE_REGION_START);
427 result.push_str(&content[..cursor_offset]);
428 result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
429 result.push_str(&content[cursor_offset..]);
430 result.push_str(TeacherPrompt::EDITABLE_REGION_END);
431 result.push_str("\n`````");
432
433 Some(result)
434}
435
436pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
437 let lines: Vec<&str> = text.lines().collect();
438
439 // Search from the end for a closing fence (line containing only backticks, 3+)
440 let mut closing_line_idx = None;
441 let mut backtick_count = 0;
442
443 for i in (0..lines.len()).rev() {
444 let line = lines[i].trim();
445 if line.len() >= 3 && line.chars().all(|c| c == '`') {
446 closing_line_idx = Some(i);
447 backtick_count = line.len();
448 break;
449 }
450 }
451
452 let closing_idx = closing_line_idx?;
453
454 // Search backwards for matching opening fence
455 // Opening fence starts with same backtick count, possibly followed by language/metadata
456 let opening_pattern = "`".repeat(backtick_count);
457
458 for i in (0..closing_idx).rev() {
459 let line = lines[i];
460 if line.starts_with(&opening_pattern) {
461 // Ensure it's exactly the right number of backticks (not more)
462 let rest = &line[backtick_count..];
463 if rest.is_empty() || !rest.starts_with('`') {
464 // Found matching opening fence
465 // Extract content between opening and closing (exclusive)
466 if closing_idx > i + 1 {
467 let content = lines[i + 1..closing_idx].join("\n");
468 // Preserve trailing newline to match previous behavior
469 return Some(format!("{}\n", content));
470 } else {
471 // Empty block
472 return Some(String::new());
473 }
474 }
475 }
476 }
477
478 None
479}
480
481#[cfg(test)]
482mod tests {
483 use super::*;
484
485 #[test]
486 fn test_extract_last_code_block() {
487 let text = indoc::indoc! {"
488 Some thinking
489
490 ```
491 first block
492 ```
493
494 `````path='something' lines=1:2
495 last block
496 `````
497 "};
498 let last_block = extract_last_codeblock(text).unwrap();
499 assert_eq!(last_block, "last block\n");
500 }
501
502 #[test]
503 fn test_extract_codeblock_with_nested_fences() {
504 let text = indoc::indoc! {"
505 `````
506 content with ``` inline
507 and ```python nested
508 more content
509 `````
510 "};
511 let last_block = extract_last_codeblock(text).unwrap();
512 assert_eq!(
513 last_block,
514 "content with ``` inline\nand ```python nested\nmore content\n"
515 );
516 }
517
518 #[test]
519 fn test_extract_codeblock_ignores_inline_backticks() {
520 let text = indoc::indoc! {"
521 `````
522 here is some `code` with inline backticks
523 and here```more```stuff
524 `````
525 "};
526 let last_block = extract_last_codeblock(text).unwrap();
527 assert_eq!(
528 last_block,
529 "here is some `code` with inline backticks\nand here```more```stuff\n"
530 );
531 }
532
533 #[test]
534 fn test_extract_editable_region() {
535 let text = indoc::indoc! {"
536 some lines
537 are
538 here
539 <|editable_region_start|>
540 one
541 two three
542
543 <|editable_region_end|>
544 more
545 lines here
546 "};
547 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
548 assert_eq!(
549 parsed,
550 indoc::indoc! {"
551 one
552 two three"}
553 );
554 }
555
556 #[test]
557 fn test_extract_last_codeblock_nested_bibtex() {
558 let text = indoc::indoc! {r#"
559 Looking at the edit history, I can see that a Citation section was just added.
560
561 `````
562 ## Collaborations
563 Our mission is to create a 4D generative model.
564
565 ## Citation
566
567 If you found Unique3D helpful, please cite our report:
568 ```bibtex
569 @misc{wu2024unique3d,
570 title={Unique3D},
571 }
572 ```
573 `````
574 "#};
575 let last_block = extract_last_codeblock(text).unwrap();
576 assert_eq!(
577 last_block,
578 indoc::indoc! {r#"
579 ## Collaborations
580 Our mission is to create a 4D generative model.
581
582 ## Citation
583
584 If you found Unique3D helpful, please cite our report:
585 ```bibtex
586 @misc{wu2024unique3d,
587 title={Unique3D},
588 }
589 ```
590 "#}
591 );
592 }
593
594 #[test]
595 fn test_extract_editable_region_no_markers() {
596 let text = indoc::indoc! {"
597 one
598 two three"};
599 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
600 assert_eq!(
601 parsed,
602 indoc::indoc! {"
603 one
604 two three"}
605 );
606 }
607
608 #[test]
609 fn test_parse_no_edits_response() {
610 let response = indoc::indoc! {"
611 The code is already complete. There is no clear next edit to make.
612
613 `````
614 NO_EDITS
615 `````
616 "};
617 let codeblock = extract_last_codeblock(response).unwrap();
618 assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
619 }
620
621 #[test]
622 fn test_extract_codeblock_no_valid_block() {
623 // Text with no code blocks should return None
624 let text = "Just some plain text without any code blocks";
625 assert!(extract_last_codeblock(text).is_none());
626
627 // Unclosed code block should return None
628 let text = indoc::indoc! {"
629 ```
630 unclosed block
631 "};
632 assert!(extract_last_codeblock(text).is_none());
633
634 // Analysis text with nested markdown but no proper outer block
635 let text = indoc::indoc! {"
636 # Analysis
637 Looking at this:
638 ```
639 some code
640 ```
641 But then more analysis without wrapping block
642 "};
643 // This should find the inner block
644 let result = extract_last_codeblock(text).unwrap();
645 assert_eq!(result, "some code\n");
646 }
647
648 #[test]
649 fn test_extract_codeblock_no_trailing_newline() {
650 // Text ending without trailing newline after closing fence
651 let text = "`````\ncontent here\n`````";
652 let result = extract_last_codeblock(text).unwrap();
653 assert_eq!(result, "content here\n");
654 }
655}