1use crate::{
2 FormatPromptArgs, PredictionProvider,
3 example::{ActualCursor, Example, ExamplePrompt},
4 headless::EpAppState,
5 progress::{ExampleProgress, Step},
6 retrieve_context::run_context_retrieval,
7};
8use anyhow::{Context as _, Result, anyhow};
9use edit_prediction::{cursor_excerpt::editable_and_context_ranges_for_cursor_position, udiff};
10use gpui::{AppContext, AsyncApp};
11use language::{Buffer, OffsetRangeExt, Point};
12use similar::DiffableStr;
13use std::sync::Arc;
14use std::{fmt::Write as _, ops::Range};
15use zeta_prompt::ZetaFormat;
16use zeta_prompt::format_zeta_prompt;
17
18pub async fn run_format_prompt(
19 example: &mut Example,
20 args: &FormatPromptArgs,
21 app_state: Arc<EpAppState>,
22 example_progress: &ExampleProgress,
23 cx: AsyncApp,
24) -> Result<()> {
25 run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
26
27 let step_progress = example_progress.start(Step::FormatPrompt);
28
29 let prompt_inputs = example
30 .prompt_inputs
31 .as_ref()
32 .context("prompt_inputs must be set after context retrieval")?;
33
34 let language = app_state
35 .languages
36 .load_language_for_file_path(&example.spec.cursor_path)
37 .await
38 .ok();
39 let snapshot_fut = cx.update(|cx| {
40 Buffer::build_snapshot(
41 prompt_inputs.content.as_str().into(),
42 language,
43 Some(app_state.languages.clone()),
44 cx,
45 )
46 });
47 let cursor_point = Point::new(prompt_inputs.cursor_row, prompt_inputs.cursor_column);
48 let snapshot = cx.background_spawn(snapshot_fut).await;
49
50 match args.provider {
51 PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
52 step_progress.set_substatus("formatting teacher prompt");
53
54 let (editable_range, context_range) = editable_and_context_ranges_for_cursor_position(
55 cursor_point,
56 &snapshot,
57 edit_prediction::zeta2::max_editable_tokens(ZetaFormat::default()),
58 edit_prediction::zeta2::MAX_CONTEXT_TOKENS,
59 );
60 let editable_range = editable_range.to_offset(&snapshot);
61 let context_range = context_range.to_offset(&snapshot);
62
63 let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
64 example.prompt = Some(ExamplePrompt {
65 input: prompt,
66 expected_output: String::new(),
67 rejected_output: None,
68 prefill: None,
69 provider: args.provider,
70 });
71 }
72 PredictionProvider::Zeta2(version) => {
73 step_progress.set_substatus("formatting zeta2 prompt");
74
75 let (editable_range, context_range) = editable_and_context_ranges_for_cursor_position(
76 cursor_point,
77 &snapshot,
78 edit_prediction::zeta2::max_editable_tokens(version),
79 edit_prediction::zeta2::MAX_CONTEXT_TOKENS,
80 );
81 let editable_range = editable_range.to_offset(&snapshot);
82 let context_range = context_range.to_offset(&snapshot);
83
84 let context_start = context_range.start;
85 let cursor_offset_in_excerpt = prompt_inputs.cursor_offset - context_start;
86 let editable_range_in_excerpt =
87 (editable_range.start - context_start)..(editable_range.end - context_start);
88 let input = zeta_prompt::ZetaPromptInput {
89 cursor_path: example.spec.cursor_path.clone(),
90 cursor_excerpt: prompt_inputs.content[context_range].to_string().into(),
91 editable_range_in_excerpt,
92 cursor_offset_in_excerpt,
93 excerpt_start_row: prompt_inputs.excerpt_start_row,
94 events: prompt_inputs.edit_history.clone(),
95 related_files: prompt_inputs.related_files.clone().unwrap_or_default(),
96 excerpt_ranges: None,
97 preferred_model: None,
98 in_open_source_repo: example
99 .spec
100 .captured_prompt_input
101 .as_ref()
102 .map_or(false, |input| input.in_open_source_repo),
103 };
104 let prompt = format_zeta_prompt(&input, version);
105 let prefill = zeta_prompt::get_prefill(&input, version);
106 let (expected_patch, expected_cursor_offset) = example
107 .spec
108 .expected_patches_with_cursor_positions()
109 .into_iter()
110 .next()
111 .context("expected patches is empty")?;
112 let expected_output =
113 zeta2_output_for_patch(&input, &expected_patch, expected_cursor_offset, version)?;
114 let rejected_output = example
115 .spec
116 .rejected_patch
117 .as_ref()
118 .and_then(|patch| zeta2_output_for_patch(&input, patch, None, version).ok());
119
120 example.prompt = Some(ExamplePrompt {
121 input: prompt,
122 expected_output,
123 rejected_output,
124 provider: args.provider,
125 prefill: Some(prefill),
126 });
127 }
128 _ => {
129 panic!("Cannot format prompt for {:?}", args.provider);
130 }
131 };
132 Ok(())
133}
134
135pub fn zeta2_output_for_patch(
136 input: &zeta_prompt::ZetaPromptInput,
137 patch: &str,
138 cursor_offset: Option<usize>,
139 version: ZetaFormat,
140) -> Result<String> {
141 let mut old_editable_region =
142 input.cursor_excerpt[input.editable_range_in_excerpt.clone()].to_string();
143
144 if !old_editable_region.ends_with_newline() {
145 old_editable_region.push('\n');
146 }
147
148 let (mut result, first_hunk_offset) =
149 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
150 || {
151 format!(
152 "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
153 patch, old_editable_region
154 )
155 },
156 )?;
157
158 if let Some(cursor_offset) = cursor_offset {
159 // The cursor_offset is relative to the start of the hunk's new text (context + additions).
160 // We need to add where the hunk context matched in the editable region to compute
161 // the actual cursor position in the result.
162 let hunk_start = first_hunk_offset.unwrap_or(0);
163 let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
164 result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
165 }
166
167 match version {
168 ZetaFormat::V0120GitMergeMarkers
169 | ZetaFormat::V0131GitMergeMarkersPrefix
170 | ZetaFormat::V0211SeedCoder => {
171 if !result.ends_with('\n') {
172 result.push('\n');
173 }
174 result.push_str(zeta_prompt::v0120_git_merge_markers::END_MARKER);
175 }
176 _ => (),
177 }
178
179 Ok(result)
180}
181
182pub struct TeacherPrompt;
183
184impl TeacherPrompt {
185 pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
186 pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
187 pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
188 pub(crate) const NO_EDITS: &str = "NO_EDITS";
189
190 /// Truncate edit history to this number of last lines
191 const MAX_HISTORY_LINES: usize = 128;
192
193 pub fn format_prompt(
194 example: &Example,
195 editable_range: Range<usize>,
196 context_range: Range<usize>,
197 ) -> String {
198 let edit_history = Self::format_edit_history(&example.spec.edit_history);
199 let context = Self::format_context(example);
200 let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
201
202 let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
203 let prompt = prompt_template
204 .replace("{{context}}", &context)
205 .replace("{{edit_history}}", &edit_history)
206 .replace("{{cursor_excerpt}}", &cursor_excerpt);
207
208 prompt
209 }
210
211 pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
212 // Check if the model indicated no edits are needed
213 if let Some(last_codeblock) = extract_last_codeblock(&response) {
214 if last_codeblock.trim() == Self::NO_EDITS {
215 return Ok((String::new(), None));
216 }
217 }
218
219 // Extract updated (new) editable region from the model response.
220 let new_editable_region = Self::extract_editable_region(&response)?;
221 let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
222 let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
223 let old_editable_region = Self::extract_editable_region(
224 &example
225 .prompt
226 .as_ref()
227 .context("example prompt missing")?
228 .input,
229 )?
230 .replace(Self::USER_CURSOR_MARKER, "");
231
232 let prompt_inputs = example
233 .prompt_inputs
234 .as_ref()
235 .context("example is missing prompt inputs")?;
236
237 // Normalize leading newlines: if old starts with newline but new doesn't,
238 // prepend newline to new to preserve whitespace structure.
239 // This handles the case where the model drops the leading blank line.
240 if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
241 new_editable_region.insert(0, '\n');
242 }
243
244 let (editable_region_offset, _) = prompt_inputs
245 .content
246 .match_indices(&old_editable_region)
247 .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset))
248 .context("editable region not found in prompt content")?;
249 let editable_region_start_line = prompt_inputs.content[..editable_region_offset]
250 .matches('\n')
251 .count();
252
253 // Use full context so cursor offset (relative to editable region start) aligns with diff content
254 let editable_region_lines = old_editable_region.lines().count() as u32;
255 let diff = language::unified_diff_with_context(
256 &old_editable_region,
257 &new_editable_region,
258 editable_region_start_line as u32,
259 editable_region_start_line as u32,
260 editable_region_lines,
261 );
262
263 let diff = indoc::formatdoc! {"
264 --- a/{path}
265 +++ b/{path}
266 {diff}",
267 path = example.spec.cursor_path.to_string_lossy(),
268 diff = diff,
269 };
270
271 let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
272 ActualCursor::from_editable_region(
273 &example.spec.cursor_path,
274 editable_region_cursor_offset,
275 &new_editable_region,
276 &prompt_inputs.content,
277 editable_region_offset,
278 editable_region_start_line,
279 )
280 });
281
282 Ok((diff, actual_cursor))
283 }
284
285 fn format_edit_history(edit_history: &str) -> String {
286 // Strip comments ("garbage lines") from edit history
287 let lines = edit_history
288 .lines()
289 .filter(|&s| Self::is_udiff_content_line(s))
290 .collect::<Vec<_>>();
291
292 let history_lines = if lines.len() > Self::MAX_HISTORY_LINES {
293 &lines[lines.len() - Self::MAX_HISTORY_LINES..]
294 } else {
295 &lines
296 };
297
298 if history_lines.is_empty() {
299 return "(No edit history)".to_string();
300 }
301
302 history_lines.join("\n")
303 }
304
305 pub fn format_context(example: &Example) -> String {
306 let related_files = example
307 .prompt_inputs
308 .as_ref()
309 .and_then(|pi| pi.related_files.as_ref());
310
311 let Some(related_files) = related_files else {
312 return "(No context)".to_string();
313 };
314
315 if related_files.is_empty() {
316 return "(No context)".to_string();
317 }
318
319 let mut prompt = String::new();
320 for file in related_files {
321 let path_str = file.path.to_string_lossy();
322 writeln!(&mut prompt, "`````{path_str}").ok();
323
324 let mut prev_row = 0;
325 for excerpt in &file.excerpts {
326 if excerpt.row_range.start > prev_row {
327 prompt.push_str("…\n");
328 }
329 prompt.push_str(&excerpt.text);
330 prompt.push('\n');
331 prev_row = excerpt.row_range.end;
332 }
333 if prev_row < file.max_row {
334 prompt.push_str("…\n");
335 }
336 prompt.push_str("\n`````\n");
337 }
338
339 prompt
340 }
341
342 fn format_cursor_excerpt(
343 example: &Example,
344 editable_range: Range<usize>,
345 context_range: Range<usize>,
346 ) -> String {
347 let mut result = String::new();
348
349 let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
350
351 let path_str = example.spec.cursor_path.to_string_lossy();
352 result.push_str(&format!("`````{path_str}\n"));
353 result.push_str(&prompt_inputs.content[context_range.start..editable_range.start]);
354 result.push_str(Self::EDITABLE_REGION_START);
355 result.push_str(&prompt_inputs.content[editable_range.start..prompt_inputs.cursor_offset]);
356 result.push_str(Self::USER_CURSOR_MARKER);
357 result.push_str(&prompt_inputs.content[prompt_inputs.cursor_offset..editable_range.end]);
358 result.push_str(Self::EDITABLE_REGION_END);
359 result.push_str(&prompt_inputs.content[editable_range.end..context_range.end]);
360 result.push_str("\n`````");
361
362 result
363 }
364
365 pub fn extract_editable_region(text: &str) -> Result<String> {
366 let start = text
367 .rfind(Self::EDITABLE_REGION_START)
368 .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
369 let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
370
371 if start >= end {
372 return Err(anyhow!("Invalid editable region markers"));
373 }
374
375 let region = &text[start..end];
376 Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
377 }
378
379 fn is_udiff_content_line(s: &str) -> bool {
380 s.starts_with("-")
381 || s.starts_with("+")
382 || s.starts_with(" ")
383 || s.starts_with("---")
384 || s.starts_with("+++")
385 || s.starts_with("@@")
386 }
387}
388
389/// Extract the cursor excerpt from an example.
390/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
391pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
392 // If we have the original prompt, extract the cursor excerpt from it
393 if let Some(prompt) = &example.prompt {
394 // Find "# 3. Current File" section and extract the content
395 if let Some(start) = prompt.input.find("# 3. Current File") {
396 let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
397 let backtick_count = prompt.input[content_start..]
398 .chars()
399 .take_while(|&c| c == '`')
400 .count();
401 let content_start = content_start + backtick_count;
402
403 // Find the path line and skip it
404 let newline_pos = prompt.input[content_start..].find('\n')?;
405 let text_start = content_start + newline_pos + 1;
406
407 // Find the closing backticks
408 let closing_pattern = "`".repeat(backtick_count);
409 let text_end = prompt.input[text_start..].find(&closing_pattern)?;
410 let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
411
412 let path_str = example.spec.cursor_path.to_string_lossy();
413 return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
414 }
415 }
416
417 // Fallback: construct from prompt_inputs if available
418 let prompt_inputs = example.prompt_inputs.as_ref()?;
419 let content = &prompt_inputs.content;
420 let cursor_offset = prompt_inputs.cursor_offset;
421
422 // Simple fallback: just show content around cursor with markers
423 let path_str = example.spec.cursor_path.to_string_lossy();
424 let mut result = format!("`````{path_str}\n");
425 result.push_str(TeacherPrompt::EDITABLE_REGION_START);
426 result.push_str(&content[..cursor_offset]);
427 result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
428 result.push_str(&content[cursor_offset..]);
429 result.push_str(TeacherPrompt::EDITABLE_REGION_END);
430 result.push_str("\n`````");
431
432 Some(result)
433}
434
435pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
436 let lines: Vec<&str> = text.lines().collect();
437
438 // Search from the end for a closing fence (line containing only backticks, 3+)
439 let mut closing_line_idx = None;
440 let mut backtick_count = 0;
441
442 for i in (0..lines.len()).rev() {
443 let line = lines[i].trim();
444 if line.len() >= 3 && line.chars().all(|c| c == '`') {
445 closing_line_idx = Some(i);
446 backtick_count = line.len();
447 break;
448 }
449 }
450
451 let closing_idx = closing_line_idx?;
452
453 // Search backwards for matching opening fence
454 // Opening fence starts with same backtick count, possibly followed by language/metadata
455 let opening_pattern = "`".repeat(backtick_count);
456
457 for i in (0..closing_idx).rev() {
458 let line = lines[i];
459 if line.starts_with(&opening_pattern) {
460 // Ensure it's exactly the right number of backticks (not more)
461 let rest = &line[backtick_count..];
462 if rest.is_empty() || !rest.starts_with('`') {
463 // Found matching opening fence
464 // Extract content between opening and closing (exclusive)
465 if closing_idx > i + 1 {
466 let content = lines[i + 1..closing_idx].join("\n");
467 // Preserve trailing newline to match previous behavior
468 return Some(format!("{}\n", content));
469 } else {
470 // Empty block
471 return Some(String::new());
472 }
473 }
474 }
475 }
476
477 None
478}
479
480#[cfg(test)]
481mod tests {
482 use super::*;
483
484 #[test]
485 fn test_extract_last_code_block() {
486 let text = indoc::indoc! {"
487 Some thinking
488
489 ```
490 first block
491 ```
492
493 `````path='something' lines=1:2
494 last block
495 `````
496 "};
497 let last_block = extract_last_codeblock(text).unwrap();
498 assert_eq!(last_block, "last block\n");
499 }
500
501 #[test]
502 fn test_extract_codeblock_with_nested_fences() {
503 let text = indoc::indoc! {"
504 `````
505 content with ``` inline
506 and ```python nested
507 more content
508 `````
509 "};
510 let last_block = extract_last_codeblock(text).unwrap();
511 assert_eq!(
512 last_block,
513 "content with ``` inline\nand ```python nested\nmore content\n"
514 );
515 }
516
517 #[test]
518 fn test_extract_codeblock_ignores_inline_backticks() {
519 let text = indoc::indoc! {"
520 `````
521 here is some `code` with inline backticks
522 and here```more```stuff
523 `````
524 "};
525 let last_block = extract_last_codeblock(text).unwrap();
526 assert_eq!(
527 last_block,
528 "here is some `code` with inline backticks\nand here```more```stuff\n"
529 );
530 }
531
532 #[test]
533 fn test_extract_editable_region() {
534 let text = indoc::indoc! {"
535 some lines
536 are
537 here
538 <|editable_region_start|>
539 one
540 two three
541
542 <|editable_region_end|>
543 more
544 lines here
545 "};
546 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
547 assert_eq!(
548 parsed,
549 indoc::indoc! {"
550 one
551 two three"}
552 );
553 }
554
555 #[test]
556 fn test_extract_last_codeblock_nested_bibtex() {
557 let text = indoc::indoc! {r#"
558 Looking at the edit history, I can see that a Citation section was just added.
559
560 `````
561 ## Collaborations
562 Our mission is to create a 4D generative model.
563
564 ## Citation
565
566 If you found Unique3D helpful, please cite our report:
567 ```bibtex
568 @misc{wu2024unique3d,
569 title={Unique3D},
570 }
571 ```
572 `````
573 "#};
574 let last_block = extract_last_codeblock(text).unwrap();
575 assert_eq!(
576 last_block,
577 indoc::indoc! {r#"
578 ## Collaborations
579 Our mission is to create a 4D generative model.
580
581 ## Citation
582
583 If you found Unique3D helpful, please cite our report:
584 ```bibtex
585 @misc{wu2024unique3d,
586 title={Unique3D},
587 }
588 ```
589 "#}
590 );
591 }
592
593 #[test]
594 fn test_extract_editable_region_no_markers() {
595 let text = indoc::indoc! {"
596 one
597 two three"};
598 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
599 assert_eq!(
600 parsed,
601 indoc::indoc! {"
602 one
603 two three"}
604 );
605 }
606
607 #[test]
608 fn test_parse_no_edits_response() {
609 let response = indoc::indoc! {"
610 The code is already complete. There is no clear next edit to make.
611
612 `````
613 NO_EDITS
614 `````
615 "};
616 let codeblock = extract_last_codeblock(response).unwrap();
617 assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
618 }
619
620 #[test]
621 fn test_extract_codeblock_no_valid_block() {
622 // Text with no code blocks should return None
623 let text = "Just some plain text without any code blocks";
624 assert!(extract_last_codeblock(text).is_none());
625
626 // Unclosed code block should return None
627 let text = indoc::indoc! {"
628 ```
629 unclosed block
630 "};
631 assert!(extract_last_codeblock(text).is_none());
632
633 // Analysis text with nested markdown but no proper outer block
634 let text = indoc::indoc! {"
635 # Analysis
636 Looking at this:
637 ```
638 some code
639 ```
640 But then more analysis without wrapping block
641 "};
642 // This should find the inner block
643 let result = extract_last_codeblock(text).unwrap();
644 assert_eq!(result, "some code\n");
645 }
646
647 #[test]
648 fn test_extract_codeblock_no_trailing_newline() {
649 // Text ending without trailing newline after closing fence
650 let text = "`````\ncontent here\n`````";
651 let result = extract_last_codeblock(text).unwrap();
652 assert_eq!(result, "content here\n");
653 }
654}