1use crate::{
2 FormatPromptArgs, PredictionProvider,
3 example::{ActualCursor, Example, ExamplePrompt},
4 headless::EpAppState,
5 progress::{ExampleProgress, Step},
6 retrieve_context::run_context_retrieval,
7};
8use anyhow::{Context as _, Result, anyhow};
9use edit_prediction::udiff;
10use gpui::AsyncApp;
11use similar::DiffableStr;
12use std::ops::Range;
13use std::sync::Arc;
14use zeta_prompt::{
15 ZetaFormat, excerpt_range_for_format, format_zeta_prompt, resolve_cursor_region,
16};
17
18pub async fn run_format_prompt(
19 example: &mut Example,
20 args: &FormatPromptArgs,
21 app_state: Arc<EpAppState>,
22 example_progress: &ExampleProgress,
23 cx: AsyncApp,
24) -> Result<()> {
25 run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
26
27 let step_progress = example_progress.start(Step::FormatPrompt);
28
29 let prompt_inputs = example
30 .prompt_inputs
31 .as_ref()
32 .context("prompt_inputs must be set after context retrieval")?;
33
34 match args.provider {
35 PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
36 step_progress.set_substatus("formatting teacher prompt");
37
38 let zeta_format = ZetaFormat::default();
39 let (editable_range, context_range) =
40 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
41
42 let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
43 example.prompt = Some(ExamplePrompt {
44 input: prompt,
45 expected_output: String::new(),
46 rejected_output: None,
47 prefill: None,
48 provider: args.provider,
49 });
50 }
51 PredictionProvider::Zeta2(zeta_format) => {
52 step_progress.set_substatus("formatting zeta2 prompt");
53
54 let prompt = format_zeta_prompt(prompt_inputs, zeta_format);
55 let prefill = zeta_prompt::get_prefill(prompt_inputs, zeta_format);
56 let (expected_patch, expected_cursor_offset) = example
57 .spec
58 .expected_patches_with_cursor_positions()
59 .into_iter()
60 .next()
61 .context("expected patches is empty")?;
62 let expected_output = zeta2_output_for_patch(
63 prompt_inputs,
64 &expected_patch,
65 expected_cursor_offset,
66 zeta_format,
67 )?;
68 let rejected_output = example.spec.rejected_patch.as_ref().and_then(|patch| {
69 zeta2_output_for_patch(prompt_inputs, patch, None, zeta_format).ok()
70 });
71
72 example.prompt = Some(ExamplePrompt {
73 input: prompt,
74 expected_output,
75 rejected_output,
76 provider: args.provider,
77 prefill: Some(prefill),
78 });
79 }
80 _ => {
81 panic!("Cannot format prompt for {:?}", args.provider);
82 }
83 };
84 Ok(())
85}
86
87pub fn zeta2_output_for_patch(
88 input: &zeta_prompt::ZetaPromptInput,
89 patch: &str,
90 cursor_offset: Option<usize>,
91 version: ZetaFormat,
92) -> Result<String> {
93 let (context, editable_range, _) = resolve_cursor_region(input, version);
94 let mut old_editable_region = context[editable_range].to_string();
95
96 if !old_editable_region.ends_with_newline() {
97 old_editable_region.push('\n');
98 }
99
100 let (mut result, first_hunk_offset) =
101 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
102 || {
103 format!(
104 "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
105 patch, old_editable_region
106 )
107 },
108 )?;
109
110 if let Some(cursor_offset) = cursor_offset {
111 // The cursor_offset is relative to the start of the hunk's new text (context + additions).
112 // We need to add where the hunk context matched in the editable region to compute
113 // the actual cursor position in the result.
114 let hunk_start = first_hunk_offset.unwrap_or(0);
115 let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
116 result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
117 }
118
119 match version {
120 ZetaFormat::V0120GitMergeMarkers
121 | ZetaFormat::V0131GitMergeMarkersPrefix
122 | ZetaFormat::V0211SeedCoder => {
123 if !result.ends_with('\n') {
124 result.push('\n');
125 }
126 result.push_str(zeta_prompt::v0120_git_merge_markers::END_MARKER);
127 }
128 _ => (),
129 }
130
131 Ok(result)
132}
133
134pub struct TeacherPrompt;
135
136impl TeacherPrompt {
137 pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
138 pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
139 pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
140 pub(crate) const NO_EDITS: &str = "NO_EDITS";
141
142 /// Truncate edit history to this number of last lines
143 const MAX_HISTORY_LINES: usize = 128;
144
145 pub fn format_prompt(
146 example: &Example,
147 editable_range: Range<usize>,
148 context_range: Range<usize>,
149 ) -> String {
150 let edit_history = Self::format_edit_history(&example.spec.edit_history);
151 let context = Self::format_context(example);
152 let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
153
154 let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
155 let prompt = prompt_template
156 .replace("{{context}}", &context)
157 .replace("{{edit_history}}", &edit_history)
158 .replace("{{cursor_excerpt}}", &cursor_excerpt);
159
160 prompt
161 }
162
163 pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
164 // Check if the model indicated no edits are needed
165 let no_edits = (String::new(), None);
166 if let Some(last_codeblock) = extract_last_codeblock(&response) {
167 if last_codeblock.trim() == Self::NO_EDITS {
168 return Ok(no_edits);
169 }
170 }
171
172 if response.trim().ends_with(Self::NO_EDITS) {
173 return Ok(no_edits);
174 }
175
176 // Extract updated (new) editable region from the model response.
177 let new_editable_region = Self::extract_editable_region(&response)?;
178 let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
179 let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
180 let old_editable_region = Self::extract_editable_region(
181 &example
182 .prompt
183 .as_ref()
184 .context("example prompt missing")?
185 .input,
186 )?
187 .replace(Self::USER_CURSOR_MARKER, "");
188
189 let prompt_inputs = example
190 .prompt_inputs
191 .as_ref()
192 .context("example is missing prompt inputs")?;
193
194 // Normalize leading newlines: if old starts with newline but new doesn't,
195 // prepend newline to new to preserve whitespace structure.
196 // This handles the case where the model drops the leading blank line.
197 if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
198 new_editable_region.insert(0, '\n');
199 }
200
201 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
202 let (editable_region_offset, _) = excerpt
203 .match_indices(&old_editable_region)
204 .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
205 .context("editable region not found in prompt content")?;
206 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
207
208 // Use full context so cursor offset (relative to editable region start) aligns with diff content
209 let editable_region_lines = old_editable_region.lines().count() as u32;
210 let diff = language::unified_diff_with_context(
211 &old_editable_region,
212 &new_editable_region,
213 editable_region_start_line as u32,
214 editable_region_start_line as u32,
215 editable_region_lines,
216 );
217
218 let diff = indoc::formatdoc! {"
219 --- a/{path}
220 +++ b/{path}
221 {diff}",
222 path = example.spec.cursor_path.to_string_lossy(),
223 diff = diff,
224 };
225
226 let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
227 ActualCursor::from_editable_region(
228 &example.spec.cursor_path,
229 editable_region_cursor_offset,
230 &new_editable_region,
231 excerpt,
232 editable_region_offset,
233 editable_region_start_line,
234 )
235 });
236
237 Ok((diff, actual_cursor))
238 }
239
240 fn format_edit_history(edit_history: &str) -> String {
241 let lines: Vec<&str> = edit_history.lines().collect();
242
243 if lines.is_empty() {
244 return "(No edit history)".to_string();
245 }
246
247 if lines.len() > Self::MAX_HISTORY_LINES {
248 let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
249 format!("{truncated}\n[...truncated...]")
250 } else {
251 lines.join("\n")
252 }
253 }
254
255 pub fn format_context(example: &Example) -> String {
256 let related_files = example.prompt_inputs.as_ref().map(|pi| &pi.related_files);
257 let Some(related_files) = related_files else {
258 return "(No context)".to_string();
259 };
260
261 if related_files.is_empty() {
262 return "(No context)".to_string();
263 }
264
265 let prefix = "`````";
266 let suffix = "`````\n\n";
267 let max_tokens = 1024;
268 zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
269 }
270
271 fn format_cursor_excerpt(
272 example: &Example,
273 editable_range: Range<usize>,
274 context_range: Range<usize>,
275 ) -> String {
276 let mut result = String::new();
277
278 let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
279 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
280 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
281
282 let path_str = example.spec.cursor_path.to_string_lossy();
283 result.push_str(&format!("`````{path_str}\n"));
284 result.push_str(&excerpt[context_range.start..editable_range.start]);
285 result.push_str(Self::EDITABLE_REGION_START);
286 result.push_str(&excerpt[editable_range.start..cursor_offset]);
287 result.push_str(Self::USER_CURSOR_MARKER);
288 result.push_str(&excerpt[cursor_offset..editable_range.end]);
289 result.push_str(Self::EDITABLE_REGION_END);
290 result.push_str(&excerpt[editable_range.end..context_range.end]);
291 result.push_str("\n`````");
292
293 result
294 }
295
296 pub fn extract_editable_region(text: &str) -> Result<String> {
297 let start = text
298 .rfind(Self::EDITABLE_REGION_START)
299 .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
300 let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
301
302 if start >= end {
303 return Err(anyhow!("Invalid editable region markers"));
304 }
305
306 let region = &text[start..end];
307 Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
308 }
309}
310
311/// Extract the cursor excerpt from an example.
312/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
313pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
314 // If we have the original prompt, extract the cursor excerpt from it
315 if let Some(prompt) = &example.prompt {
316 // Find "# 3. Current File" section and extract the content
317 if let Some(start) = prompt.input.find("# 3. Current File") {
318 let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
319 let backtick_count = prompt.input[content_start..]
320 .chars()
321 .take_while(|&c| c == '`')
322 .count();
323 let content_start = content_start + backtick_count;
324
325 // Find the path line and skip it
326 let newline_pos = prompt.input[content_start..].find('\n')?;
327 let text_start = content_start + newline_pos + 1;
328
329 // Find the closing backticks
330 let closing_pattern = "`".repeat(backtick_count);
331 let text_end = prompt.input[text_start..].find(&closing_pattern)?;
332 let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
333
334 let path_str = example.spec.cursor_path.to_string_lossy();
335 return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
336 }
337 }
338
339 // Fallback: construct from prompt_inputs if available
340 let prompt_inputs = example.prompt_inputs.as_ref()?;
341 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
342 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
343
344 // Simple fallback: just show content around cursor with markers
345 let path_str = example.spec.cursor_path.to_string_lossy();
346 let mut result = format!("`````{path_str}\n");
347 result.push_str(TeacherPrompt::EDITABLE_REGION_START);
348 result.push_str(&excerpt[..cursor_offset]);
349 result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
350 result.push_str(&excerpt[cursor_offset..]);
351 result.push_str(TeacherPrompt::EDITABLE_REGION_END);
352 result.push_str("\n`````");
353
354 Some(result)
355}
356
357pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
358 let lines: Vec<&str> = text.lines().collect();
359
360 // Search from the end for a closing fence (line containing only backticks, 3+)
361 let mut closing_line_idx = None;
362 let mut backtick_count = 0;
363
364 for i in (0..lines.len()).rev() {
365 let line = lines[i].trim();
366 if line.len() >= 3 && line.chars().all(|c| c == '`') {
367 closing_line_idx = Some(i);
368 backtick_count = line.len();
369 break;
370 }
371 }
372
373 let closing_idx = closing_line_idx?;
374
375 // Search backwards for matching opening fence
376 // Opening fence starts with same backtick count, possibly followed by language/metadata
377 let opening_pattern = "`".repeat(backtick_count);
378
379 for i in (0..closing_idx).rev() {
380 let line = lines[i];
381 if line.starts_with(&opening_pattern) {
382 // Ensure it's exactly the right number of backticks (not more)
383 let rest = &line[backtick_count..];
384 if rest.is_empty() || !rest.starts_with('`') {
385 // Found matching opening fence
386 // Extract content between opening and closing (exclusive)
387 if closing_idx > i + 1 {
388 let content = lines[i + 1..closing_idx].join("\n");
389 // Preserve trailing newline to match previous behavior
390 return Some(format!("{}\n", content));
391 } else {
392 // Empty block
393 return Some(String::new());
394 }
395 }
396 }
397 }
398
399 None
400}
401
402#[cfg(test)]
403mod tests {
404 use super::*;
405
406 #[test]
407 fn test_extract_last_code_block() {
408 let text = indoc::indoc! {"
409 Some thinking
410
411 ```
412 first block
413 ```
414
415 `````path='something' lines=1:2
416 last block
417 `````
418 "};
419 let last_block = extract_last_codeblock(text).unwrap();
420 assert_eq!(last_block, "last block\n");
421 }
422
423 #[test]
424 fn test_extract_codeblock_with_nested_fences() {
425 let text = indoc::indoc! {"
426 `````
427 content with ``` inline
428 and ```python nested
429 more content
430 `````
431 "};
432 let last_block = extract_last_codeblock(text).unwrap();
433 assert_eq!(
434 last_block,
435 "content with ``` inline\nand ```python nested\nmore content\n"
436 );
437 }
438
439 #[test]
440 fn test_extract_codeblock_ignores_inline_backticks() {
441 let text = indoc::indoc! {"
442 `````
443 here is some `code` with inline backticks
444 and here```more```stuff
445 `````
446 "};
447 let last_block = extract_last_codeblock(text).unwrap();
448 assert_eq!(
449 last_block,
450 "here is some `code` with inline backticks\nand here```more```stuff\n"
451 );
452 }
453
454 #[test]
455 fn test_extract_editable_region() {
456 let text = indoc::indoc! {"
457 some lines
458 are
459 here
460 <|editable_region_start|>
461 one
462 two three
463
464 <|editable_region_end|>
465 more
466 lines here
467 "};
468 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
469 assert_eq!(
470 parsed,
471 indoc::indoc! {"
472 one
473 two three"}
474 );
475 }
476
477 #[test]
478 fn test_extract_last_codeblock_nested_bibtex() {
479 let text = indoc::indoc! {r#"
480 Looking at the edit history, I can see that a Citation section was just added.
481
482 `````
483 ## Collaborations
484 Our mission is to create a 4D generative model.
485
486 ## Citation
487
488 If you found Unique3D helpful, please cite our report:
489 ```bibtex
490 @misc{wu2024unique3d,
491 title={Unique3D},
492 }
493 ```
494 `````
495 "#};
496 let last_block = extract_last_codeblock(text).unwrap();
497 assert_eq!(
498 last_block,
499 indoc::indoc! {r#"
500 ## Collaborations
501 Our mission is to create a 4D generative model.
502
503 ## Citation
504
505 If you found Unique3D helpful, please cite our report:
506 ```bibtex
507 @misc{wu2024unique3d,
508 title={Unique3D},
509 }
510 ```
511 "#}
512 );
513 }
514
515 #[test]
516 fn test_extract_editable_region_no_markers() {
517 let text = indoc::indoc! {"
518 one
519 two three"};
520 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
521 assert_eq!(
522 parsed,
523 indoc::indoc! {"
524 one
525 two three"}
526 );
527 }
528
529 #[test]
530 fn test_parse_no_edits_response() {
531 let response = indoc::indoc! {"
532 The code is already complete. There is no clear next edit to make.
533
534 `````
535 NO_EDITS
536 `````
537 "};
538 let codeblock = extract_last_codeblock(response).unwrap();
539 assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
540 }
541
542 #[test]
543 fn test_extract_codeblock_no_valid_block() {
544 // Text with no code blocks should return None
545 let text = "Just some plain text without any code blocks";
546 assert!(extract_last_codeblock(text).is_none());
547
548 // Unclosed code block should return None
549 let text = indoc::indoc! {"
550 ```
551 unclosed block
552 "};
553 assert!(extract_last_codeblock(text).is_none());
554
555 // Analysis text with nested markdown but no proper outer block
556 let text = indoc::indoc! {"
557 # Analysis
558 Looking at this:
559 ```
560 some code
561 ```
562 But then more analysis without wrapping block
563 "};
564 // This should find the inner block
565 let result = extract_last_codeblock(text).unwrap();
566 assert_eq!(result, "some code\n");
567 }
568
569 #[test]
570 fn test_extract_codeblock_no_trailing_newline() {
571 // Text ending without trailing newline after closing fence
572 let text = "`````\ncontent here\n`````";
573 let result = extract_last_codeblock(text).unwrap();
574 assert_eq!(result, "content here\n");
575 }
576}