1use crate::{
2 FormatPromptArgs, PredictionProvider,
3 example::{ActualCursor, Example, ExamplePrompt},
4 headless::EpAppState,
5 progress::{ExampleProgress, Step},
6 retrieve_context::run_context_retrieval,
7};
8use anyhow::{Context as _, Result, anyhow};
9use edit_prediction::udiff;
10use gpui::AsyncApp;
11use similar::DiffableStr;
12use std::ops::Range;
13use std::sync::Arc;
14use zeta_prompt::{
15 ZetaFormat, encode_patch_as_output_for_format, excerpt_range_for_format, format_zeta_prompt,
16 output_end_marker_for_format, resolve_cursor_region,
17};
18
19pub async fn run_format_prompt(
20 example: &mut Example,
21 args: &FormatPromptArgs,
22 app_state: Arc<EpAppState>,
23 example_progress: &ExampleProgress,
24 cx: AsyncApp,
25) -> Result<()> {
26 run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
27
28 let step_progress = example_progress.start(Step::FormatPrompt);
29
30 let prompt_inputs = example
31 .prompt_inputs
32 .as_ref()
33 .context("prompt_inputs must be set after context retrieval")?;
34
35 match args.provider {
36 PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
37 step_progress.set_substatus("formatting teacher prompt");
38
39 let zeta_format = ZetaFormat::default();
40 let (editable_range, context_range) =
41 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
42
43 let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
44 example.prompt = Some(ExamplePrompt {
45 input: prompt,
46 expected_output: String::new(),
47 rejected_output: None,
48 prefill: None,
49 provider: args.provider,
50 });
51 }
52 PredictionProvider::Zeta2(zeta_format) => {
53 step_progress.set_substatus("formatting zeta2 prompt");
54
55 let prompt = format_zeta_prompt(prompt_inputs, zeta_format);
56 let prefill = zeta_prompt::get_prefill(prompt_inputs, zeta_format);
57 let expected_output = example
58 .spec
59 .expected_patches_with_cursor_positions()
60 .into_iter()
61 .next()
62 .and_then(|(expected_patch, expected_cursor_offset)| {
63 zeta2_output_for_patch(
64 prompt_inputs,
65 &expected_patch,
66 expected_cursor_offset,
67 zeta_format,
68 )
69 .ok()
70 })
71 .unwrap_or_default();
72
73 let rejected_output = example.spec.rejected_patch.as_ref().and_then(|patch| {
74 zeta2_output_for_patch(prompt_inputs, patch, None, zeta_format).ok()
75 });
76
77 example.prompt = Some(ExamplePrompt {
78 input: prompt,
79 expected_output,
80 rejected_output,
81 provider: args.provider,
82 prefill: Some(prefill),
83 });
84 }
85 _ => {
86 panic!("Cannot format prompt for {:?}", args.provider);
87 }
88 };
89 Ok(())
90}
91
92pub fn zeta2_output_for_patch(
93 input: &zeta_prompt::ZetaPromptInput,
94 patch: &str,
95 cursor_offset: Option<usize>,
96 version: ZetaFormat,
97) -> Result<String> {
98 let (context, editable_range, _, _) = resolve_cursor_region(input, version);
99 let mut old_editable_region = context[editable_range].to_string();
100
101 if !old_editable_region.ends_with_newline() {
102 old_editable_region.push('\n');
103 }
104
105 if let Some(encoded_output) =
106 encode_patch_as_output_for_format(version, &old_editable_region, patch, cursor_offset)?
107 {
108 return Ok(encoded_output);
109 }
110
111 let (mut result, first_hunk_offset) =
112 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
113 || {
114 format!(
115 "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
116 patch, old_editable_region
117 )
118 },
119 )?;
120
121 if let Some(cursor_offset) = cursor_offset {
122 // The cursor_offset is relative to the start of the hunk's new text (context + additions).
123 // We need to add where the hunk context matched in the editable region to compute
124 // the actual cursor position in the result.
125 let hunk_start = first_hunk_offset.unwrap_or(0);
126 let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
127 result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
128 }
129
130 if let Some(end_marker) = output_end_marker_for_format(version) {
131 if !result.ends_with('\n') {
132 result.push('\n');
133 }
134 result.push_str(end_marker);
135 }
136
137 Ok(result)
138}
139
140pub struct TeacherPrompt;
141
142impl TeacherPrompt {
143 pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
144 pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
145 pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
146 pub(crate) const NO_EDITS: &str = "NO_EDITS";
147
148 /// Truncate edit history to this number of last lines
149 const MAX_HISTORY_LINES: usize = 128;
150
151 pub fn format_prompt(
152 example: &Example,
153 editable_range: Range<usize>,
154 context_range: Range<usize>,
155 ) -> String {
156 let edit_history = Self::format_edit_history(&example.spec.edit_history);
157 let context = Self::format_context(example);
158 let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
159
160 let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
161 let prompt = prompt_template
162 .replace("{{context}}", &context)
163 .replace("{{edit_history}}", &edit_history)
164 .replace("{{cursor_excerpt}}", &cursor_excerpt);
165
166 prompt
167 }
168
169 pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
170 // Check if the model indicated no edits are needed
171 let no_edits = (String::new(), None);
172 if let Some(last_codeblock) = extract_last_codeblock(&response) {
173 if last_codeblock.trim() == Self::NO_EDITS {
174 return Ok(no_edits);
175 }
176 }
177
178 if response.trim().ends_with(Self::NO_EDITS) {
179 return Ok(no_edits);
180 }
181
182 // Extract updated (new) editable region from the model response.
183 let new_editable_region = Self::extract_editable_region(&response)?;
184 let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
185 let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
186 let old_editable_region = Self::extract_editable_region(
187 &example
188 .prompt
189 .as_ref()
190 .context("example prompt missing")?
191 .input,
192 )?
193 .replace(Self::USER_CURSOR_MARKER, "");
194
195 let prompt_inputs = example
196 .prompt_inputs
197 .as_ref()
198 .context("example is missing prompt inputs")?;
199
200 // Normalize leading newlines: if old starts with newline but new doesn't,
201 // prepend newline to new to preserve whitespace structure.
202 // This handles the case where the model drops the leading blank line.
203 if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
204 new_editable_region.insert(0, '\n');
205 }
206
207 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
208 let (editable_region_offset, _) = excerpt
209 .match_indices(&old_editable_region)
210 .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
211 .context("editable region not found in prompt content")?;
212 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
213
214 // Use full context so cursor offset (relative to editable region start) aligns with diff content
215 let editable_region_lines = old_editable_region.lines().count() as u32;
216 let diff = language::unified_diff_with_context(
217 &old_editable_region,
218 &new_editable_region,
219 editable_region_start_line as u32,
220 editable_region_start_line as u32,
221 editable_region_lines,
222 );
223
224 let diff = indoc::formatdoc! {"
225 --- a/{path}
226 +++ b/{path}
227 {diff}",
228 path = example.spec.cursor_path.to_string_lossy(),
229 diff = diff,
230 };
231
232 let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
233 ActualCursor::from_editable_region(
234 &example.spec.cursor_path,
235 editable_region_cursor_offset,
236 &new_editable_region,
237 excerpt,
238 editable_region_offset,
239 editable_region_start_line,
240 )
241 });
242
243 Ok((diff, actual_cursor))
244 }
245
246 fn format_edit_history(edit_history: &str) -> String {
247 let lines: Vec<&str> = edit_history.lines().collect();
248
249 if lines.is_empty() {
250 return "(No edit history)".to_string();
251 }
252
253 if lines.len() > Self::MAX_HISTORY_LINES {
254 let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
255 format!("{truncated}\n[...truncated...]")
256 } else {
257 lines.join("\n")
258 }
259 }
260
261 pub fn format_context(example: &Example) -> String {
262 let related_files = example
263 .prompt_inputs
264 .as_ref()
265 .and_then(|pi| pi.related_files.as_deref());
266 let Some(related_files) = related_files else {
267 return "(No context)".to_string();
268 };
269
270 if related_files.is_empty() {
271 return "(No context)".to_string();
272 }
273
274 let prefix = "`````";
275 let suffix = "`````\n\n";
276 let max_tokens = 1024;
277 zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
278 }
279
280 fn format_cursor_excerpt(
281 example: &Example,
282 editable_range: Range<usize>,
283 context_range: Range<usize>,
284 ) -> String {
285 let mut result = String::new();
286
287 let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
288 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
289 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
290
291 let path_str = example.spec.cursor_path.to_string_lossy();
292 result.push_str(&format!("`````{path_str}\n"));
293 result.push_str(&excerpt[context_range.start..editable_range.start]);
294 result.push_str(Self::EDITABLE_REGION_START);
295 result.push_str(&excerpt[editable_range.start..cursor_offset]);
296 result.push_str(Self::USER_CURSOR_MARKER);
297 result.push_str(&excerpt[cursor_offset..editable_range.end]);
298 result.push_str(Self::EDITABLE_REGION_END);
299 result.push_str(&excerpt[editable_range.end..context_range.end]);
300 result.push_str("\n`````");
301
302 result
303 }
304
305 pub fn extract_editable_region(text: &str) -> Result<String> {
306 let start = text
307 .rfind(Self::EDITABLE_REGION_START)
308 .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
309 let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
310
311 if start >= end {
312 return Err(anyhow!("Invalid editable region markers"));
313 }
314
315 let region = &text[start..end];
316 Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
317 }
318}
319
320/// Extract the cursor excerpt from an example.
321/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
322pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
323 // If we have the original prompt, extract the cursor excerpt from it
324 if let Some(prompt) = &example.prompt {
325 // Find "# 3. Current File" section and extract the content
326 if let Some(start) = prompt.input.find("# 3. Current File") {
327 let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
328 let backtick_count = prompt.input[content_start..]
329 .chars()
330 .take_while(|&c| c == '`')
331 .count();
332 let content_start = content_start + backtick_count;
333
334 // Find the path line and skip it
335 let newline_pos = prompt.input[content_start..].find('\n')?;
336 let text_start = content_start + newline_pos + 1;
337
338 // Find the closing backticks
339 let closing_pattern = "`".repeat(backtick_count);
340 let text_end = prompt.input[text_start..].find(&closing_pattern)?;
341 let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
342
343 let path_str = example.spec.cursor_path.to_string_lossy();
344 return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
345 }
346 }
347
348 // Fallback: construct from prompt_inputs if available
349 let prompt_inputs = example.prompt_inputs.as_ref()?;
350 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
351 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
352
353 // Simple fallback: just show content around cursor with markers
354 let path_str = example.spec.cursor_path.to_string_lossy();
355 let mut result = format!("`````{path_str}\n");
356 result.push_str(TeacherPrompt::EDITABLE_REGION_START);
357 result.push_str(&excerpt[..cursor_offset]);
358 result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
359 result.push_str(&excerpt[cursor_offset..]);
360 result.push_str(TeacherPrompt::EDITABLE_REGION_END);
361 result.push_str("\n`````");
362
363 Some(result)
364}
365
366pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
367 let lines: Vec<&str> = text.lines().collect();
368
369 // Search from the end for a closing fence (line containing only backticks, 3+)
370 let mut closing_line_idx = None;
371 let mut backtick_count = 0;
372
373 for i in (0..lines.len()).rev() {
374 let line = lines[i].trim();
375 if line.len() >= 3 && line.chars().all(|c| c == '`') {
376 closing_line_idx = Some(i);
377 backtick_count = line.len();
378 break;
379 }
380 }
381
382 let closing_idx = closing_line_idx?;
383
384 // Search backwards for matching opening fence
385 // Opening fence starts with same backtick count, possibly followed by language/metadata
386 let opening_pattern = "`".repeat(backtick_count);
387
388 for i in (0..closing_idx).rev() {
389 let line = lines[i];
390 if line.starts_with(&opening_pattern) {
391 // Ensure it's exactly the right number of backticks (not more)
392 let rest = &line[backtick_count..];
393 if rest.is_empty() || !rest.starts_with('`') {
394 // Found matching opening fence
395 // Extract content between opening and closing (exclusive)
396 if closing_idx > i + 1 {
397 let content = lines[i + 1..closing_idx].join("\n");
398 // Preserve trailing newline to match previous behavior
399 return Some(format!("{}\n", content));
400 } else {
401 // Empty block
402 return Some(String::new());
403 }
404 }
405 }
406 }
407
408 None
409}
410
411#[cfg(test)]
412mod tests {
413 use super::*;
414
415 #[test]
416 fn test_extract_last_code_block() {
417 let text = indoc::indoc! {"
418 Some thinking
419
420 ```
421 first block
422 ```
423
424 `````path='something' lines=1:2
425 last block
426 `````
427 "};
428 let last_block = extract_last_codeblock(text).unwrap();
429 assert_eq!(last_block, "last block\n");
430 }
431
432 #[test]
433 fn test_extract_codeblock_with_nested_fences() {
434 let text = indoc::indoc! {"
435 `````
436 content with ``` inline
437 and ```python nested
438 more content
439 `````
440 "};
441 let last_block = extract_last_codeblock(text).unwrap();
442 assert_eq!(
443 last_block,
444 "content with ``` inline\nand ```python nested\nmore content\n"
445 );
446 }
447
448 #[test]
449 fn test_extract_codeblock_ignores_inline_backticks() {
450 let text = indoc::indoc! {"
451 `````
452 here is some `code` with inline backticks
453 and here```more```stuff
454 `````
455 "};
456 let last_block = extract_last_codeblock(text).unwrap();
457 assert_eq!(
458 last_block,
459 "here is some `code` with inline backticks\nand here```more```stuff\n"
460 );
461 }
462
463 #[test]
464 fn test_extract_editable_region() {
465 let text = indoc::indoc! {"
466 some lines
467 are
468 here
469 <|editable_region_start|>
470 one
471 two three
472
473 <|editable_region_end|>
474 more
475 lines here
476 "};
477 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
478 assert_eq!(
479 parsed,
480 indoc::indoc! {"
481 one
482 two three"}
483 );
484 }
485
486 #[test]
487 fn test_extract_last_codeblock_nested_bibtex() {
488 let text = indoc::indoc! {r#"
489 Looking at the edit history, I can see that a Citation section was just added.
490
491 `````
492 ## Collaborations
493 Our mission is to create a 4D generative model.
494
495 ## Citation
496
497 If you found Unique3D helpful, please cite our report:
498 ```bibtex
499 @misc{wu2024unique3d,
500 title={Unique3D},
501 }
502 ```
503 `````
504 "#};
505 let last_block = extract_last_codeblock(text).unwrap();
506 assert_eq!(
507 last_block,
508 indoc::indoc! {r#"
509 ## Collaborations
510 Our mission is to create a 4D generative model.
511
512 ## Citation
513
514 If you found Unique3D helpful, please cite our report:
515 ```bibtex
516 @misc{wu2024unique3d,
517 title={Unique3D},
518 }
519 ```
520 "#}
521 );
522 }
523
524 #[test]
525 fn test_extract_editable_region_no_markers() {
526 let text = indoc::indoc! {"
527 one
528 two three"};
529 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
530 assert_eq!(
531 parsed,
532 indoc::indoc! {"
533 one
534 two three"}
535 );
536 }
537
538 #[test]
539 fn test_parse_no_edits_response() {
540 let response = indoc::indoc! {"
541 The code is already complete. There is no clear next edit to make.
542
543 `````
544 NO_EDITS
545 `````
546 "};
547 let codeblock = extract_last_codeblock(response).unwrap();
548 assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
549 }
550
551 #[test]
552 fn test_extract_codeblock_no_valid_block() {
553 // Text with no code blocks should return None
554 let text = "Just some plain text without any code blocks";
555 assert!(extract_last_codeblock(text).is_none());
556
557 // Unclosed code block should return None
558 let text = indoc::indoc! {"
559 ```
560 unclosed block
561 "};
562 assert!(extract_last_codeblock(text).is_none());
563
564 // Analysis text with nested markdown but no proper outer block
565 let text = indoc::indoc! {"
566 # Analysis
567 Looking at this:
568 ```
569 some code
570 ```
571 But then more analysis without wrapping block
572 "};
573 // This should find the inner block
574 let result = extract_last_codeblock(text).unwrap();
575 assert_eq!(result, "some code\n");
576 }
577
578 #[test]
579 fn test_extract_codeblock_no_trailing_newline() {
580 // Text ending without trailing newline after closing fence
581 let text = "`````\ncontent here\n`````";
582 let result = extract_last_codeblock(text).unwrap();
583 assert_eq!(result, "content here\n");
584 }
585}