1use crate::{
2 FormatPromptArgs, PredictionProvider,
3 example::{ActualCursor, Example, ExamplePrompt},
4 headless::EpAppState,
5 progress::{ExampleProgress, Step},
6 retrieve_context::run_context_retrieval,
7};
8use anyhow::{Context as _, Result, anyhow};
9use gpui::AsyncApp;
10use similar::DiffableStr;
11use std::ops::Range;
12use std::sync::Arc;
13use zeta_prompt::udiff;
14use zeta_prompt::{
15 ZetaFormat, encode_patch_as_output_for_format, excerpt_range_for_format, format_zeta_prompt,
16 multi_region, output_end_marker_for_format, resolve_cursor_region,
17};
18
19pub async fn run_format_prompt(
20 example: &mut Example,
21 args: &FormatPromptArgs,
22 app_state: Arc<EpAppState>,
23 example_progress: &ExampleProgress,
24 cx: AsyncApp,
25) -> Result<()> {
26 run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
27
28 let step_progress = example_progress.start(Step::FormatPrompt);
29
30 let prompt_inputs = example
31 .prompt_inputs
32 .as_ref()
33 .context("prompt_inputs must be set after context retrieval")?;
34
35 match args.provider {
36 PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
37 step_progress.set_substatus("formatting teacher prompt");
38
39 let zeta_format = ZetaFormat::default();
40 let (editable_range, context_range) =
41 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
42
43 let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
44 example.prompt = Some(ExamplePrompt {
45 input: prompt,
46 expected_output: None,
47 rejected_output: None,
48 prefill: None,
49 provider: args.provider,
50 });
51 }
52 PredictionProvider::TeacherMultiRegion(_)
53 | PredictionProvider::TeacherMultiRegionNonBatching(_) => {
54 step_progress.set_substatus("formatting teacher multi-region prompt");
55
56 let zeta_format = ZetaFormat::default();
57 let (editable_range, context_range) =
58 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
59
60 let prompt =
61 TeacherMultiRegionPrompt::format_prompt(example, editable_range, context_range);
62 example.prompt = Some(ExamplePrompt {
63 input: prompt,
64 expected_output: None,
65 rejected_output: None,
66 prefill: None,
67 provider: args.provider,
68 });
69 }
70 PredictionProvider::Zeta2(zeta_format) => {
71 step_progress.set_substatus("formatting zeta2 prompt");
72
73 let prompt = format_zeta_prompt(prompt_inputs, zeta_format);
74 let prefill = zeta_prompt::get_prefill(prompt_inputs, zeta_format);
75 let expected_output = example
76 .spec
77 .expected_patches_with_cursor_positions()
78 .into_iter()
79 .next()
80 .and_then(|(expected_patch, expected_cursor_offset)| {
81 zeta2_output_for_patch(
82 prompt_inputs,
83 &expected_patch,
84 expected_cursor_offset,
85 zeta_format,
86 )
87 .ok()
88 });
89
90 let rejected_output = example.spec.rejected_patch.as_ref().and_then(|patch| {
91 zeta2_output_for_patch(prompt_inputs, patch, None, zeta_format).ok()
92 });
93
94 example.prompt = prompt.map(|prompt| ExamplePrompt {
95 input: prompt,
96 expected_output,
97 rejected_output,
98 provider: args.provider,
99 prefill: Some(prefill),
100 });
101 }
102 _ => {
103 panic!("Cannot format prompt for {:?}", args.provider);
104 }
105 };
106 Ok(())
107}
108
109pub fn zeta2_output_for_patch(
110 input: &zeta_prompt::ZetaPromptInput,
111 patch: &str,
112 cursor_offset: Option<usize>,
113 version: ZetaFormat,
114) -> Result<String> {
115 let (context, editable_range, _, _) = resolve_cursor_region(input, version);
116 let mut old_editable_region = context[editable_range].to_string();
117
118 if !old_editable_region.ends_with_newline() {
119 old_editable_region.push('\n');
120 }
121
122 if let Some(encoded_output) =
123 encode_patch_as_output_for_format(version, &old_editable_region, patch, cursor_offset)?
124 {
125 return Ok(encoded_output);
126 }
127
128 let (result, first_hunk_offset) =
129 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
130 || {
131 format!(
132 "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
133 patch, old_editable_region
134 )
135 },
136 )?;
137
138 if version == ZetaFormat::V0317SeedMultiRegions {
139 let cursor_in_new = cursor_offset.map(|cursor_offset| {
140 let hunk_start = first_hunk_offset.unwrap_or(0);
141 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
142 });
143 return multi_region::encode_from_old_and_new_v0317(
144 &old_editable_region,
145 &result,
146 cursor_in_new,
147 zeta_prompt::CURSOR_MARKER,
148 multi_region::V0317_END_MARKER,
149 );
150 }
151
152 if version == ZetaFormat::V0318SeedMultiRegions {
153 let cursor_in_new = cursor_offset.map(|cursor_offset| {
154 let hunk_start = first_hunk_offset.unwrap_or(0);
155 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
156 });
157 return multi_region::encode_from_old_and_new_v0318(
158 &old_editable_region,
159 &result,
160 cursor_in_new,
161 zeta_prompt::CURSOR_MARKER,
162 multi_region::V0318_END_MARKER,
163 );
164 }
165
166 if version == ZetaFormat::V0316SeedMultiRegions {
167 let cursor_in_new = cursor_offset.map(|cursor_offset| {
168 let hunk_start = first_hunk_offset.unwrap_or(0);
169 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
170 });
171 return multi_region::encode_from_old_and_new_v0316(
172 &old_editable_region,
173 &result,
174 cursor_in_new,
175 zeta_prompt::CURSOR_MARKER,
176 multi_region::V0316_END_MARKER,
177 );
178 }
179
180 if version == ZetaFormat::V0306SeedMultiRegions {
181 let cursor_in_new = cursor_offset.map(|cursor_offset| {
182 let hunk_start = first_hunk_offset.unwrap_or(0);
183 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
184 });
185 return multi_region::encode_from_old_and_new(
186 &old_editable_region,
187 &result,
188 cursor_in_new,
189 zeta_prompt::CURSOR_MARKER,
190 zeta_prompt::seed_coder::END_MARKER,
191 zeta_prompt::seed_coder::NO_EDITS,
192 );
193 }
194
195 let mut result = result;
196 if let Some(cursor_offset) = cursor_offset {
197 // The cursor_offset is relative to the start of the hunk's new text (context + additions).
198 // We need to add where the hunk context matched in the editable region to compute
199 // the actual cursor position in the result.
200 let hunk_start = first_hunk_offset.unwrap_or(0);
201 let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
202 result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
203 }
204
205 if let Some(end_marker) = output_end_marker_for_format(version) {
206 if !result.ends_with('\n') {
207 result.push('\n');
208 }
209 result.push_str(end_marker);
210 }
211
212 Ok(result)
213}
214
215pub struct TeacherPrompt;
216
217impl TeacherPrompt {
218 pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
219 pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
220 pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
221 pub(crate) const NO_EDITS: &str = "NO_EDITS";
222
223 /// Truncate edit history to this number of last lines
224 const MAX_HISTORY_LINES: usize = 128;
225
226 pub fn format_prompt(
227 example: &Example,
228 editable_range: Range<usize>,
229 context_range: Range<usize>,
230 ) -> String {
231 let edit_history = Self::format_edit_history(&example.spec.edit_history);
232 let context = Self::format_context(example);
233 let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
234
235 let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
236 let prompt = prompt_template
237 .replace("{{context}}", &context)
238 .replace("{{edit_history}}", &edit_history)
239 .replace("{{cursor_excerpt}}", &cursor_excerpt);
240
241 prompt
242 }
243
244 pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
245 // Check if the model indicated no edits are needed
246 let no_edits = (String::new(), None);
247 if let Some(last_codeblock) = extract_last_codeblock(&response) {
248 if last_codeblock.trim() == Self::NO_EDITS {
249 return Ok(no_edits);
250 }
251 }
252
253 if response
254 .trim_end_matches(&[' ', '\n', '`'])
255 .ends_with(Self::NO_EDITS)
256 {
257 return Ok(no_edits);
258 }
259
260 // Extract updated (new) editable region from the model response.
261 let new_editable_region = Self::extract_editable_region(&response)?;
262 let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
263 let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
264 let old_editable_region = Self::extract_editable_region(
265 &example
266 .prompt
267 .as_ref()
268 .context("example prompt missing")?
269 .input,
270 )?
271 .replace(Self::USER_CURSOR_MARKER, "");
272
273 let prompt_inputs = example
274 .prompt_inputs
275 .as_ref()
276 .context("example is missing prompt inputs")?;
277
278 // Normalize leading newlines: if old starts with newline but new doesn't,
279 // prepend newline to new to preserve whitespace structure.
280 // This handles the case where the model drops the leading blank line.
281 if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
282 new_editable_region.insert(0, '\n');
283 }
284
285 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
286 let (editable_region_offset, _) = excerpt
287 .match_indices(&old_editable_region)
288 .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
289 .context("editable region not found in prompt content")?;
290 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
291
292 let editable_region_lines = old_editable_region.lines().count() as u32;
293 let diff = language::unified_diff_with_context(
294 &old_editable_region,
295 &new_editable_region,
296 editable_region_start_line as u32,
297 editable_region_start_line as u32,
298 editable_region_lines,
299 );
300
301 let diff = indoc::formatdoc! {"
302 --- a/{path}
303 +++ b/{path}
304 {diff}",
305 path = example.spec.cursor_path.to_string_lossy(),
306 diff = diff,
307 };
308
309 let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
310 ActualCursor::from_editable_region(
311 &example.spec.cursor_path,
312 editable_region_cursor_offset,
313 &new_editable_region,
314 excerpt,
315 editable_region_offset,
316 editable_region_start_line,
317 )
318 });
319
320 Ok((diff, actual_cursor))
321 }
322
323 fn format_edit_history(edit_history: &str) -> String {
324 let lines: Vec<&str> = edit_history.lines().collect();
325
326 if lines.is_empty() {
327 return "(No edit history)".to_string();
328 }
329
330 if lines.len() > Self::MAX_HISTORY_LINES {
331 let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
332 format!("{truncated}\n[...truncated...]")
333 } else {
334 lines.join("\n")
335 }
336 }
337
338 pub fn format_context(example: &Example) -> String {
339 let related_files = example
340 .prompt_inputs
341 .as_ref()
342 .and_then(|pi| pi.related_files.as_deref());
343
344 let Some(related_files) = related_files else {
345 return "(No context)".to_string();
346 };
347
348 if related_files.is_empty() {
349 return "(No context)".to_string();
350 }
351
352 let prefix = "`````";
353 let suffix = "`````\n\n";
354 let max_tokens = 1024;
355 zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
356 }
357
358 fn format_cursor_excerpt(
359 example: &Example,
360 editable_range: Range<usize>,
361 context_range: Range<usize>,
362 ) -> String {
363 let mut result = String::new();
364
365 let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
366 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
367 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
368
369 let path_str = example.spec.cursor_path.to_string_lossy();
370 result.push_str(&format!("`````{path_str}\n"));
371 result.push_str(&excerpt[context_range.start..editable_range.start]);
372 result.push_str(Self::EDITABLE_REGION_START);
373 result.push_str(&excerpt[editable_range.start..cursor_offset]);
374 result.push_str(Self::USER_CURSOR_MARKER);
375 result.push_str(&excerpt[cursor_offset..editable_range.end]);
376 result.push_str(Self::EDITABLE_REGION_END);
377 result.push_str(&excerpt[editable_range.end..context_range.end]);
378 result.push_str("\n`````");
379
380 result
381 }
382
383 pub fn extract_editable_region(text: &str) -> Result<String> {
384 let start = text
385 .rfind(Self::EDITABLE_REGION_START)
386 .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
387 let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
388
389 if start >= end {
390 return Err(anyhow!("Invalid editable region markers"));
391 }
392
393 let region = &text[start..end];
394 Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
395 }
396}
397
398pub struct TeacherMultiRegionPrompt;
399
400impl TeacherMultiRegionPrompt {
401 pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
402 pub(crate) const NO_EDITS: &str = "NO_EDITS";
403
404 /// Truncate edit history to this number of last lines
405 const MAX_HISTORY_LINES: usize = 128;
406
407 pub fn format_prompt(
408 example: &Example,
409 editable_range: Range<usize>,
410 context_range: Range<usize>,
411 ) -> String {
412 let edit_history = Self::format_edit_history(&example.spec.edit_history);
413 let context = Self::format_context(example);
414 let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
415
416 let prompt_template = crate::prompt_assets::get_prompt("teacher_multi_region.md");
417 let prompt = prompt_template
418 .replace("{{context}}", &context)
419 .replace("{{edit_history}}", &edit_history)
420 .replace("{{cursor_excerpt}}", &cursor_excerpt);
421
422 prompt
423 }
424
425 pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
426 let no_edits = (String::new(), None);
427 if let Some(last_codeblock) = extract_last_codeblock(&response) {
428 if last_codeblock.trim() == Self::NO_EDITS {
429 return Ok(no_edits);
430 }
431 }
432
433 if response.trim().ends_with(Self::NO_EDITS) {
434 return Ok(no_edits);
435 }
436
437 let prompt_inputs = example
438 .prompt_inputs
439 .as_ref()
440 .context("example is missing prompt inputs")?;
441
442 let zeta_format = ZetaFormat::default();
443 let (editable_range, _) =
444 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
445 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
446 let old_editable_region = &excerpt[editable_range.clone()];
447 let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
448
449 let codeblock =
450 extract_last_codeblock(&response).context("no codeblock found in model response")?;
451 let (start_num, end_num, raw_new_span) = multi_region::extract_marker_span(&codeblock)?;
452
453 let start_idx = start_num
454 .checked_sub(1)
455 .context("marker numbers are 1-indexed")?;
456 let end_idx = end_num
457 .checked_sub(1)
458 .context("marker numbers are 1-indexed")?;
459 let start_byte = *marker_offsets
460 .get(start_idx)
461 .context("start marker number out of range")?;
462 let end_byte = *marker_offsets
463 .get(end_idx)
464 .context("end marker number out of range")?;
465
466 if start_byte > end_byte {
467 return Err(anyhow!("start marker must come before end marker"));
468 }
469
470 let cursor_in_span = raw_new_span.find(Self::USER_CURSOR_MARKER);
471 let new_span = raw_new_span.replace(Self::USER_CURSOR_MARKER, "");
472
473 let old_span = &old_editable_region[start_byte..end_byte];
474 let mut new_span = new_span;
475 if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
476 new_span.push('\n');
477 }
478 if !old_span.ends_with('\n') && new_span.ends_with('\n') {
479 new_span.pop();
480 }
481
482 let mut new_editable_region = String::new();
483 new_editable_region.push_str(&old_editable_region[..start_byte]);
484 new_editable_region.push_str(&new_span);
485 new_editable_region.push_str(&old_editable_region[end_byte..]);
486
487 let cursor_offset = cursor_in_span.map(|pos| start_byte + pos);
488
489 if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
490 new_editable_region.insert(0, '\n');
491 }
492
493 let editable_region_offset = editable_range.start;
494 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
495
496 let editable_region_lines = old_editable_region.lines().count() as u32;
497 let diff = language::unified_diff_with_context(
498 old_editable_region,
499 &new_editable_region,
500 editable_region_start_line as u32,
501 editable_region_start_line as u32,
502 editable_region_lines,
503 );
504
505 let diff = indoc::formatdoc! {"
506 --- a/{path}
507 +++ b/{path}
508 {diff}",
509 path = example.spec.cursor_path.to_string_lossy(),
510 diff = diff,
511 };
512
513 let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
514 ActualCursor::from_editable_region(
515 &example.spec.cursor_path,
516 editable_region_cursor_offset,
517 &new_editable_region,
518 excerpt,
519 editable_region_offset,
520 editable_region_start_line,
521 )
522 });
523
524 Ok((diff, actual_cursor))
525 }
526
527 fn format_edit_history(edit_history: &str) -> String {
528 let lines: Vec<&str> = edit_history.lines().collect();
529
530 if lines.is_empty() {
531 return "(No edit history)".to_string();
532 }
533
534 if lines.len() > Self::MAX_HISTORY_LINES {
535 let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
536 format!("{truncated}\n[...truncated...]")
537 } else {
538 lines.join("\n")
539 }
540 }
541
542 pub fn format_context(example: &Example) -> String {
543 let related_files = example
544 .prompt_inputs
545 .as_ref()
546 .and_then(|pi| pi.related_files.as_deref());
547 let Some(related_files) = related_files else {
548 return "(No context)".to_string();
549 };
550
551 if related_files.is_empty() {
552 return "(No context)".to_string();
553 }
554
555 let prefix = "`````";
556 let suffix = "`````\n\n";
557 let max_tokens = 1024;
558 zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
559 }
560
561 fn format_cursor_excerpt(
562 example: &Example,
563 editable_range: Range<usize>,
564 context_range: Range<usize>,
565 ) -> String {
566 let mut result = String::new();
567
568 let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
569 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
570 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
571
572 let editable_text = &excerpt[editable_range.clone()];
573 let cursor_in_editable = cursor_offset - editable_range.start;
574
575 let path_str = example.spec.cursor_path.to_string_lossy();
576 result.push_str(&format!("`````{path_str}\n"));
577
578 result.push_str(&excerpt[context_range.start..editable_range.start]);
579
580 multi_region::write_editable_with_markers(
581 &mut result,
582 editable_text,
583 cursor_in_editable,
584 Self::USER_CURSOR_MARKER,
585 );
586
587 result.push_str(&excerpt[editable_range.end..context_range.end]);
588 result.push_str("\n`````");
589
590 result
591 }
592}
593
594/// Extract the cursor excerpt from an example.
595/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
596pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
597 // If we have the original prompt, extract the cursor excerpt from it
598 if let Some(prompt) = &example.prompt {
599 // Find "# 3. Current File" section and extract the content
600 if let Some(start) = prompt.input.find("# 3. Current File") {
601 let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
602 let backtick_count = prompt.input[content_start..]
603 .chars()
604 .take_while(|&c| c == '`')
605 .count();
606 let content_start = content_start + backtick_count;
607
608 // Find the path line and skip it
609 let newline_pos = prompt.input[content_start..].find('\n')?;
610 let text_start = content_start + newline_pos + 1;
611
612 // Find the closing backticks
613 let closing_pattern = "`".repeat(backtick_count);
614 let text_end = prompt.input[text_start..].find(&closing_pattern)?;
615 let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
616
617 let path_str = example.spec.cursor_path.to_string_lossy();
618 return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
619 }
620 }
621
622 // Fallback: construct from prompt_inputs if available
623 let prompt_inputs = example.prompt_inputs.as_ref()?;
624 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
625 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
626
627 // Simple fallback: just show content around cursor with markers
628 let path_str = example.spec.cursor_path.to_string_lossy();
629 let mut result = format!("`````{path_str}\n");
630 result.push_str(TeacherPrompt::EDITABLE_REGION_START);
631 result.push_str(&excerpt[..cursor_offset]);
632 result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
633 result.push_str(&excerpt[cursor_offset..]);
634 result.push_str(TeacherPrompt::EDITABLE_REGION_END);
635 result.push_str("\n`````");
636
637 Some(result)
638}
639
640pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
641 let lines: Vec<&str> = text.lines().collect();
642
643 // Search from the end for a closing fence (line containing only backticks, 3+)
644 let mut closing_line_idx = None;
645 let mut backtick_count = 0;
646
647 for i in (0..lines.len()).rev() {
648 let line = lines[i].trim();
649 if line.len() >= 3 && line.chars().all(|c| c == '`') {
650 closing_line_idx = Some(i);
651 backtick_count = line.len();
652 break;
653 }
654 }
655
656 let closing_idx = closing_line_idx?;
657
658 // Search backwards for matching opening fence
659 // Opening fence starts with same backtick count, possibly followed by language/metadata
660 let opening_pattern = "`".repeat(backtick_count);
661
662 for i in (0..closing_idx).rev() {
663 let line = lines[i];
664 if line.starts_with(&opening_pattern) {
665 // Ensure it's exactly the right number of backticks (not more)
666 let rest = &line[backtick_count..];
667 if rest.is_empty() || !rest.starts_with('`') {
668 // Found matching opening fence
669 // Extract content between opening and closing (exclusive)
670 if closing_idx > i + 1 {
671 let content = lines[i + 1..closing_idx].join("\n");
672 // Preserve trailing newline to match previous behavior
673 return Some(format!("{}\n", content));
674 } else {
675 // Empty block
676 return Some(String::new());
677 }
678 }
679 }
680 }
681
682 None
683}
684
685#[cfg(test)]
686mod tests {
687 use super::*;
688
689 #[test]
690 fn test_extract_last_code_block() {
691 let text = indoc::indoc! {"
692 Some thinking
693
694 ```
695 first block
696 ```
697
698 `````path='something' lines=1:2
699 last block
700 `````
701 "};
702 let last_block = extract_last_codeblock(text).unwrap();
703 assert_eq!(last_block, "last block\n");
704 }
705
706 #[test]
707 fn test_extract_codeblock_with_nested_fences() {
708 let text = indoc::indoc! {"
709 `````
710 content with ``` inline
711 and ```python nested
712 more content
713 `````
714 "};
715 let last_block = extract_last_codeblock(text).unwrap();
716 assert_eq!(
717 last_block,
718 "content with ``` inline\nand ```python nested\nmore content\n"
719 );
720 }
721
722 #[test]
723 fn test_extract_codeblock_ignores_inline_backticks() {
724 let text = indoc::indoc! {"
725 `````
726 here is some `code` with inline backticks
727 and here```more```stuff
728 `````
729 "};
730 let last_block = extract_last_codeblock(text).unwrap();
731 assert_eq!(
732 last_block,
733 "here is some `code` with inline backticks\nand here```more```stuff\n"
734 );
735 }
736
737 #[test]
738 fn test_extract_editable_region_old_format() {
739 let text = indoc::indoc! {"
740 some lines
741 are
742 here
743 <|editable_region_start|>
744 one
745 two three
746
747 <|editable_region_end|>
748 more
749 lines here
750 "};
751 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
752 assert_eq!(
753 parsed,
754 indoc::indoc! {"
755 one
756 two three"}
757 );
758 }
759
760 #[test]
761 fn test_extract_editable_region_marker_format() {
762 let text = indoc::indoc! {"
763 some context
764 <|marker_1|>
765 one
766 two three
767 <|marker_2|>
768 more context
769 "};
770 let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
771 assert_eq!(parsed, "one\ntwo three");
772 }
773
774 #[test]
775 fn test_extract_editable_region_multi_markers() {
776 let text = indoc::indoc! {"
777 prefix
778 <|marker_1|>
779 aaa
780 bbb
781 <|marker_2|>
782 ccc
783 ddd
784 <|marker_3|>
785 suffix
786 "};
787 let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
788 // Intermediate marker and its trailing \n are stripped
789 assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
790 }
791
792 #[test]
793 fn test_extract_last_codeblock_nested_bibtex() {
794 let text = indoc::indoc! {r#"
795 Looking at the edit history, I can see that a Citation section was just added.
796
797 `````
798 ## Collaborations
799 Our mission is to create a 4D generative model.
800
801 ## Citation
802
803 If you found Unique3D helpful, please cite our report:
804 ```bibtex
805 @misc{wu2024unique3d,
806 title={Unique3D},
807 }
808 ```
809 `````
810 "#};
811 let last_block = extract_last_codeblock(text).unwrap();
812 assert_eq!(
813 last_block,
814 indoc::indoc! {r#"
815 ## Collaborations
816 Our mission is to create a 4D generative model.
817
818 ## Citation
819
820 If you found Unique3D helpful, please cite our report:
821 ```bibtex
822 @misc{wu2024unique3d,
823 title={Unique3D},
824 }
825 ```
826 "#}
827 );
828 }
829
830 #[test]
831 fn test_extract_editable_region_no_markers() {
832 let text = indoc::indoc! {"
833 one
834 two three"};
835 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
836 assert_eq!(
837 parsed,
838 indoc::indoc! {"
839 one
840 two three"}
841 );
842 }
843
844 #[test]
845 fn test_parse_no_edits_response() {
846 let response = indoc::indoc! {"
847 The code is already complete. There is no clear next edit to make.
848
849 `````
850 NO_EDITS
851 `````
852 "};
853 let codeblock = extract_last_codeblock(response).unwrap();
854 assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
855 }
856
857 #[test]
858 fn test_extract_codeblock_no_valid_block() {
859 // Text with no code blocks should return None
860 let text = "Just some plain text without any code blocks";
861 assert!(extract_last_codeblock(text).is_none());
862
863 // Unclosed code block should return None
864 let text = indoc::indoc! {"
865 ```
866 unclosed block
867 "};
868 assert!(extract_last_codeblock(text).is_none());
869
870 // Analysis text with nested markdown but no proper outer block
871 let text = indoc::indoc! {"
872 # Analysis
873 Looking at this:
874 ```
875 some code
876 ```
877 But then more analysis without wrapping block
878 "};
879 // This should find the inner block
880 let result = extract_last_codeblock(text).unwrap();
881 assert_eq!(result, "some code\n");
882 }
883
884 #[test]
885 fn test_extract_codeblock_no_trailing_newline() {
886 // Text ending without trailing newline after closing fence
887 let text = "`````\ncontent here\n`````";
888 let result = extract_last_codeblock(text).unwrap();
889 assert_eq!(result, "content here\n");
890 }
891
892 #[test]
893 fn test_parse_no_edits_response_with_trailing_backticks() {
894 let response = "NO_EDITS```";
895
896 let parsed = TeacherPrompt::parse(
897 &Example {
898 spec: edit_prediction::example_spec::ExampleSpec {
899 name: "test".to_string(),
900 repository_url: "https://github.com/zed-industries/zed.git".to_string(),
901 revision: "HEAD".to_string(),
902 tags: Vec::new(),
903 reasoning: None,
904 uncommitted_diff: String::new(),
905 cursor_path: std::sync::Arc::from(std::path::Path::new("src/main.rs")),
906 cursor_position: "0:0".to_string(),
907 edit_history: String::new(),
908 expected_patches: Vec::new(),
909 rejected_patch: None,
910 telemetry: None,
911 human_feedback: Vec::new(),
912 rating: None,
913 },
914 prompt_inputs: None,
915 prompt: None,
916 predictions: Vec::new(),
917 score: Vec::new(),
918 qa: Vec::new(),
919 zed_version: None,
920 state: None,
921 },
922 response,
923 )
924 .unwrap();
925
926 assert!(parsed.0.is_empty());
927 assert!(parsed.1.is_none());
928 }
929}