1use crate::{
2 FormatPromptArgs, PredictionProvider,
3 example::{ActualCursor, Example, ExamplePrompt},
4 headless::EpAppState,
5 progress::{ExampleProgress, Step},
6 retrieve_context::run_context_retrieval,
7};
8use anyhow::{Context as _, Result, anyhow};
9use edit_prediction::udiff;
10use gpui::AsyncApp;
11use similar::DiffableStr;
12use std::ops::Range;
13use std::sync::Arc;
14use zeta_prompt::{
15 ZetaFormat, encode_patch_as_output_for_format, excerpt_range_for_format, format_zeta_prompt,
16 multi_region, output_end_marker_for_format, resolve_cursor_region,
17};
18
19pub async fn run_format_prompt(
20 example: &mut Example,
21 args: &FormatPromptArgs,
22 app_state: Arc<EpAppState>,
23 example_progress: &ExampleProgress,
24 cx: AsyncApp,
25) -> Result<()> {
26 run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
27
28 let step_progress = example_progress.start(Step::FormatPrompt);
29
30 let prompt_inputs = example
31 .prompt_inputs
32 .as_ref()
33 .context("prompt_inputs must be set after context retrieval")?;
34
35 match args.provider {
36 PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
37 step_progress.set_substatus("formatting teacher prompt");
38
39 let zeta_format = ZetaFormat::default();
40 let (editable_range, context_range) =
41 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
42
43 let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
44 example.prompt = Some(ExamplePrompt {
45 input: prompt,
46 expected_output: String::new(),
47 rejected_output: None,
48 prefill: None,
49 provider: args.provider,
50 });
51 }
52 PredictionProvider::TeacherMultiRegion(_)
53 | PredictionProvider::TeacherMultiRegionNonBatching(_) => {
54 step_progress.set_substatus("formatting teacher multi-region prompt");
55
56 let zeta_format = ZetaFormat::default();
57 let (editable_range, context_range) =
58 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
59
60 let prompt =
61 TeacherMultiRegionPrompt::format_prompt(example, editable_range, context_range);
62 example.prompt = Some(ExamplePrompt {
63 input: prompt,
64 expected_output: String::new(),
65 rejected_output: None,
66 prefill: None,
67 provider: args.provider,
68 });
69 }
70 PredictionProvider::Zeta2(zeta_format) => {
71 step_progress.set_substatus("formatting zeta2 prompt");
72
73 let prompt = format_zeta_prompt(prompt_inputs, zeta_format);
74 let prefill = zeta_prompt::get_prefill(prompt_inputs, zeta_format);
75 let expected_output = example
76 .spec
77 .expected_patches_with_cursor_positions()
78 .into_iter()
79 .next()
80 .and_then(|(expected_patch, expected_cursor_offset)| {
81 zeta2_output_for_patch(
82 prompt_inputs,
83 &expected_patch,
84 expected_cursor_offset,
85 zeta_format,
86 )
87 .ok()
88 })
89 .unwrap_or_default();
90
91 let rejected_output = example.spec.rejected_patch.as_ref().and_then(|patch| {
92 zeta2_output_for_patch(prompt_inputs, patch, None, zeta_format).ok()
93 });
94
95 example.prompt = prompt.map(|prompt| ExamplePrompt {
96 input: prompt,
97 expected_output,
98 rejected_output,
99 provider: args.provider,
100 prefill: Some(prefill),
101 });
102 }
103 _ => {
104 panic!("Cannot format prompt for {:?}", args.provider);
105 }
106 };
107 Ok(())
108}
109
110pub fn zeta2_output_for_patch(
111 input: &zeta_prompt::ZetaPromptInput,
112 patch: &str,
113 cursor_offset: Option<usize>,
114 version: ZetaFormat,
115) -> Result<String> {
116 let (context, editable_range, _, _) = resolve_cursor_region(input, version);
117 let mut old_editable_region = context[editable_range].to_string();
118
119 if !old_editable_region.ends_with_newline() {
120 old_editable_region.push('\n');
121 }
122
123 if let Some(encoded_output) =
124 encode_patch_as_output_for_format(version, &old_editable_region, patch, cursor_offset)?
125 {
126 return Ok(encoded_output);
127 }
128
129 let (result, first_hunk_offset) =
130 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
131 || {
132 format!(
133 "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
134 patch, old_editable_region
135 )
136 },
137 )?;
138
139 if version == ZetaFormat::V0317SeedMultiRegions {
140 let cursor_in_new = cursor_offset.map(|cursor_offset| {
141 let hunk_start = first_hunk_offset.unwrap_or(0);
142 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
143 });
144 return multi_region::encode_from_old_and_new_v0317(
145 &old_editable_region,
146 &result,
147 cursor_in_new,
148 zeta_prompt::CURSOR_MARKER,
149 multi_region::V0317_END_MARKER,
150 );
151 }
152
153 if version == ZetaFormat::V0318SeedMultiRegions {
154 let cursor_in_new = cursor_offset.map(|cursor_offset| {
155 let hunk_start = first_hunk_offset.unwrap_or(0);
156 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
157 });
158 return multi_region::encode_from_old_and_new_v0318(
159 &old_editable_region,
160 &result,
161 cursor_in_new,
162 zeta_prompt::CURSOR_MARKER,
163 multi_region::V0318_END_MARKER,
164 );
165 }
166
167 if version == ZetaFormat::V0316SeedMultiRegions {
168 let cursor_in_new = cursor_offset.map(|cursor_offset| {
169 let hunk_start = first_hunk_offset.unwrap_or(0);
170 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
171 });
172 return multi_region::encode_from_old_and_new_v0316(
173 &old_editable_region,
174 &result,
175 cursor_in_new,
176 zeta_prompt::CURSOR_MARKER,
177 multi_region::V0316_END_MARKER,
178 );
179 }
180
181 if version == ZetaFormat::V0306SeedMultiRegions {
182 let cursor_in_new = cursor_offset.map(|cursor_offset| {
183 let hunk_start = first_hunk_offset.unwrap_or(0);
184 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
185 });
186 return multi_region::encode_from_old_and_new(
187 &old_editable_region,
188 &result,
189 cursor_in_new,
190 zeta_prompt::CURSOR_MARKER,
191 zeta_prompt::seed_coder::END_MARKER,
192 zeta_prompt::seed_coder::NO_EDITS,
193 );
194 }
195
196 let mut result = result;
197 if let Some(cursor_offset) = cursor_offset {
198 // The cursor_offset is relative to the start of the hunk's new text (context + additions).
199 // We need to add where the hunk context matched in the editable region to compute
200 // the actual cursor position in the result.
201 let hunk_start = first_hunk_offset.unwrap_or(0);
202 let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
203 result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
204 }
205
206 if let Some(end_marker) = output_end_marker_for_format(version) {
207 if !result.ends_with('\n') {
208 result.push('\n');
209 }
210 result.push_str(end_marker);
211 }
212
213 Ok(result)
214}
215
216pub struct TeacherPrompt;
217
218impl TeacherPrompt {
219 pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
220 pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
221 pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
222 pub(crate) const NO_EDITS: &str = "NO_EDITS";
223
224 /// Truncate edit history to this number of last lines
225 const MAX_HISTORY_LINES: usize = 128;
226
227 pub fn format_prompt(
228 example: &Example,
229 editable_range: Range<usize>,
230 context_range: Range<usize>,
231 ) -> String {
232 let edit_history = Self::format_edit_history(&example.spec.edit_history);
233 let context = Self::format_context(example);
234 let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
235
236 let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
237 let prompt = prompt_template
238 .replace("{{context}}", &context)
239 .replace("{{edit_history}}", &edit_history)
240 .replace("{{cursor_excerpt}}", &cursor_excerpt);
241
242 prompt
243 }
244
245 pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
246 // Check if the model indicated no edits are needed
247 let no_edits = (String::new(), None);
248 if let Some(last_codeblock) = extract_last_codeblock(&response) {
249 if last_codeblock.trim() == Self::NO_EDITS {
250 return Ok(no_edits);
251 }
252 }
253
254 if response
255 .trim_end_matches(&[' ', '\n', '`'])
256 .ends_with(Self::NO_EDITS)
257 {
258 return Ok(no_edits);
259 }
260
261 // Extract updated (new) editable region from the model response.
262 let new_editable_region = Self::extract_editable_region(&response)?;
263 let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
264 let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
265 let old_editable_region = Self::extract_editable_region(
266 &example
267 .prompt
268 .as_ref()
269 .context("example prompt missing")?
270 .input,
271 )?
272 .replace(Self::USER_CURSOR_MARKER, "");
273
274 let prompt_inputs = example
275 .prompt_inputs
276 .as_ref()
277 .context("example is missing prompt inputs")?;
278
279 // Normalize leading newlines: if old starts with newline but new doesn't,
280 // prepend newline to new to preserve whitespace structure.
281 // This handles the case where the model drops the leading blank line.
282 if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
283 new_editable_region.insert(0, '\n');
284 }
285
286 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
287 let (editable_region_offset, _) = excerpt
288 .match_indices(&old_editable_region)
289 .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
290 .context("editable region not found in prompt content")?;
291 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
292
293 let editable_region_lines = old_editable_region.lines().count() as u32;
294 let diff = language::unified_diff_with_context(
295 &old_editable_region,
296 &new_editable_region,
297 editable_region_start_line as u32,
298 editable_region_start_line as u32,
299 editable_region_lines,
300 );
301
302 let diff = indoc::formatdoc! {"
303 --- a/{path}
304 +++ b/{path}
305 {diff}",
306 path = example.spec.cursor_path.to_string_lossy(),
307 diff = diff,
308 };
309
310 let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
311 ActualCursor::from_editable_region(
312 &example.spec.cursor_path,
313 editable_region_cursor_offset,
314 &new_editable_region,
315 excerpt,
316 editable_region_offset,
317 editable_region_start_line,
318 )
319 });
320
321 Ok((diff, actual_cursor))
322 }
323
324 fn format_edit_history(edit_history: &str) -> String {
325 let lines: Vec<&str> = edit_history.lines().collect();
326
327 if lines.is_empty() {
328 return "(No edit history)".to_string();
329 }
330
331 if lines.len() > Self::MAX_HISTORY_LINES {
332 let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
333 format!("{truncated}\n[...truncated...]")
334 } else {
335 lines.join("\n")
336 }
337 }
338
339 pub fn format_context(example: &Example) -> String {
340 let related_files = example
341 .prompt_inputs
342 .as_ref()
343 .and_then(|pi| pi.related_files.as_deref());
344
345 let Some(related_files) = related_files else {
346 return "(No context)".to_string();
347 };
348
349 if related_files.is_empty() {
350 return "(No context)".to_string();
351 }
352
353 let prefix = "`````";
354 let suffix = "`````\n\n";
355 let max_tokens = 1024;
356 zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
357 }
358
359 fn format_cursor_excerpt(
360 example: &Example,
361 editable_range: Range<usize>,
362 context_range: Range<usize>,
363 ) -> String {
364 let mut result = String::new();
365
366 let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
367 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
368 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
369
370 let path_str = example.spec.cursor_path.to_string_lossy();
371 result.push_str(&format!("`````{path_str}\n"));
372 result.push_str(&excerpt[context_range.start..editable_range.start]);
373 result.push_str(Self::EDITABLE_REGION_START);
374 result.push_str(&excerpt[editable_range.start..cursor_offset]);
375 result.push_str(Self::USER_CURSOR_MARKER);
376 result.push_str(&excerpt[cursor_offset..editable_range.end]);
377 result.push_str(Self::EDITABLE_REGION_END);
378 result.push_str(&excerpt[editable_range.end..context_range.end]);
379 result.push_str("\n`````");
380
381 result
382 }
383
384 pub fn extract_editable_region(text: &str) -> Result<String> {
385 let start = text
386 .rfind(Self::EDITABLE_REGION_START)
387 .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
388 let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
389
390 if start >= end {
391 return Err(anyhow!("Invalid editable region markers"));
392 }
393
394 let region = &text[start..end];
395 Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
396 }
397}
398
399pub struct TeacherMultiRegionPrompt;
400
401impl TeacherMultiRegionPrompt {
402 pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
403 pub(crate) const NO_EDITS: &str = "NO_EDITS";
404
405 /// Truncate edit history to this number of last lines
406 const MAX_HISTORY_LINES: usize = 128;
407
408 pub fn format_prompt(
409 example: &Example,
410 editable_range: Range<usize>,
411 context_range: Range<usize>,
412 ) -> String {
413 let edit_history = Self::format_edit_history(&example.spec.edit_history);
414 let context = Self::format_context(example);
415 let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
416
417 let prompt_template = crate::prompt_assets::get_prompt("teacher_multi_region.md");
418 let prompt = prompt_template
419 .replace("{{context}}", &context)
420 .replace("{{edit_history}}", &edit_history)
421 .replace("{{cursor_excerpt}}", &cursor_excerpt);
422
423 prompt
424 }
425
426 pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
427 let no_edits = (String::new(), None);
428 if let Some(last_codeblock) = extract_last_codeblock(&response) {
429 if last_codeblock.trim() == Self::NO_EDITS {
430 return Ok(no_edits);
431 }
432 }
433
434 if response.trim().ends_with(Self::NO_EDITS) {
435 return Ok(no_edits);
436 }
437
438 let prompt_inputs = example
439 .prompt_inputs
440 .as_ref()
441 .context("example is missing prompt inputs")?;
442
443 let zeta_format = ZetaFormat::default();
444 let (editable_range, _) =
445 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
446 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
447 let old_editable_region = &excerpt[editable_range.clone()];
448 let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
449
450 let codeblock =
451 extract_last_codeblock(&response).context("no codeblock found in model response")?;
452 let (start_num, end_num, raw_new_span) = multi_region::extract_marker_span(&codeblock)?;
453
454 let start_idx = start_num
455 .checked_sub(1)
456 .context("marker numbers are 1-indexed")?;
457 let end_idx = end_num
458 .checked_sub(1)
459 .context("marker numbers are 1-indexed")?;
460 let start_byte = *marker_offsets
461 .get(start_idx)
462 .context("start marker number out of range")?;
463 let end_byte = *marker_offsets
464 .get(end_idx)
465 .context("end marker number out of range")?;
466
467 if start_byte > end_byte {
468 return Err(anyhow!("start marker must come before end marker"));
469 }
470
471 let cursor_in_span = raw_new_span.find(Self::USER_CURSOR_MARKER);
472 let new_span = raw_new_span.replace(Self::USER_CURSOR_MARKER, "");
473
474 let old_span = &old_editable_region[start_byte..end_byte];
475 let mut new_span = new_span;
476 if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
477 new_span.push('\n');
478 }
479 if !old_span.ends_with('\n') && new_span.ends_with('\n') {
480 new_span.pop();
481 }
482
483 let mut new_editable_region = String::new();
484 new_editable_region.push_str(&old_editable_region[..start_byte]);
485 new_editable_region.push_str(&new_span);
486 new_editable_region.push_str(&old_editable_region[end_byte..]);
487
488 let cursor_offset = cursor_in_span.map(|pos| start_byte + pos);
489
490 if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
491 new_editable_region.insert(0, '\n');
492 }
493
494 let editable_region_offset = editable_range.start;
495 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
496
497 let editable_region_lines = old_editable_region.lines().count() as u32;
498 let diff = language::unified_diff_with_context(
499 old_editable_region,
500 &new_editable_region,
501 editable_region_start_line as u32,
502 editable_region_start_line as u32,
503 editable_region_lines,
504 );
505
506 let diff = indoc::formatdoc! {"
507 --- a/{path}
508 +++ b/{path}
509 {diff}",
510 path = example.spec.cursor_path.to_string_lossy(),
511 diff = diff,
512 };
513
514 let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
515 ActualCursor::from_editable_region(
516 &example.spec.cursor_path,
517 editable_region_cursor_offset,
518 &new_editable_region,
519 excerpt,
520 editable_region_offset,
521 editable_region_start_line,
522 )
523 });
524
525 Ok((diff, actual_cursor))
526 }
527
528 fn format_edit_history(edit_history: &str) -> String {
529 let lines: Vec<&str> = edit_history.lines().collect();
530
531 if lines.is_empty() {
532 return "(No edit history)".to_string();
533 }
534
535 if lines.len() > Self::MAX_HISTORY_LINES {
536 let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
537 format!("{truncated}\n[...truncated...]")
538 } else {
539 lines.join("\n")
540 }
541 }
542
543 pub fn format_context(example: &Example) -> String {
544 let related_files = example
545 .prompt_inputs
546 .as_ref()
547 .and_then(|pi| pi.related_files.as_deref());
548 let Some(related_files) = related_files else {
549 return "(No context)".to_string();
550 };
551
552 if related_files.is_empty() {
553 return "(No context)".to_string();
554 }
555
556 let prefix = "`````";
557 let suffix = "`````\n\n";
558 let max_tokens = 1024;
559 zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
560 }
561
562 fn format_cursor_excerpt(
563 example: &Example,
564 editable_range: Range<usize>,
565 context_range: Range<usize>,
566 ) -> String {
567 let mut result = String::new();
568
569 let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
570 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
571 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
572
573 let editable_text = &excerpt[editable_range.clone()];
574 let cursor_in_editable = cursor_offset - editable_range.start;
575
576 let path_str = example.spec.cursor_path.to_string_lossy();
577 result.push_str(&format!("`````{path_str}\n"));
578
579 result.push_str(&excerpt[context_range.start..editable_range.start]);
580
581 multi_region::write_editable_with_markers(
582 &mut result,
583 editable_text,
584 cursor_in_editable,
585 Self::USER_CURSOR_MARKER,
586 );
587
588 result.push_str(&excerpt[editable_range.end..context_range.end]);
589 result.push_str("\n`````");
590
591 result
592 }
593}
594
595/// Extract the cursor excerpt from an example.
596/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
597pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
598 // If we have the original prompt, extract the cursor excerpt from it
599 if let Some(prompt) = &example.prompt {
600 // Find "# 3. Current File" section and extract the content
601 if let Some(start) = prompt.input.find("# 3. Current File") {
602 let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
603 let backtick_count = prompt.input[content_start..]
604 .chars()
605 .take_while(|&c| c == '`')
606 .count();
607 let content_start = content_start + backtick_count;
608
609 // Find the path line and skip it
610 let newline_pos = prompt.input[content_start..].find('\n')?;
611 let text_start = content_start + newline_pos + 1;
612
613 // Find the closing backticks
614 let closing_pattern = "`".repeat(backtick_count);
615 let text_end = prompt.input[text_start..].find(&closing_pattern)?;
616 let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
617
618 let path_str = example.spec.cursor_path.to_string_lossy();
619 return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
620 }
621 }
622
623 // Fallback: construct from prompt_inputs if available
624 let prompt_inputs = example.prompt_inputs.as_ref()?;
625 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
626 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
627
628 // Simple fallback: just show content around cursor with markers
629 let path_str = example.spec.cursor_path.to_string_lossy();
630 let mut result = format!("`````{path_str}\n");
631 result.push_str(TeacherPrompt::EDITABLE_REGION_START);
632 result.push_str(&excerpt[..cursor_offset]);
633 result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
634 result.push_str(&excerpt[cursor_offset..]);
635 result.push_str(TeacherPrompt::EDITABLE_REGION_END);
636 result.push_str("\n`````");
637
638 Some(result)
639}
640
641pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
642 let lines: Vec<&str> = text.lines().collect();
643
644 // Search from the end for a closing fence (line containing only backticks, 3+)
645 let mut closing_line_idx = None;
646 let mut backtick_count = 0;
647
648 for i in (0..lines.len()).rev() {
649 let line = lines[i].trim();
650 if line.len() >= 3 && line.chars().all(|c| c == '`') {
651 closing_line_idx = Some(i);
652 backtick_count = line.len();
653 break;
654 }
655 }
656
657 let closing_idx = closing_line_idx?;
658
659 // Search backwards for matching opening fence
660 // Opening fence starts with same backtick count, possibly followed by language/metadata
661 let opening_pattern = "`".repeat(backtick_count);
662
663 for i in (0..closing_idx).rev() {
664 let line = lines[i];
665 if line.starts_with(&opening_pattern) {
666 // Ensure it's exactly the right number of backticks (not more)
667 let rest = &line[backtick_count..];
668 if rest.is_empty() || !rest.starts_with('`') {
669 // Found matching opening fence
670 // Extract content between opening and closing (exclusive)
671 if closing_idx > i + 1 {
672 let content = lines[i + 1..closing_idx].join("\n");
673 // Preserve trailing newline to match previous behavior
674 return Some(format!("{}\n", content));
675 } else {
676 // Empty block
677 return Some(String::new());
678 }
679 }
680 }
681 }
682
683 None
684}
685
686#[cfg(test)]
687mod tests {
688 use super::*;
689
690 #[test]
691 fn test_extract_last_code_block() {
692 let text = indoc::indoc! {"
693 Some thinking
694
695 ```
696 first block
697 ```
698
699 `````path='something' lines=1:2
700 last block
701 `````
702 "};
703 let last_block = extract_last_codeblock(text).unwrap();
704 assert_eq!(last_block, "last block\n");
705 }
706
707 #[test]
708 fn test_extract_codeblock_with_nested_fences() {
709 let text = indoc::indoc! {"
710 `````
711 content with ``` inline
712 and ```python nested
713 more content
714 `````
715 "};
716 let last_block = extract_last_codeblock(text).unwrap();
717 assert_eq!(
718 last_block,
719 "content with ``` inline\nand ```python nested\nmore content\n"
720 );
721 }
722
723 #[test]
724 fn test_extract_codeblock_ignores_inline_backticks() {
725 let text = indoc::indoc! {"
726 `````
727 here is some `code` with inline backticks
728 and here```more```stuff
729 `````
730 "};
731 let last_block = extract_last_codeblock(text).unwrap();
732 assert_eq!(
733 last_block,
734 "here is some `code` with inline backticks\nand here```more```stuff\n"
735 );
736 }
737
738 #[test]
739 fn test_extract_editable_region_old_format() {
740 let text = indoc::indoc! {"
741 some lines
742 are
743 here
744 <|editable_region_start|>
745 one
746 two three
747
748 <|editable_region_end|>
749 more
750 lines here
751 "};
752 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
753 assert_eq!(
754 parsed,
755 indoc::indoc! {"
756 one
757 two three"}
758 );
759 }
760
761 #[test]
762 fn test_extract_editable_region_marker_format() {
763 let text = indoc::indoc! {"
764 some context
765 <|marker_1|>
766 one
767 two three
768 <|marker_2|>
769 more context
770 "};
771 let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
772 assert_eq!(parsed, "one\ntwo three");
773 }
774
775 #[test]
776 fn test_extract_editable_region_multi_markers() {
777 let text = indoc::indoc! {"
778 prefix
779 <|marker_1|>
780 aaa
781 bbb
782 <|marker_2|>
783 ccc
784 ddd
785 <|marker_3|>
786 suffix
787 "};
788 let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
789 // Intermediate marker and its trailing \n are stripped
790 assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
791 }
792
793 #[test]
794 fn test_extract_last_codeblock_nested_bibtex() {
795 let text = indoc::indoc! {r#"
796 Looking at the edit history, I can see that a Citation section was just added.
797
798 `````
799 ## Collaborations
800 Our mission is to create a 4D generative model.
801
802 ## Citation
803
804 If you found Unique3D helpful, please cite our report:
805 ```bibtex
806 @misc{wu2024unique3d,
807 title={Unique3D},
808 }
809 ```
810 `````
811 "#};
812 let last_block = extract_last_codeblock(text).unwrap();
813 assert_eq!(
814 last_block,
815 indoc::indoc! {r#"
816 ## Collaborations
817 Our mission is to create a 4D generative model.
818
819 ## Citation
820
821 If you found Unique3D helpful, please cite our report:
822 ```bibtex
823 @misc{wu2024unique3d,
824 title={Unique3D},
825 }
826 ```
827 "#}
828 );
829 }
830
831 #[test]
832 fn test_extract_editable_region_no_markers() {
833 let text = indoc::indoc! {"
834 one
835 two three"};
836 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
837 assert_eq!(
838 parsed,
839 indoc::indoc! {"
840 one
841 two three"}
842 );
843 }
844
845 #[test]
846 fn test_parse_no_edits_response() {
847 let response = indoc::indoc! {"
848 The code is already complete. There is no clear next edit to make.
849
850 `````
851 NO_EDITS
852 `````
853 "};
854 let codeblock = extract_last_codeblock(response).unwrap();
855 assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
856 }
857
858 #[test]
859 fn test_extract_codeblock_no_valid_block() {
860 // Text with no code blocks should return None
861 let text = "Just some plain text without any code blocks";
862 assert!(extract_last_codeblock(text).is_none());
863
864 // Unclosed code block should return None
865 let text = indoc::indoc! {"
866 ```
867 unclosed block
868 "};
869 assert!(extract_last_codeblock(text).is_none());
870
871 // Analysis text with nested markdown but no proper outer block
872 let text = indoc::indoc! {"
873 # Analysis
874 Looking at this:
875 ```
876 some code
877 ```
878 But then more analysis without wrapping block
879 "};
880 // This should find the inner block
881 let result = extract_last_codeblock(text).unwrap();
882 assert_eq!(result, "some code\n");
883 }
884
885 #[test]
886 fn test_extract_codeblock_no_trailing_newline() {
887 // Text ending without trailing newline after closing fence
888 let text = "`````\ncontent here\n`````";
889 let result = extract_last_codeblock(text).unwrap();
890 assert_eq!(result, "content here\n");
891 }
892
893 #[test]
894 fn test_parse_no_edits_response_with_trailing_backticks() {
895 let response = "NO_EDITS```";
896
897 let parsed = TeacherPrompt::parse(
898 &Example {
899 spec: edit_prediction::example_spec::ExampleSpec {
900 name: "test".to_string(),
901 repository_url: "https://github.com/zed-industries/zed.git".to_string(),
902 revision: "HEAD".to_string(),
903 tags: Vec::new(),
904 reasoning: None,
905 uncommitted_diff: String::new(),
906 cursor_path: std::sync::Arc::from(std::path::Path::new("src/main.rs")),
907 cursor_position: "0:0".to_string(),
908 edit_history: String::new(),
909 expected_patches: Vec::new(),
910 rejected_patch: None,
911 telemetry: None,
912 human_feedback: Vec::new(),
913 rating: None,
914 },
915 prompt_inputs: None,
916 prompt: None,
917 predictions: Vec::new(),
918 score: Vec::new(),
919 qa: Vec::new(),
920 zed_version: None,
921 state: None,
922 },
923 response,
924 )
925 .unwrap();
926
927 assert!(parsed.0.is_empty());
928 assert!(parsed.1.is_none());
929 }
930}