1use crate::{
2 FormatPromptArgs, PredictionProvider,
3 example::{ActualCursor, Example, ExamplePrompt},
4 headless::EpAppState,
5 progress::{ExampleProgress, Step},
6 retrieve_context::run_context_retrieval,
7};
8use anyhow::{Context as _, Result, anyhow};
9use gpui::AsyncApp;
10use similar::DiffableStr;
11use std::ops::Range;
12use std::sync::Arc;
13use zeta_prompt::udiff;
14use zeta_prompt::{
15 ZetaFormat, encode_patch_as_output_for_format, excerpt_range_for_format, format_zeta_prompt,
16 multi_region, output_end_marker_for_format, resolve_cursor_region,
17};
18
19pub async fn run_format_prompt(
20 example: &mut Example,
21 args: &FormatPromptArgs,
22 app_state: Arc<EpAppState>,
23 example_progress: &ExampleProgress,
24 cx: AsyncApp,
25) -> Result<()> {
26 run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
27
28 let step_progress = example_progress.start(Step::FormatPrompt);
29
30 let prompt_inputs = example
31 .prompt_inputs
32 .as_ref()
33 .context("prompt_inputs must be set after context retrieval")?;
34
35 match args.provider {
36 PredictionProvider::Teacher(_, zeta_format)
37 | PredictionProvider::TeacherNonBatching(_, zeta_format) => {
38 step_progress.set_substatus("formatting teacher prompt");
39
40 let (editable_range, context_range) =
41 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
42
43 let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
44 example.prompt = Some(ExamplePrompt {
45 input: prompt,
46 expected_output: None,
47 rejected_output: None,
48 prefill: None,
49 provider: args.provider,
50 });
51 }
52 PredictionProvider::TeacherMultiRegion(_)
53 | PredictionProvider::TeacherMultiRegionNonBatching(_) => {
54 step_progress.set_substatus("formatting teacher multi-region prompt");
55
56 let zeta_format = ZetaFormat::default();
57 let (editable_range, context_range) =
58 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
59
60 let prompt =
61 TeacherMultiRegionPrompt::format_prompt(example, editable_range, context_range);
62 example.prompt = Some(ExamplePrompt {
63 input: prompt,
64 expected_output: None,
65 rejected_output: None,
66 prefill: None,
67 provider: args.provider,
68 });
69 }
70 PredictionProvider::Zeta2(zeta_format) => {
71 step_progress.set_substatus("formatting zeta2 prompt");
72
73 let prompt = format_zeta_prompt(prompt_inputs, zeta_format);
74 let prefill = zeta_prompt::get_prefill(prompt_inputs, zeta_format);
75 let expected_output = example
76 .spec
77 .expected_patches_with_cursor_positions()
78 .into_iter()
79 .next()
80 .and_then(|(expected_patch, expected_cursor_offset)| {
81 zeta2_output_for_patch(
82 prompt_inputs,
83 &expected_patch,
84 expected_cursor_offset,
85 zeta_format,
86 )
87 .ok()
88 });
89
90 let rejected_output = example.spec.rejected_patch.as_ref().and_then(|patch| {
91 zeta2_output_for_patch(prompt_inputs, patch, None, zeta_format).ok()
92 });
93
94 example.prompt = prompt.map(|prompt| ExamplePrompt {
95 input: prompt,
96 expected_output,
97 rejected_output,
98 provider: args.provider,
99 prefill: Some(prefill),
100 });
101 }
102 _ => {
103 panic!("Cannot format prompt for {:?}", args.provider);
104 }
105 };
106 Ok(())
107}
108
109pub fn zeta2_output_for_patch(
110 input: &zeta_prompt::ZetaPromptInput,
111 patch: &str,
112 cursor_offset: Option<usize>,
113 version: ZetaFormat,
114) -> Result<String> {
115 let (context, editable_range, _, _) = resolve_cursor_region(input, version);
116 let mut old_editable_region = context[editable_range].to_string();
117
118 if !old_editable_region.ends_with_newline() {
119 old_editable_region.push('\n');
120 }
121
122 if let Some(encoded_output) =
123 encode_patch_as_output_for_format(version, &old_editable_region, patch, cursor_offset)?
124 {
125 return Ok(encoded_output);
126 }
127
128 let (result, first_hunk_offset) =
129 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
130 || {
131 format!(
132 "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
133 patch, old_editable_region
134 )
135 },
136 )?;
137
138 if version == ZetaFormat::V0317SeedMultiRegions {
139 let cursor_in_new = cursor_offset.map(|cursor_offset| {
140 let hunk_start = first_hunk_offset.unwrap_or(0);
141 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
142 });
143 return multi_region::encode_from_old_and_new_v0317(
144 &old_editable_region,
145 &result,
146 cursor_in_new,
147 zeta_prompt::CURSOR_MARKER,
148 multi_region::V0317_END_MARKER,
149 );
150 }
151
152 if version == ZetaFormat::V0318SeedMultiRegions {
153 let cursor_in_new = cursor_offset.map(|cursor_offset| {
154 let hunk_start = first_hunk_offset.unwrap_or(0);
155 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
156 });
157 return multi_region::encode_from_old_and_new_v0318(
158 &old_editable_region,
159 &result,
160 cursor_in_new,
161 zeta_prompt::CURSOR_MARKER,
162 multi_region::V0318_END_MARKER,
163 );
164 }
165
166 if version == ZetaFormat::V0327SingleFile {
167 let cursor_in_new = cursor_offset.map(|cursor_offset| {
168 let hunk_start = first_hunk_offset.unwrap_or(0);
169 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
170 });
171 return multi_region::encode_from_old_and_new_v0318(
172 &old_editable_region,
173 &result,
174 cursor_in_new,
175 zeta_prompt::CURSOR_MARKER,
176 multi_region::V0327_END_MARKER,
177 );
178 }
179
180 if version == ZetaFormat::V0316SeedMultiRegions {
181 let cursor_in_new = cursor_offset.map(|cursor_offset| {
182 let hunk_start = first_hunk_offset.unwrap_or(0);
183 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
184 });
185 return multi_region::encode_from_old_and_new_v0316(
186 &old_editable_region,
187 &result,
188 cursor_in_new,
189 zeta_prompt::CURSOR_MARKER,
190 multi_region::V0316_END_MARKER,
191 );
192 }
193
194 if version == ZetaFormat::V0306SeedMultiRegions {
195 let cursor_in_new = cursor_offset.map(|cursor_offset| {
196 let hunk_start = first_hunk_offset.unwrap_or(0);
197 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
198 });
199 return multi_region::encode_from_old_and_new(
200 &old_editable_region,
201 &result,
202 cursor_in_new,
203 zeta_prompt::CURSOR_MARKER,
204 zeta_prompt::seed_coder::END_MARKER,
205 zeta_prompt::seed_coder::NO_EDITS,
206 );
207 }
208
209 let mut result = result;
210 if let Some(cursor_offset) = cursor_offset {
211 // The cursor_offset is relative to the start of the hunk's new text (context + additions).
212 // We need to add where the hunk context matched in the editable region to compute
213 // the actual cursor position in the result.
214 let hunk_start = first_hunk_offset.unwrap_or(0);
215 let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
216 result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
217 }
218
219 if let Some(end_marker) = output_end_marker_for_format(version) {
220 if !result.ends_with('\n') {
221 result.push('\n');
222 }
223 result.push_str(end_marker);
224 }
225
226 Ok(result)
227}
228
229pub struct TeacherPrompt;
230
231impl TeacherPrompt {
232 pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
233 pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
234 pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
235 pub(crate) const NO_EDITS: &str = "NO_EDITS";
236
237 /// Truncate edit history to this number of last lines
238 const MAX_HISTORY_LINES: usize = 128;
239
240 pub fn format_prompt(
241 example: &Example,
242 editable_range: Range<usize>,
243 context_range: Range<usize>,
244 ) -> String {
245 let edit_history = Self::format_edit_history(&example.spec.edit_history);
246 let context = Self::format_context(example);
247 let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
248
249 let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
250 let prompt = prompt_template
251 .replace("{{context}}", &context)
252 .replace("{{edit_history}}", &edit_history)
253 .replace("{{cursor_excerpt}}", &cursor_excerpt);
254
255 prompt
256 }
257
258 pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
259 // Check if the model indicated no edits are needed
260 let no_edits = (String::new(), None);
261 if let Some(last_codeblock) = extract_last_codeblock(&response) {
262 if last_codeblock.trim() == Self::NO_EDITS {
263 return Ok(no_edits);
264 }
265 }
266
267 if response
268 .trim_end_matches(&[' ', '\n', '`'])
269 .ends_with(Self::NO_EDITS)
270 {
271 return Ok(no_edits);
272 }
273
274 // Extract updated (new) editable region from the model response.
275 let new_editable_region = Self::extract_editable_region(&response)?;
276 let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
277 let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
278 let old_editable_region = Self::extract_editable_region(
279 &example
280 .prompt
281 .as_ref()
282 .context("example prompt missing")?
283 .input,
284 )?
285 .replace(Self::USER_CURSOR_MARKER, "");
286
287 let prompt_inputs = example
288 .prompt_inputs
289 .as_ref()
290 .context("example is missing prompt inputs")?;
291
292 // Normalize leading newlines: if old starts with newline but new doesn't,
293 // prepend newline to new to preserve whitespace structure.
294 // This handles the case where the model drops the leading blank line.
295 if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
296 new_editable_region.insert(0, '\n');
297 }
298
299 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
300 let (editable_region_offset, _) = excerpt
301 .match_indices(&old_editable_region)
302 .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
303 .context("editable region not found in prompt content")?;
304 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
305
306 let editable_region_lines = old_editable_region.lines().count() as u32;
307 let diff = language::unified_diff_with_context(
308 &old_editable_region,
309 &new_editable_region,
310 editable_region_start_line as u32,
311 editable_region_start_line as u32,
312 editable_region_lines,
313 );
314
315 let diff = indoc::formatdoc! {"
316 --- a/{path}
317 +++ b/{path}
318 {diff}",
319 path = example.spec.cursor_path.to_string_lossy(),
320 diff = diff,
321 };
322
323 let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
324 ActualCursor::from_editable_region(
325 &example.spec.cursor_path,
326 editable_region_cursor_offset,
327 &new_editable_region,
328 excerpt,
329 editable_region_offset,
330 editable_region_start_line,
331 )
332 });
333
334 Ok((diff, actual_cursor))
335 }
336
337 fn format_edit_history(edit_history: &str) -> String {
338 let lines: Vec<&str> = edit_history.lines().collect();
339
340 if lines.is_empty() {
341 return "(No edit history)".to_string();
342 }
343
344 if lines.len() > Self::MAX_HISTORY_LINES {
345 let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
346 format!("{truncated}\n[...truncated...]")
347 } else {
348 lines.join("\n")
349 }
350 }
351
352 pub fn format_context(example: &Example) -> String {
353 let related_files = example
354 .prompt_inputs
355 .as_ref()
356 .and_then(|pi| pi.related_files.as_deref());
357
358 let Some(related_files) = related_files else {
359 return "(No context)".to_string();
360 };
361
362 if related_files.is_empty() {
363 return "(No context)".to_string();
364 }
365
366 let prefix = "`````";
367 let suffix = "`````\n\n";
368 let max_tokens = 1024;
369 zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
370 }
371
372 fn format_cursor_excerpt(
373 example: &Example,
374 editable_range: Range<usize>,
375 context_range: Range<usize>,
376 ) -> String {
377 let mut result = String::new();
378
379 let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
380 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
381 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
382
383 let path_str = example.spec.cursor_path.to_string_lossy();
384 result.push_str(&format!("`````{path_str}\n"));
385 result.push_str(&excerpt[context_range.start..editable_range.start]);
386 result.push_str(Self::EDITABLE_REGION_START);
387 result.push_str(&excerpt[editable_range.start..cursor_offset]);
388 result.push_str(Self::USER_CURSOR_MARKER);
389 result.push_str(&excerpt[cursor_offset..editable_range.end]);
390 result.push_str(Self::EDITABLE_REGION_END);
391 result.push_str(&excerpt[editable_range.end..context_range.end]);
392 result.push_str("\n`````");
393
394 result
395 }
396
397 pub fn extract_editable_region(text: &str) -> Result<String> {
398 let start = text
399 .rfind(Self::EDITABLE_REGION_START)
400 .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
401 let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
402
403 if start >= end {
404 return Err(anyhow!("Invalid editable region markers"));
405 }
406
407 let region = &text[start..end];
408 Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
409 }
410}
411
412pub struct TeacherMultiRegionPrompt;
413
414impl TeacherMultiRegionPrompt {
415 pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
416 pub(crate) const NO_EDITS: &str = "NO_EDITS";
417
418 /// Truncate edit history to this number of last lines
419 const MAX_HISTORY_LINES: usize = 128;
420
421 pub fn format_prompt(
422 example: &Example,
423 editable_range: Range<usize>,
424 context_range: Range<usize>,
425 ) -> String {
426 let edit_history = Self::format_edit_history(&example.spec.edit_history);
427 let context = Self::format_context(example);
428 let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
429
430 let prompt_template = crate::prompt_assets::get_prompt("teacher_multi_region.md");
431 let prompt = prompt_template
432 .replace("{{context}}", &context)
433 .replace("{{edit_history}}", &edit_history)
434 .replace("{{cursor_excerpt}}", &cursor_excerpt);
435
436 prompt
437 }
438
439 pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
440 let no_edits = (String::new(), None);
441 if let Some(last_codeblock) = extract_last_codeblock(&response) {
442 if last_codeblock.trim() == Self::NO_EDITS {
443 return Ok(no_edits);
444 }
445 }
446
447 if response.trim().ends_with(Self::NO_EDITS) {
448 return Ok(no_edits);
449 }
450
451 let prompt_inputs = example
452 .prompt_inputs
453 .as_ref()
454 .context("example is missing prompt inputs")?;
455
456 let zeta_format = ZetaFormat::default();
457 let (editable_range, _) =
458 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
459 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
460 let old_editable_region = &excerpt[editable_range.clone()];
461 let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
462
463 let codeblock =
464 extract_last_codeblock(&response).context("no codeblock found in model response")?;
465 let (start_num, end_num, raw_new_span) = multi_region::extract_marker_span(&codeblock)?;
466
467 let start_idx = start_num
468 .checked_sub(1)
469 .context("marker numbers are 1-indexed")?;
470 let end_idx = end_num
471 .checked_sub(1)
472 .context("marker numbers are 1-indexed")?;
473 let start_byte = *marker_offsets
474 .get(start_idx)
475 .context("start marker number out of range")?;
476 let end_byte = *marker_offsets
477 .get(end_idx)
478 .context("end marker number out of range")?;
479
480 if start_byte > end_byte {
481 return Err(anyhow!("start marker must come before end marker"));
482 }
483
484 let cursor_in_span = raw_new_span.find(Self::USER_CURSOR_MARKER);
485 let new_span = raw_new_span.replace(Self::USER_CURSOR_MARKER, "");
486
487 let old_span = &old_editable_region[start_byte..end_byte];
488 let mut new_span = new_span;
489 if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
490 new_span.push('\n');
491 }
492 if !old_span.ends_with('\n') && new_span.ends_with('\n') {
493 new_span.pop();
494 }
495
496 let mut new_editable_region = String::new();
497 new_editable_region.push_str(&old_editable_region[..start_byte]);
498 new_editable_region.push_str(&new_span);
499 new_editable_region.push_str(&old_editable_region[end_byte..]);
500
501 let cursor_offset = cursor_in_span.map(|pos| start_byte + pos);
502
503 if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
504 new_editable_region.insert(0, '\n');
505 }
506
507 let editable_region_offset = editable_range.start;
508 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
509
510 let editable_region_lines = old_editable_region.lines().count() as u32;
511 let diff = language::unified_diff_with_context(
512 old_editable_region,
513 &new_editable_region,
514 editable_region_start_line as u32,
515 editable_region_start_line as u32,
516 editable_region_lines,
517 );
518
519 let diff = indoc::formatdoc! {"
520 --- a/{path}
521 +++ b/{path}
522 {diff}",
523 path = example.spec.cursor_path.to_string_lossy(),
524 diff = diff,
525 };
526
527 let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
528 ActualCursor::from_editable_region(
529 &example.spec.cursor_path,
530 editable_region_cursor_offset,
531 &new_editable_region,
532 excerpt,
533 editable_region_offset,
534 editable_region_start_line,
535 )
536 });
537
538 Ok((diff, actual_cursor))
539 }
540
541 fn format_edit_history(edit_history: &str) -> String {
542 let lines: Vec<&str> = edit_history.lines().collect();
543
544 if lines.is_empty() {
545 return "(No edit history)".to_string();
546 }
547
548 if lines.len() > Self::MAX_HISTORY_LINES {
549 let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
550 format!("{truncated}\n[...truncated...]")
551 } else {
552 lines.join("\n")
553 }
554 }
555
556 pub fn format_context(example: &Example) -> String {
557 let related_files = example
558 .prompt_inputs
559 .as_ref()
560 .and_then(|pi| pi.related_files.as_deref());
561 let Some(related_files) = related_files else {
562 return "(No context)".to_string();
563 };
564
565 if related_files.is_empty() {
566 return "(No context)".to_string();
567 }
568
569 let prefix = "`````";
570 let suffix = "`````\n\n";
571 let max_tokens = 1024;
572 zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
573 }
574
575 fn format_cursor_excerpt(
576 example: &Example,
577 editable_range: Range<usize>,
578 context_range: Range<usize>,
579 ) -> String {
580 let mut result = String::new();
581
582 let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
583 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
584 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
585
586 let editable_text = &excerpt[editable_range.clone()];
587 let cursor_in_editable = cursor_offset - editable_range.start;
588
589 let path_str = example.spec.cursor_path.to_string_lossy();
590 result.push_str(&format!("`````{path_str}\n"));
591
592 result.push_str(&excerpt[context_range.start..editable_range.start]);
593
594 multi_region::write_editable_with_markers(
595 &mut result,
596 editable_text,
597 cursor_in_editable,
598 Self::USER_CURSOR_MARKER,
599 );
600
601 result.push_str(&excerpt[editable_range.end..context_range.end]);
602 result.push_str("\n`````");
603
604 result
605 }
606}
607
608/// Extract the cursor excerpt from an example.
609/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
610pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
611 // If we have the original prompt, extract the cursor excerpt from it
612 if let Some(prompt) = &example.prompt {
613 // Find "# 3. Current File" section and extract the content
614 if let Some(start) = prompt.input.find("# 3. Current File") {
615 let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
616 let backtick_count = prompt.input[content_start..]
617 .chars()
618 .take_while(|&c| c == '`')
619 .count();
620 let content_start = content_start + backtick_count;
621
622 // Find the path line and skip it
623 let newline_pos = prompt.input[content_start..].find('\n')?;
624 let text_start = content_start + newline_pos + 1;
625
626 // Find the closing backticks
627 let closing_pattern = "`".repeat(backtick_count);
628 let text_end = prompt.input[text_start..].find(&closing_pattern)?;
629 let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
630
631 let path_str = example.spec.cursor_path.to_string_lossy();
632 return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
633 }
634 }
635
636 // Fallback: construct from prompt_inputs if available
637 let prompt_inputs = example.prompt_inputs.as_ref()?;
638 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
639 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
640
641 // Simple fallback: just show content around cursor with markers
642 let path_str = example.spec.cursor_path.to_string_lossy();
643 let mut result = format!("`````{path_str}\n");
644 result.push_str(TeacherPrompt::EDITABLE_REGION_START);
645 result.push_str(&excerpt[..cursor_offset]);
646 result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
647 result.push_str(&excerpt[cursor_offset..]);
648 result.push_str(TeacherPrompt::EDITABLE_REGION_END);
649 result.push_str("\n`````");
650
651 Some(result)
652}
653
654pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
655 let lines: Vec<&str> = text.lines().collect();
656
657 // Search from the end for a closing fence (line containing only backticks, 3+)
658 let mut closing_line_idx = None;
659 let mut backtick_count = 0;
660
661 for i in (0..lines.len()).rev() {
662 let line = lines[i].trim();
663 if line.len() >= 3 && line.chars().all(|c| c == '`') {
664 closing_line_idx = Some(i);
665 backtick_count = line.len();
666 break;
667 }
668 }
669
670 let closing_idx = closing_line_idx?;
671
672 // Search backwards for matching opening fence
673 // Opening fence starts with same backtick count, possibly followed by language/metadata
674 let opening_pattern = "`".repeat(backtick_count);
675
676 for i in (0..closing_idx).rev() {
677 let line = lines[i];
678 if line.starts_with(&opening_pattern) {
679 // Ensure it's exactly the right number of backticks (not more)
680 let rest = &line[backtick_count..];
681 if rest.is_empty() || !rest.starts_with('`') {
682 // Found matching opening fence
683 // Extract content between opening and closing (exclusive)
684 if closing_idx > i + 1 {
685 let content = lines[i + 1..closing_idx].join("\n");
686 // Preserve trailing newline to match previous behavior
687 return Some(format!("{}\n", content));
688 } else {
689 // Empty block
690 return Some(String::new());
691 }
692 }
693 }
694 }
695
696 None
697}
698
699#[cfg(test)]
700mod tests {
701 use super::*;
702
703 #[test]
704 fn test_extract_last_code_block() {
705 let text = indoc::indoc! {"
706 Some thinking
707
708 ```
709 first block
710 ```
711
712 `````path='something' lines=1:2
713 last block
714 `````
715 "};
716 let last_block = extract_last_codeblock(text).unwrap();
717 assert_eq!(last_block, "last block\n");
718 }
719
720 #[test]
721 fn test_extract_codeblock_with_nested_fences() {
722 let text = indoc::indoc! {"
723 `````
724 content with ``` inline
725 and ```python nested
726 more content
727 `````
728 "};
729 let last_block = extract_last_codeblock(text).unwrap();
730 assert_eq!(
731 last_block,
732 "content with ``` inline\nand ```python nested\nmore content\n"
733 );
734 }
735
736 #[test]
737 fn test_extract_codeblock_ignores_inline_backticks() {
738 let text = indoc::indoc! {"
739 `````
740 here is some `code` with inline backticks
741 and here```more```stuff
742 `````
743 "};
744 let last_block = extract_last_codeblock(text).unwrap();
745 assert_eq!(
746 last_block,
747 "here is some `code` with inline backticks\nand here```more```stuff\n"
748 );
749 }
750
751 #[test]
752 fn test_extract_editable_region_old_format() {
753 let text = indoc::indoc! {"
754 some lines
755 are
756 here
757 <|editable_region_start|>
758 one
759 two three
760
761 <|editable_region_end|>
762 more
763 lines here
764 "};
765 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
766 assert_eq!(
767 parsed,
768 indoc::indoc! {"
769 one
770 two three"}
771 );
772 }
773
774 #[test]
775 fn test_extract_editable_region_marker_format() {
776 let text = indoc::indoc! {"
777 some context
778 <|marker_1|>
779 one
780 two three
781 <|marker_2|>
782 more context
783 "};
784 let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
785 assert_eq!(parsed, "one\ntwo three");
786 }
787
788 #[test]
789 fn test_extract_editable_region_multi_markers() {
790 let text = indoc::indoc! {"
791 prefix
792 <|marker_1|>
793 aaa
794 bbb
795 <|marker_2|>
796 ccc
797 ddd
798 <|marker_3|>
799 suffix
800 "};
801 let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
802 // Intermediate marker and its trailing \n are stripped
803 assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
804 }
805
806 #[test]
807 fn test_extract_last_codeblock_nested_bibtex() {
808 let text = indoc::indoc! {r#"
809 Looking at the edit history, I can see that a Citation section was just added.
810
811 `````
812 ## Collaborations
813 Our mission is to create a 4D generative model.
814
815 ## Citation
816
817 If you found Unique3D helpful, please cite our report:
818 ```bibtex
819 @misc{wu2024unique3d,
820 title={Unique3D},
821 }
822 ```
823 `````
824 "#};
825 let last_block = extract_last_codeblock(text).unwrap();
826 assert_eq!(
827 last_block,
828 indoc::indoc! {r#"
829 ## Collaborations
830 Our mission is to create a 4D generative model.
831
832 ## Citation
833
834 If you found Unique3D helpful, please cite our report:
835 ```bibtex
836 @misc{wu2024unique3d,
837 title={Unique3D},
838 }
839 ```
840 "#}
841 );
842 }
843
844 #[test]
845 fn test_extract_editable_region_no_markers() {
846 let text = indoc::indoc! {"
847 one
848 two three"};
849 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
850 assert_eq!(
851 parsed,
852 indoc::indoc! {"
853 one
854 two three"}
855 );
856 }
857
858 #[test]
859 fn test_parse_no_edits_response() {
860 let response = indoc::indoc! {"
861 The code is already complete. There is no clear next edit to make.
862
863 `````
864 NO_EDITS
865 `````
866 "};
867 let codeblock = extract_last_codeblock(response).unwrap();
868 assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
869 }
870
871 #[test]
872 fn test_extract_codeblock_no_valid_block() {
873 // Text with no code blocks should return None
874 let text = "Just some plain text without any code blocks";
875 assert!(extract_last_codeblock(text).is_none());
876
877 // Unclosed code block should return None
878 let text = indoc::indoc! {"
879 ```
880 unclosed block
881 "};
882 assert!(extract_last_codeblock(text).is_none());
883
884 // Analysis text with nested markdown but no proper outer block
885 let text = indoc::indoc! {"
886 # Analysis
887 Looking at this:
888 ```
889 some code
890 ```
891 But then more analysis without wrapping block
892 "};
893 // This should find the inner block
894 let result = extract_last_codeblock(text).unwrap();
895 assert_eq!(result, "some code\n");
896 }
897
898 #[test]
899 fn test_extract_codeblock_no_trailing_newline() {
900 // Text ending without trailing newline after closing fence
901 let text = "`````\ncontent here\n`````";
902 let result = extract_last_codeblock(text).unwrap();
903 assert_eq!(result, "content here\n");
904 }
905
906 #[test]
907 fn test_parse_no_edits_response_with_trailing_backticks() {
908 let response = "NO_EDITS```";
909
910 let parsed = TeacherPrompt::parse(
911 &Example {
912 spec: edit_prediction::example_spec::ExampleSpec {
913 name: "test".to_string(),
914 repository_url: "https://github.com/zed-industries/zed.git".to_string(),
915 revision: "HEAD".to_string(),
916 tags: Vec::new(),
917 reasoning: None,
918 uncommitted_diff: String::new(),
919 cursor_path: std::sync::Arc::from(std::path::Path::new("src/main.rs")),
920 cursor_position: "0:0".to_string(),
921 edit_history: String::new(),
922 expected_patches: Vec::new(),
923 rejected_patch: None,
924 telemetry: None,
925 human_feedback: Vec::new(),
926 rating: None,
927 },
928 prompt_inputs: None,
929 prompt: None,
930 predictions: Vec::new(),
931 score: Vec::new(),
932 qa: Vec::new(),
933 zed_version: None,
934 state: None,
935 },
936 response,
937 )
938 .unwrap();
939
940 assert!(parsed.0.is_empty());
941 assert!(parsed.1.is_none());
942 }
943}