1use crate::{
2 FormatPromptArgs, PredictionProvider,
3 example::{ActualCursor, Example, ExamplePrompt},
4 headless::EpAppState,
5 progress::{ExampleProgress, Step},
6 retrieve_context::run_context_retrieval,
7};
8use anyhow::{Context as _, Result, anyhow};
9use edit_prediction::udiff;
10use gpui::AsyncApp;
11use similar::DiffableStr;
12use std::ops::Range;
13use std::sync::Arc;
14use zeta_prompt::{
15 ZetaFormat, encode_patch_as_output_for_format, excerpt_range_for_format, format_zeta_prompt,
16 multi_region, output_end_marker_for_format, resolve_cursor_region,
17};
18
19pub async fn run_format_prompt(
20 example: &mut Example,
21 args: &FormatPromptArgs,
22 app_state: Arc<EpAppState>,
23 example_progress: &ExampleProgress,
24 cx: AsyncApp,
25) -> Result<()> {
26 run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
27
28 let step_progress = example_progress.start(Step::FormatPrompt);
29
30 let prompt_inputs = example
31 .prompt_inputs
32 .as_ref()
33 .context("prompt_inputs must be set after context retrieval")?;
34
35 match args.provider {
36 PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
37 step_progress.set_substatus("formatting teacher prompt");
38
39 let zeta_format = ZetaFormat::default();
40 let (editable_range, context_range) =
41 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
42
43 let prompt = TeacherPrompt::format_prompt(example, editable_range, context_range);
44 example.prompt = Some(ExamplePrompt {
45 input: prompt,
46 expected_output: String::new(),
47 rejected_output: None,
48 prefill: None,
49 provider: args.provider,
50 });
51 }
52 PredictionProvider::TeacherMultiRegion(_)
53 | PredictionProvider::TeacherMultiRegionNonBatching(_) => {
54 step_progress.set_substatus("formatting teacher multi-region prompt");
55
56 let zeta_format = ZetaFormat::default();
57 let (editable_range, context_range) =
58 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
59
60 let prompt =
61 TeacherMultiRegionPrompt::format_prompt(example, editable_range, context_range);
62 example.prompt = Some(ExamplePrompt {
63 input: prompt,
64 expected_output: String::new(),
65 rejected_output: None,
66 prefill: None,
67 provider: args.provider,
68 });
69 }
70 PredictionProvider::Zeta2(zeta_format) => {
71 step_progress.set_substatus("formatting zeta2 prompt");
72
73 let prompt = format_zeta_prompt(prompt_inputs, zeta_format);
74 let prefill = zeta_prompt::get_prefill(prompt_inputs, zeta_format);
75 let expected_output = example
76 .spec
77 .expected_patches_with_cursor_positions()
78 .into_iter()
79 .next()
80 .and_then(|(expected_patch, expected_cursor_offset)| {
81 zeta2_output_for_patch(
82 prompt_inputs,
83 &expected_patch,
84 expected_cursor_offset,
85 zeta_format,
86 )
87 .ok()
88 })
89 .unwrap_or_default();
90
91 let rejected_output = example.spec.rejected_patch.as_ref().and_then(|patch| {
92 zeta2_output_for_patch(prompt_inputs, patch, None, zeta_format).ok()
93 });
94
95 example.prompt = prompt.map(|prompt| ExamplePrompt {
96 input: prompt,
97 expected_output,
98 rejected_output,
99 provider: args.provider,
100 prefill: Some(prefill),
101 });
102 }
103 _ => {
104 panic!("Cannot format prompt for {:?}", args.provider);
105 }
106 };
107 Ok(())
108}
109
110pub fn zeta2_output_for_patch(
111 input: &zeta_prompt::ZetaPromptInput,
112 patch: &str,
113 cursor_offset: Option<usize>,
114 version: ZetaFormat,
115) -> Result<String> {
116 let (context, editable_range, _, _) = resolve_cursor_region(input, version);
117 let mut old_editable_region = context[editable_range].to_string();
118
119 if !old_editable_region.ends_with_newline() {
120 old_editable_region.push('\n');
121 }
122
123 if let Some(encoded_output) =
124 encode_patch_as_output_for_format(version, &old_editable_region, patch, cursor_offset)?
125 {
126 return Ok(encoded_output);
127 }
128
129 let (result, first_hunk_offset) =
130 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context(
131 || {
132 format!(
133 "Patch:\n```\n{}```\n\nEditable region:\n```\n{}```",
134 patch, old_editable_region
135 )
136 },
137 )?;
138
139 if version == ZetaFormat::V0317SeedMultiRegions {
140 let cursor_in_new = cursor_offset.map(|cursor_offset| {
141 let hunk_start = first_hunk_offset.unwrap_or(0);
142 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
143 });
144 return multi_region::encode_from_old_and_new_v0317(
145 &old_editable_region,
146 &result,
147 cursor_in_new,
148 zeta_prompt::CURSOR_MARKER,
149 multi_region::V0317_END_MARKER,
150 );
151 }
152
153 if version == ZetaFormat::V0316SeedMultiRegions {
154 let cursor_in_new = cursor_offset.map(|cursor_offset| {
155 let hunk_start = first_hunk_offset.unwrap_or(0);
156 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
157 });
158 return multi_region::encode_from_old_and_new_v0316(
159 &old_editable_region,
160 &result,
161 cursor_in_new,
162 zeta_prompt::CURSOR_MARKER,
163 multi_region::V0316_END_MARKER,
164 );
165 }
166
167 if version == ZetaFormat::V0306SeedMultiRegions {
168 let cursor_in_new = cursor_offset.map(|cursor_offset| {
169 let hunk_start = first_hunk_offset.unwrap_or(0);
170 result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
171 });
172 return multi_region::encode_from_old_and_new(
173 &old_editable_region,
174 &result,
175 cursor_in_new,
176 zeta_prompt::CURSOR_MARKER,
177 zeta_prompt::seed_coder::END_MARKER,
178 zeta_prompt::seed_coder::NO_EDITS,
179 );
180 }
181
182 let mut result = result;
183 if let Some(cursor_offset) = cursor_offset {
184 // The cursor_offset is relative to the start of the hunk's new text (context + additions).
185 // We need to add where the hunk context matched in the editable region to compute
186 // the actual cursor position in the result.
187 let hunk_start = first_hunk_offset.unwrap_or(0);
188 let offset = result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()));
189 result.insert_str(offset, zeta_prompt::CURSOR_MARKER);
190 }
191
192 if let Some(end_marker) = output_end_marker_for_format(version) {
193 if !result.ends_with('\n') {
194 result.push('\n');
195 }
196 result.push_str(end_marker);
197 }
198
199 Ok(result)
200}
201
202pub struct TeacherPrompt;
203
204impl TeacherPrompt {
205 pub(crate) const EDITABLE_REGION_START: &str = "<|editable_region_start|>\n";
206 pub(crate) const EDITABLE_REGION_END: &str = "\n<|editable_region_end|>";
207 pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
208 pub(crate) const NO_EDITS: &str = "NO_EDITS";
209
210 /// Truncate edit history to this number of last lines
211 const MAX_HISTORY_LINES: usize = 128;
212
213 pub fn format_prompt(
214 example: &Example,
215 editable_range: Range<usize>,
216 context_range: Range<usize>,
217 ) -> String {
218 let edit_history = Self::format_edit_history(&example.spec.edit_history);
219 let context = Self::format_context(example);
220 let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
221
222 let prompt_template = crate::prompt_assets::get_prompt("teacher.md");
223 let prompt = prompt_template
224 .replace("{{context}}", &context)
225 .replace("{{edit_history}}", &edit_history)
226 .replace("{{cursor_excerpt}}", &cursor_excerpt);
227
228 prompt
229 }
230
231 pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
232 // Check if the model indicated no edits are needed
233 let no_edits = (String::new(), None);
234 if let Some(last_codeblock) = extract_last_codeblock(&response) {
235 if last_codeblock.trim() == Self::NO_EDITS {
236 return Ok(no_edits);
237 }
238 }
239
240 if response.trim().ends_with(Self::NO_EDITS) {
241 return Ok(no_edits);
242 }
243
244 // Extract updated (new) editable region from the model response.
245 let new_editable_region = Self::extract_editable_region(&response)?;
246 let cursor_offset = new_editable_region.find(Self::USER_CURSOR_MARKER);
247 let mut new_editable_region = new_editable_region.replace(Self::USER_CURSOR_MARKER, "");
248 let old_editable_region = Self::extract_editable_region(
249 &example
250 .prompt
251 .as_ref()
252 .context("example prompt missing")?
253 .input,
254 )?
255 .replace(Self::USER_CURSOR_MARKER, "");
256
257 let prompt_inputs = example
258 .prompt_inputs
259 .as_ref()
260 .context("example is missing prompt inputs")?;
261
262 // Normalize leading newlines: if old starts with newline but new doesn't,
263 // prepend newline to new to preserve whitespace structure.
264 // This handles the case where the model drops the leading blank line.
265 if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
266 new_editable_region.insert(0, '\n');
267 }
268
269 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
270 let (editable_region_offset, _) = excerpt
271 .match_indices(&old_editable_region)
272 .min_by_key(|(index, _)| index.abs_diff(prompt_inputs.cursor_offset_in_excerpt))
273 .context("editable region not found in prompt content")?;
274 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
275
276 let editable_region_lines = old_editable_region.lines().count() as u32;
277 let diff = language::unified_diff_with_context(
278 &old_editable_region,
279 &new_editable_region,
280 editable_region_start_line as u32,
281 editable_region_start_line as u32,
282 editable_region_lines,
283 );
284
285 let diff = indoc::formatdoc! {"
286 --- a/{path}
287 +++ b/{path}
288 {diff}",
289 path = example.spec.cursor_path.to_string_lossy(),
290 diff = diff,
291 };
292
293 let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
294 ActualCursor::from_editable_region(
295 &example.spec.cursor_path,
296 editable_region_cursor_offset,
297 &new_editable_region,
298 excerpt,
299 editable_region_offset,
300 editable_region_start_line,
301 )
302 });
303
304 Ok((diff, actual_cursor))
305 }
306
307 fn format_edit_history(edit_history: &str) -> String {
308 let lines: Vec<&str> = edit_history.lines().collect();
309
310 if lines.is_empty() {
311 return "(No edit history)".to_string();
312 }
313
314 if lines.len() > Self::MAX_HISTORY_LINES {
315 let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
316 format!("{truncated}\n[...truncated...]")
317 } else {
318 lines.join("\n")
319 }
320 }
321
322 pub fn format_context(example: &Example) -> String {
323 let related_files = example
324 .prompt_inputs
325 .as_ref()
326 .and_then(|pi| pi.related_files.as_deref());
327
328 let Some(related_files) = related_files else {
329 return "(No context)".to_string();
330 };
331
332 if related_files.is_empty() {
333 return "(No context)".to_string();
334 }
335
336 let prefix = "`````";
337 let suffix = "`````\n\n";
338 let max_tokens = 1024;
339 zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
340 }
341
342 fn format_cursor_excerpt(
343 example: &Example,
344 editable_range: Range<usize>,
345 context_range: Range<usize>,
346 ) -> String {
347 let mut result = String::new();
348
349 let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
350 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
351 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
352
353 let path_str = example.spec.cursor_path.to_string_lossy();
354 result.push_str(&format!("`````{path_str}\n"));
355 result.push_str(&excerpt[context_range.start..editable_range.start]);
356 result.push_str(Self::EDITABLE_REGION_START);
357 result.push_str(&excerpt[editable_range.start..cursor_offset]);
358 result.push_str(Self::USER_CURSOR_MARKER);
359 result.push_str(&excerpt[cursor_offset..editable_range.end]);
360 result.push_str(Self::EDITABLE_REGION_END);
361 result.push_str(&excerpt[editable_range.end..context_range.end]);
362 result.push_str("\n`````");
363
364 result
365 }
366
367 pub fn extract_editable_region(text: &str) -> Result<String> {
368 let start = text
369 .rfind(Self::EDITABLE_REGION_START)
370 .map_or(0, |pos| pos + Self::EDITABLE_REGION_START.len());
371 let end = text.rfind(Self::EDITABLE_REGION_END).unwrap_or(text.len());
372
373 if start >= end {
374 return Err(anyhow!("Invalid editable region markers"));
375 }
376
377 let region = &text[start..end];
378 Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
379 }
380}
381
382pub struct TeacherMultiRegionPrompt;
383
384impl TeacherMultiRegionPrompt {
385 pub(crate) const USER_CURSOR_MARKER: &str = "<|user_cursor|>";
386 pub(crate) const NO_EDITS: &str = "NO_EDITS";
387
388 /// Truncate edit history to this number of last lines
389 const MAX_HISTORY_LINES: usize = 128;
390
391 pub fn format_prompt(
392 example: &Example,
393 editable_range: Range<usize>,
394 context_range: Range<usize>,
395 ) -> String {
396 let edit_history = Self::format_edit_history(&example.spec.edit_history);
397 let context = Self::format_context(example);
398 let cursor_excerpt = Self::format_cursor_excerpt(example, editable_range, context_range);
399
400 let prompt_template = crate::prompt_assets::get_prompt("teacher_multi_region.md");
401 let prompt = prompt_template
402 .replace("{{context}}", &context)
403 .replace("{{edit_history}}", &edit_history)
404 .replace("{{cursor_excerpt}}", &cursor_excerpt);
405
406 prompt
407 }
408
409 pub fn parse(example: &Example, response: &str) -> Result<(String, Option<ActualCursor>)> {
410 let no_edits = (String::new(), None);
411 if let Some(last_codeblock) = extract_last_codeblock(&response) {
412 if last_codeblock.trim() == Self::NO_EDITS {
413 return Ok(no_edits);
414 }
415 }
416
417 if response.trim().ends_with(Self::NO_EDITS) {
418 return Ok(no_edits);
419 }
420
421 let prompt_inputs = example
422 .prompt_inputs
423 .as_ref()
424 .context("example is missing prompt inputs")?;
425
426 let zeta_format = ZetaFormat::default();
427 let (editable_range, _) =
428 excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
429 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
430 let old_editable_region = &excerpt[editable_range.clone()];
431 let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
432
433 let codeblock =
434 extract_last_codeblock(&response).context("no codeblock found in model response")?;
435 let (start_num, end_num, raw_new_span) = multi_region::extract_marker_span(&codeblock)?;
436
437 let start_idx = start_num
438 .checked_sub(1)
439 .context("marker numbers are 1-indexed")?;
440 let end_idx = end_num
441 .checked_sub(1)
442 .context("marker numbers are 1-indexed")?;
443 let start_byte = *marker_offsets
444 .get(start_idx)
445 .context("start marker number out of range")?;
446 let end_byte = *marker_offsets
447 .get(end_idx)
448 .context("end marker number out of range")?;
449
450 if start_byte > end_byte {
451 return Err(anyhow!("start marker must come before end marker"));
452 }
453
454 let cursor_in_span = raw_new_span.find(Self::USER_CURSOR_MARKER);
455 let new_span = raw_new_span.replace(Self::USER_CURSOR_MARKER, "");
456
457 let old_span = &old_editable_region[start_byte..end_byte];
458 let mut new_span = new_span;
459 if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
460 new_span.push('\n');
461 }
462 if !old_span.ends_with('\n') && new_span.ends_with('\n') {
463 new_span.pop();
464 }
465
466 let mut new_editable_region = String::new();
467 new_editable_region.push_str(&old_editable_region[..start_byte]);
468 new_editable_region.push_str(&new_span);
469 new_editable_region.push_str(&old_editable_region[end_byte..]);
470
471 let cursor_offset = cursor_in_span.map(|pos| start_byte + pos);
472
473 if old_editable_region.starts_with('\n') && !new_editable_region.starts_with('\n') {
474 new_editable_region.insert(0, '\n');
475 }
476
477 let editable_region_offset = editable_range.start;
478 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
479
480 let editable_region_lines = old_editable_region.lines().count() as u32;
481 let diff = language::unified_diff_with_context(
482 old_editable_region,
483 &new_editable_region,
484 editable_region_start_line as u32,
485 editable_region_start_line as u32,
486 editable_region_lines,
487 );
488
489 let diff = indoc::formatdoc! {"
490 --- a/{path}
491 +++ b/{path}
492 {diff}",
493 path = example.spec.cursor_path.to_string_lossy(),
494 diff = diff,
495 };
496
497 let actual_cursor = cursor_offset.map(|editable_region_cursor_offset| {
498 ActualCursor::from_editable_region(
499 &example.spec.cursor_path,
500 editable_region_cursor_offset,
501 &new_editable_region,
502 excerpt,
503 editable_region_offset,
504 editable_region_start_line,
505 )
506 });
507
508 Ok((diff, actual_cursor))
509 }
510
511 fn format_edit_history(edit_history: &str) -> String {
512 let lines: Vec<&str> = edit_history.lines().collect();
513
514 if lines.is_empty() {
515 return "(No edit history)".to_string();
516 }
517
518 if lines.len() > Self::MAX_HISTORY_LINES {
519 let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
520 format!("{truncated}\n[...truncated...]")
521 } else {
522 lines.join("\n")
523 }
524 }
525
526 pub fn format_context(example: &Example) -> String {
527 let related_files = example
528 .prompt_inputs
529 .as_ref()
530 .and_then(|pi| pi.related_files.as_deref());
531 let Some(related_files) = related_files else {
532 return "(No context)".to_string();
533 };
534
535 if related_files.is_empty() {
536 return "(No context)".to_string();
537 }
538
539 let prefix = "`````";
540 let suffix = "`````\n\n";
541 let max_tokens = 1024;
542 zeta_prompt::format_related_files_within_budget(related_files, &prefix, &suffix, max_tokens)
543 }
544
545 fn format_cursor_excerpt(
546 example: &Example,
547 editable_range: Range<usize>,
548 context_range: Range<usize>,
549 ) -> String {
550 let mut result = String::new();
551
552 let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
553 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
554 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
555
556 let editable_text = &excerpt[editable_range.clone()];
557 let cursor_in_editable = cursor_offset - editable_range.start;
558
559 let path_str = example.spec.cursor_path.to_string_lossy();
560 result.push_str(&format!("`````{path_str}\n"));
561
562 result.push_str(&excerpt[context_range.start..editable_range.start]);
563
564 multi_region::write_editable_with_markers(
565 &mut result,
566 editable_text,
567 cursor_in_editable,
568 Self::USER_CURSOR_MARKER,
569 );
570
571 result.push_str(&excerpt[editable_range.end..context_range.end]);
572 result.push_str("\n`````");
573
574 result
575 }
576}
577
578/// Extract the cursor excerpt from an example.
579/// First tries to extract from an existing prompt, then falls back to constructing from prompt_inputs.
580pub fn extract_cursor_excerpt_from_example(example: &Example) -> Option<String> {
581 // If we have the original prompt, extract the cursor excerpt from it
582 if let Some(prompt) = &example.prompt {
583 // Find "# 3. Current File" section and extract the content
584 if let Some(start) = prompt.input.find("# 3. Current File") {
585 let content_start = prompt.input[start..].find('`').map(|i| start + i)?;
586 let backtick_count = prompt.input[content_start..]
587 .chars()
588 .take_while(|&c| c == '`')
589 .count();
590 let content_start = content_start + backtick_count;
591
592 // Find the path line and skip it
593 let newline_pos = prompt.input[content_start..].find('\n')?;
594 let text_start = content_start + newline_pos + 1;
595
596 // Find the closing backticks
597 let closing_pattern = "`".repeat(backtick_count);
598 let text_end = prompt.input[text_start..].find(&closing_pattern)?;
599 let cursor_excerpt = &prompt.input[text_start..text_start + text_end];
600
601 let path_str = example.spec.cursor_path.to_string_lossy();
602 return Some(format!("`````{path_str}\n{cursor_excerpt}`````"));
603 }
604 }
605
606 // Fallback: construct from prompt_inputs if available
607 let prompt_inputs = example.prompt_inputs.as_ref()?;
608 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
609 let cursor_offset = prompt_inputs.cursor_offset_in_excerpt;
610
611 // Simple fallback: just show content around cursor with markers
612 let path_str = example.spec.cursor_path.to_string_lossy();
613 let mut result = format!("`````{path_str}\n");
614 result.push_str(TeacherPrompt::EDITABLE_REGION_START);
615 result.push_str(&excerpt[..cursor_offset]);
616 result.push_str(TeacherPrompt::USER_CURSOR_MARKER);
617 result.push_str(&excerpt[cursor_offset..]);
618 result.push_str(TeacherPrompt::EDITABLE_REGION_END);
619 result.push_str("\n`````");
620
621 Some(result)
622}
623
624pub(crate) fn extract_last_codeblock(text: &str) -> Option<String> {
625 let lines: Vec<&str> = text.lines().collect();
626
627 // Search from the end for a closing fence (line containing only backticks, 3+)
628 let mut closing_line_idx = None;
629 let mut backtick_count = 0;
630
631 for i in (0..lines.len()).rev() {
632 let line = lines[i].trim();
633 if line.len() >= 3 && line.chars().all(|c| c == '`') {
634 closing_line_idx = Some(i);
635 backtick_count = line.len();
636 break;
637 }
638 }
639
640 let closing_idx = closing_line_idx?;
641
642 // Search backwards for matching opening fence
643 // Opening fence starts with same backtick count, possibly followed by language/metadata
644 let opening_pattern = "`".repeat(backtick_count);
645
646 for i in (0..closing_idx).rev() {
647 let line = lines[i];
648 if line.starts_with(&opening_pattern) {
649 // Ensure it's exactly the right number of backticks (not more)
650 let rest = &line[backtick_count..];
651 if rest.is_empty() || !rest.starts_with('`') {
652 // Found matching opening fence
653 // Extract content between opening and closing (exclusive)
654 if closing_idx > i + 1 {
655 let content = lines[i + 1..closing_idx].join("\n");
656 // Preserve trailing newline to match previous behavior
657 return Some(format!("{}\n", content));
658 } else {
659 // Empty block
660 return Some(String::new());
661 }
662 }
663 }
664 }
665
666 None
667}
668
669#[cfg(test)]
670mod tests {
671 use super::*;
672
673 #[test]
674 fn test_extract_last_code_block() {
675 let text = indoc::indoc! {"
676 Some thinking
677
678 ```
679 first block
680 ```
681
682 `````path='something' lines=1:2
683 last block
684 `````
685 "};
686 let last_block = extract_last_codeblock(text).unwrap();
687 assert_eq!(last_block, "last block\n");
688 }
689
690 #[test]
691 fn test_extract_codeblock_with_nested_fences() {
692 let text = indoc::indoc! {"
693 `````
694 content with ``` inline
695 and ```python nested
696 more content
697 `````
698 "};
699 let last_block = extract_last_codeblock(text).unwrap();
700 assert_eq!(
701 last_block,
702 "content with ``` inline\nand ```python nested\nmore content\n"
703 );
704 }
705
706 #[test]
707 fn test_extract_codeblock_ignores_inline_backticks() {
708 let text = indoc::indoc! {"
709 `````
710 here is some `code` with inline backticks
711 and here```more```stuff
712 `````
713 "};
714 let last_block = extract_last_codeblock(text).unwrap();
715 assert_eq!(
716 last_block,
717 "here is some `code` with inline backticks\nand here```more```stuff\n"
718 );
719 }
720
721 #[test]
722 fn test_extract_editable_region_old_format() {
723 let text = indoc::indoc! {"
724 some lines
725 are
726 here
727 <|editable_region_start|>
728 one
729 two three
730
731 <|editable_region_end|>
732 more
733 lines here
734 "};
735 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
736 assert_eq!(
737 parsed,
738 indoc::indoc! {"
739 one
740 two three"}
741 );
742 }
743
744 #[test]
745 fn test_extract_editable_region_marker_format() {
746 let text = indoc::indoc! {"
747 some context
748 <|marker_1|>
749 one
750 two three
751 <|marker_2|>
752 more context
753 "};
754 let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
755 assert_eq!(parsed, "one\ntwo three");
756 }
757
758 #[test]
759 fn test_extract_editable_region_multi_markers() {
760 let text = indoc::indoc! {"
761 prefix
762 <|marker_1|>
763 aaa
764 bbb
765 <|marker_2|>
766 ccc
767 ddd
768 <|marker_3|>
769 suffix
770 "};
771 let parsed = multi_region::extract_editable_region_from_markers(text).unwrap();
772 // Intermediate marker and its trailing \n are stripped
773 assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
774 }
775
776 #[test]
777 fn test_extract_last_codeblock_nested_bibtex() {
778 let text = indoc::indoc! {r#"
779 Looking at the edit history, I can see that a Citation section was just added.
780
781 `````
782 ## Collaborations
783 Our mission is to create a 4D generative model.
784
785 ## Citation
786
787 If you found Unique3D helpful, please cite our report:
788 ```bibtex
789 @misc{wu2024unique3d,
790 title={Unique3D},
791 }
792 ```
793 `````
794 "#};
795 let last_block = extract_last_codeblock(text).unwrap();
796 assert_eq!(
797 last_block,
798 indoc::indoc! {r#"
799 ## Collaborations
800 Our mission is to create a 4D generative model.
801
802 ## Citation
803
804 If you found Unique3D helpful, please cite our report:
805 ```bibtex
806 @misc{wu2024unique3d,
807 title={Unique3D},
808 }
809 ```
810 "#}
811 );
812 }
813
814 #[test]
815 fn test_extract_editable_region_no_markers() {
816 let text = indoc::indoc! {"
817 one
818 two three"};
819 let parsed = TeacherPrompt::extract_editable_region(text).unwrap();
820 assert_eq!(
821 parsed,
822 indoc::indoc! {"
823 one
824 two three"}
825 );
826 }
827
828 #[test]
829 fn test_parse_no_edits_response() {
830 let response = indoc::indoc! {"
831 The code is already complete. There is no clear next edit to make.
832
833 `````
834 NO_EDITS
835 `````
836 "};
837 let codeblock = extract_last_codeblock(response).unwrap();
838 assert_eq!(codeblock.trim(), TeacherPrompt::NO_EDITS);
839 }
840
841 #[test]
842 fn test_extract_codeblock_no_valid_block() {
843 // Text with no code blocks should return None
844 let text = "Just some plain text without any code blocks";
845 assert!(extract_last_codeblock(text).is_none());
846
847 // Unclosed code block should return None
848 let text = indoc::indoc! {"
849 ```
850 unclosed block
851 "};
852 assert!(extract_last_codeblock(text).is_none());
853
854 // Analysis text with nested markdown but no proper outer block
855 let text = indoc::indoc! {"
856 # Analysis
857 Looking at this:
858 ```
859 some code
860 ```
861 But then more analysis without wrapping block
862 "};
863 // This should find the inner block
864 let result = extract_last_codeblock(text).unwrap();
865 assert_eq!(result, "some code\n");
866 }
867
868 #[test]
869 fn test_extract_codeblock_no_trailing_newline() {
870 // Text ending without trailing newline after closing fence
871 let text = "`````\ncontent here\n`````";
872 let result = extract_last_codeblock(text).unwrap();
873 assert_eq!(result, "content here\n");
874 }
875}