1pub mod excerpt_ranges;
2pub mod multi_region;
3pub mod udiff;
4
5use anyhow::{Result, anyhow};
6use serde::{Deserialize, Serialize};
7use std::fmt::Write;
8use std::ops::Range;
9use std::path::Path;
10use std::sync::Arc;
11use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
12
13pub use crate::excerpt_ranges::{
14 ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
15};
16
17pub const CURSOR_MARKER: &str = "<|user_cursor|>";
18pub const MAX_PROMPT_TOKENS: usize = 4096;
19
20/// Use up to this amount of the editable region for prefill.
21/// Larger values may result in more robust generation, but
22/// this region becomes non-editable.
23pub const PREFILL_RATIO: f64 = 0.1; // 10%
24
25fn estimate_tokens(bytes: usize) -> usize {
26 bytes / 3
27}
28
29/// Leave some slack to avoid overflow.
30fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
31 (max_tokens as f64 * 0.9).floor() as usize
32}
33
34#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
35pub struct ZetaPromptInput {
36 pub cursor_path: Arc<Path>,
37 pub cursor_excerpt: Arc<str>,
38 pub cursor_offset_in_excerpt: usize,
39 #[serde(default, skip_serializing_if = "Option::is_none")]
40 pub excerpt_start_row: Option<u32>,
41 pub events: Vec<Arc<Event>>,
42 #[serde(default)]
43 pub related_files: Option<Vec<RelatedFile>>,
44 #[serde(default, skip_serializing_if = "Vec::is_empty")]
45 pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
46 /// These ranges let the server select model-appropriate subsets.
47 pub excerpt_ranges: ExcerptRanges,
48 /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
49 /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
50 /// When present, the server uses these to compute editable/context ranges
51 /// instead of `excerpt_ranges`.
52 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub syntax_ranges: Option<Vec<Range<usize>>>,
54 /// The name of the edit prediction model experiment to use.
55 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub experiment: Option<String>,
57 #[serde(default)]
58 pub in_open_source_repo: bool,
59 #[serde(default)]
60 pub can_collect_data: bool,
61 #[serde(default, skip_serializing_if = "Option::is_none")]
62 pub repo_url: Option<String>,
63}
64
65#[derive(
66 Default,
67 Clone,
68 Copy,
69 Debug,
70 PartialEq,
71 Eq,
72 Hash,
73 EnumIter,
74 IntoStaticStr,
75 Serialize,
76 Deserialize,
77)]
78#[allow(non_camel_case_types)]
79pub enum ZetaFormat {
80 V0112MiddleAtEnd,
81 V0113Ordered,
82 V0114180EditableRegion,
83 V0120GitMergeMarkers,
84 #[default]
85 V0131GitMergeMarkersPrefix,
86 V0211Prefill,
87 V0211SeedCoder,
88 v0226Hashline,
89 V0304VariableEdit,
90 V0304SeedNoEdits,
91 /// Multi-block marker spans with NO_EDITS sentinel.
92 V0306SeedMultiRegions,
93 /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
94 V0316SeedMultiRegions,
95 /// V0316 with larger block sizes.
96 V0318SeedMultiRegions,
97 /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
98 V0317SeedMultiRegions,
99}
100
101impl std::fmt::Display for ZetaFormat {
102 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103 write!(f, "{}", <&'static str>::from(self))
104 }
105}
106
107impl ZetaFormat {
108 pub fn parse(format_name: &str) -> Result<Self> {
109 let lower = format_name.to_lowercase();
110
111 // Exact case-insensitive match takes priority, bypassing ambiguity checks.
112 for variant in ZetaFormat::iter() {
113 if <&'static str>::from(&variant).to_lowercase() == lower {
114 return Ok(variant);
115 }
116 }
117
118 let mut results = ZetaFormat::iter().filter(|version| {
119 <&'static str>::from(version)
120 .to_lowercase()
121 .contains(&lower)
122 });
123 let Some(result) = results.next() else {
124 anyhow::bail!(
125 "`{format_name}` did not match any of:\n{}",
126 Self::options_as_string()
127 );
128 };
129 if results.next().is_some() {
130 anyhow::bail!(
131 "`{format_name}` matched more than one of:\n{}",
132 Self::options_as_string()
133 );
134 }
135 Ok(result)
136 }
137
138 pub fn options_as_string() -> String {
139 ZetaFormat::iter()
140 .map(|format| format!("- {}\n", <&'static str>::from(format)))
141 .collect::<Vec<_>>()
142 .concat()
143 }
144}
145
146#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
147#[serde(tag = "event")]
148pub enum Event {
149 BufferChange {
150 path: Arc<Path>,
151 old_path: Arc<Path>,
152 diff: String,
153 predicted: bool,
154 in_open_source_repo: bool,
155 },
156}
157
158impl Event {
159 pub fn in_open_source_repo(&self) -> bool {
160 match self {
161 Event::BufferChange {
162 in_open_source_repo,
163 ..
164 } => *in_open_source_repo,
165 }
166 }
167}
168
169pub fn write_event(prompt: &mut String, event: &Event) {
170 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
171 for component in path.components() {
172 prompt.push('/');
173 write!(prompt, "{}", component.as_os_str().display()).ok();
174 }
175 }
176 match event {
177 Event::BufferChange {
178 path,
179 old_path,
180 diff,
181 predicted,
182 in_open_source_repo: _,
183 } => {
184 if *predicted {
185 prompt.push_str("// User accepted prediction:\n");
186 }
187 prompt.push_str("--- a");
188 write_path_as_unix_str(prompt, old_path.as_ref());
189 prompt.push_str("\n+++ b");
190 write_path_as_unix_str(prompt, path.as_ref());
191 prompt.push('\n');
192 prompt.push_str(diff);
193 }
194 }
195}
196
197#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
198pub struct ActiveBufferDiagnostic {
199 pub severity: Option<i32>,
200 pub message: String,
201 pub snippet: String,
202 pub snippet_buffer_row_range: Range<u32>,
203 pub diagnostic_range_in_snippet: Range<usize>,
204}
205
206#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
207pub struct RelatedFile {
208 pub path: Arc<Path>,
209 pub max_row: u32,
210 pub excerpts: Vec<RelatedExcerpt>,
211 #[serde(default)]
212 pub in_open_source_repo: bool,
213}
214
215#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
216pub struct RelatedExcerpt {
217 pub row_range: Range<u32>,
218 pub text: Arc<str>,
219 #[serde(default)]
220 pub order: usize,
221}
222
223pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
224 special_tokens_for_format(format).iter().any(|token| {
225 if let Some(line_token) = token.strip_suffix('\n') {
226 input.cursor_excerpt.lines().any(|line| line == line_token)
227 } else {
228 input.cursor_excerpt.contains(token)
229 }
230 })
231}
232
233pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
234 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
235}
236
237pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
238 match format {
239 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
240 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
241 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
242 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
243 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
244 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
245 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
246 ZetaFormat::v0226Hashline => hashline::special_tokens(),
247 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
248 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
249 ZetaFormat::V0316SeedMultiRegions => {
250 static TOKENS: &[&str] = &[
251 seed_coder::FIM_SUFFIX,
252 seed_coder::FIM_PREFIX,
253 seed_coder::FIM_MIDDLE,
254 seed_coder::FILE_MARKER,
255 multi_region::V0316_END_MARKER,
256 CURSOR_MARKER,
257 multi_region::MARKER_TAG_PREFIX,
258 ];
259 TOKENS
260 }
261 ZetaFormat::V0318SeedMultiRegions => {
262 static TOKENS: &[&str] = &[
263 seed_coder::FIM_SUFFIX,
264 seed_coder::FIM_PREFIX,
265 seed_coder::FIM_MIDDLE,
266 seed_coder::FILE_MARKER,
267 multi_region::V0318_END_MARKER,
268 CURSOR_MARKER,
269 multi_region::MARKER_TAG_PREFIX,
270 ];
271 TOKENS
272 }
273 ZetaFormat::V0317SeedMultiRegions => {
274 static TOKENS: &[&str] = &[
275 seed_coder::FIM_SUFFIX,
276 seed_coder::FIM_PREFIX,
277 seed_coder::FIM_MIDDLE,
278 seed_coder::FILE_MARKER,
279 multi_region::V0317_END_MARKER,
280 CURSOR_MARKER,
281 multi_region::RELATIVE_MARKER_TAG_PREFIX,
282 ];
283 TOKENS
284 }
285 ZetaFormat::V0306SeedMultiRegions => {
286 static TOKENS: &[&str] = &[
287 seed_coder::FIM_SUFFIX,
288 seed_coder::FIM_PREFIX,
289 seed_coder::FIM_MIDDLE,
290 seed_coder::FILE_MARKER,
291 seed_coder::START_MARKER,
292 seed_coder::SEPARATOR,
293 seed_coder::END_MARKER,
294 CURSOR_MARKER,
295 multi_region::MARKER_TAG_PREFIX,
296 ];
297 TOKENS
298 }
299 }
300}
301
302/// Returns the (editable_token_limit, context_token_limit) for a given format.
303pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
304 match format {
305 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
306 ZetaFormat::V0114180EditableRegion => (180, 350),
307 ZetaFormat::V0120GitMergeMarkers
308 | ZetaFormat::V0131GitMergeMarkersPrefix
309 | ZetaFormat::V0211Prefill
310 | ZetaFormat::V0211SeedCoder
311 | ZetaFormat::v0226Hashline
312 | ZetaFormat::V0306SeedMultiRegions
313 | ZetaFormat::V0316SeedMultiRegions
314 | ZetaFormat::V0318SeedMultiRegions
315 | ZetaFormat::V0317SeedMultiRegions
316 | ZetaFormat::V0304SeedNoEdits => (350, 150),
317 ZetaFormat::V0304VariableEdit => (1024, 0),
318 }
319}
320
321pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
322 match format {
323 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
324 ZetaFormat::V0112MiddleAtEnd
325 | ZetaFormat::V0113Ordered
326 | ZetaFormat::V0114180EditableRegion
327 | ZetaFormat::V0120GitMergeMarkers
328 | ZetaFormat::V0131GitMergeMarkersPrefix
329 | ZetaFormat::V0211Prefill
330 | ZetaFormat::V0211SeedCoder
331 | ZetaFormat::V0304VariableEdit
332 | ZetaFormat::V0306SeedMultiRegions
333 | ZetaFormat::V0304SeedNoEdits => &[],
334 ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
335 ZetaFormat::V0318SeedMultiRegions => &[multi_region::V0318_END_MARKER],
336 ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
337 }
338}
339
340pub fn excerpt_ranges_for_format(
341 format: ZetaFormat,
342 ranges: &ExcerptRanges,
343) -> (Range<usize>, Range<usize>) {
344 match format {
345 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
346 ranges.editable_150.clone(),
347 ranges.editable_150_context_350.clone(),
348 ),
349 ZetaFormat::V0114180EditableRegion => (
350 ranges.editable_180.clone(),
351 ranges.editable_180_context_350.clone(),
352 ),
353 ZetaFormat::V0120GitMergeMarkers
354 | ZetaFormat::V0131GitMergeMarkersPrefix
355 | ZetaFormat::V0211Prefill
356 | ZetaFormat::V0211SeedCoder
357 | ZetaFormat::v0226Hashline
358 | ZetaFormat::V0304SeedNoEdits
359 | ZetaFormat::V0306SeedMultiRegions
360 | ZetaFormat::V0316SeedMultiRegions
361 | ZetaFormat::V0318SeedMultiRegions
362 | ZetaFormat::V0317SeedMultiRegions => (
363 ranges.editable_350.clone(),
364 ranges.editable_350_context_150.clone(),
365 ),
366 ZetaFormat::V0304VariableEdit => {
367 let context = ranges
368 .editable_350_context_1024
369 .clone()
370 .or(ranges.editable_350_context_512.clone())
371 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
372 (context.clone(), context)
373 }
374 }
375}
376
377pub fn write_cursor_excerpt_section_for_format(
378 format: ZetaFormat,
379 prompt: &mut String,
380 path: &Path,
381 context: &str,
382 editable_range: &Range<usize>,
383 cursor_offset: usize,
384) {
385 match format {
386 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
387 prompt,
388 path,
389 context,
390 editable_range,
391 cursor_offset,
392 ),
393 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
394 v0113_ordered::write_cursor_excerpt_section(
395 prompt,
396 path,
397 context,
398 editable_range,
399 cursor_offset,
400 )
401 }
402 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
403 prompt,
404 path,
405 context,
406 editable_range,
407 cursor_offset,
408 ),
409 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
410 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
411 prompt,
412 path,
413 context,
414 editable_range,
415 cursor_offset,
416 )
417 }
418 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
419 seed_coder::write_cursor_excerpt_section(
420 prompt,
421 path,
422 context,
423 editable_range,
424 cursor_offset,
425 )
426 }
427 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
428 prompt,
429 path,
430 context,
431 editable_range,
432 cursor_offset,
433 ),
434 ZetaFormat::V0304VariableEdit => {
435 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
436 }
437 ZetaFormat::V0306SeedMultiRegions => {
438 prompt.push_str(&build_v0306_cursor_prefix(
439 path,
440 context,
441 editable_range,
442 cursor_offset,
443 ));
444 }
445 ZetaFormat::V0316SeedMultiRegions => {
446 prompt.push_str(&build_v0316_cursor_prefix(
447 path,
448 context,
449 editable_range,
450 cursor_offset,
451 ));
452 }
453 ZetaFormat::V0318SeedMultiRegions => {
454 prompt.push_str(&build_v0318_cursor_prefix(
455 path,
456 context,
457 editable_range,
458 cursor_offset,
459 ));
460 }
461 ZetaFormat::V0317SeedMultiRegions => {
462 prompt.push_str(&build_v0317_cursor_prefix(
463 path,
464 context,
465 editable_range,
466 cursor_offset,
467 ));
468 }
469 }
470}
471
472fn build_v0306_cursor_prefix(
473 path: &Path,
474 context: &str,
475 editable_range: &Range<usize>,
476 cursor_offset: usize,
477) -> String {
478 let mut section = String::new();
479 let path_str = path.to_string_lossy();
480 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
481
482 section.push_str(&context[..editable_range.start]);
483 section.push_str(seed_coder::START_MARKER);
484
485 let editable_text = &context[editable_range.clone()];
486 let cursor_in_editable = cursor_offset - editable_range.start;
487 multi_region::write_editable_with_markers(
488 &mut section,
489 editable_text,
490 cursor_in_editable,
491 CURSOR_MARKER,
492 );
493
494 if !section.ends_with('\n') {
495 section.push('\n');
496 }
497 section.push_str(seed_coder::SEPARATOR);
498 section
499}
500
501fn build_v0316_cursor_prefix(
502 path: &Path,
503 context: &str,
504 editable_range: &Range<usize>,
505 cursor_offset: usize,
506) -> String {
507 let mut section = String::new();
508 let path_str = path.to_string_lossy();
509 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
510
511 section.push_str(&context[..editable_range.start]);
512
513 let editable_text = &context[editable_range.clone()];
514 let cursor_in_editable = cursor_offset - editable_range.start;
515 multi_region::write_editable_with_markers_v0316(
516 &mut section,
517 editable_text,
518 cursor_in_editable,
519 CURSOR_MARKER,
520 );
521
522 if !section.ends_with('\n') {
523 section.push('\n');
524 }
525 section
526}
527
528fn build_v0318_cursor_prefix(
529 path: &Path,
530 context: &str,
531 editable_range: &Range<usize>,
532 cursor_offset: usize,
533) -> String {
534 let mut section = String::new();
535 let path_str = path.to_string_lossy();
536 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
537
538 section.push_str(&context[..editable_range.start]);
539
540 let editable_text = &context[editable_range.clone()];
541 let cursor_in_editable = cursor_offset - editable_range.start;
542 multi_region::write_editable_with_markers_v0318(
543 &mut section,
544 editable_text,
545 cursor_in_editable,
546 CURSOR_MARKER,
547 );
548
549 if !section.ends_with('\n') {
550 section.push('\n');
551 }
552 section
553}
554
555fn build_v0317_cursor_prefix(
556 path: &Path,
557 context: &str,
558 editable_range: &Range<usize>,
559 cursor_offset: usize,
560) -> String {
561 let mut section = String::new();
562 let path_str = path.to_string_lossy();
563 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
564
565 section.push_str(&context[..editable_range.start]);
566
567 let editable_text = &context[editable_range.clone()];
568 let cursor_in_editable = cursor_offset - editable_range.start;
569 multi_region::write_editable_with_markers_v0317(
570 &mut section,
571 editable_text,
572 cursor_in_editable,
573 CURSOR_MARKER,
574 );
575
576 if !section.ends_with('\n') {
577 section.push('\n');
578 }
579 section
580}
581
582fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
583 let start_row = text[0..range.start].matches('\n').count() as u32;
584 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
585 if !text[..range.end].ends_with('\n') {
586 end_row += 1;
587 }
588 return start_row..end_row;
589}
590
591pub fn format_prompt_with_budget_for_format(
592 input: &ZetaPromptInput,
593 format: ZetaFormat,
594 max_tokens: usize,
595) -> Option<String> {
596 let (context, editable_range, context_range, cursor_offset) =
597 resolve_cursor_region(input, format);
598 let path = &*input.cursor_path;
599
600 let empty_files = Vec::new();
601 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
602 let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
603 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
604 let row_range = relative_row_range.start + cursor_excerpt_start_row
605 ..relative_row_range.end + cursor_excerpt_start_row;
606 &filter_redundant_excerpts(
607 input_related_files.to_vec(),
608 input.cursor_path.as_ref(),
609 row_range,
610 )
611 } else {
612 input_related_files
613 };
614
615 let prompt = match format {
616 ZetaFormat::V0211SeedCoder
617 | ZetaFormat::V0304SeedNoEdits
618 | ZetaFormat::V0306SeedMultiRegions
619 | ZetaFormat::V0316SeedMultiRegions
620 | ZetaFormat::V0318SeedMultiRegions
621 | ZetaFormat::V0317SeedMultiRegions => {
622 let mut cursor_section = String::new();
623 write_cursor_excerpt_section_for_format(
624 format,
625 &mut cursor_section,
626 path,
627 context,
628 &editable_range,
629 cursor_offset,
630 );
631
632 let budget_with_margin = apply_prompt_budget_margin(max_tokens);
633 seed_coder::assemble_fim_prompt(
634 context,
635 &editable_range,
636 &cursor_section,
637 &input.events,
638 related_files,
639 budget_with_margin,
640 )
641 }
642 _ => {
643 let mut cursor_section = String::new();
644 write_cursor_excerpt_section_for_format(
645 format,
646 &mut cursor_section,
647 path,
648 context,
649 &editable_range,
650 cursor_offset,
651 );
652
653 let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
654 let cursor_tokens = estimate_tokens(cursor_section.len());
655 remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
656
657 let edit_history_section = format_edit_history_within_budget(
658 &input.events,
659 "<|file_sep|>",
660 "edit history",
661 remaining_budget,
662 max_edit_event_count_for_format(&format),
663 );
664 let edit_history_tokens = estimate_tokens(edit_history_section.len());
665 remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
666
667 let related_files_section = format_related_files_within_budget(
668 &related_files,
669 "<|file_sep|>",
670 "",
671 remaining_budget,
672 );
673
674 let mut prompt = String::new();
675 prompt.push_str(&related_files_section);
676 prompt.push_str(&edit_history_section);
677 prompt.push_str(&cursor_section);
678 prompt
679 }
680 };
681 let prompt_tokens = estimate_tokens(prompt.len());
682 if prompt_tokens > max_tokens {
683 return None;
684 }
685 return Some(prompt);
686}
687
688pub fn filter_redundant_excerpts(
689 mut related_files: Vec<RelatedFile>,
690 cursor_path: &Path,
691 cursor_row_range: Range<u32>,
692) -> Vec<RelatedFile> {
693 for file in &mut related_files {
694 if file.path.as_ref() == cursor_path {
695 file.excerpts.retain(|excerpt| {
696 excerpt.row_range.start < cursor_row_range.start
697 || excerpt.row_range.end > cursor_row_range.end
698 });
699 }
700 }
701 related_files.retain(|file| !file.excerpts.is_empty());
702 related_files
703}
704
705pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
706 match format {
707 ZetaFormat::V0112MiddleAtEnd
708 | ZetaFormat::V0113Ordered
709 | ZetaFormat::V0114180EditableRegion
710 | ZetaFormat::V0120GitMergeMarkers
711 | ZetaFormat::V0131GitMergeMarkersPrefix
712 | ZetaFormat::V0211Prefill
713 | ZetaFormat::V0211SeedCoder
714 | ZetaFormat::v0226Hashline
715 | ZetaFormat::V0304SeedNoEdits
716 | ZetaFormat::V0304VariableEdit
717 | ZetaFormat::V0306SeedMultiRegions
718 | ZetaFormat::V0316SeedMultiRegions
719 | ZetaFormat::V0318SeedMultiRegions
720 | ZetaFormat::V0317SeedMultiRegions => 6,
721 }
722}
723
724pub fn get_prefill_for_format(
725 format: ZetaFormat,
726 context: &str,
727 editable_range: &Range<usize>,
728) -> String {
729 match format {
730 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
731 ZetaFormat::V0112MiddleAtEnd
732 | ZetaFormat::V0113Ordered
733 | ZetaFormat::V0114180EditableRegion
734 | ZetaFormat::V0120GitMergeMarkers
735 | ZetaFormat::V0131GitMergeMarkersPrefix
736 | ZetaFormat::V0211SeedCoder
737 | ZetaFormat::v0226Hashline
738 | ZetaFormat::V0304VariableEdit => String::new(),
739 ZetaFormat::V0304SeedNoEdits
740 | ZetaFormat::V0306SeedMultiRegions
741 | ZetaFormat::V0316SeedMultiRegions
742 | ZetaFormat::V0318SeedMultiRegions
743 | ZetaFormat::V0317SeedMultiRegions => String::new(),
744 }
745}
746
747pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
748 match format {
749 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
750 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
751 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
752 ZetaFormat::V0211SeedCoder
753 | ZetaFormat::V0304SeedNoEdits
754 | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
755 ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
756 ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
757 ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
758 ZetaFormat::V0112MiddleAtEnd
759 | ZetaFormat::V0113Ordered
760 | ZetaFormat::V0114180EditableRegion
761 | ZetaFormat::v0226Hashline
762 | ZetaFormat::V0304VariableEdit => None,
763 }
764}
765
766pub fn encode_patch_as_output_for_format(
767 format: ZetaFormat,
768 old_editable_region: &str,
769 patch: &str,
770 cursor_offset: Option<usize>,
771) -> Result<Option<String>> {
772 match format {
773 ZetaFormat::v0226Hashline => {
774 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
775 }
776 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
777 old_editable_region,
778 patch,
779 cursor_offset,
780 )
781 .map(Some),
782 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
783 Ok(seed_coder::no_edits(patch))
784 }
785 ZetaFormat::V0316SeedMultiRegions => {
786 let empty_patch = patch.lines().count() <= 3;
787 if empty_patch {
788 let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
789 let marker_num =
790 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
791 let tag = multi_region::marker_tag(marker_num);
792 Ok(Some(format!(
793 "{tag}{tag}{}",
794 multi_region::V0316_END_MARKER
795 )))
796 } else {
797 Ok(None)
798 }
799 }
800 ZetaFormat::V0318SeedMultiRegions => {
801 let empty_patch = patch.lines().count() <= 3;
802 if empty_patch {
803 let marker_offsets =
804 multi_region::compute_marker_offsets_v0318(old_editable_region);
805 let marker_num =
806 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
807 let tag = multi_region::marker_tag(marker_num);
808 Ok(Some(format!(
809 "{tag}{tag}{}",
810 multi_region::V0318_END_MARKER
811 )))
812 } else {
813 Ok(None)
814 }
815 }
816 ZetaFormat::V0317SeedMultiRegions => {
817 let empty_patch = patch.lines().count() <= 3;
818 if empty_patch {
819 let tag = multi_region::marker_tag_relative(0);
820 Ok(Some(format!(
821 "{tag}{tag}{}",
822 multi_region::V0317_END_MARKER
823 )))
824 } else {
825 Ok(None)
826 }
827 }
828 _ => Ok(None),
829 }
830}
831
832/// Given a `ZetaPromptInput`, a format, and a patch (with cursor already
833/// extracted), produce the expected model output string for training.
834pub fn format_expected_output(
835 input: &ZetaPromptInput,
836 format: ZetaFormat,
837 patch: &str,
838 cursor_offset: Option<usize>,
839) -> Result<String> {
840 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
841 let mut old_editable = context[editable_range].to_string();
842 if !old_editable.is_empty() && !old_editable.ends_with('\n') {
843 old_editable.push('\n');
844 }
845
846 // Formats with their own output encoding (hashline, variable-edit,
847 // multi-region empty patches) are handled here.
848 if let Some(output) =
849 encode_patch_as_output_for_format(format, &old_editable, patch, cursor_offset)?
850 {
851 return Ok(output);
852 }
853
854 let empty_patch = patch.lines().count() <= 3;
855
856 match format {
857 // Multi-region formats: non-empty patches need diff application
858 // then marker-span encoding.
859 ZetaFormat::V0316SeedMultiRegions => {
860 let (new_editable, first_hunk_offset) =
861 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
862 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
863 multi_region::encode_from_old_and_new_v0316(
864 &old_editable,
865 &new_editable,
866 cursor_in_new,
867 CURSOR_MARKER,
868 multi_region::V0316_END_MARKER,
869 )
870 }
871 ZetaFormat::V0318SeedMultiRegions => {
872 let (new_editable, first_hunk_offset) =
873 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
874 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
875 multi_region::encode_from_old_and_new_v0318(
876 &old_editable,
877 &new_editable,
878 cursor_in_new,
879 CURSOR_MARKER,
880 multi_region::V0318_END_MARKER,
881 )
882 }
883 ZetaFormat::V0317SeedMultiRegions => {
884 let (new_editable, first_hunk_offset) =
885 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
886 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
887 multi_region::encode_from_old_and_new_v0317(
888 &old_editable,
889 &new_editable,
890 cursor_in_new,
891 CURSOR_MARKER,
892 multi_region::V0317_END_MARKER,
893 )
894 }
895 // V0131-style formats and fallback: produce new editable text with
896 // cursor marker inserted, followed by the end marker.
897 _ => {
898 let (mut result, first_hunk_offset) = if empty_patch {
899 (old_editable.clone(), None)
900 } else {
901 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?
902 };
903
904 if let Some(cursor) = cursor_offset {
905 let hunk_start = if !empty_patch {
906 first_hunk_offset.unwrap_or(0)
907 } else {
908 0
909 };
910 let offset = (hunk_start + cursor).min(result.len());
911 result.insert_str(offset, CURSOR_MARKER);
912 }
913
914 if !result.is_empty() && !result.ends_with('\n') {
915 result.push('\n');
916 }
917
918 if let Some(end_marker) = output_end_marker_for_format(format) {
919 result.push_str(end_marker);
920 }
921
922 Ok(result)
923 }
924 }
925}
926
927/// Compute the cursor position within the new text after diff application.
928fn cursor_in_new_text(
929 cursor_offset: Option<usize>,
930 first_hunk_offset: Option<usize>,
931 new_text: &str,
932) -> Option<usize> {
933 cursor_offset.map(|cursor| {
934 let hunk_start = first_hunk_offset.unwrap_or(0);
935 (hunk_start + cursor).min(new_text.len())
936 })
937}
938
939#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
940pub struct ParsedOutput {
941 /// Text that should replace the editable region
942 pub new_editable_region: String,
943 /// The byte range within `cursor_excerpt` that this replacement applies to
944 pub range_in_excerpt: Range<usize>,
945 /// Byte offset of the cursor marker within `new_editable_region`, if present
946 pub cursor_offset_in_new_editable_region: Option<usize>,
947}
948
949#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
950pub struct CursorPosition {
951 pub path: String,
952 pub row: usize,
953 pub column: usize,
954 pub offset: usize,
955 pub editable_region_offset: usize,
956}
957
958pub fn parsed_output_from_editable_region(
959 range_in_excerpt: Range<usize>,
960 mut new_editable_region: String,
961) -> ParsedOutput {
962 let cursor_offset_in_new_editable_region = new_editable_region.find(CURSOR_MARKER);
963 if let Some(offset) = cursor_offset_in_new_editable_region {
964 new_editable_region.replace_range(offset..offset + CURSOR_MARKER.len(), "");
965 }
966
967 ParsedOutput {
968 new_editable_region,
969 range_in_excerpt,
970 cursor_offset_in_new_editable_region,
971 }
972}
973
974/// Parse model output for the given zeta format
975pub fn parse_zeta2_model_output(
976 output: &str,
977 format: ZetaFormat,
978 prompt_inputs: &ZetaPromptInput,
979) -> Result<ParsedOutput> {
980 let output = match output_end_marker_for_format(format) {
981 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
982 None => output,
983 };
984
985 let (context, editable_range_in_context, context_range, cursor_offset) =
986 resolve_cursor_region(prompt_inputs, format);
987 let context_start = context_range.start;
988 let old_editable_region = &context[editable_range_in_context.clone()];
989 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
990
991 let (range_in_context, output) = match format {
992 ZetaFormat::v0226Hashline => (
993 editable_range_in_context,
994 if hashline::output_has_edit_commands(output) {
995 hashline::apply_edit_commands(old_editable_region, output)
996 } else {
997 output.to_string()
998 },
999 ),
1000 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
1001 ZetaFormat::V0304SeedNoEdits => (
1002 editable_range_in_context,
1003 if output.starts_with(seed_coder::NO_EDITS) {
1004 old_editable_region.to_string()
1005 } else {
1006 output.to_string()
1007 },
1008 ),
1009 ZetaFormat::V0306SeedMultiRegions => (
1010 editable_range_in_context,
1011 if output.starts_with(seed_coder::NO_EDITS) {
1012 old_editable_region.to_string()
1013 } else {
1014 multi_region::apply_marker_span(old_editable_region, output)?
1015 },
1016 ),
1017 ZetaFormat::V0316SeedMultiRegions => (
1018 editable_range_in_context,
1019 multi_region::apply_marker_span_v0316(old_editable_region, output)?,
1020 ),
1021 ZetaFormat::V0318SeedMultiRegions => (
1022 editable_range_in_context,
1023 multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1024 ),
1025 ZetaFormat::V0317SeedMultiRegions => (
1026 editable_range_in_context,
1027 multi_region::apply_marker_span_v0317(
1028 old_editable_region,
1029 output,
1030 Some(cursor_offset_in_editable),
1031 )?,
1032 ),
1033 _ => (editable_range_in_context, output.to_string()),
1034 };
1035
1036 let range_in_excerpt =
1037 range_in_context.start + context_start..range_in_context.end + context_start;
1038
1039 Ok(parsed_output_from_editable_region(range_in_excerpt, output))
1040}
1041
1042pub fn parse_zeta2_model_output_as_patch(
1043 output: &str,
1044 format: ZetaFormat,
1045 prompt_inputs: &ZetaPromptInput,
1046) -> Result<String> {
1047 let parsed = parse_zeta2_model_output(output, format, prompt_inputs)?;
1048 parsed_output_to_patch(prompt_inputs, parsed)
1049}
1050
1051pub fn cursor_position_from_parsed_output(
1052 prompt_inputs: &ZetaPromptInput,
1053 parsed: &ParsedOutput,
1054) -> Option<CursorPosition> {
1055 let cursor_offset = parsed.cursor_offset_in_new_editable_region?;
1056 let editable_region_offset = parsed.range_in_excerpt.start;
1057 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1058
1059 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
1060
1061 let new_editable_region = &parsed.new_editable_region;
1062 let prefix_end = cursor_offset.min(new_editable_region.len());
1063 let new_region_prefix = &new_editable_region[..prefix_end];
1064
1065 let row = editable_region_start_line + new_region_prefix.matches('\n').count();
1066
1067 let column = match new_region_prefix.rfind('\n') {
1068 Some(last_newline) => cursor_offset - last_newline - 1,
1069 None => {
1070 let content_prefix = &excerpt[..editable_region_offset];
1071 let content_column = match content_prefix.rfind('\n') {
1072 Some(last_newline) => editable_region_offset - last_newline - 1,
1073 None => editable_region_offset,
1074 };
1075 content_column + cursor_offset
1076 }
1077 };
1078
1079 Some(CursorPosition {
1080 path: prompt_inputs.cursor_path.to_string_lossy().into_owned(),
1081 row,
1082 column,
1083 offset: editable_region_offset + cursor_offset,
1084 editable_region_offset: cursor_offset,
1085 })
1086}
1087
1088pub fn parsed_output_to_patch(
1089 prompt_inputs: &ZetaPromptInput,
1090 parsed: ParsedOutput,
1091) -> Result<String> {
1092 let range_in_excerpt = parsed.range_in_excerpt;
1093 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1094 let old_text = excerpt[range_in_excerpt.clone()].to_string();
1095 let mut new_text = parsed.new_editable_region;
1096
1097 let mut old_text_normalized = old_text;
1098 if !new_text.is_empty() && !new_text.ends_with('\n') {
1099 new_text.push('\n');
1100 }
1101 if !old_text_normalized.is_empty() && !old_text_normalized.ends_with('\n') {
1102 old_text_normalized.push('\n');
1103 }
1104
1105 let editable_region_offset = range_in_excerpt.start;
1106 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count() as u32;
1107 let editable_region_lines = old_text_normalized.lines().count() as u32;
1108
1109 let diff = udiff::unified_diff_with_context(
1110 &old_text_normalized,
1111 &new_text,
1112 editable_region_start_line,
1113 editable_region_start_line,
1114 editable_region_lines,
1115 );
1116
1117 let path = prompt_inputs
1118 .cursor_path
1119 .to_string_lossy()
1120 .trim_start_matches('/')
1121 .to_string();
1122 let formatted_diff = format!("--- a/{path}\n+++ b/{path}\n{diff}");
1123
1124 Ok(udiff::encode_cursor_in_patch(
1125 &formatted_diff,
1126 parsed.cursor_offset_in_new_editable_region,
1127 ))
1128}
1129
1130pub fn excerpt_range_for_format(
1131 format: ZetaFormat,
1132 ranges: &ExcerptRanges,
1133) -> (Range<usize>, Range<usize>) {
1134 excerpt_ranges_for_format(format, ranges)
1135}
1136
1137pub fn resolve_cursor_region(
1138 input: &ZetaPromptInput,
1139 format: ZetaFormat,
1140) -> (&str, Range<usize>, Range<usize>, usize) {
1141 let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
1142 let (editable_tokens, context_tokens) = token_limits_for_format(format);
1143 compute_editable_and_context_ranges(
1144 &input.cursor_excerpt,
1145 input.cursor_offset_in_excerpt,
1146 syntax_ranges,
1147 editable_tokens,
1148 context_tokens,
1149 )
1150 } else {
1151 excerpt_range_for_format(format, &input.excerpt_ranges)
1152 };
1153 let context_start = context_range.start;
1154 let context_text = &input.cursor_excerpt[context_range.clone()];
1155 let adjusted_editable =
1156 (editable_range.start - context_start)..(editable_range.end - context_start);
1157 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
1158
1159 (
1160 context_text,
1161 adjusted_editable,
1162 context_range,
1163 adjusted_cursor,
1164 )
1165}
1166
1167pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
1168 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1169 get_prefill_for_format(format, context, &editable_range)
1170}
1171
1172fn format_edit_history_within_budget(
1173 events: &[Arc<Event>],
1174 file_marker: &str,
1175 edit_history_name: &str,
1176 max_tokens: usize,
1177 max_edit_event_count: usize,
1178) -> String {
1179 let header = format!("{}{}\n", file_marker, edit_history_name);
1180 let header_tokens = estimate_tokens(header.len());
1181 if header_tokens >= max_tokens {
1182 return String::new();
1183 }
1184
1185 let mut event_strings: Vec<String> = Vec::new();
1186 let mut total_tokens = header_tokens;
1187
1188 for event in events.iter().rev().take(max_edit_event_count) {
1189 let mut event_str = String::new();
1190 write_event(&mut event_str, event);
1191 let event_tokens = estimate_tokens(event_str.len());
1192
1193 if total_tokens + event_tokens > max_tokens {
1194 break;
1195 }
1196 total_tokens += event_tokens;
1197 event_strings.push(event_str);
1198 }
1199
1200 if event_strings.is_empty() {
1201 return String::new();
1202 }
1203
1204 let mut result = header;
1205 for event_str in event_strings.iter().rev() {
1206 result.push_str(event_str);
1207 }
1208 result
1209}
1210
1211fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
1212 let needs_newline = !excerpt.text.ends_with('\n');
1213 let needs_ellipsis = excerpt.row_range.end < file_max_row;
1214 let len = excerpt.text.len()
1215 + if needs_newline { "\n".len() } else { 0 }
1216 + if needs_ellipsis { "...\n".len() } else { 0 };
1217 estimate_tokens(len)
1218}
1219
1220pub fn format_related_files_within_budget(
1221 related_files: &[RelatedFile],
1222 file_prefix: &str,
1223 file_suffix: &str,
1224 max_tokens: usize,
1225) -> String {
1226 struct ExcerptCandidate {
1227 file_ix: usize,
1228 excerpt_ix: usize,
1229 order: usize,
1230 }
1231
1232 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
1233 .iter()
1234 .enumerate()
1235 .flat_map(|(file_ix, file)| {
1236 file.excerpts
1237 .iter()
1238 .enumerate()
1239 .map(move |(excerpt_ix, e)| ExcerptCandidate {
1240 file_ix,
1241 excerpt_ix,
1242 order: e.order,
1243 })
1244 })
1245 .collect();
1246
1247 // Pre-compute file header strings and their token costs.
1248 let file_headers: Vec<String> = related_files
1249 .iter()
1250 .map(|file| {
1251 let path_str = file.path.to_string_lossy();
1252 format!("{}{}\n", file_prefix, path_str)
1253 })
1254 .collect();
1255
1256 // Sort the excerpts by their order and determine how many fit within the budget.
1257 let mut total_tokens = 0;
1258 let mut included_excerpt_count = 0_usize;
1259 let mut included_file_indices = vec![false; related_files.len()];
1260 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1261 for candidate in &excerpt_candidates {
1262 let file = &related_files[candidate.file_ix];
1263 let excerpt = &file.excerpts[candidate.excerpt_ix];
1264 let file_already_included = included_file_indices[candidate.file_ix];
1265 let header_cost = if file_already_included {
1266 0
1267 } else {
1268 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1269 };
1270 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1271 if total_tokens + header_cost + excerpt_cost > max_tokens {
1272 break;
1273 }
1274 total_tokens += header_cost + excerpt_cost;
1275 if !file_already_included {
1276 included_file_indices[candidate.file_ix] = true;
1277 }
1278 included_excerpt_count += 1;
1279 }
1280
1281 excerpt_candidates.truncate(included_excerpt_count);
1282 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1283
1284 // Render all of the files that fit within the token budget, in the original order.
1285 let mut result = String::new();
1286 let mut last_file_ix = None;
1287 for candidate in &excerpt_candidates {
1288 if last_file_ix != Some(candidate.file_ix) {
1289 if last_file_ix.is_some() {
1290 result.push_str(file_suffix);
1291 }
1292 result.push_str(&file_headers[candidate.file_ix]);
1293 last_file_ix = Some(candidate.file_ix);
1294 }
1295 let file = &related_files[candidate.file_ix];
1296 let excerpt = &file.excerpts[candidate.excerpt_ix];
1297 result.push_str(&excerpt.text);
1298 if !result.ends_with('\n') {
1299 result.push('\n');
1300 }
1301 if excerpt.row_range.end < file.max_row {
1302 result.push_str("...\n");
1303 }
1304 }
1305
1306 result
1307}
1308
1309pub fn write_related_files(
1310 prompt: &mut String,
1311 related_files: &[RelatedFile],
1312) -> Vec<Range<usize>> {
1313 let mut ranges = Vec::new();
1314 for file in related_files {
1315 let start = prompt.len();
1316 let path_str = file.path.to_string_lossy();
1317 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1318 for excerpt in &file.excerpts {
1319 prompt.push_str(&excerpt.text);
1320 if !prompt.ends_with('\n') {
1321 prompt.push('\n');
1322 }
1323 if excerpt.row_range.end < file.max_row {
1324 prompt.push_str("...\n");
1325 }
1326 }
1327 let end = prompt.len();
1328 ranges.push(start..end);
1329 }
1330 ranges
1331}
1332
1333mod v0112_middle_at_end {
1334 use super::*;
1335
1336 pub fn special_tokens() -> &'static [&'static str] {
1337 &[
1338 "<|fim_prefix|>",
1339 "<|fim_suffix|>",
1340 "<|fim_middle|>",
1341 "<|file_sep|>",
1342 CURSOR_MARKER,
1343 ]
1344 }
1345
1346 pub fn write_cursor_excerpt_section(
1347 prompt: &mut String,
1348 path: &Path,
1349 context: &str,
1350 editable_range: &Range<usize>,
1351 cursor_offset: usize,
1352 ) {
1353 let path_str = path.to_string_lossy();
1354 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1355
1356 prompt.push_str("<|fim_prefix|>\n");
1357 prompt.push_str(&context[..editable_range.start]);
1358
1359 prompt.push_str("<|fim_suffix|>\n");
1360 prompt.push_str(&context[editable_range.end..]);
1361 if !prompt.ends_with('\n') {
1362 prompt.push('\n');
1363 }
1364
1365 prompt.push_str("<|fim_middle|>current\n");
1366 prompt.push_str(&context[editable_range.start..cursor_offset]);
1367 prompt.push_str(CURSOR_MARKER);
1368 prompt.push_str(&context[cursor_offset..editable_range.end]);
1369 if !prompt.ends_with('\n') {
1370 prompt.push('\n');
1371 }
1372
1373 prompt.push_str("<|fim_middle|>updated\n");
1374 }
1375}
1376
1377mod v0113_ordered {
1378 use super::*;
1379
1380 pub fn special_tokens() -> &'static [&'static str] {
1381 &[
1382 "<|fim_prefix|>",
1383 "<|fim_suffix|>",
1384 "<|fim_middle|>",
1385 "<|file_sep|>",
1386 CURSOR_MARKER,
1387 ]
1388 }
1389
1390 pub fn write_cursor_excerpt_section(
1391 prompt: &mut String,
1392 path: &Path,
1393 context: &str,
1394 editable_range: &Range<usize>,
1395 cursor_offset: usize,
1396 ) {
1397 let path_str = path.to_string_lossy();
1398 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1399
1400 prompt.push_str("<|fim_prefix|>\n");
1401 prompt.push_str(&context[..editable_range.start]);
1402 if !prompt.ends_with('\n') {
1403 prompt.push('\n');
1404 }
1405
1406 prompt.push_str("<|fim_middle|>current\n");
1407 prompt.push_str(&context[editable_range.start..cursor_offset]);
1408 prompt.push_str(CURSOR_MARKER);
1409 prompt.push_str(&context[cursor_offset..editable_range.end]);
1410 if !prompt.ends_with('\n') {
1411 prompt.push('\n');
1412 }
1413
1414 prompt.push_str("<|fim_suffix|>\n");
1415 prompt.push_str(&context[editable_range.end..]);
1416 if !prompt.ends_with('\n') {
1417 prompt.push('\n');
1418 }
1419
1420 prompt.push_str("<|fim_middle|>updated\n");
1421 }
1422}
1423
1424mod v0114180_editable_region {
1425 use super::*;
1426
1427 pub fn special_tokens() -> &'static [&'static str] {
1428 v0113_ordered::special_tokens()
1429 }
1430}
1431
1432pub mod v0120_git_merge_markers {
1433 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1434 //!
1435 //! Example prompt:
1436 //!
1437 //! <|file_sep|>path/to/target_file.py
1438 //! <|fim_prefix|>
1439 //! code before editable region
1440 //! <|fim_suffix|>
1441 //! code after editable region
1442 //! <|fim_middle|>
1443 //! <<<<<<< CURRENT
1444 //! code that
1445 //! needs to<|user_cursor|>
1446 //! be rewritten
1447 //! =======
1448 //!
1449 //! Expected output (should be generated by the model):
1450 //!
1451 //! updated
1452 //! code with
1453 //! changes applied
1454 //! >>>>>>> UPDATED
1455
1456 use super::*;
1457
1458 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1459 pub const SEPARATOR: &str = "=======\n";
1460 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1461
1462 pub fn special_tokens() -> &'static [&'static str] {
1463 &[
1464 "<|fim_prefix|>",
1465 "<|fim_suffix|>",
1466 "<|fim_middle|>",
1467 "<|file_sep|>",
1468 START_MARKER,
1469 SEPARATOR,
1470 END_MARKER,
1471 CURSOR_MARKER,
1472 ]
1473 }
1474
1475 pub fn write_cursor_excerpt_section(
1476 prompt: &mut String,
1477 path: &Path,
1478 context: &str,
1479 editable_range: &Range<usize>,
1480 cursor_offset: usize,
1481 ) {
1482 let path_str = path.to_string_lossy();
1483 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1484
1485 prompt.push_str("<|fim_prefix|>");
1486 prompt.push_str(&context[..editable_range.start]);
1487
1488 prompt.push_str("<|fim_suffix|>");
1489 prompt.push_str(&context[editable_range.end..]);
1490 if !prompt.ends_with('\n') {
1491 prompt.push('\n');
1492 }
1493
1494 prompt.push_str("<|fim_middle|>");
1495 prompt.push_str(START_MARKER);
1496 prompt.push_str(&context[editable_range.start..cursor_offset]);
1497 prompt.push_str(CURSOR_MARKER);
1498 prompt.push_str(&context[cursor_offset..editable_range.end]);
1499 if !prompt.ends_with('\n') {
1500 prompt.push('\n');
1501 }
1502 prompt.push_str(SEPARATOR);
1503 }
1504}
1505
1506pub mod v0131_git_merge_markers_prefix {
1507 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1508 //!
1509 //! Example prompt:
1510 //!
1511 //! <|file_sep|>path/to/target_file.py
1512 //! <|fim_prefix|>
1513 //! code before editable region
1514 //! <<<<<<< CURRENT
1515 //! code that
1516 //! needs to<|user_cursor|>
1517 //! be rewritten
1518 //! =======
1519 //! <|fim_suffix|>
1520 //! code after editable region
1521 //! <|fim_middle|>
1522 //!
1523 //! Expected output (should be generated by the model):
1524 //!
1525 //! updated
1526 //! code with
1527 //! changes applied
1528 //! >>>>>>> UPDATED
1529
1530 use super::*;
1531
1532 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1533 pub const SEPARATOR: &str = "=======\n";
1534 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1535
1536 pub fn special_tokens() -> &'static [&'static str] {
1537 &[
1538 "<|fim_prefix|>",
1539 "<|fim_suffix|>",
1540 "<|fim_middle|>",
1541 "<|file_sep|>",
1542 START_MARKER,
1543 SEPARATOR,
1544 END_MARKER,
1545 CURSOR_MARKER,
1546 ]
1547 }
1548
1549 pub fn write_cursor_excerpt_section(
1550 prompt: &mut String,
1551 path: &Path,
1552 context: &str,
1553 editable_range: &Range<usize>,
1554 cursor_offset: usize,
1555 ) {
1556 let path_str = path.to_string_lossy();
1557 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1558
1559 prompt.push_str("<|fim_prefix|>");
1560 prompt.push_str(&context[..editable_range.start]);
1561 prompt.push_str(START_MARKER);
1562 prompt.push_str(&context[editable_range.start..cursor_offset]);
1563 prompt.push_str(CURSOR_MARKER);
1564 prompt.push_str(&context[cursor_offset..editable_range.end]);
1565 if !prompt.ends_with('\n') {
1566 prompt.push('\n');
1567 }
1568 prompt.push_str(SEPARATOR);
1569
1570 prompt.push_str("<|fim_suffix|>");
1571 prompt.push_str(&context[editable_range.end..]);
1572 if !prompt.ends_with('\n') {
1573 prompt.push('\n');
1574 }
1575
1576 prompt.push_str("<|fim_middle|>");
1577 }
1578}
1579
1580pub mod v0211_prefill {
1581 use super::*;
1582
1583 pub fn special_tokens() -> &'static [&'static str] {
1584 v0131_git_merge_markers_prefix::special_tokens()
1585 }
1586
1587 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1588 let editable_region = &context[editable_range.start..editable_range.end];
1589
1590 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1591 let prefill_len = editable_region.floor_char_boundary(prefill_len);
1592
1593 // Find a token boundary to avoid splitting tokens in the prefill.
1594 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1595 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1596 // the \n and consume any consecutive \n characters after it.
1597 let prefill = &editable_region[..prefill_len];
1598 match prefill.rfind('\n') {
1599 Some(pos) => {
1600 let mut end = pos + 1;
1601 while end < editable_region.len()
1602 && editable_region.as_bytes().get(end) == Some(&b'\n')
1603 {
1604 end += 1;
1605 }
1606 editable_region[..end].to_string()
1607 }
1608 // No newline found. Fall back to splitting before the last space
1609 // (word-level boundary)
1610 None => match prefill.rfind(' ') {
1611 Some(pos) => prefill[..pos].to_string(),
1612 None => prefill.to_string(),
1613 },
1614 }
1615 }
1616}
1617
1618pub mod hashline {
1619
1620 use std::fmt::Display;
1621
1622 pub const END_MARKER: &str = "<|fim_middle|>updated";
1623 pub const START_MARKER: &str = "<|fim_middle|>current";
1624
1625 use super::*;
1626
1627 const SET_COMMAND_MARKER: &str = "<|set|>";
1628 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1629 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1630
1631 pub fn special_tokens() -> &'static [&'static str] {
1632 return &[
1633 SET_COMMAND_MARKER,
1634 "<|set_range|>",
1635 INSERT_COMMAND_MARKER,
1636 NO_EDITS_COMMAND_MARKER,
1637 CURSOR_MARKER,
1638 "<|file_sep|>",
1639 "<|fim_prefix|>",
1640 "<|fim_suffix|>",
1641 "<|fim_middle|>",
1642 ];
1643 }
1644
1645 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1646 #[derive(Debug, Clone, PartialEq, Eq)]
1647 struct LineRef {
1648 index: usize,
1649 hash: u8,
1650 }
1651
1652 impl Display for LineRef {
1653 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1654 write!(f, "{}:{:02x}", self.index, self.hash)
1655 }
1656 }
1657
1658 pub fn hash_line(line: &[u8]) -> u8 {
1659 let mut h: u8 = 0;
1660 for &byte in line {
1661 h = h.wrapping_add(byte);
1662 }
1663 return h;
1664 }
1665
1666 /// Write the hashline-encoded editable region into `out`. Each line of
1667 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1668 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1669 /// to the start of `editable_text`).
1670 pub fn write_hashline_editable_region(
1671 out: &mut String,
1672 editable_text: &str,
1673 cursor_offset_in_editable: usize,
1674 ) {
1675 let mut offset = 0;
1676 for (i, line) in editable_text.lines().enumerate() {
1677 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1678 && cursor_offset_in_editable < offset + line.len()
1679 {
1680 (
1681 &line[..cursor_offset_in_editable - offset],
1682 CURSOR_MARKER,
1683 &line[cursor_offset_in_editable - offset..],
1684 )
1685 } else {
1686 (line, "", "")
1687 };
1688 write!(
1689 out,
1690 "\n{}|{head}{cursor}{tail}",
1691 LineRef {
1692 index: i,
1693 hash: hash_line(line.as_bytes())
1694 }
1695 )
1696 .unwrap();
1697 offset += line.len() + 1;
1698 }
1699 }
1700
1701 pub fn write_cursor_excerpt_section(
1702 prompt: &mut String,
1703 path: &Path,
1704 context: &str,
1705 editable_range: &Range<usize>,
1706 cursor_offset: usize,
1707 ) {
1708 let path_str = path.to_string_lossy();
1709 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1710
1711 prompt.push_str("<|fim_prefix|>\n");
1712 prompt.push_str(&context[..editable_range.start]);
1713 prompt.push_str(START_MARKER);
1714
1715 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1716 let editable_region = &context[editable_range.clone()];
1717 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1718
1719 if !prompt.ends_with('\n') {
1720 prompt.push('\n');
1721 }
1722
1723 prompt.push_str("<|fim_suffix|>\n");
1724 prompt.push_str(&context[editable_range.end..]);
1725 if !prompt.ends_with('\n') {
1726 prompt.push('\n');
1727 }
1728
1729 prompt.push_str(END_MARKER);
1730 prompt.push('\n');
1731 }
1732
1733 /// A single edit command parsed from the model output.
1734 #[derive(Debug)]
1735 enum EditCommand<'a> {
1736 /// Replace a range of lines (inclusive on both ends). Single-line set is
1737 /// represented by `start == end`.
1738 Set {
1739 start: LineRef,
1740 end: LineRef,
1741 content: &'a str,
1742 },
1743 /// Insert new lines after the given line, or before the first line if
1744 /// `after` is `None`.
1745 Insert {
1746 after: Option<LineRef>,
1747 content: &'a str,
1748 },
1749 }
1750
1751 /// Parse a line reference like `3:c3` into a `LineRef`.
1752 fn parse_line_ref(s: &str) -> Option<LineRef> {
1753 let (idx_str, hash_str) = s.split_once(':')?;
1754 let index = idx_str.parse::<usize>().ok()?;
1755 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1756 Some(LineRef { index, hash })
1757 }
1758
1759 /// Parse the model output into a list of `EditCommand`s.
1760 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1761 let mut commands = Vec::new();
1762 let mut offset = 0usize;
1763
1764 while offset < model_output.len() {
1765 let next_nl = model_output[offset..]
1766 .find('\n')
1767 .map(|i| offset + i)
1768 .unwrap_or(model_output.len());
1769 let line = &model_output[offset..next_nl];
1770 let line_end = if next_nl < model_output.len() {
1771 next_nl + 1
1772 } else {
1773 next_nl
1774 };
1775
1776 let trimmed = line.trim();
1777 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1778 (true, spec)
1779 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1780 (false, spec)
1781 } else {
1782 offset = line_end;
1783 continue;
1784 };
1785
1786 let mut content_end = line_end;
1787 let mut scan = line_end;
1788
1789 while scan < model_output.len() {
1790 let body_nl = model_output[scan..]
1791 .find('\n')
1792 .map(|i| scan + i)
1793 .unwrap_or(model_output.len());
1794 let body_line = &model_output[scan..body_nl];
1795 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1796 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1797 {
1798 break;
1799 }
1800 scan = if body_nl < model_output.len() {
1801 body_nl + 1
1802 } else {
1803 body_nl
1804 };
1805 content_end = scan;
1806 }
1807
1808 let content = &model_output[line_end..content_end];
1809
1810 if is_set {
1811 if let Some((start_str, end_str)) = specifier.split_once('-') {
1812 if let (Some(start), Some(end)) =
1813 (parse_line_ref(start_str), parse_line_ref(end_str))
1814 {
1815 commands.push(EditCommand::Set {
1816 start,
1817 end,
1818 content,
1819 });
1820 }
1821 } else if let Some(target) = parse_line_ref(specifier) {
1822 commands.push(EditCommand::Set {
1823 start: target.clone(),
1824 end: target,
1825 content,
1826 });
1827 }
1828 } else {
1829 let after = parse_line_ref(specifier);
1830 commands.push(EditCommand::Insert { after, content });
1831 }
1832
1833 offset = scan;
1834 }
1835
1836 commands
1837 }
1838
1839 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1840 /// (as opposed to being a plain full-replacement output).
1841 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1842 /// editable region, returning the plain text content.
1843 pub fn strip_hashline_prefixes(region: &str) -> String {
1844 let mut decoded: String = region
1845 .lines()
1846 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1847 .collect::<Vec<_>>()
1848 .join("\n");
1849 if region.ends_with('\n') {
1850 decoded.push('\n');
1851 }
1852 decoded
1853 }
1854
1855 pub fn output_has_edit_commands(model_output: &str) -> bool {
1856 model_output.contains(SET_COMMAND_MARKER)
1857 || model_output.contains(INSERT_COMMAND_MARKER)
1858 || model_output.contains(NO_EDITS_COMMAND_MARKER)
1859 }
1860
1861 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1862 /// original editable region text.
1863 ///
1864 /// `editable_region` is the original text of the editable region (without hash
1865 /// prefixes). `model_output` is the raw model response containing edit commands.
1866 ///
1867 /// Returns the full replacement text for the editable region.
1868 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1869 if model_output
1870 .trim_start()
1871 .starts_with(NO_EDITS_COMMAND_MARKER)
1872 {
1873 return editable_region.to_string();
1874 }
1875
1876 let original_lines: Vec<&str> = editable_region.lines().collect();
1877 let old_hashes: Vec<u8> = original_lines
1878 .iter()
1879 .map(|line| hash_line(line.as_bytes()))
1880 .collect();
1881
1882 let commands = parse_edit_commands(model_output);
1883
1884 // For set operations: indexed by start line → Some((end line index, content))
1885 // For insert operations: indexed by line index → vec of content to insert after
1886 // Insert-before-first is tracked separately.
1887 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1888 let mut insert_before_first: Vec<&str> = Vec::new();
1889 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1890
1891 for command in &commands {
1892 match command {
1893 EditCommand::Set {
1894 start,
1895 end,
1896 content,
1897 } => {
1898 if start.index < old_hashes.len()
1899 && end.index < old_hashes.len()
1900 && start.index <= end.index
1901 && old_hashes[start.index] == start.hash
1902 && old_hashes[end.index] == end.hash
1903 {
1904 set_ops[start.index] = Some((end.index, *content));
1905 }
1906 }
1907 EditCommand::Insert { after, content } => match after {
1908 None => insert_before_first.push(*content),
1909 Some(line_ref) => {
1910 if line_ref.index < old_hashes.len()
1911 && old_hashes[line_ref.index] == line_ref.hash
1912 {
1913 insert_after[line_ref.index].push(*content);
1914 }
1915 }
1916 },
1917 }
1918 }
1919
1920 let mut result = String::new();
1921
1922 // Emit any insertions before the first line
1923 for content in &insert_before_first {
1924 result.push_str(content);
1925 if !content.ends_with('\n') {
1926 result.push('\n');
1927 }
1928 }
1929
1930 let mut i = 0;
1931 while i < original_lines.len() {
1932 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1933 // Replace lines i..=end_index with the replacement content
1934 result.push_str(replacement);
1935 if !replacement.is_empty() && !replacement.ends_with('\n') {
1936 result.push('\n');
1937 }
1938 // Emit any insertions after the end of this set range
1939 if *end_index < insert_after.len() {
1940 for content in &insert_after[*end_index] {
1941 result.push_str(content);
1942 if !content.ends_with('\n') {
1943 result.push('\n');
1944 }
1945 }
1946 }
1947 i = end_index + 1;
1948 } else {
1949 // Keep the original line
1950 result.push_str(original_lines[i]);
1951 result.push('\n');
1952 // Emit any insertions after this line
1953 for content in &insert_after[i] {
1954 result.push_str(content);
1955 if !content.ends_with('\n') {
1956 result.push('\n');
1957 }
1958 }
1959 i += 1;
1960 }
1961 }
1962
1963 // Preserve trailing newline behavior: if the original ended with a
1964 // newline the result already has one; if it didn't, trim the extra one
1965 // we added.
1966 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1967 result.pop();
1968 }
1969
1970 result
1971 }
1972
1973 /// Convert a unified diff patch into hashline edit commands.
1974 ///
1975 /// Parses the unified diff `patch` directly to determine which lines of
1976 /// `old_text` are deleted/replaced and what new lines are added, then emits
1977 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1978 /// `{index}:{hash}` identifiers.
1979 ///
1980 /// `cursor_offset` is an optional byte offset into the first hunk's new
1981 /// text (context + additions) where the cursor marker should be placed.
1982 pub fn patch_to_edit_commands(
1983 old_text: &str,
1984 patch: &str,
1985 cursor_offset: Option<usize>,
1986 ) -> Result<String> {
1987 let old_lines: Vec<&str> = old_text.lines().collect();
1988 let old_hashes: Vec<u8> = old_lines
1989 .iter()
1990 .map(|line| hash_line(line.as_bytes()))
1991 .collect();
1992
1993 let mut result = String::new();
1994 let mut first_hunk = true;
1995
1996 struct Hunk<'a> {
1997 line_range: Range<usize>,
1998 new_text_lines: Vec<&'a str>,
1999 cursor_line_offset_in_new_text: Option<(usize, usize)>,
2000 }
2001
2002 // Parse the patch line by line. We only care about hunk headers,
2003 // context, deletions, and additions.
2004 let mut old_line_index: usize = 0;
2005 let mut current_hunk: Option<Hunk> = None;
2006 // Byte offset tracking within the hunk's new text for cursor placement.
2007 let mut new_text_byte_offset: usize = 0;
2008 // The line index of the last old line seen before/in the current hunk
2009 // (used for insert-after reference).
2010 let mut last_old_line_before_hunk: Option<usize> = None;
2011
2012 fn flush_hunk(
2013 hunk: Hunk,
2014 last_old_line: Option<usize>,
2015 result: &mut String,
2016 old_hashes: &[u8],
2017 ) {
2018 if hunk.line_range.is_empty() {
2019 // Pure insertion — reference the old line to insert after when in bounds.
2020 if let Some(after) = last_old_line
2021 && let Some(&hash) = old_hashes.get(after)
2022 {
2023 write!(
2024 result,
2025 "{INSERT_COMMAND_MARKER}{}\n",
2026 LineRef { index: after, hash }
2027 )
2028 .unwrap();
2029 } else {
2030 result.push_str(INSERT_COMMAND_MARKER);
2031 result.push('\n');
2032 }
2033 } else {
2034 let start = hunk.line_range.start;
2035 let end_exclusive = hunk.line_range.end;
2036 let deleted_line_count = end_exclusive.saturating_sub(start);
2037
2038 if deleted_line_count == 1 {
2039 if let Some(&hash) = old_hashes.get(start) {
2040 write!(
2041 result,
2042 "{SET_COMMAND_MARKER}{}\n",
2043 LineRef { index: start, hash }
2044 )
2045 .unwrap();
2046 } else {
2047 result.push_str(SET_COMMAND_MARKER);
2048 result.push('\n');
2049 }
2050 } else {
2051 let end_inclusive = end_exclusive - 1;
2052 match (
2053 old_hashes.get(start).copied(),
2054 old_hashes.get(end_inclusive).copied(),
2055 ) {
2056 (Some(start_hash), Some(end_hash)) => {
2057 write!(
2058 result,
2059 "{SET_COMMAND_MARKER}{}-{}\n",
2060 LineRef {
2061 index: start,
2062 hash: start_hash
2063 },
2064 LineRef {
2065 index: end_inclusive,
2066 hash: end_hash
2067 }
2068 )
2069 .unwrap();
2070 }
2071 _ => {
2072 result.push_str(SET_COMMAND_MARKER);
2073 result.push('\n');
2074 }
2075 }
2076 }
2077 }
2078 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
2079 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
2080 && line_offset == cursor_line_offset
2081 {
2082 result.push_str(&line[..char_offset]);
2083 result.push_str(CURSOR_MARKER);
2084 result.push_str(&line[char_offset..]);
2085 continue;
2086 }
2087
2088 result.push_str(line);
2089 }
2090 }
2091
2092 for raw_line in patch.split_inclusive('\n') {
2093 if raw_line.starts_with("@@") {
2094 // Flush any pending change hunk from a previous patch hunk.
2095 if let Some(hunk) = current_hunk.take() {
2096 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2097 }
2098
2099 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
2100 // We intentionally do not trust old_start as a direct local index into `old_text`,
2101 // because some patches are produced against a larger file region and carry
2102 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
2103 if first_hunk {
2104 new_text_byte_offset = 0;
2105 first_hunk = false;
2106 }
2107 continue;
2108 }
2109
2110 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
2111 continue;
2112 }
2113 if raw_line.starts_with("\\ No newline") {
2114 continue;
2115 }
2116
2117 if raw_line.starts_with('-') {
2118 // Extend or start a change hunk with this deleted old line.
2119 match &mut current_hunk {
2120 Some(Hunk {
2121 line_range: range, ..
2122 }) => range.end = old_line_index + 1,
2123 None => {
2124 current_hunk = Some(Hunk {
2125 line_range: old_line_index..old_line_index + 1,
2126 new_text_lines: Vec::new(),
2127 cursor_line_offset_in_new_text: None,
2128 });
2129 }
2130 }
2131 old_line_index += 1;
2132 } else if let Some(added_content) = raw_line.strip_prefix('+') {
2133 // Place cursor marker if cursor_offset falls within this line.
2134 let mut cursor_line_offset = None;
2135 if let Some(cursor_off) = cursor_offset
2136 && (first_hunk
2137 || cursor_off >= new_text_byte_offset
2138 && cursor_off <= new_text_byte_offset + added_content.len())
2139 {
2140 let line_offset = added_content.floor_char_boundary(
2141 cursor_off
2142 .saturating_sub(new_text_byte_offset)
2143 .min(added_content.len()),
2144 );
2145 cursor_line_offset = Some(line_offset);
2146 }
2147
2148 new_text_byte_offset += added_content.len();
2149
2150 let hunk = current_hunk.get_or_insert(Hunk {
2151 line_range: old_line_index..old_line_index,
2152 new_text_lines: vec![],
2153 cursor_line_offset_in_new_text: None,
2154 });
2155 hunk.new_text_lines.push(added_content);
2156 hunk.cursor_line_offset_in_new_text = cursor_line_offset
2157 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
2158 } else {
2159 // Context line (starts with ' ' or is empty).
2160 if let Some(hunk) = current_hunk.take() {
2161 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2162 }
2163 last_old_line_before_hunk = Some(old_line_index);
2164 old_line_index += 1;
2165 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
2166 new_text_byte_offset += content.len();
2167 }
2168 }
2169
2170 // Flush final group.
2171 if let Some(hunk) = current_hunk.take() {
2172 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2173 }
2174
2175 // Trim a single trailing newline.
2176 if result.ends_with('\n') {
2177 result.pop();
2178 }
2179
2180 if result.is_empty() {
2181 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
2182 }
2183
2184 Ok(result)
2185 }
2186
2187 #[cfg(test)]
2188 mod tests {
2189 use super::*;
2190 use indoc::indoc;
2191
2192 #[test]
2193 fn test_format_cursor_region() {
2194 struct Case {
2195 name: &'static str,
2196 context: &'static str,
2197 editable_range: Range<usize>,
2198 cursor_offset: usize,
2199 expected: &'static str,
2200 }
2201
2202 let cases = [
2203 Case {
2204 name: "basic_cursor_placement",
2205 context: "hello world\n",
2206 editable_range: 0..12,
2207 cursor_offset: 5,
2208 expected: indoc! {"
2209 <|file_sep|>test.rs
2210 <|fim_prefix|>
2211 <|fim_middle|>current
2212 0:5c|hello<|user_cursor|> world
2213 <|fim_suffix|>
2214 <|fim_middle|>updated
2215 "},
2216 },
2217 Case {
2218 name: "multiline_cursor_on_second_line",
2219 context: "aaa\nbbb\nccc\n",
2220 editable_range: 0..12,
2221 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
2222 expected: indoc! {"
2223 <|file_sep|>test.rs
2224 <|fim_prefix|>
2225 <|fim_middle|>current
2226 0:23|aaa
2227 1:26|b<|user_cursor|>bb
2228 2:29|ccc
2229 <|fim_suffix|>
2230 <|fim_middle|>updated
2231 "},
2232 },
2233 Case {
2234 name: "no_trailing_newline_in_context",
2235 context: "line1\nline2",
2236 editable_range: 0..11,
2237 cursor_offset: 3,
2238 expected: indoc! {"
2239 <|file_sep|>test.rs
2240 <|fim_prefix|>
2241 <|fim_middle|>current
2242 0:d9|lin<|user_cursor|>e1
2243 1:da|line2
2244 <|fim_suffix|>
2245 <|fim_middle|>updated
2246 "},
2247 },
2248 Case {
2249 name: "leading_newline_in_editable_region",
2250 context: "\nabc\n",
2251 editable_range: 0..5,
2252 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2253 expected: indoc! {"
2254 <|file_sep|>test.rs
2255 <|fim_prefix|>
2256 <|fim_middle|>current
2257 0:00|
2258 1:26|a<|user_cursor|>bc
2259 <|fim_suffix|>
2260 <|fim_middle|>updated
2261 "},
2262 },
2263 Case {
2264 name: "with_suffix",
2265 context: "abc\ndef",
2266 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2267 cursor_offset: 2,
2268 expected: indoc! {"
2269 <|file_sep|>test.rs
2270 <|fim_prefix|>
2271 <|fim_middle|>current
2272 0:26|ab<|user_cursor|>c
2273 <|fim_suffix|>
2274 def
2275 <|fim_middle|>updated
2276 "},
2277 },
2278 Case {
2279 name: "unicode_two_byte_chars",
2280 context: "héllo\n",
2281 editable_range: 0..7,
2282 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2283 expected: indoc! {"
2284 <|file_sep|>test.rs
2285 <|fim_prefix|>
2286 <|fim_middle|>current
2287 0:1b|hé<|user_cursor|>llo
2288 <|fim_suffix|>
2289 <|fim_middle|>updated
2290 "},
2291 },
2292 Case {
2293 name: "unicode_three_byte_chars",
2294 context: "日本語\n",
2295 editable_range: 0..10,
2296 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2297 expected: indoc! {"
2298 <|file_sep|>test.rs
2299 <|fim_prefix|>
2300 <|fim_middle|>current
2301 0:80|日本<|user_cursor|>語
2302 <|fim_suffix|>
2303 <|fim_middle|>updated
2304 "},
2305 },
2306 Case {
2307 name: "unicode_four_byte_chars",
2308 context: "a🌍b\n",
2309 editable_range: 0..7,
2310 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2311 expected: indoc! {"
2312 <|file_sep|>test.rs
2313 <|fim_prefix|>
2314 <|fim_middle|>current
2315 0:6b|a🌍<|user_cursor|>b
2316 <|fim_suffix|>
2317 <|fim_middle|>updated
2318 "},
2319 },
2320 Case {
2321 name: "cursor_at_start_of_region_not_placed",
2322 context: "abc\n",
2323 editable_range: 0..4,
2324 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2325 expected: indoc! {"
2326 <|file_sep|>test.rs
2327 <|fim_prefix|>
2328 <|fim_middle|>current
2329 0:26|abc
2330 <|fim_suffix|>
2331 <|fim_middle|>updated
2332 "},
2333 },
2334 Case {
2335 name: "cursor_at_end_of_line_not_placed",
2336 context: "abc\ndef\n",
2337 editable_range: 0..8,
2338 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2339 expected: indoc! {"
2340 <|file_sep|>test.rs
2341 <|fim_prefix|>
2342 <|fim_middle|>current
2343 0:26|abc
2344 1:2f|def
2345 <|fim_suffix|>
2346 <|fim_middle|>updated
2347 "},
2348 },
2349 Case {
2350 name: "cursor_offset_relative_to_context_not_editable_region",
2351 // cursor_offset is relative to `context`, so when editable_range.start > 0,
2352 // write_cursor_excerpt_section must subtract it before comparing against
2353 // per-line offsets within the editable region.
2354 context: "pre\naaa\nbbb\nsuf\n",
2355 editable_range: 4..12, // editable region = "aaa\nbbb\n"
2356 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
2357 expected: indoc! {"
2358 <|file_sep|>test.rs
2359 <|fim_prefix|>
2360 pre
2361 <|fim_middle|>current
2362 0:23|aaa
2363 1:26|b<|user_cursor|>bb
2364 <|fim_suffix|>
2365 suf
2366 <|fim_middle|>updated
2367 "},
2368 },
2369 ];
2370
2371 for case in &cases {
2372 let mut prompt = String::new();
2373 hashline::write_cursor_excerpt_section(
2374 &mut prompt,
2375 Path::new("test.rs"),
2376 case.context,
2377 &case.editable_range,
2378 case.cursor_offset,
2379 );
2380 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2381 }
2382 }
2383
2384 #[test]
2385 fn test_apply_edit_commands() {
2386 struct Case {
2387 name: &'static str,
2388 original: &'static str,
2389 model_output: &'static str,
2390 expected: &'static str,
2391 }
2392
2393 let cases = vec![
2394 Case {
2395 name: "set_single_line",
2396 original: indoc! {"
2397 let mut total = 0;
2398 for product in products {
2399 total += ;
2400 }
2401 total
2402 "},
2403 model_output: indoc! {"
2404 <|set|>2:87
2405 total += product.price;
2406 "},
2407 expected: indoc! {"
2408 let mut total = 0;
2409 for product in products {
2410 total += product.price;
2411 }
2412 total
2413 "},
2414 },
2415 Case {
2416 name: "set_range",
2417 original: indoc! {"
2418 fn foo() {
2419 let x = 1;
2420 let y = 2;
2421 let z = 3;
2422 }
2423 "},
2424 model_output: indoc! {"
2425 <|set|>1:46-3:4a
2426 let sum = 6;
2427 "},
2428 expected: indoc! {"
2429 fn foo() {
2430 let sum = 6;
2431 }
2432 "},
2433 },
2434 Case {
2435 name: "insert_after_line",
2436 original: indoc! {"
2437 fn main() {
2438 let x = 1;
2439 }
2440 "},
2441 model_output: indoc! {"
2442 <|insert|>1:46
2443 let y = 2;
2444 "},
2445 expected: indoc! {"
2446 fn main() {
2447 let x = 1;
2448 let y = 2;
2449 }
2450 "},
2451 },
2452 Case {
2453 name: "insert_before_first",
2454 original: indoc! {"
2455 let x = 1;
2456 let y = 2;
2457 "},
2458 model_output: indoc! {"
2459 <|insert|>
2460 use std::io;
2461 "},
2462 expected: indoc! {"
2463 use std::io;
2464 let x = 1;
2465 let y = 2;
2466 "},
2467 },
2468 Case {
2469 name: "set_with_cursor_marker",
2470 original: indoc! {"
2471 fn main() {
2472 println!();
2473 }
2474 "},
2475 model_output: indoc! {"
2476 <|set|>1:34
2477 eprintln!(\"<|user_cursor|>\");
2478 "},
2479 expected: indoc! {"
2480 fn main() {
2481 eprintln!(\"<|user_cursor|>\");
2482 }
2483 "},
2484 },
2485 Case {
2486 name: "multiple_set_commands",
2487 original: indoc! {"
2488 aaa
2489 bbb
2490 ccc
2491 ddd
2492 "},
2493 model_output: indoc! {"
2494 <|set|>0:23
2495 AAA
2496 <|set|>2:29
2497 CCC
2498 "},
2499 expected: indoc! {"
2500 AAA
2501 bbb
2502 CCC
2503 ddd
2504 "},
2505 },
2506 Case {
2507 name: "set_range_multiline_replacement",
2508 original: indoc! {"
2509 fn handle_submit() {
2510 }
2511
2512 fn handle_keystroke() {
2513 "},
2514 model_output: indoc! {"
2515 <|set|>0:3f-1:7d
2516 fn handle_submit(modal_state: &mut ModalState) {
2517 <|user_cursor|>
2518 }
2519 "},
2520 expected: indoc! {"
2521 fn handle_submit(modal_state: &mut ModalState) {
2522 <|user_cursor|>
2523 }
2524
2525 fn handle_keystroke() {
2526 "},
2527 },
2528 Case {
2529 name: "no_edit_commands_returns_original",
2530 original: indoc! {"
2531 hello
2532 world
2533 "},
2534 model_output: "some random text with no commands",
2535 expected: indoc! {"
2536 hello
2537 world
2538 "},
2539 },
2540 Case {
2541 name: "no_edits_command_returns_original",
2542 original: indoc! {"
2543 hello
2544 world
2545 "},
2546 model_output: "<|no_edits|>",
2547 expected: indoc! {"
2548 hello
2549 world
2550 "},
2551 },
2552 Case {
2553 name: "wrong_hash_set_ignored",
2554 original: indoc! {"
2555 aaa
2556 bbb
2557 "},
2558 model_output: indoc! {"
2559 <|set|>0:ff
2560 ZZZ
2561 "},
2562 expected: indoc! {"
2563 aaa
2564 bbb
2565 "},
2566 },
2567 Case {
2568 name: "insert_and_set_combined",
2569 original: indoc! {"
2570 alpha
2571 beta
2572 gamma
2573 "},
2574 model_output: indoc! {"
2575 <|set|>0:06
2576 ALPHA
2577 <|insert|>1:9c
2578 beta_extra
2579 "},
2580 expected: indoc! {"
2581 ALPHA
2582 beta
2583 beta_extra
2584 gamma
2585 "},
2586 },
2587 Case {
2588 name: "no_trailing_newline_preserved",
2589 original: "hello\nworld",
2590 model_output: indoc! {"
2591 <|set|>0:14
2592 HELLO
2593 "},
2594 expected: "HELLO\nworld",
2595 },
2596 Case {
2597 name: "set_range_hash_mismatch_in_end_bound",
2598 original: indoc! {"
2599 one
2600 two
2601 three
2602 "},
2603 model_output: indoc! {"
2604 <|set|>0:42-2:ff
2605 ONE_TWO_THREE
2606 "},
2607 expected: indoc! {"
2608 one
2609 two
2610 three
2611 "},
2612 },
2613 Case {
2614 name: "set_range_start_greater_than_end_ignored",
2615 original: indoc! {"
2616 a
2617 b
2618 c
2619 "},
2620 model_output: indoc! {"
2621 <|set|>2:63-1:62
2622 X
2623 "},
2624 expected: indoc! {"
2625 a
2626 b
2627 c
2628 "},
2629 },
2630 Case {
2631 name: "insert_out_of_bounds_ignored",
2632 original: indoc! {"
2633 x
2634 y
2635 "},
2636 model_output: indoc! {"
2637 <|insert|>99:aa
2638 z
2639 "},
2640 expected: indoc! {"
2641 x
2642 y
2643 "},
2644 },
2645 Case {
2646 name: "set_out_of_bounds_ignored",
2647 original: indoc! {"
2648 x
2649 y
2650 "},
2651 model_output: indoc! {"
2652 <|set|>99:aa
2653 z
2654 "},
2655 expected: indoc! {"
2656 x
2657 y
2658 "},
2659 },
2660 Case {
2661 name: "malformed_set_command_ignored",
2662 original: indoc! {"
2663 alpha
2664 beta
2665 "},
2666 model_output: indoc! {"
2667 <|set|>not-a-line-ref
2668 UPDATED
2669 "},
2670 expected: indoc! {"
2671 alpha
2672 beta
2673 "},
2674 },
2675 Case {
2676 name: "malformed_insert_hash_treated_as_before_first",
2677 original: indoc! {"
2678 alpha
2679 beta
2680 "},
2681 model_output: indoc! {"
2682 <|insert|>1:nothex
2683 preamble
2684 "},
2685 expected: indoc! {"
2686 preamble
2687 alpha
2688 beta
2689 "},
2690 },
2691 Case {
2692 name: "set_then_insert_same_target_orders_insert_after_replacement",
2693 original: indoc! {"
2694 cat
2695 dog
2696 "},
2697 model_output: indoc! {"
2698 <|set|>0:38
2699 CAT
2700 <|insert|>0:38
2701 TAIL
2702 "},
2703 expected: indoc! {"
2704 CAT
2705 TAIL
2706 dog
2707 "},
2708 },
2709 Case {
2710 name: "overlapping_set_ranges_last_wins",
2711 original: indoc! {"
2712 a
2713 b
2714 c
2715 d
2716 "},
2717 model_output: indoc! {"
2718 <|set|>0:61-2:63
2719 FIRST
2720 <|set|>1:62-3:64
2721 SECOND
2722 "},
2723 expected: indoc! {"
2724 FIRST
2725 d
2726 "},
2727 },
2728 Case {
2729 name: "insert_before_first_and_after_line",
2730 original: indoc! {"
2731 a
2732 b
2733 "},
2734 model_output: indoc! {"
2735 <|insert|>
2736 HEAD
2737 <|insert|>0:61
2738 MID
2739 "},
2740 expected: indoc! {"
2741 HEAD
2742 a
2743 MID
2744 b
2745 "},
2746 },
2747 ];
2748
2749 for case in &cases {
2750 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2751 assert_eq!(result, case.expected, "failed case: {}", case.name);
2752 }
2753 }
2754
2755 #[test]
2756 fn test_output_has_edit_commands() {
2757 assert!(hashline::output_has_edit_commands(&format!(
2758 "{}0:ab\nnew",
2759 SET_COMMAND_MARKER
2760 )));
2761 assert!(hashline::output_has_edit_commands(&format!(
2762 "{}0:ab\nnew",
2763 INSERT_COMMAND_MARKER
2764 )));
2765 assert!(hashline::output_has_edit_commands(&format!(
2766 "some text\n{}1:cd\nstuff",
2767 SET_COMMAND_MARKER
2768 )));
2769 assert!(!hashline::output_has_edit_commands("just plain text"));
2770 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2771 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2772 }
2773
2774 // ---- hashline::patch_to_edit_commands round-trip tests ----
2775
2776 #[test]
2777 fn test_patch_to_edit_commands() {
2778 struct Case {
2779 name: &'static str,
2780 old: &'static str,
2781 patch: &'static str,
2782 expected_new: &'static str,
2783 }
2784
2785 let cases = [
2786 Case {
2787 name: "single_line_replacement",
2788 old: indoc! {"
2789 let mut total = 0;
2790 for product in products {
2791 total += ;
2792 }
2793 total
2794 "},
2795 patch: indoc! {"
2796 @@ -1,5 +1,5 @@
2797 let mut total = 0;
2798 for product in products {
2799 - total += ;
2800 + total += product.price;
2801 }
2802 total
2803 "},
2804 expected_new: indoc! {"
2805 let mut total = 0;
2806 for product in products {
2807 total += product.price;
2808 }
2809 total
2810 "},
2811 },
2812 Case {
2813 name: "multiline_replacement",
2814 old: indoc! {"
2815 fn foo() {
2816 let x = 1;
2817 let y = 2;
2818 let z = 3;
2819 }
2820 "},
2821 patch: indoc! {"
2822 @@ -1,5 +1,3 @@
2823 fn foo() {
2824 - let x = 1;
2825 - let y = 2;
2826 - let z = 3;
2827 + let sum = 1 + 2 + 3;
2828 }
2829 "},
2830 expected_new: indoc! {"
2831 fn foo() {
2832 let sum = 1 + 2 + 3;
2833 }
2834 "},
2835 },
2836 Case {
2837 name: "insertion",
2838 old: indoc! {"
2839 fn main() {
2840 let x = 1;
2841 }
2842 "},
2843 patch: indoc! {"
2844 @@ -1,3 +1,4 @@
2845 fn main() {
2846 let x = 1;
2847 + let y = 2;
2848 }
2849 "},
2850 expected_new: indoc! {"
2851 fn main() {
2852 let x = 1;
2853 let y = 2;
2854 }
2855 "},
2856 },
2857 Case {
2858 name: "insertion_before_first",
2859 old: indoc! {"
2860 let x = 1;
2861 let y = 2;
2862 "},
2863 patch: indoc! {"
2864 @@ -1,2 +1,3 @@
2865 +use std::io;
2866 let x = 1;
2867 let y = 2;
2868 "},
2869 expected_new: indoc! {"
2870 use std::io;
2871 let x = 1;
2872 let y = 2;
2873 "},
2874 },
2875 Case {
2876 name: "deletion",
2877 old: indoc! {"
2878 aaa
2879 bbb
2880 ccc
2881 ddd
2882 "},
2883 patch: indoc! {"
2884 @@ -1,4 +1,2 @@
2885 aaa
2886 -bbb
2887 -ccc
2888 ddd
2889 "},
2890 expected_new: indoc! {"
2891 aaa
2892 ddd
2893 "},
2894 },
2895 Case {
2896 name: "multiple_changes",
2897 old: indoc! {"
2898 alpha
2899 beta
2900 gamma
2901 delta
2902 epsilon
2903 "},
2904 patch: indoc! {"
2905 @@ -1,5 +1,5 @@
2906 -alpha
2907 +ALPHA
2908 beta
2909 gamma
2910 -delta
2911 +DELTA
2912 epsilon
2913 "},
2914 expected_new: indoc! {"
2915 ALPHA
2916 beta
2917 gamma
2918 DELTA
2919 epsilon
2920 "},
2921 },
2922 Case {
2923 name: "replace_with_insertion",
2924 old: indoc! {r#"
2925 fn handle() {
2926 modal_state.close();
2927 modal_state.dismiss();
2928 "#},
2929 patch: indoc! {r#"
2930 @@ -1,3 +1,4 @@
2931 fn handle() {
2932 modal_state.close();
2933 + eprintln!("");
2934 modal_state.dismiss();
2935 "#},
2936 expected_new: indoc! {r#"
2937 fn handle() {
2938 modal_state.close();
2939 eprintln!("");
2940 modal_state.dismiss();
2941 "#},
2942 },
2943 Case {
2944 name: "complete_replacement",
2945 old: indoc! {"
2946 aaa
2947 bbb
2948 ccc
2949 "},
2950 patch: indoc! {"
2951 @@ -1,3 +1,3 @@
2952 -aaa
2953 -bbb
2954 -ccc
2955 +xxx
2956 +yyy
2957 +zzz
2958 "},
2959 expected_new: indoc! {"
2960 xxx
2961 yyy
2962 zzz
2963 "},
2964 },
2965 Case {
2966 name: "add_function_body",
2967 old: indoc! {"
2968 fn foo() {
2969 modal_state.dismiss();
2970 }
2971
2972 fn
2973
2974 fn handle_keystroke() {
2975 "},
2976 patch: indoc! {"
2977 @@ -1,6 +1,8 @@
2978 fn foo() {
2979 modal_state.dismiss();
2980 }
2981
2982 -fn
2983 +fn handle_submit() {
2984 + todo()
2985 +}
2986
2987 fn handle_keystroke() {
2988 "},
2989 expected_new: indoc! {"
2990 fn foo() {
2991 modal_state.dismiss();
2992 }
2993
2994 fn handle_submit() {
2995 todo()
2996 }
2997
2998 fn handle_keystroke() {
2999 "},
3000 },
3001 Case {
3002 name: "with_cursor_offset",
3003 old: indoc! {r#"
3004 fn main() {
3005 println!();
3006 }
3007 "#},
3008 patch: indoc! {r#"
3009 @@ -1,3 +1,3 @@
3010 fn main() {
3011 - println!();
3012 + eprintln!("");
3013 }
3014 "#},
3015 expected_new: indoc! {r#"
3016 fn main() {
3017 eprintln!("<|user_cursor|>");
3018 }
3019 "#},
3020 },
3021 Case {
3022 name: "non_local_hunk_header_pure_insertion_repro",
3023 old: indoc! {"
3024 aaa
3025 bbb
3026 "},
3027 patch: indoc! {"
3028 @@ -20,2 +20,3 @@
3029 aaa
3030 +xxx
3031 bbb
3032 "},
3033 expected_new: indoc! {"
3034 aaa
3035 xxx
3036 bbb
3037 "},
3038 },
3039 Case {
3040 name: "empty_patch_produces_no_edits_marker",
3041 old: indoc! {"
3042 aaa
3043 bbb
3044 "},
3045 patch: "@@ -20,2 +20,3 @@\n",
3046 expected_new: indoc! {"
3047 aaa
3048 bbb
3049 "},
3050 },
3051 ];
3052
3053 for case in &cases {
3054 // The cursor_offset for patch_to_edit_commands is relative to
3055 // the first hunk's new text (context + additions). We compute
3056 // it by finding where the marker sits in the expected output
3057 // (which mirrors the new text of the hunk).
3058 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
3059
3060 let commands =
3061 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
3062 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
3063
3064 assert!(
3065 hashline::output_has_edit_commands(&commands),
3066 "case {}: expected edit commands, got: {commands:?}",
3067 case.name,
3068 );
3069
3070 let applied = hashline::apply_edit_commands(case.old, &commands);
3071 assert_eq!(applied, case.expected_new, "case {}", case.name);
3072 }
3073 }
3074 }
3075}
3076
3077pub mod seed_coder {
3078 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
3079 //!
3080 //! Seed-Coder uses different FIM tokens and order than Qwen:
3081 //! - SPM order: suffix comes FIRST, then prefix, then middle
3082 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
3083 //! - File markers: StarCoder-style `<filename>path` (single token + path)
3084 //!
3085 //! All context (related files, edit history) goes in the PREFIX section.
3086 //! The suffix contains only code after the editable region.
3087 //!
3088 //! Example prompt:
3089 //!
3090 //! <[fim-suffix]>
3091 //! code after editable region
3092 //! <[fim-prefix]><filename>related/file.py
3093 //! related file content
3094 //!
3095 //! <filename>edit_history
3096 //! --- a/some_file.py
3097 //! +++ b/some_file.py
3098 //! -old
3099 //! +new
3100 //!
3101 //! <filename>path/to/target_file.py
3102 //! code before editable region
3103 //! <<<<<<< CURRENT
3104 //! code that
3105 //! needs to<|user_cursor|>
3106 //! be rewritten
3107 //! =======
3108 //! <[fim-middle]>
3109 //!
3110 //! Expected output (model generates):
3111 //!
3112 //! updated
3113 //! code with
3114 //! changes applied
3115 //! >>>>>>> UPDATED
3116
3117 use super::*;
3118
3119 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
3120 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
3121 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
3122 pub const FILE_MARKER: &str = "<filename>";
3123
3124 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
3125 pub const SEPARATOR: &str = "=======\n";
3126 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
3127
3128 pub const NO_EDITS: &str = "NO_EDITS\n";
3129
3130 pub fn special_tokens() -> &'static [&'static str] {
3131 &[
3132 FIM_SUFFIX,
3133 FIM_PREFIX,
3134 FIM_MIDDLE,
3135 FILE_MARKER,
3136 START_MARKER,
3137 SEPARATOR,
3138 END_MARKER,
3139 CURSOR_MARKER,
3140 ]
3141 }
3142
3143 pub fn write_cursor_excerpt_section(
3144 prompt: &mut String,
3145 path: &Path,
3146 context: &str,
3147 editable_range: &Range<usize>,
3148 cursor_offset: usize,
3149 ) {
3150 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3151 prompt.push_str(§ion);
3152 }
3153
3154 pub fn format_prompt_with_budget(
3155 path: &Path,
3156 context: &str,
3157 editable_range: &Range<usize>,
3158 cursor_offset: usize,
3159 events: &[Arc<Event>],
3160 related_files: &[RelatedFile],
3161 max_tokens: usize,
3162 ) -> String {
3163 let cursor_prefix_section =
3164 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3165 assemble_fim_prompt(
3166 context,
3167 editable_range,
3168 &cursor_prefix_section,
3169 events,
3170 related_files,
3171 max_tokens,
3172 )
3173 }
3174
3175 pub fn assemble_fim_prompt(
3176 context: &str,
3177 editable_range: &Range<usize>,
3178 cursor_prefix_section: &str,
3179 events: &[Arc<Event>],
3180 related_files: &[RelatedFile],
3181 max_tokens: usize,
3182 ) -> String {
3183 let suffix_section = build_suffix_section(context, editable_range);
3184
3185 let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
3186 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
3187 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
3188
3189 let edit_history_section = super::format_edit_history_within_budget(
3190 events,
3191 FILE_MARKER,
3192 "edit_history",
3193 budget_after_cursor,
3194 max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
3195 );
3196 let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
3197 let budget_after_edit_history =
3198 budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
3199
3200 let related_files_section = super::format_related_files_within_budget(
3201 related_files,
3202 FILE_MARKER,
3203 "",
3204 budget_after_edit_history,
3205 );
3206
3207 let mut prompt = String::new();
3208 prompt.push_str(&suffix_section);
3209 prompt.push_str(FIM_PREFIX);
3210 prompt.push_str(&related_files_section);
3211 if !related_files_section.is_empty() {
3212 prompt.push('\n');
3213 }
3214 prompt.push_str(&edit_history_section);
3215 if !edit_history_section.is_empty() {
3216 prompt.push('\n');
3217 }
3218 prompt.push_str(cursor_prefix_section);
3219 prompt.push_str(FIM_MIDDLE);
3220
3221 prompt
3222 }
3223
3224 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
3225 let mut section = String::new();
3226 section.push_str(FIM_SUFFIX);
3227 section.push_str(&context[editable_range.end..]);
3228 if !section.ends_with('\n') {
3229 section.push('\n');
3230 }
3231 section
3232 }
3233
3234 fn build_cursor_prefix_section(
3235 path: &Path,
3236 context: &str,
3237 editable_range: &Range<usize>,
3238 cursor_offset: usize,
3239 ) -> String {
3240 let mut section = String::new();
3241 let path_str = path.to_string_lossy();
3242 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3243
3244 section.push_str(&context[..editable_range.start]);
3245 section.push_str(START_MARKER);
3246 section.push_str(&context[editable_range.start..cursor_offset]);
3247 section.push_str(CURSOR_MARKER);
3248 section.push_str(&context[cursor_offset..editable_range.end]);
3249 if !section.ends_with('\n') {
3250 section.push('\n');
3251 }
3252 section.push_str(SEPARATOR);
3253 section
3254 }
3255
3256 /// Format patch as containing no changes if it's empty; otherwise return None.
3257 pub(crate) fn no_edits(patch: &str) -> Option<String> {
3258 // Count lines in the patch
3259 let empty_patch = patch.lines().count() <= 3;
3260 if empty_patch {
3261 Some(format!("{NO_EDITS}{END_MARKER}"))
3262 } else {
3263 None
3264 }
3265 }
3266}
3267
3268pub mod v0304_variable_edit {
3269 //! A prompt format with no fixed editable region. The entire context is shown
3270 //! to the model, and it chooses which text to replace by outputting surrounding
3271 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3272 //! text.
3273 //!
3274 //! Example prompt:
3275 //!
3276 //! <|file_sep|>path/to/file.py
3277 //! zero
3278 //! one
3279 //! two
3280 //! three<|user_cursor|>
3281 //! four
3282 //! five
3283 //! <|fim_prefix|>
3284 //
3285 //! Expected output (model generates):
3286 //!
3287 //! two
3288 //! <|fim_middle|>
3289 //! THREE
3290 //! <|fim_suffix|>
3291 //! four
3292 //!
3293 //! The output means: find "two\n...\nfour" in the context, and replace
3294 //! everything between "two\n" and "four" with "THREE\n".
3295
3296 use super::*;
3297
3298 pub fn special_tokens() -> &'static [&'static str] {
3299 &[
3300 "<|fim_prefix|>",
3301 "<|fim_suffix|>",
3302 "<|fim_middle|>",
3303 "<|file_sep|>",
3304 CURSOR_MARKER,
3305 ]
3306 }
3307
3308 pub fn write_cursor_excerpt_section(
3309 prompt: &mut String,
3310 path: &Path,
3311 context: &str,
3312 cursor_offset: usize,
3313 ) {
3314 let path_str = path.to_string_lossy();
3315 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3316
3317 prompt.push_str(&context[..cursor_offset]);
3318 prompt.push_str(CURSOR_MARKER);
3319 prompt.push_str(&context[cursor_offset..]);
3320 if !prompt.ends_with('\n') {
3321 prompt.push('\n');
3322 }
3323 prompt.push_str("<|fim_prefix|>\n")
3324 }
3325
3326 /// Apply a variable-edit model output to the original context text.
3327 ///
3328 /// The model output has the form:
3329 ///
3330 /// - prefix context lines
3331 /// - `<|fim_middle|>`
3332 /// - new text
3333 /// - `<|fim_suffix|>`
3334 /// - suffix context lines
3335 ///
3336 /// We locate the prefix/suffix context lines in the original text and replace
3337 /// everything between them with the new text.
3338 pub fn apply_variable_edit(
3339 context: &str,
3340 model_output: &str,
3341 ) -> Result<(Range<usize>, String)> {
3342 let (prefix_context, rest) = model_output
3343 .split_once("<|fim_middle|>\n")
3344 .or_else(|| model_output.split_once("<|fim_middle|>"))
3345 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3346
3347 let (new_text, suffix_context) = rest
3348 .split_once("<|fim_suffix|>\n")
3349 .or_else(|| rest.split_once("<|fim_suffix|>"))
3350 .unwrap_or((rest, ""));
3351
3352 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3353 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3354 } else {
3355 suffix_context
3356 };
3357
3358 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3359 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3360 + prefix_context.len();
3361 let suffix_offset = if suffix_context.is_empty() {
3362 context.len()
3363 } else {
3364 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3365 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3366 + prefix_offset
3367 };
3368
3369 let edit_range = prefix_offset..suffix_offset;
3370 return Ok((edit_range, new_text.to_string()));
3371 }
3372
3373 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3374 if needle.is_empty() {
3375 return Some(0);
3376 }
3377
3378 haystack.match_indices(needle).find_map(|(offset, _)| {
3379 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3380 matched_line_start.then_some(offset)
3381 })
3382 }
3383
3384 /// Convert a unified diff patch into the variable-edit output format.
3385 ///
3386 /// Parses `patch` as a unified diff against `old_text` and produces model
3387 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3388 /// delimiters. The diff is resolved by content matching rather than line
3389 /// numbers.
3390 pub fn patch_to_variable_edit_output(
3391 old_text: &str,
3392 patch: &str,
3393 cursor_offset: Option<usize>,
3394 ) -> Result<String> {
3395 // Parse the unified diff into hunks. Each hunk has an `old_context`
3396 // string (context + deleted lines interleaved in order) and a list of
3397 // edits expressed as byte ranges within that context plus replacement
3398 // text.
3399 let hunks = parse_hunks(patch);
3400 if hunks.is_empty() {
3401 return Ok(String::new());
3402 }
3403
3404 // Apply each hunk by finding its old_context in the text and
3405 // performing the edits. We search forward from where the previous
3406 // hunk ended so that hunks are applied in order.
3407 let mut new_text = old_text.to_string();
3408 let mut search_from: usize = 0;
3409 let mut first_hunk_pos: Option<usize> = None;
3410
3411 for hunk in &hunks {
3412 let context_pos = new_text[search_from..]
3413 .find(&hunk.old_context)
3414 .map(|pos| pos + search_from)
3415 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3416
3417 if first_hunk_pos.is_none() {
3418 first_hunk_pos = Some(context_pos);
3419 }
3420
3421 // Apply edits in reverse order so byte offsets remain valid.
3422 for edit in hunk.edits.iter().rev() {
3423 let abs_start = context_pos + edit.range.start;
3424 let abs_end = context_pos + edit.range.end;
3425 new_text.replace_range(abs_start..abs_end, &edit.text);
3426 }
3427
3428 // Advance past this hunk's region in the (now modified) text.
3429 let new_region_len: usize =
3430 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3431 len + edit.text.len() - (edit.range.end - edit.range.start)
3432 });
3433 search_from = context_pos + new_region_len;
3434 }
3435
3436 // Now we have old_text and new_text. Find the changed line range by
3437 // comparing them.
3438 let old_lines: Vec<&str> = old_text.lines().collect();
3439 let new_lines: Vec<&str> = new_text.lines().collect();
3440
3441 // Find first differing line.
3442 let first_changed_row = old_lines
3443 .iter()
3444 .zip(new_lines.iter())
3445 .position(|(a, b)| a != b)
3446 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3447
3448 // Find last differing line (from the end).
3449 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3450 let common_suffix = old_lines
3451 .iter()
3452 .rev()
3453 .zip(new_lines.iter().rev())
3454 .take(max_suffix)
3455 .take_while(|(a, b)| a == b)
3456 .count();
3457
3458 let old_end = old_lines.len() - common_suffix;
3459 let new_end = new_lines.len() - common_suffix;
3460
3461 if first_changed_row == old_end && first_changed_row == new_end {
3462 return Ok(String::new());
3463 }
3464
3465 // Build the replacement text from new_lines[first_diff..new_end].
3466 let mut merged_new_text = String::new();
3467 for line in &new_lines[first_changed_row..new_end] {
3468 merged_new_text.push_str(line);
3469 merged_new_text.push('\n');
3470 }
3471
3472 // cursor_offset is relative to the first hunk's new content in
3473 // new_text. Translate it to an offset within merged_new_text, which
3474 // only contains lines first_diff..new_end of new_text.
3475 if let Some(hunk_offset) = cursor_offset {
3476 let hunk_start = first_hunk_pos.unwrap_or(0);
3477 let absolute_pos = hunk_start + hunk_offset;
3478
3479 // Byte offset where first_diff starts in new_text.
3480 let merged_start: usize = new_lines[..first_changed_row]
3481 .iter()
3482 .map(|line| line.len() + 1)
3483 .sum();
3484
3485 if absolute_pos >= merged_start {
3486 let relative_offset = absolute_pos - merged_start;
3487 if relative_offset <= merged_new_text.len() {
3488 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3489 }
3490 }
3491 }
3492
3493 // Build output with 2 lines of context above and below.
3494 let context_lines_count = 2;
3495 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3496 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3497
3498 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3499 let pattern = &lines[line_range];
3500 let pattern_len = pattern.len();
3501
3502 let mut count = 0;
3503 for offset in 0..=lines.len() - pattern_len {
3504 if &lines[offset..offset + pattern_len] == pattern {
3505 count += 1;
3506 }
3507 }
3508 count
3509 }
3510
3511 // Expand prefix and suffix until they are unique
3512 while prefix_start > 0 {
3513 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3514 prefix_start -= 1;
3515 } else {
3516 break;
3517 }
3518 }
3519 while suffix_end < old_lines.len() {
3520 if count_matches(old_end..suffix_end, &old_lines) > 1 {
3521 suffix_end += 1;
3522 } else {
3523 break;
3524 }
3525 }
3526
3527 let mut output = String::new();
3528 for line in &old_lines[prefix_start..first_changed_row] {
3529 output.push_str(line);
3530 output.push('\n');
3531 }
3532 output.push_str("<|fim_middle|>\n");
3533 output.push_str(&merged_new_text);
3534 output.push_str("<|fim_suffix|>\n");
3535 for line in &old_lines[old_end..suffix_end] {
3536 output.push_str(line);
3537 output.push('\n');
3538 }
3539
3540 Ok(output)
3541 }
3542
3543 struct ParsedHunk {
3544 old_context: String,
3545 edits: Vec<ParsedEdit>,
3546 }
3547
3548 struct ParsedEdit {
3549 range: Range<usize>,
3550 text: String,
3551 }
3552
3553 /// Parse a unified diff into content-based hunks. Each hunk contains an
3554 /// `old_context` string (context lines + deleted lines, which together
3555 /// form the text that should be found in the original) and a list of edits
3556 /// expressed as byte ranges within that context.
3557 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3558 let mut hunks = Vec::new();
3559 let mut current: Option<ParsedHunk> = None;
3560
3561 for line in patch.lines() {
3562 if line.starts_with("@@") {
3563 if let Some(hunk) = current.take() {
3564 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3565 hunks.push(hunk);
3566 }
3567 }
3568 current = Some(ParsedHunk {
3569 old_context: String::new(),
3570 edits: Vec::new(),
3571 });
3572 } else if line.starts_with("---") || line.starts_with("+++") {
3573 continue;
3574 } else if let Some(hunk) = &mut current {
3575 if let Some(added) = line.strip_prefix('+') {
3576 let pos = hunk.old_context.len();
3577 if let Some(last_edit) = hunk.edits.last_mut() {
3578 if last_edit.range.end == pos {
3579 writeln!(&mut last_edit.text, "{added}").ok();
3580 continue;
3581 }
3582 }
3583 hunk.edits.push(ParsedEdit {
3584 range: pos..pos,
3585 text: format!("{added}\n"),
3586 });
3587 } else if let Some(removed) = line.strip_prefix('-') {
3588 let start = hunk.old_context.len();
3589 writeln!(&mut hunk.old_context, "{removed}").ok();
3590 let end = hunk.old_context.len();
3591 if let Some(last_edit) = hunk.edits.last_mut() {
3592 if last_edit.range.end == start {
3593 last_edit.range.end = end;
3594 continue;
3595 }
3596 }
3597 hunk.edits.push(ParsedEdit {
3598 range: start..end,
3599 text: String::new(),
3600 });
3601 } else {
3602 let ctx = line.strip_prefix(' ').unwrap_or(line);
3603 writeln!(&mut hunk.old_context, "{ctx}").ok();
3604 }
3605 }
3606 }
3607
3608 if let Some(hunk) = current {
3609 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3610 hunks.push(hunk);
3611 }
3612 }
3613
3614 hunks
3615 }
3616
3617 #[cfg(test)]
3618 mod tests {
3619 use super::*;
3620 use indoc::indoc;
3621
3622 #[test]
3623 fn test_apply_variable_edit() {
3624 struct Case {
3625 name: &'static str,
3626 original: &'static str,
3627 model_output: &'static str,
3628 expected: &'static str,
3629 }
3630
3631 let cases = [
3632 Case {
3633 name: "simple_single_line_replacement",
3634 original: indoc! {"
3635 zero
3636 one
3637 two
3638 three
3639 four
3640 five
3641 "},
3642 model_output: indoc! {"
3643 two
3644 <|fim_middle|>
3645 THREE
3646 <|fim_suffix|>
3647 four
3648 "},
3649 expected: indoc! {"
3650 zero
3651 one
3652 two
3653 THREE
3654 four
3655 five
3656 "},
3657 },
3658 Case {
3659 name: "multi_line_replacement",
3660 original: indoc! {"
3661 a
3662 b
3663 c
3664 d
3665 e
3666 "},
3667 model_output: indoc! {"
3668 a
3669 <|fim_middle|>
3670 B
3671 C
3672 D
3673 <|fim_suffix|>
3674 e
3675 "},
3676 expected: indoc! {"
3677 a
3678 B
3679 C
3680 D
3681 e
3682 "},
3683 },
3684 Case {
3685 name: "insertion_between_existing_lines",
3686 original: indoc! {"
3687 a
3688 b
3689 c
3690 "},
3691 model_output: indoc! {"
3692 a
3693 <|fim_middle|>
3694 X
3695 <|fim_suffix|>
3696 b
3697 "},
3698 expected: indoc! {"
3699 a
3700 X
3701 b
3702 c
3703 "},
3704 },
3705 Case {
3706 name: "deletion",
3707 original: indoc! {"
3708 a
3709 b
3710 c
3711 d
3712 "},
3713 model_output: indoc! {"
3714 a
3715 <|fim_middle|>
3716 <|fim_suffix|>
3717 c
3718 "},
3719 expected: indoc! {"
3720 a
3721 c
3722 d
3723 "},
3724 },
3725 Case {
3726 name: "replacement_at_start_no_prefix_context",
3727 original: indoc! {"
3728 a
3729 b
3730 c
3731 "},
3732 model_output: indoc! {"
3733 <|fim_middle|>
3734 X
3735 <|fim_suffix|>
3736 b
3737 "},
3738 expected: indoc! {"
3739 X
3740 b
3741 c
3742 "},
3743 },
3744 Case {
3745 name: "replacement_at_end_no_suffix_context",
3746 original: indoc! {"
3747 a
3748 b
3749 c
3750 "},
3751 model_output: indoc! {"
3752 b
3753 <|fim_middle|>
3754 Z
3755 <|fim_suffix|>
3756 "},
3757 expected: indoc! {"
3758 a
3759 b
3760 Z
3761 "},
3762 },
3763 Case {
3764 name: "context_with_trailing_newline_is_preserved",
3765 original: indoc! {"
3766 a
3767 b
3768 c
3769 "},
3770 model_output: indoc! {"
3771 a
3772 <|fim_middle|>
3773 B
3774 <|fim_suffix|>
3775 c
3776 "},
3777 expected: indoc! {"
3778 a
3779 B
3780 c
3781 "},
3782 },
3783 Case {
3784 name: "cursor_marker_passes_through_untouched",
3785 original: indoc! {"
3786 a
3787 b
3788 c
3789 "},
3790 model_output: indoc! {"
3791 a
3792 <|fim_middle|>
3793 B<|user_cursor|>B
3794 <|fim_suffix|>
3795 c
3796 "},
3797 expected: indoc! {"
3798 a
3799 B<|user_cursor|>B
3800 c
3801 "},
3802 },
3803 Case {
3804 name: "multiple_prefix_context_lines",
3805 original: indoc! {"
3806 a
3807 b
3808 c
3809 d
3810 e
3811 "},
3812 model_output: indoc! {"
3813 b
3814 c
3815 <|fim_middle|>
3816 D
3817 <|fim_suffix|>
3818 e
3819 "},
3820 expected: indoc! {"
3821 a
3822 b
3823 c
3824 D
3825 e
3826 "},
3827 },
3828 ];
3829
3830 for case in cases {
3831 let (edit_range, replacement) =
3832 apply_variable_edit(case.original, case.model_output).unwrap();
3833 let mut edited = case.original.to_string();
3834 edited.replace_range(edit_range, &replacement);
3835 assert_eq!(edited, case.expected, "{}", case.name);
3836 }
3837 }
3838
3839 #[test]
3840 fn test_patch_to_variable_edit() {
3841 struct Case {
3842 name: &'static str,
3843 old: &'static str,
3844 patch: &'static str,
3845 cursor_offset: Option<usize>,
3846 expected_variable_edit: &'static str,
3847 expected_after_apply: &'static str,
3848 }
3849
3850 let cases = [
3851 Case {
3852 name: "simple_replacement",
3853 old: indoc! {"
3854 zero
3855 one
3856 two
3857 three
3858 four
3859 five
3860 "},
3861 patch: indoc! {"
3862 @@ -3,3 +3,3 @@
3863 two
3864 -three
3865 +THREE
3866 four
3867 "},
3868 cursor_offset: None,
3869 expected_variable_edit: indoc! {"
3870 one
3871 two
3872 <|fim_middle|>
3873 THREE
3874 <|fim_suffix|>
3875 four
3876 five
3877 "},
3878 expected_after_apply: indoc! {"
3879 zero
3880 one
3881 two
3882 THREE
3883 four
3884 five
3885 "},
3886 },
3887 Case {
3888 name: "insertion",
3889 old: indoc! {"
3890 a
3891 b
3892 c
3893 d
3894 e
3895 "},
3896 patch: indoc! {"
3897 @@ -2,0 +3,1 @@
3898 b
3899 +X
3900 c
3901 "},
3902 cursor_offset: None,
3903 expected_variable_edit: indoc! {"
3904 a
3905 b
3906 <|fim_middle|>
3907 X
3908 <|fim_suffix|>
3909 c
3910 d
3911 "},
3912 expected_after_apply: indoc! {"
3913 a
3914 b
3915 X
3916 c
3917 d
3918 e
3919 "},
3920 },
3921 Case {
3922 name: "deletion",
3923 old: indoc! {"
3924 a
3925 b
3926 c
3927 d
3928 e
3929 "},
3930 patch: indoc! {"
3931 @@ -2,3 +2,2 @@
3932 b
3933 -c
3934 d
3935 "},
3936 cursor_offset: None,
3937 expected_variable_edit: indoc! {"
3938 a
3939 b
3940 <|fim_middle|>
3941 <|fim_suffix|>
3942 d
3943 e
3944 "},
3945 expected_after_apply: indoc! {"
3946 a
3947 b
3948 d
3949 e
3950 "},
3951 },
3952 Case {
3953 name: "edit_near_start",
3954 old: indoc! {"
3955 first
3956 second
3957 third
3958 fourth
3959 "},
3960 patch: indoc! {"
3961 @@ -1,1 +1,1 @@
3962 -first
3963 +FIRST
3964 "},
3965 cursor_offset: None,
3966 expected_variable_edit: indoc! {"
3967 <|fim_middle|>
3968 FIRST
3969 <|fim_suffix|>
3970 second
3971 third
3972 "},
3973 expected_after_apply: indoc! {"
3974 FIRST
3975 second
3976 third
3977 fourth
3978 "},
3979 },
3980 Case {
3981 name: "edit_near_end",
3982 old: indoc! {"
3983 first
3984 second
3985 third
3986 fourth
3987 "},
3988 patch: indoc! {"
3989 @@ -4,1 +4,1 @@
3990 -fourth
3991 +FOURTH
3992 "},
3993 cursor_offset: None,
3994 expected_variable_edit: indoc! {"
3995 second
3996 third
3997 <|fim_middle|>
3998 FOURTH
3999 <|fim_suffix|>
4000 "},
4001 expected_after_apply: indoc! {"
4002 first
4003 second
4004 third
4005 FOURTH
4006 "},
4007 },
4008 Case {
4009 name: "cursor_at_start_of_replacement",
4010 old: indoc! {"
4011 zero
4012 one
4013 two
4014 three
4015 four
4016 five
4017 "},
4018 patch: indoc! {"
4019 @@ -3,3 +3,3 @@
4020 two
4021 -three
4022 +THREE
4023 four
4024 "},
4025 cursor_offset: Some(4),
4026 expected_variable_edit: indoc! {"
4027 one
4028 two
4029 <|fim_middle|>
4030 <|user_cursor|>THREE
4031 <|fim_suffix|>
4032 four
4033 five
4034 "},
4035 expected_after_apply: indoc! {"
4036 zero
4037 one
4038 two
4039 <|user_cursor|>THREE
4040 four
4041 five
4042 "},
4043 },
4044 Case {
4045 name: "cursor_in_middle_of_replacement",
4046 old: indoc! {"
4047 zero
4048 one
4049 two
4050 three
4051 four
4052 five
4053 "},
4054 patch: indoc! {"
4055 @@ -3,3 +3,3 @@
4056 two
4057 -three
4058 +THREE
4059 four
4060 "},
4061 cursor_offset: Some(6),
4062 expected_variable_edit: indoc! {"
4063 one
4064 two
4065 <|fim_middle|>
4066 TH<|user_cursor|>REE
4067 <|fim_suffix|>
4068 four
4069 five
4070 "},
4071 expected_after_apply: indoc! {"
4072 zero
4073 one
4074 two
4075 TH<|user_cursor|>REE
4076 four
4077 five
4078 "},
4079 },
4080 Case {
4081 name: "expands_context_when_two_lines_not_unique_before_and_after",
4082 old: indoc! {"
4083 one
4084 a
4085 b
4086 c
4087 d
4088 two
4089 a
4090 b
4091 c
4092 d
4093 three
4094 a
4095 b
4096 c
4097 d
4098 four
4099 "},
4100 patch: indoc! {"
4101 @@ -4,5 +4,5 @@
4102 two
4103 a
4104 b
4105 -c
4106 +C
4107 d
4108 three
4109 "},
4110 cursor_offset: None,
4111 expected_variable_edit: indoc! {"
4112 two
4113 a
4114 b
4115 <|fim_middle|>
4116 C
4117 <|fim_suffix|>
4118 d
4119 three
4120 "},
4121 expected_after_apply: indoc! {"
4122 one
4123 a
4124 b
4125 c
4126 d
4127 two
4128 a
4129 b
4130 C
4131 d
4132 three
4133 a
4134 b
4135 c
4136 d
4137 four
4138 "},
4139 },
4140 Case {
4141 name: "expands_context_when_two_lines_not_unique_before_and_after",
4142 old: indoc! {"
4143 {
4144 {
4145 one();
4146 }
4147 }
4148 {
4149 {
4150 two();
4151 }
4152 }
4153 {
4154 {
4155 three();
4156 }
4157 }
4158 {
4159 {
4160 four();
4161 }
4162 }
4163 "},
4164 patch: indoc! {"
4165 @@ -4,5 +4,5 @@
4166 {
4167 - two();
4168 + TWO();
4169 }
4170 "},
4171 cursor_offset: None,
4172 expected_variable_edit: indoc! {"
4173 one();
4174 }
4175 }
4176 {
4177 {
4178 <|fim_middle|>
4179 TWO();
4180 <|fim_suffix|>
4181 }
4182 }
4183 {
4184 {
4185 three();
4186 "},
4187 expected_after_apply: indoc! {"
4188 {
4189 {
4190 one();
4191 }
4192 }
4193 {
4194 {
4195 TWO();
4196 }
4197 }
4198 {
4199 {
4200 three();
4201 }
4202 }
4203 {
4204 {
4205 four();
4206 }
4207 }
4208 "},
4209 },
4210 ];
4211
4212 for case in cases {
4213 let output =
4214 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
4215 .unwrap_or_else(|error| {
4216 panic!("failed converting patch for {}: {error}", case.name)
4217 });
4218 assert_eq!(
4219 output, case.expected_variable_edit,
4220 "patch->variable_edit mismatch for {}",
4221 case.name
4222 );
4223
4224 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
4225 .unwrap_or_else(|error| {
4226 panic!("failed applying variable_edit for {}: {error}", case.name)
4227 });
4228 let mut edited_by_variable_edit = case.old.to_string();
4229 edited_by_variable_edit.replace_range(edit_range, &replacement);
4230 assert_eq!(
4231 edited_by_variable_edit, case.expected_after_apply,
4232 "variable_edit apply mismatch for {}",
4233 case.name
4234 );
4235
4236 let (expected_edit_range, expected_replacement) =
4237 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4238 |error| {
4239 panic!(
4240 "failed applying expected variable_edit for {}: {error}",
4241 case.name
4242 )
4243 },
4244 );
4245 let mut edited_by_expected_variable_edit = case.old.to_string();
4246 edited_by_expected_variable_edit
4247 .replace_range(expected_edit_range, &expected_replacement);
4248 assert_eq!(
4249 edited_by_expected_variable_edit, case.expected_after_apply,
4250 "expected variable_edit apply mismatch for {}",
4251 case.name
4252 );
4253 }
4254 }
4255
4256 #[test]
4257 fn test_write_cursor_excerpt_section() {
4258 let path = Path::new("test.rs");
4259 let context = "fn main() {\n hello();\n}\n";
4260 let cursor_offset = 17;
4261 let mut prompt = String::new();
4262 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4263 assert_eq!(
4264 prompt,
4265 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4266 );
4267 }
4268 }
4269}
4270
4271/// The zeta1 prompt format
4272pub mod zeta1 {
4273 use super::*;
4274 use std::fmt::Write;
4275
4276 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4277 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4278 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4279 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4280
4281 const INSTRUCTION_HEADER: &str = concat!(
4282 "### Instruction:\n",
4283 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4284 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4285 "into account the cursor location.\n\n",
4286 "### User Edits:\n\n"
4287 );
4288 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4289 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4290
4291 /// Formats a complete zeta1 prompt from the input events and excerpt.
4292 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4293 let mut prompt = String::with_capacity(
4294 INSTRUCTION_HEADER.len()
4295 + input_events.len()
4296 + EXCERPT_HEADER.len()
4297 + input_excerpt.len()
4298 + RESPONSE_HEADER.len(),
4299 );
4300 prompt.push_str(INSTRUCTION_HEADER);
4301 prompt.push_str(input_events);
4302 prompt.push_str(EXCERPT_HEADER);
4303 prompt.push_str(input_excerpt);
4304 prompt.push_str(RESPONSE_HEADER);
4305 prompt
4306 }
4307
4308 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4309 /// editable and context byte-offset ranges within `cursor_excerpt`.
4310 pub fn format_zeta1_from_input(
4311 input: &ZetaPromptInput,
4312 editable_range: Range<usize>,
4313 context_range: Range<usize>,
4314 ) -> String {
4315 let events = format_zeta1_events(&input.events);
4316 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4317 format_zeta1_prompt(&events, &excerpt)
4318 }
4319
4320 /// Formats events in zeta1 style (oldest first).
4321 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4322 let mut result = String::new();
4323 for event in
4324 events
4325 .iter()
4326 .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4327 &ZetaFormat::V0114180EditableRegion,
4328 )))
4329 {
4330 let event_string = format_zeta1_event(event);
4331 if event_string.is_empty() {
4332 continue;
4333 }
4334 if !result.is_empty() {
4335 result.push_str("\n\n");
4336 }
4337 result.push_str(&event_string);
4338 }
4339 result
4340 }
4341
4342 fn format_zeta1_event(event: &Event) -> String {
4343 match event {
4344 Event::BufferChange {
4345 path,
4346 old_path,
4347 diff,
4348 ..
4349 } => {
4350 let mut prompt = String::new();
4351 if old_path != path {
4352 writeln!(
4353 prompt,
4354 "User renamed {} to {}\n",
4355 old_path.display(),
4356 path.display()
4357 )
4358 .ok();
4359 }
4360 if !diff.is_empty() {
4361 write!(
4362 prompt,
4363 "User edited {}:\n```diff\n{}\n```",
4364 path.display(),
4365 diff
4366 )
4367 .ok();
4368 }
4369 prompt
4370 }
4371 }
4372 }
4373
4374 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4375 /// within `cursor_excerpt`.
4376 fn format_zeta1_excerpt(
4377 input: &ZetaPromptInput,
4378 editable_range: Range<usize>,
4379 context_range: Range<usize>,
4380 ) -> String {
4381 let path_str = input.cursor_path.to_string_lossy();
4382 let excerpt = &*input.cursor_excerpt;
4383 let cursor_offset = input.cursor_offset_in_excerpt;
4384
4385 let mut prompt = String::new();
4386 writeln!(&mut prompt, "```{path_str}").ok();
4387
4388 let starts_at_file_beginning =
4389 input.excerpt_start_row == Some(0) && context_range.start == 0;
4390 if starts_at_file_beginning {
4391 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4392 }
4393
4394 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4395
4396 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4397 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4398 prompt.push_str(CURSOR_MARKER);
4399 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4400 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4401
4402 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4403 write!(prompt, "\n```").ok();
4404
4405 prompt
4406 }
4407
4408 /// Cleans zeta1 model output by extracting content between editable region
4409 /// markers and converting the zeta1 cursor marker to the universal one.
4410 /// Returns `None` if the output doesn't contain the expected markers.
4411 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4412 let content = output.replace(CURSOR_MARKER, "");
4413
4414 let content_start = content
4415 .find(EDITABLE_REGION_START_MARKER)
4416 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4417 .map(|pos| {
4418 if content.as_bytes().get(pos) == Some(&b'\n') {
4419 pos + 1
4420 } else {
4421 pos
4422 }
4423 })
4424 .unwrap_or(0);
4425
4426 let content_end = content
4427 .find(EDITABLE_REGION_END_MARKER)
4428 .map(|pos| {
4429 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4430 pos - 1
4431 } else {
4432 pos
4433 }
4434 })
4435 .unwrap_or(content.len());
4436
4437 if content_start > content_end {
4438 return Some(String::new());
4439 }
4440
4441 let extracted = &content[content_start..content_end];
4442
4443 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4444 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4445 let text_before_cursor = text_before_cursor
4446 .find(EDITABLE_REGION_START_MARKER)
4447 .map(|pos| {
4448 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4449 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4450 after_marker + 1
4451 } else {
4452 after_marker
4453 }
4454 })
4455 .unwrap_or(0);
4456 let offset_in_extracted = zeta1_cursor_pos
4457 .saturating_sub(text_before_cursor)
4458 .min(extracted.len());
4459 offset_in_extracted
4460 });
4461
4462 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4463 if let Some(offset) = cursor_offset {
4464 result.push_str(&extracted[..offset]);
4465 result.push_str(super::CURSOR_MARKER);
4466 result.push_str(&extracted[offset..]);
4467 } else {
4468 result.push_str(extracted);
4469 }
4470
4471 Some(result)
4472 }
4473}
4474
4475#[cfg(test)]
4476mod tests {
4477 use super::*;
4478 use indoc::indoc;
4479
4480 fn make_input(
4481 cursor_excerpt: &str,
4482 editable_range: Range<usize>,
4483 cursor_offset: usize,
4484 events: Vec<Event>,
4485 related_files: Vec<RelatedFile>,
4486 ) -> ZetaPromptInput {
4487 let context_range = 0..cursor_excerpt.len();
4488 ZetaPromptInput {
4489 cursor_path: Path::new("test.rs").into(),
4490 cursor_excerpt: cursor_excerpt.into(),
4491 cursor_offset_in_excerpt: cursor_offset,
4492 excerpt_start_row: None,
4493 events: events.into_iter().map(Arc::new).collect(),
4494 related_files: Some(related_files),
4495 active_buffer_diagnostics: vec![],
4496 excerpt_ranges: ExcerptRanges {
4497 editable_150: editable_range.clone(),
4498 editable_180: editable_range.clone(),
4499 editable_350: editable_range,
4500 editable_150_context_350: context_range.clone(),
4501 editable_180_context_350: context_range.clone(),
4502 editable_350_context_150: context_range,
4503 ..Default::default()
4504 },
4505 syntax_ranges: None,
4506 experiment: None,
4507 in_open_source_repo: false,
4508 can_collect_data: false,
4509 repo_url: None,
4510 }
4511 }
4512
4513 fn make_input_with_context_range(
4514 excerpt: &str,
4515 editable_range: Range<usize>,
4516 context_range: Range<usize>,
4517 cursor_offset: usize,
4518 ) -> ZetaPromptInput {
4519 ZetaPromptInput {
4520 cursor_path: Path::new("test.rs").into(),
4521 cursor_excerpt: excerpt.into(),
4522 cursor_offset_in_excerpt: cursor_offset,
4523 excerpt_start_row: None,
4524 events: vec![],
4525 related_files: Some(vec![]),
4526 active_buffer_diagnostics: vec![],
4527 excerpt_ranges: ExcerptRanges {
4528 editable_150: editable_range.clone(),
4529 editable_180: editable_range.clone(),
4530 editable_350: editable_range,
4531 editable_150_context_350: context_range.clone(),
4532 editable_180_context_350: context_range.clone(),
4533 editable_350_context_150: context_range,
4534 ..Default::default()
4535 },
4536 syntax_ranges: None,
4537 experiment: None,
4538 in_open_source_repo: false,
4539 can_collect_data: false,
4540 repo_url: None,
4541 }
4542 }
4543
4544 fn make_event(path: &str, diff: &str) -> Event {
4545 Event::BufferChange {
4546 path: Path::new(path).into(),
4547 old_path: Path::new(path).into(),
4548 diff: diff.to_string(),
4549 predicted: false,
4550 in_open_source_repo: false,
4551 }
4552 }
4553
4554 fn make_related_file(path: &str, content: &str) -> RelatedFile {
4555 RelatedFile {
4556 path: Path::new(path).into(),
4557 max_row: content.lines().count() as u32,
4558 excerpts: vec![RelatedExcerpt {
4559 row_range: 0..content.lines().count() as u32,
4560 text: content.into(),
4561 order: 0,
4562 }],
4563 in_open_source_repo: false,
4564 }
4565 }
4566
4567 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4568 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4569 }
4570
4571 fn budget_with_margin(requested_tokens: usize) -> usize {
4572 ((requested_tokens as f64) / 0.9).ceil() as usize
4573 }
4574
4575 #[test]
4576 fn test_no_truncation_when_within_budget() {
4577 let input = make_input(
4578 "prefix\neditable\nsuffix",
4579 7..15,
4580 10,
4581 vec![make_event("a.rs", "-old\n+new\n")],
4582 vec![make_related_file("related.rs", "fn helper() {}\n")],
4583 );
4584
4585 assert_eq!(
4586 format_with_budget(&input, 10000).unwrap(),
4587 indoc! {r#"
4588 <|file_sep|>related.rs
4589 fn helper() {}
4590 <|file_sep|>edit history
4591 --- a/a.rs
4592 +++ b/a.rs
4593 -old
4594 +new
4595 <|file_sep|>test.rs
4596 <|fim_prefix|>
4597 prefix
4598 <|fim_middle|>current
4599 edi<|user_cursor|>table
4600 <|fim_suffix|>
4601
4602 suffix
4603 <|fim_middle|>updated
4604 "#}
4605 .to_string()
4606 );
4607 }
4608
4609 #[test]
4610 fn test_truncation_drops_edit_history_when_budget_tight() {
4611 let input = make_input(
4612 "code",
4613 0..4,
4614 2,
4615 vec![make_event("a.rs", "-x\n+y\n")],
4616 vec![
4617 make_related_file("r1.rs", "aaaaaaa\n"),
4618 make_related_file("r2.rs", "bbbbbbb\n"),
4619 ],
4620 );
4621
4622 assert_eq!(
4623 format_with_budget(&input, 10000).unwrap(),
4624 indoc! {r#"
4625 <|file_sep|>r1.rs
4626 aaaaaaa
4627 <|file_sep|>r2.rs
4628 bbbbbbb
4629 <|file_sep|>edit history
4630 --- a/a.rs
4631 +++ b/a.rs
4632 -x
4633 +y
4634 <|file_sep|>test.rs
4635 <|fim_prefix|>
4636 <|fim_middle|>current
4637 co<|user_cursor|>de
4638 <|fim_suffix|>
4639 <|fim_middle|>updated
4640 "#}
4641 .to_string()
4642 );
4643
4644 assert_eq!(
4645 format_with_budget(&input, budget_with_margin(55)),
4646 Some(
4647 indoc! {r#"
4648 <|file_sep|>edit history
4649 --- a/a.rs
4650 +++ b/a.rs
4651 -x
4652 +y
4653 <|file_sep|>test.rs
4654 <|fim_prefix|>
4655 <|fim_middle|>current
4656 co<|user_cursor|>de
4657 <|fim_suffix|>
4658 <|fim_middle|>updated
4659 "#}
4660 .to_string()
4661 )
4662 );
4663 }
4664
4665 #[test]
4666 fn test_truncation_includes_partial_excerpts() {
4667 let input = make_input(
4668 "x",
4669 0..1,
4670 0,
4671 vec![],
4672 vec![RelatedFile {
4673 path: Path::new("big.rs").into(),
4674 max_row: 30,
4675 in_open_source_repo: false,
4676 excerpts: vec![
4677 RelatedExcerpt {
4678 row_range: 0..10,
4679 text: "first excerpt\n".into(),
4680 order: 0,
4681 },
4682 RelatedExcerpt {
4683 row_range: 10..20,
4684 text: "second excerpt\n".into(),
4685 order: 0,
4686 },
4687 RelatedExcerpt {
4688 row_range: 20..30,
4689 text: "third excerpt\n".into(),
4690 order: 0,
4691 },
4692 ],
4693 }],
4694 );
4695
4696 assert_eq!(
4697 format_with_budget(&input, 10000).unwrap(),
4698 indoc! {r#"
4699 <|file_sep|>big.rs
4700 first excerpt
4701 ...
4702 second excerpt
4703 ...
4704 third excerpt
4705 <|file_sep|>test.rs
4706 <|fim_prefix|>
4707 <|fim_middle|>current
4708 <|user_cursor|>x
4709 <|fim_suffix|>
4710 <|fim_middle|>updated
4711 "#}
4712 .to_string()
4713 );
4714
4715 assert_eq!(
4716 format_with_budget(&input, budget_with_margin(50)).unwrap(),
4717 indoc! {r#"
4718 <|file_sep|>big.rs
4719 first excerpt
4720 ...
4721 <|file_sep|>test.rs
4722 <|fim_prefix|>
4723 <|fim_middle|>current
4724 <|user_cursor|>x
4725 <|fim_suffix|>
4726 <|fim_middle|>updated
4727 "#}
4728 .to_string()
4729 );
4730 }
4731
4732 #[test]
4733 fn test_truncation_prioritizes_lower_order_excerpts() {
4734 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4735 // With tight budget, only the lower-order excerpt from file_b should be included.
4736 let input = make_input(
4737 "x",
4738 0..1,
4739 0,
4740 vec![],
4741 vec![
4742 RelatedFile {
4743 path: Path::new("file_a.rs").into(),
4744 max_row: 10,
4745 in_open_source_repo: false,
4746 excerpts: vec![RelatedExcerpt {
4747 row_range: 0..10,
4748 text: "low priority content\n".into(),
4749 order: 5,
4750 }],
4751 },
4752 RelatedFile {
4753 path: Path::new("file_b.rs").into(),
4754 max_row: 10,
4755 in_open_source_repo: false,
4756 excerpts: vec![RelatedExcerpt {
4757 row_range: 0..10,
4758 text: "high priority content\n".into(),
4759 order: 1,
4760 }],
4761 },
4762 ],
4763 );
4764
4765 // With large budget, both files included; rendered in stable lexicographic order.
4766 assert_eq!(
4767 format_with_budget(&input, 10000).unwrap(),
4768 indoc! {r#"
4769 <|file_sep|>file_a.rs
4770 low priority content
4771 <|file_sep|>file_b.rs
4772 high priority content
4773 <|file_sep|>test.rs
4774 <|fim_prefix|>
4775 <|fim_middle|>current
4776 <|user_cursor|>x
4777 <|fim_suffix|>
4778 <|fim_middle|>updated
4779 "#}
4780 .to_string()
4781 );
4782
4783 // With tight budget, only file_b (lower order) fits.
4784 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4785 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4786 // file_a would need another 14 tokens, which doesn't fit.
4787 assert_eq!(
4788 format_with_budget(&input, budget_with_margin(52)).unwrap(),
4789 indoc! {r#"
4790 <|file_sep|>file_b.rs
4791 high priority content
4792 <|file_sep|>test.rs
4793 <|fim_prefix|>
4794 <|fim_middle|>current
4795 <|user_cursor|>x
4796 <|fim_suffix|>
4797 <|fim_middle|>updated
4798 "#}
4799 .to_string()
4800 );
4801 }
4802
4803 #[test]
4804 fn test_truncation_drops_high_order_excerpts_within_file() {
4805 // A single file has excerpts at order 1 and order 3. With a tight budget,
4806 // only the order-1 excerpts are included while the order-3 excerpt is
4807 // dropped — even though they belong to the same file. This also preserves
4808 // the parent invariant: parent outline items have order ≤ their best
4809 // child, so they're always included when any child is.
4810 let input = make_input(
4811 "x",
4812 0..1,
4813 0,
4814 vec![],
4815 vec![RelatedFile {
4816 path: Path::new("mod.rs").into(),
4817 max_row: 30,
4818 in_open_source_repo: false,
4819 excerpts: vec![
4820 RelatedExcerpt {
4821 row_range: 0..5,
4822 text: "mod header\n".into(),
4823 order: 1,
4824 },
4825 RelatedExcerpt {
4826 row_range: 5..15,
4827 text: "important fn\n".into(),
4828 order: 1,
4829 },
4830 RelatedExcerpt {
4831 row_range: 15..30,
4832 text: "less important fn\n".into(),
4833 order: 3,
4834 },
4835 ],
4836 }],
4837 );
4838
4839 // With large budget, all three excerpts included.
4840 assert_eq!(
4841 format_with_budget(&input, 10000).unwrap(),
4842 indoc! {r#"
4843 <|file_sep|>mod.rs
4844 mod header
4845 ...
4846 important fn
4847 ...
4848 less important fn
4849 <|file_sep|>test.rs
4850 <|fim_prefix|>
4851 <|fim_middle|>current
4852 <|user_cursor|>x
4853 <|fim_suffix|>
4854 <|fim_middle|>updated
4855 "#}
4856 .to_string()
4857 );
4858
4859 // With tight budget, only order<=1 excerpts included (header + important fn).
4860 assert_eq!(
4861 format_with_budget(&input, budget_with_margin(55)).unwrap(),
4862 indoc! {r#"
4863 <|file_sep|>mod.rs
4864 mod header
4865 ...
4866 important fn
4867 ...
4868 <|file_sep|>test.rs
4869 <|fim_prefix|>
4870 <|fim_middle|>current
4871 <|user_cursor|>x
4872 <|fim_suffix|>
4873 <|fim_middle|>updated
4874 "#}
4875 .to_string()
4876 );
4877 }
4878
4879 #[test]
4880 fn test_truncation_drops_older_events_first() {
4881 let input = make_input(
4882 "x",
4883 0..1,
4884 0,
4885 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4886 vec![],
4887 );
4888
4889 assert_eq!(
4890 format_with_budget(&input, 10000).unwrap(),
4891 indoc! {r#"
4892 <|file_sep|>edit history
4893 --- a/old.rs
4894 +++ b/old.rs
4895 -1
4896 --- a/new.rs
4897 +++ b/new.rs
4898 -2
4899 <|file_sep|>test.rs
4900 <|fim_prefix|>
4901 <|fim_middle|>current
4902 <|user_cursor|>x
4903 <|fim_suffix|>
4904 <|fim_middle|>updated
4905 "#}
4906 .to_string()
4907 );
4908
4909 assert_eq!(
4910 format_with_budget(&input, 60).unwrap(),
4911 indoc! {r#"
4912 <|file_sep|>edit history
4913 --- a/new.rs
4914 +++ b/new.rs
4915 -2
4916 <|file_sep|>test.rs
4917 <|fim_prefix|>
4918 <|fim_middle|>current
4919 <|user_cursor|>x
4920 <|fim_suffix|>
4921 <|fim_middle|>updated
4922 "#}
4923 .to_string()
4924 );
4925 }
4926
4927 #[test]
4928 fn test_cursor_excerpt_always_included_with_minimal_budget() {
4929 let input = make_input(
4930 "fn main() {}",
4931 0..12,
4932 3,
4933 vec![make_event("a.rs", "-old\n+new\n")],
4934 vec![make_related_file("related.rs", "helper\n")],
4935 );
4936
4937 assert!(format_with_budget(&input, 30).is_none())
4938 }
4939
4940 #[track_caller]
4941 fn format_seed_coder(input: &ZetaPromptInput) -> String {
4942 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4943 .expect("seed coder prompt formatting should succeed")
4944 }
4945
4946 #[track_caller]
4947 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4948 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4949 .expect("seed coder prompt formatting should succeed")
4950 }
4951
4952 #[test]
4953 fn test_seed_coder_basic_format() {
4954 let input = make_input(
4955 "prefix\neditable\nsuffix",
4956 7..15,
4957 10,
4958 vec![make_event("a.rs", "-old\n+new\n")],
4959 vec![make_related_file("related.rs", "fn helper() {}\n")],
4960 );
4961
4962 assert_eq!(
4963 format_seed_coder(&input),
4964 indoc! {r#"
4965 <[fim-suffix]>
4966 suffix
4967 <[fim-prefix]><filename>related.rs
4968 fn helper() {}
4969
4970 <filename>edit_history
4971 --- a/a.rs
4972 +++ b/a.rs
4973 -old
4974 +new
4975
4976 <filename>test.rs
4977 prefix
4978 <<<<<<< CURRENT
4979 edi<|user_cursor|>table
4980 =======
4981 <[fim-middle]>"#}
4982 );
4983 }
4984
4985 #[test]
4986 fn test_v0317_formats_prompt_with_many_related_files() {
4987 let related_files = (0..900)
4988 .map(|index| {
4989 make_related_file(
4990 &format!("related_{index}.rs"),
4991 "fn helper() {\n let value = 1;\n}\n",
4992 )
4993 })
4994 .collect();
4995
4996 let input = make_input(
4997 "code",
4998 0..4,
4999 2,
5000 vec![make_event("a.rs", "-x\n+y\n")],
5001 related_files,
5002 );
5003
5004 let prompt =
5005 format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
5006
5007 assert!(prompt.is_some());
5008 let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
5009 assert!(prompt.contains("test.rs"));
5010 assert!(prompt.contains(CURSOR_MARKER));
5011 }
5012
5013 #[test]
5014 fn test_seed_coder_no_context() {
5015 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
5016
5017 assert_eq!(
5018 format_seed_coder(&input),
5019 indoc! {r#"
5020 <[fim-suffix]>
5021 after
5022 <[fim-prefix]><filename>test.rs
5023 before
5024 <<<<<<< CURRENT
5025 mid<|user_cursor|>dle
5026 =======
5027 <[fim-middle]>"#}
5028 );
5029 }
5030
5031 #[test]
5032 fn test_seed_coder_truncation_drops_context() {
5033 let input = make_input(
5034 "code",
5035 0..4,
5036 2,
5037 vec![make_event("a.rs", "-x\n+y\n")],
5038 vec![make_related_file("r1.rs", "content\n")],
5039 );
5040
5041 // With large budget, everything is included
5042 assert_eq!(
5043 format_seed_coder(&input),
5044 indoc! {r#"
5045 <[fim-suffix]>
5046 <[fim-prefix]><filename>r1.rs
5047 content
5048
5049 <filename>edit_history
5050 --- a/a.rs
5051 +++ b/a.rs
5052 -x
5053 +y
5054
5055 <filename>test.rs
5056 <<<<<<< CURRENT
5057 co<|user_cursor|>de
5058 =======
5059 <[fim-middle]>"#}
5060 );
5061
5062 assert_eq!(
5063 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
5064 None
5065 );
5066
5067 assert_eq!(
5068 format_seed_coder_with_budget(&input, 40),
5069 indoc! {r#"
5070 <[fim-suffix]>
5071 <[fim-prefix]><filename>test.rs
5072 <<<<<<< CURRENT
5073 co<|user_cursor|>de
5074 =======
5075 <[fim-middle]>"#
5076 }
5077 )
5078 }
5079
5080 #[test]
5081 fn test_seed_coder_truncation_prioritizes_lower_order() {
5082 let input = make_input(
5083 "code",
5084 0..4,
5085 2,
5086 vec![],
5087 vec![
5088 RelatedFile {
5089 path: Path::new("low_prio.rs").into(),
5090 max_row: 5,
5091 in_open_source_repo: false,
5092 excerpts: vec![RelatedExcerpt {
5093 row_range: 0..5,
5094 text: "low prio\n".into(),
5095 order: 10,
5096 }],
5097 },
5098 RelatedFile {
5099 path: Path::new("high_prio.rs").into(),
5100 max_row: 5,
5101 in_open_source_repo: false,
5102 excerpts: vec![RelatedExcerpt {
5103 row_range: 0..5,
5104 text: "high prio\n".into(),
5105 order: 1,
5106 }],
5107 },
5108 ],
5109 );
5110
5111 // With large budget, both included; rendered in stable lexicographic order.
5112 assert_eq!(
5113 format_seed_coder(&input),
5114 indoc! {r#"
5115 <[fim-suffix]>
5116 <[fim-prefix]><filename>low_prio.rs
5117 low prio
5118 <filename>high_prio.rs
5119 high prio
5120
5121 <filename>test.rs
5122 <<<<<<< CURRENT
5123 co<|user_cursor|>de
5124 =======
5125 <[fim-middle]>"#}
5126 );
5127
5128 // With tight budget under the generic heuristic, context is dropped but the
5129 // minimal cursor section still fits.
5130 assert_eq!(
5131 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
5132 Some(
5133 indoc! {r#"
5134 <[fim-suffix]>
5135 <[fim-prefix]><filename>test.rs
5136 <<<<<<< CURRENT
5137 co<|user_cursor|>de
5138 =======
5139 <[fim-middle]>"#}
5140 .to_string()
5141 )
5142 );
5143 }
5144
5145 #[test]
5146 fn test_format_zeta1_from_input_basic() {
5147 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
5148 let input = ZetaPromptInput {
5149 cursor_path: Path::new("src/main.rs").into(),
5150 cursor_excerpt: excerpt.into(),
5151 cursor_offset_in_excerpt: 30,
5152 excerpt_start_row: Some(0),
5153 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
5154 related_files: Some(vec![]),
5155 active_buffer_diagnostics: vec![],
5156 excerpt_ranges: ExcerptRanges {
5157 editable_150: 15..41,
5158 editable_180: 15..41,
5159 editable_350: 15..41,
5160 editable_150_context_350: 0..excerpt.len(),
5161 editable_180_context_350: 0..excerpt.len(),
5162 editable_350_context_150: 0..excerpt.len(),
5163 ..Default::default()
5164 },
5165 syntax_ranges: None,
5166 experiment: None,
5167 in_open_source_repo: false,
5168 can_collect_data: false,
5169 repo_url: None,
5170 };
5171
5172 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
5173
5174 assert_eq!(
5175 prompt,
5176 concat!(
5177 "### Instruction:\n",
5178 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5179 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5180 "into account the cursor location.\n",
5181 "\n",
5182 "### User Edits:\n",
5183 "\n",
5184 "User edited other.rs:\n",
5185 "```diff\n",
5186 "-old\n",
5187 "+new\n",
5188 "\n",
5189 "```\n",
5190 "\n",
5191 "### User Excerpt:\n",
5192 "\n",
5193 "```src/main.rs\n",
5194 "<|start_of_file|>\n",
5195 "fn before() {}\n",
5196 "<|editable_region_start|>\n",
5197 "fn foo() {\n",
5198 " <|user_cursor_is_here|>let x = 1;\n",
5199 "\n",
5200 "<|editable_region_end|>}\n",
5201 "fn after() {}\n",
5202 "\n",
5203 "```\n",
5204 "\n",
5205 "### Response:\n",
5206 ),
5207 );
5208 }
5209
5210 #[test]
5211 fn test_format_zeta1_from_input_no_start_of_file() {
5212 let excerpt = "fn foo() {\n let x = 1;\n}\n";
5213 let input = ZetaPromptInput {
5214 cursor_path: Path::new("src/main.rs").into(),
5215 cursor_excerpt: excerpt.into(),
5216 cursor_offset_in_excerpt: 15,
5217 excerpt_start_row: Some(10),
5218 events: vec![],
5219 related_files: Some(vec![]),
5220 active_buffer_diagnostics: vec![],
5221 excerpt_ranges: ExcerptRanges {
5222 editable_150: 0..28,
5223 editable_180: 0..28,
5224 editable_350: 0..28,
5225 editable_150_context_350: 0..28,
5226 editable_180_context_350: 0..28,
5227 editable_350_context_150: 0..28,
5228 ..Default::default()
5229 },
5230 syntax_ranges: None,
5231 experiment: None,
5232 in_open_source_repo: false,
5233 can_collect_data: false,
5234 repo_url: None,
5235 };
5236
5237 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5238
5239 assert_eq!(
5240 prompt,
5241 concat!(
5242 "### Instruction:\n",
5243 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5244 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5245 "into account the cursor location.\n",
5246 "\n",
5247 "### User Edits:\n",
5248 "\n",
5249 "\n",
5250 "\n",
5251 "### User Excerpt:\n",
5252 "\n",
5253 "```src/main.rs\n",
5254 "<|editable_region_start|>\n",
5255 "fn foo() {\n",
5256 " <|user_cursor_is_here|>let x = 1;\n",
5257 "}\n",
5258 "\n",
5259 "<|editable_region_end|>\n",
5260 "```\n",
5261 "\n",
5262 "### Response:\n",
5263 ),
5264 );
5265 }
5266
5267 #[test]
5268 fn test_format_zeta1_from_input_with_sub_ranges() {
5269 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
5270 let editable_range = 10..37;
5271 let context_range = 0..excerpt.len();
5272
5273 let input = ZetaPromptInput {
5274 cursor_path: Path::new("test.rs").into(),
5275 cursor_excerpt: excerpt.into(),
5276 cursor_offset_in_excerpt: 25,
5277 excerpt_start_row: Some(0),
5278 events: vec![],
5279 related_files: Some(vec![]),
5280 active_buffer_diagnostics: vec![],
5281 excerpt_ranges: ExcerptRanges {
5282 editable_150: editable_range.clone(),
5283 editable_180: editable_range.clone(),
5284 editable_350: editable_range.clone(),
5285 editable_150_context_350: context_range.clone(),
5286 editable_180_context_350: context_range.clone(),
5287 editable_350_context_150: context_range.clone(),
5288 ..Default::default()
5289 },
5290 syntax_ranges: None,
5291 experiment: None,
5292 in_open_source_repo: false,
5293 can_collect_data: false,
5294 repo_url: None,
5295 };
5296
5297 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5298
5299 assert_eq!(
5300 prompt,
5301 concat!(
5302 "### Instruction:\n",
5303 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5304 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5305 "into account the cursor location.\n",
5306 "\n",
5307 "### User Edits:\n",
5308 "\n",
5309 "\n",
5310 "\n",
5311 "### User Excerpt:\n",
5312 "\n",
5313 "```test.rs\n",
5314 "<|start_of_file|>\n",
5315 "// prefix\n",
5316 "<|editable_region_start|>\n",
5317 "fn foo() {\n",
5318 " <|user_cursor_is_here|>let x = 1;\n",
5319 "}\n",
5320 "<|editable_region_end|>\n",
5321 "// suffix\n",
5322 "\n",
5323 "```\n",
5324 "\n",
5325 "### Response:\n",
5326 ),
5327 );
5328 }
5329
5330 #[test]
5331 fn test_max_event_count() {
5332 fn make_numbered_event(index: usize) -> Event {
5333 return make_event(
5334 &format!("event-{index}.rs"),
5335 &format!("-old-{index}\n+new-{index}\n"),
5336 );
5337 }
5338 let input = make_input(
5339 "x",
5340 0..1,
5341 0,
5342 (0..3).map(make_numbered_event).collect(),
5343 vec![],
5344 );
5345
5346 let edit_history_section = format_edit_history_within_budget(
5347 &input.events,
5348 "<|file_sep|>",
5349 "edit history",
5350 usize::MAX,
5351 5,
5352 );
5353
5354 assert_eq!(
5355 &edit_history_section,
5356 indoc!(
5357 "
5358 <|file_sep|>edit history
5359 --- a/event-0.rs
5360 +++ b/event-0.rs
5361 -old-0
5362 +new-0
5363 --- a/event-1.rs
5364 +++ b/event-1.rs
5365 -old-1
5366 +new-1
5367 --- a/event-2.rs
5368 +++ b/event-2.rs
5369 -old-2
5370 +new-2
5371 "
5372 )
5373 );
5374
5375 let edit_history_section = format_edit_history_within_budget(
5376 &input.events,
5377 "<|file_sep|>",
5378 "edit history",
5379 usize::MAX,
5380 2,
5381 );
5382
5383 assert_eq!(
5384 &edit_history_section,
5385 indoc!(
5386 "
5387 <|file_sep|>edit history
5388 --- a/event-1.rs
5389 +++ b/event-1.rs
5390 -old-1
5391 +new-1
5392 --- a/event-2.rs
5393 +++ b/event-2.rs
5394 -old-2
5395 +new-2
5396 "
5397 )
5398 );
5399
5400 let edit_history_section = format_edit_history_within_budget(
5401 &input.events,
5402 "<|file_sep|>",
5403 "edit history",
5404 usize::MAX,
5405 0,
5406 );
5407
5408 assert_eq!(&edit_history_section, "");
5409 }
5410
5411 #[test]
5412 fn test_clean_zeta1_model_output_basic() {
5413 let output = indoc! {"
5414 <|editable_region_start|>
5415 fn main() {
5416 println!(\"hello\");
5417 }
5418 <|editable_region_end|>
5419 "};
5420
5421 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5422 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
5423 }
5424
5425 #[test]
5426 fn test_clean_zeta1_model_output_with_cursor() {
5427 let output = indoc! {"
5428 <|editable_region_start|>
5429 fn main() {
5430 <|user_cursor_is_here|>println!(\"hello\");
5431 }
5432 <|editable_region_end|>
5433 "};
5434
5435 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5436 assert_eq!(
5437 cleaned,
5438 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
5439 );
5440 }
5441
5442 #[test]
5443 fn test_clean_zeta1_model_output_no_markers() {
5444 let output = "fn main() {}\n";
5445 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5446 assert_eq!(cleaned, "fn main() {}\n");
5447 }
5448
5449 #[test]
5450 fn test_clean_zeta1_model_output_empty_region() {
5451 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5452 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5453 assert_eq!(cleaned, "");
5454 }
5455
5456 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5457 let mut result = excerpt.to_string();
5458 result.replace_range(
5459 parsed_output.range_in_excerpt.clone(),
5460 &parsed_output.new_editable_region,
5461 );
5462 result
5463 }
5464
5465 #[test]
5466 fn test_parse_zeta2_model_output() {
5467 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5468 let context_start = excerpt.find("ctx start").unwrap();
5469 let context_end = excerpt.find("after ctx").unwrap();
5470 let editable_start = excerpt.find("editable old").unwrap();
5471 let editable_end = editable_start + "editable old\n".len();
5472 let input = make_input_with_context_range(
5473 excerpt,
5474 editable_start..editable_end,
5475 context_start..context_end,
5476 editable_start,
5477 );
5478
5479 let output = parse_zeta2_model_output(
5480 "editable new\n>>>>>>> UPDATED\n",
5481 ZetaFormat::V0131GitMergeMarkersPrefix,
5482 &input,
5483 )
5484 .unwrap();
5485
5486 assert_eq!(
5487 apply_edit(excerpt, &output),
5488 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5489 );
5490 }
5491
5492 #[test]
5493 fn test_parse_zeta2_model_output_identity() {
5494 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5495 let editable_start = excerpt.find("bbb").unwrap();
5496 let editable_end = excerpt.find("ddd").unwrap();
5497 let input = make_input_with_context_range(
5498 excerpt,
5499 editable_start..editable_end,
5500 0..excerpt.len(),
5501 editable_start,
5502 );
5503
5504 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5505 let output =
5506 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5507
5508 assert_eq!(apply_edit(excerpt, &output), excerpt);
5509 }
5510
5511 #[test]
5512 fn test_parse_zeta2_model_output_strips_end_marker() {
5513 let excerpt = "hello\nworld\n";
5514 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5515
5516 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5517 let output1 =
5518 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5519 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5520
5521 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5522 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5523 }
5524
5525 #[test]
5526 fn test_parsed_output_to_patch_round_trips_through_udiff_application() {
5527 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5528 let context_start = excerpt.find("ctx start").unwrap();
5529 let context_end = excerpt.find("after ctx").unwrap();
5530 let editable_start = excerpt.find("editable old").unwrap();
5531 let editable_end = editable_start + "editable old\n".len();
5532 let input = make_input_with_context_range(
5533 excerpt,
5534 editable_start..editable_end,
5535 context_start..context_end,
5536 editable_start,
5537 );
5538
5539 let parsed = parse_zeta2_model_output(
5540 "editable new\n>>>>>>> UPDATED\n",
5541 ZetaFormat::V0131GitMergeMarkersPrefix,
5542 &input,
5543 )
5544 .unwrap();
5545 let expected = apply_edit(excerpt, &parsed);
5546 let patch = parsed_output_to_patch(&input, parsed).unwrap();
5547 let patched = udiff::apply_diff_to_string(&patch, excerpt).unwrap();
5548
5549 assert_eq!(patched, expected);
5550 }
5551
5552 #[test]
5553 fn test_special_tokens_not_triggered_by_comment_separator() {
5554 // Regression test for https://github.com/zed-industries/zed/issues/52489
5555 let excerpt = "fn main() {\n // =======\n println!(\"hello\");\n}\n";
5556 let input = make_input(excerpt, 0..excerpt.len(), 0, vec![], vec![]);
5557 assert!(
5558 !prompt_input_contains_special_tokens(&input, ZetaFormat::V0131GitMergeMarkersPrefix),
5559 "comment containing ======= should not trigger special token detection"
5560 );
5561 }
5562}