1pub mod excerpt_ranges;
2pub mod multi_region;
3
4use anyhow::{Result, anyhow};
5use serde::{Deserialize, Serialize};
6use std::fmt::Write;
7use std::ops::Range;
8use std::path::Path;
9use std::sync::Arc;
10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
11
12pub use crate::excerpt_ranges::{
13 ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
14};
15
16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
17pub const MAX_PROMPT_TOKENS: usize = 4096;
18
19/// Use up to this amount of the editable region for prefill.
20/// Larger values may result in more robust generation, but
21/// this region becomes non-editable.
22pub const PREFILL_RATIO: f64 = 0.1; // 10%
23
24fn estimate_tokens(bytes: usize) -> usize {
25 bytes / 3
26}
27
28/// Leave some slack to avoid overflow.
29fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
30 (max_tokens as f64 * 0.9).floor() as usize
31}
32
33#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
34pub struct ZetaPromptInput {
35 pub cursor_path: Arc<Path>,
36 pub cursor_excerpt: Arc<str>,
37 pub cursor_offset_in_excerpt: usize,
38 #[serde(default, skip_serializing_if = "Option::is_none")]
39 pub excerpt_start_row: Option<u32>,
40 pub events: Vec<Arc<Event>>,
41 #[serde(default)]
42 pub related_files: Option<Vec<RelatedFile>>,
43 #[serde(default, skip_serializing_if = "Vec::is_empty")]
44 pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
45 /// These ranges let the server select model-appropriate subsets.
46 pub excerpt_ranges: ExcerptRanges,
47 /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
48 /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
49 /// When present, the server uses these to compute editable/context ranges
50 /// instead of `excerpt_ranges`.
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub syntax_ranges: Option<Vec<Range<usize>>>,
53 /// The name of the edit prediction model experiment to use.
54 #[serde(default, skip_serializing_if = "Option::is_none")]
55 pub experiment: Option<String>,
56 #[serde(default)]
57 pub in_open_source_repo: bool,
58 #[serde(default)]
59 pub can_collect_data: bool,
60 #[serde(default, skip_serializing_if = "Option::is_none")]
61 pub repo_url: Option<String>,
62}
63
64#[derive(
65 Default,
66 Clone,
67 Copy,
68 Debug,
69 PartialEq,
70 Eq,
71 Hash,
72 EnumIter,
73 IntoStaticStr,
74 Serialize,
75 Deserialize,
76)]
77#[allow(non_camel_case_types)]
78pub enum ZetaFormat {
79 V0112MiddleAtEnd,
80 V0113Ordered,
81 V0114180EditableRegion,
82 V0120GitMergeMarkers,
83 #[default]
84 V0131GitMergeMarkersPrefix,
85 V0211Prefill,
86 V0211SeedCoder,
87 v0226Hashline,
88 V0304VariableEdit,
89 V0304SeedNoEdits,
90 /// Multi-block marker spans with NO_EDITS sentinel.
91 V0306SeedMultiRegions,
92 /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
93 V0316SeedMultiRegions,
94 /// V0316 with larger block sizes.
95 V0318SeedMultiRegions,
96 /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
97 V0317SeedMultiRegions,
98}
99
100impl std::fmt::Display for ZetaFormat {
101 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102 write!(f, "{}", <&'static str>::from(self))
103 }
104}
105
106impl ZetaFormat {
107 pub fn parse(format_name: &str) -> Result<Self> {
108 let mut results = ZetaFormat::iter().filter(|version| {
109 <&'static str>::from(version)
110 .to_lowercase()
111 .contains(&format_name.to_lowercase())
112 });
113 let Some(result) = results.next() else {
114 anyhow::bail!(
115 "`{format_name}` did not match any of:\n{}",
116 Self::options_as_string()
117 );
118 };
119 if results.next().is_some() {
120 anyhow::bail!(
121 "`{format_name}` matched more than one of:\n{}",
122 Self::options_as_string()
123 );
124 }
125 Ok(result)
126 }
127
128 pub fn options_as_string() -> String {
129 ZetaFormat::iter()
130 .map(|format| format!("- {}\n", <&'static str>::from(format)))
131 .collect::<Vec<_>>()
132 .concat()
133 }
134}
135
136#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
137#[serde(tag = "event")]
138pub enum Event {
139 BufferChange {
140 path: Arc<Path>,
141 old_path: Arc<Path>,
142 diff: String,
143 predicted: bool,
144 in_open_source_repo: bool,
145 },
146}
147
148impl Event {
149 pub fn in_open_source_repo(&self) -> bool {
150 match self {
151 Event::BufferChange {
152 in_open_source_repo,
153 ..
154 } => *in_open_source_repo,
155 }
156 }
157}
158
159pub fn write_event(prompt: &mut String, event: &Event) {
160 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
161 for component in path.components() {
162 prompt.push('/');
163 write!(prompt, "{}", component.as_os_str().display()).ok();
164 }
165 }
166 match event {
167 Event::BufferChange {
168 path,
169 old_path,
170 diff,
171 predicted,
172 in_open_source_repo: _,
173 } => {
174 if *predicted {
175 prompt.push_str("// User accepted prediction:\n");
176 }
177 prompt.push_str("--- a");
178 write_path_as_unix_str(prompt, old_path.as_ref());
179 prompt.push_str("\n+++ b");
180 write_path_as_unix_str(prompt, path.as_ref());
181 prompt.push('\n');
182 prompt.push_str(diff);
183 }
184 }
185}
186
187#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
188pub struct ActiveBufferDiagnostic {
189 pub severity: Option<i32>,
190 pub message: String,
191 pub snippet: String,
192 pub snippet_buffer_row_range: Range<u32>,
193 pub diagnostic_range_in_snippet: Range<usize>,
194}
195
196#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
197pub struct RelatedFile {
198 pub path: Arc<Path>,
199 pub max_row: u32,
200 pub excerpts: Vec<RelatedExcerpt>,
201 #[serde(default)]
202 pub in_open_source_repo: bool,
203}
204
205#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
206pub struct RelatedExcerpt {
207 pub row_range: Range<u32>,
208 pub text: Arc<str>,
209 #[serde(default)]
210 pub order: usize,
211}
212
213pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
214 special_tokens_for_format(format)
215 .iter()
216 .any(|token| input.cursor_excerpt.contains(token))
217}
218
219pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
220 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
221}
222
223pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
224 match format {
225 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
226 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
227 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
228 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
229 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
230 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
231 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
232 ZetaFormat::v0226Hashline => hashline::special_tokens(),
233 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
234 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
235 ZetaFormat::V0316SeedMultiRegions => {
236 static TOKENS: &[&str] = &[
237 seed_coder::FIM_SUFFIX,
238 seed_coder::FIM_PREFIX,
239 seed_coder::FIM_MIDDLE,
240 seed_coder::FILE_MARKER,
241 multi_region::V0316_END_MARKER,
242 CURSOR_MARKER,
243 multi_region::MARKER_TAG_PREFIX,
244 ];
245 TOKENS
246 }
247 ZetaFormat::V0318SeedMultiRegions => {
248 static TOKENS: &[&str] = &[
249 seed_coder::FIM_SUFFIX,
250 seed_coder::FIM_PREFIX,
251 seed_coder::FIM_MIDDLE,
252 seed_coder::FILE_MARKER,
253 multi_region::V0318_END_MARKER,
254 CURSOR_MARKER,
255 multi_region::MARKER_TAG_PREFIX,
256 ];
257 TOKENS
258 }
259 ZetaFormat::V0317SeedMultiRegions => {
260 static TOKENS: &[&str] = &[
261 seed_coder::FIM_SUFFIX,
262 seed_coder::FIM_PREFIX,
263 seed_coder::FIM_MIDDLE,
264 seed_coder::FILE_MARKER,
265 multi_region::V0317_END_MARKER,
266 CURSOR_MARKER,
267 multi_region::RELATIVE_MARKER_TAG_PREFIX,
268 ];
269 TOKENS
270 }
271 ZetaFormat::V0306SeedMultiRegions => {
272 static TOKENS: &[&str] = &[
273 seed_coder::FIM_SUFFIX,
274 seed_coder::FIM_PREFIX,
275 seed_coder::FIM_MIDDLE,
276 seed_coder::FILE_MARKER,
277 seed_coder::START_MARKER,
278 seed_coder::SEPARATOR,
279 seed_coder::END_MARKER,
280 CURSOR_MARKER,
281 multi_region::MARKER_TAG_PREFIX,
282 ];
283 TOKENS
284 }
285 }
286}
287
288/// Returns the (editable_token_limit, context_token_limit) for a given format.
289pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
290 match format {
291 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
292 ZetaFormat::V0114180EditableRegion => (180, 350),
293 ZetaFormat::V0120GitMergeMarkers
294 | ZetaFormat::V0131GitMergeMarkersPrefix
295 | ZetaFormat::V0211Prefill
296 | ZetaFormat::V0211SeedCoder
297 | ZetaFormat::v0226Hashline
298 | ZetaFormat::V0306SeedMultiRegions
299 | ZetaFormat::V0316SeedMultiRegions
300 | ZetaFormat::V0318SeedMultiRegions
301 | ZetaFormat::V0317SeedMultiRegions
302 | ZetaFormat::V0304SeedNoEdits => (350, 150),
303 ZetaFormat::V0304VariableEdit => (1024, 0),
304 }
305}
306
307pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
308 match format {
309 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
310 ZetaFormat::V0112MiddleAtEnd
311 | ZetaFormat::V0113Ordered
312 | ZetaFormat::V0114180EditableRegion
313 | ZetaFormat::V0120GitMergeMarkers
314 | ZetaFormat::V0131GitMergeMarkersPrefix
315 | ZetaFormat::V0211Prefill
316 | ZetaFormat::V0211SeedCoder
317 | ZetaFormat::V0304VariableEdit
318 | ZetaFormat::V0306SeedMultiRegions
319 | ZetaFormat::V0304SeedNoEdits => &[],
320 ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
321 ZetaFormat::V0318SeedMultiRegions => &[multi_region::V0318_END_MARKER],
322 ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
323 }
324}
325
326pub fn excerpt_ranges_for_format(
327 format: ZetaFormat,
328 ranges: &ExcerptRanges,
329) -> (Range<usize>, Range<usize>) {
330 match format {
331 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
332 ranges.editable_150.clone(),
333 ranges.editable_150_context_350.clone(),
334 ),
335 ZetaFormat::V0114180EditableRegion => (
336 ranges.editable_180.clone(),
337 ranges.editable_180_context_350.clone(),
338 ),
339 ZetaFormat::V0120GitMergeMarkers
340 | ZetaFormat::V0131GitMergeMarkersPrefix
341 | ZetaFormat::V0211Prefill
342 | ZetaFormat::V0211SeedCoder
343 | ZetaFormat::v0226Hashline
344 | ZetaFormat::V0304SeedNoEdits
345 | ZetaFormat::V0306SeedMultiRegions
346 | ZetaFormat::V0316SeedMultiRegions
347 | ZetaFormat::V0318SeedMultiRegions
348 | ZetaFormat::V0317SeedMultiRegions => (
349 ranges.editable_350.clone(),
350 ranges.editable_350_context_150.clone(),
351 ),
352 ZetaFormat::V0304VariableEdit => {
353 let context = ranges
354 .editable_350_context_1024
355 .clone()
356 .or(ranges.editable_350_context_512.clone())
357 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
358 (context.clone(), context)
359 }
360 }
361}
362
363pub fn write_cursor_excerpt_section_for_format(
364 format: ZetaFormat,
365 prompt: &mut String,
366 path: &Path,
367 context: &str,
368 editable_range: &Range<usize>,
369 cursor_offset: usize,
370) {
371 match format {
372 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
373 prompt,
374 path,
375 context,
376 editable_range,
377 cursor_offset,
378 ),
379 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
380 v0113_ordered::write_cursor_excerpt_section(
381 prompt,
382 path,
383 context,
384 editable_range,
385 cursor_offset,
386 )
387 }
388 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
389 prompt,
390 path,
391 context,
392 editable_range,
393 cursor_offset,
394 ),
395 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
396 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
397 prompt,
398 path,
399 context,
400 editable_range,
401 cursor_offset,
402 )
403 }
404 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
405 seed_coder::write_cursor_excerpt_section(
406 prompt,
407 path,
408 context,
409 editable_range,
410 cursor_offset,
411 )
412 }
413 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
414 prompt,
415 path,
416 context,
417 editable_range,
418 cursor_offset,
419 ),
420 ZetaFormat::V0304VariableEdit => {
421 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
422 }
423 ZetaFormat::V0306SeedMultiRegions => {
424 prompt.push_str(&build_v0306_cursor_prefix(
425 path,
426 context,
427 editable_range,
428 cursor_offset,
429 ));
430 }
431 ZetaFormat::V0316SeedMultiRegions => {
432 prompt.push_str(&build_v0316_cursor_prefix(
433 path,
434 context,
435 editable_range,
436 cursor_offset,
437 ));
438 }
439 ZetaFormat::V0318SeedMultiRegions => {
440 prompt.push_str(&build_v0318_cursor_prefix(
441 path,
442 context,
443 editable_range,
444 cursor_offset,
445 ));
446 }
447 ZetaFormat::V0317SeedMultiRegions => {
448 prompt.push_str(&build_v0317_cursor_prefix(
449 path,
450 context,
451 editable_range,
452 cursor_offset,
453 ));
454 }
455 }
456}
457
458fn build_v0306_cursor_prefix(
459 path: &Path,
460 context: &str,
461 editable_range: &Range<usize>,
462 cursor_offset: usize,
463) -> String {
464 let mut section = String::new();
465 let path_str = path.to_string_lossy();
466 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
467
468 section.push_str(&context[..editable_range.start]);
469 section.push_str(seed_coder::START_MARKER);
470
471 let editable_text = &context[editable_range.clone()];
472 let cursor_in_editable = cursor_offset - editable_range.start;
473 multi_region::write_editable_with_markers(
474 &mut section,
475 editable_text,
476 cursor_in_editable,
477 CURSOR_MARKER,
478 );
479
480 if !section.ends_with('\n') {
481 section.push('\n');
482 }
483 section.push_str(seed_coder::SEPARATOR);
484 section
485}
486
487fn build_v0316_cursor_prefix(
488 path: &Path,
489 context: &str,
490 editable_range: &Range<usize>,
491 cursor_offset: usize,
492) -> String {
493 let mut section = String::new();
494 let path_str = path.to_string_lossy();
495 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
496
497 section.push_str(&context[..editable_range.start]);
498
499 let editable_text = &context[editable_range.clone()];
500 let cursor_in_editable = cursor_offset - editable_range.start;
501 multi_region::write_editable_with_markers_v0316(
502 &mut section,
503 editable_text,
504 cursor_in_editable,
505 CURSOR_MARKER,
506 );
507
508 if !section.ends_with('\n') {
509 section.push('\n');
510 }
511 section
512}
513
514fn build_v0318_cursor_prefix(
515 path: &Path,
516 context: &str,
517 editable_range: &Range<usize>,
518 cursor_offset: usize,
519) -> String {
520 let mut section = String::new();
521 let path_str = path.to_string_lossy();
522 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
523
524 section.push_str(&context[..editable_range.start]);
525
526 let editable_text = &context[editable_range.clone()];
527 let cursor_in_editable = cursor_offset - editable_range.start;
528 multi_region::write_editable_with_markers_v0318(
529 &mut section,
530 editable_text,
531 cursor_in_editable,
532 CURSOR_MARKER,
533 );
534
535 if !section.ends_with('\n') {
536 section.push('\n');
537 }
538 section
539}
540
541fn build_v0317_cursor_prefix(
542 path: &Path,
543 context: &str,
544 editable_range: &Range<usize>,
545 cursor_offset: usize,
546) -> String {
547 let mut section = String::new();
548 let path_str = path.to_string_lossy();
549 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
550
551 section.push_str(&context[..editable_range.start]);
552
553 let editable_text = &context[editable_range.clone()];
554 let cursor_in_editable = cursor_offset - editable_range.start;
555 multi_region::write_editable_with_markers_v0317(
556 &mut section,
557 editable_text,
558 cursor_in_editable,
559 CURSOR_MARKER,
560 );
561
562 if !section.ends_with('\n') {
563 section.push('\n');
564 }
565 section
566}
567
568fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
569 let start_row = text[0..range.start].matches('\n').count() as u32;
570 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
571 if !text[..range.end].ends_with('\n') {
572 end_row += 1;
573 }
574 return start_row..end_row;
575}
576
577pub fn format_prompt_with_budget_for_format(
578 input: &ZetaPromptInput,
579 format: ZetaFormat,
580 max_tokens: usize,
581) -> Option<String> {
582 let (context, editable_range, context_range, cursor_offset) =
583 resolve_cursor_region(input, format);
584 let path = &*input.cursor_path;
585
586 let empty_files = Vec::new();
587 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
588 let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
589 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
590 let row_range = relative_row_range.start + cursor_excerpt_start_row
591 ..relative_row_range.end + cursor_excerpt_start_row;
592 &filter_redundant_excerpts(
593 input_related_files.to_vec(),
594 input.cursor_path.as_ref(),
595 row_range,
596 )
597 } else {
598 input_related_files
599 };
600
601 let prompt = match format {
602 ZetaFormat::V0211SeedCoder
603 | ZetaFormat::V0304SeedNoEdits
604 | ZetaFormat::V0306SeedMultiRegions
605 | ZetaFormat::V0316SeedMultiRegions
606 | ZetaFormat::V0318SeedMultiRegions
607 | ZetaFormat::V0317SeedMultiRegions => {
608 let mut cursor_section = String::new();
609 write_cursor_excerpt_section_for_format(
610 format,
611 &mut cursor_section,
612 path,
613 context,
614 &editable_range,
615 cursor_offset,
616 );
617
618 let budget_with_margin = apply_prompt_budget_margin(max_tokens);
619 seed_coder::assemble_fim_prompt(
620 context,
621 &editable_range,
622 &cursor_section,
623 &input.events,
624 related_files,
625 budget_with_margin,
626 )
627 }
628 _ => {
629 let mut cursor_section = String::new();
630 write_cursor_excerpt_section_for_format(
631 format,
632 &mut cursor_section,
633 path,
634 context,
635 &editable_range,
636 cursor_offset,
637 );
638
639 let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
640 let cursor_tokens = estimate_tokens(cursor_section.len());
641 remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
642
643 let edit_history_section = format_edit_history_within_budget(
644 &input.events,
645 "<|file_sep|>",
646 "edit history",
647 remaining_budget,
648 max_edit_event_count_for_format(&format),
649 );
650 let edit_history_tokens = estimate_tokens(edit_history_section.len());
651 remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
652
653 let related_files_section = format_related_files_within_budget(
654 &related_files,
655 "<|file_sep|>",
656 "",
657 remaining_budget,
658 );
659
660 let mut prompt = String::new();
661 prompt.push_str(&related_files_section);
662 prompt.push_str(&edit_history_section);
663 prompt.push_str(&cursor_section);
664 prompt
665 }
666 };
667 let prompt_tokens = estimate_tokens(prompt.len());
668 if prompt_tokens > max_tokens {
669 return None;
670 }
671 return Some(prompt);
672}
673
674pub fn filter_redundant_excerpts(
675 mut related_files: Vec<RelatedFile>,
676 cursor_path: &Path,
677 cursor_row_range: Range<u32>,
678) -> Vec<RelatedFile> {
679 for file in &mut related_files {
680 if file.path.as_ref() == cursor_path {
681 file.excerpts.retain(|excerpt| {
682 excerpt.row_range.start < cursor_row_range.start
683 || excerpt.row_range.end > cursor_row_range.end
684 });
685 }
686 }
687 related_files.retain(|file| !file.excerpts.is_empty());
688 related_files
689}
690
691pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
692 match format {
693 ZetaFormat::V0112MiddleAtEnd
694 | ZetaFormat::V0113Ordered
695 | ZetaFormat::V0114180EditableRegion
696 | ZetaFormat::V0120GitMergeMarkers
697 | ZetaFormat::V0131GitMergeMarkersPrefix
698 | ZetaFormat::V0211Prefill
699 | ZetaFormat::V0211SeedCoder
700 | ZetaFormat::v0226Hashline
701 | ZetaFormat::V0304SeedNoEdits
702 | ZetaFormat::V0304VariableEdit
703 | ZetaFormat::V0306SeedMultiRegions
704 | ZetaFormat::V0316SeedMultiRegions
705 | ZetaFormat::V0318SeedMultiRegions
706 | ZetaFormat::V0317SeedMultiRegions => 6,
707 }
708}
709
710pub fn get_prefill_for_format(
711 format: ZetaFormat,
712 context: &str,
713 editable_range: &Range<usize>,
714) -> String {
715 match format {
716 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
717 ZetaFormat::V0112MiddleAtEnd
718 | ZetaFormat::V0113Ordered
719 | ZetaFormat::V0114180EditableRegion
720 | ZetaFormat::V0120GitMergeMarkers
721 | ZetaFormat::V0131GitMergeMarkersPrefix
722 | ZetaFormat::V0211SeedCoder
723 | ZetaFormat::v0226Hashline
724 | ZetaFormat::V0304VariableEdit => String::new(),
725 ZetaFormat::V0304SeedNoEdits
726 | ZetaFormat::V0306SeedMultiRegions
727 | ZetaFormat::V0316SeedMultiRegions
728 | ZetaFormat::V0318SeedMultiRegions
729 | ZetaFormat::V0317SeedMultiRegions => String::new(),
730 }
731}
732
733pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
734 match format {
735 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
736 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
737 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
738 ZetaFormat::V0211SeedCoder
739 | ZetaFormat::V0304SeedNoEdits
740 | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
741 ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
742 ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
743 ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
744 ZetaFormat::V0112MiddleAtEnd
745 | ZetaFormat::V0113Ordered
746 | ZetaFormat::V0114180EditableRegion
747 | ZetaFormat::v0226Hashline
748 | ZetaFormat::V0304VariableEdit => None,
749 }
750}
751
752pub fn encode_patch_as_output_for_format(
753 format: ZetaFormat,
754 old_editable_region: &str,
755 patch: &str,
756 cursor_offset: Option<usize>,
757) -> Result<Option<String>> {
758 match format {
759 ZetaFormat::v0226Hashline => {
760 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
761 }
762 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
763 old_editable_region,
764 patch,
765 cursor_offset,
766 )
767 .map(Some),
768 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
769 Ok(seed_coder::no_edits(patch))
770 }
771 ZetaFormat::V0316SeedMultiRegions => {
772 let empty_patch = patch.lines().count() <= 3;
773 if empty_patch {
774 let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
775 let marker_num =
776 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
777 let tag = multi_region::marker_tag(marker_num);
778 Ok(Some(format!(
779 "{tag}{tag}{}",
780 multi_region::V0316_END_MARKER
781 )))
782 } else {
783 Ok(None)
784 }
785 }
786 ZetaFormat::V0318SeedMultiRegions => {
787 let empty_patch = patch.lines().count() <= 3;
788 if empty_patch {
789 let marker_offsets =
790 multi_region::compute_marker_offsets_v0318(old_editable_region);
791 let marker_num =
792 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
793 let tag = multi_region::marker_tag(marker_num);
794 Ok(Some(format!(
795 "{tag}{tag}{}",
796 multi_region::V0318_END_MARKER
797 )))
798 } else {
799 Ok(None)
800 }
801 }
802 ZetaFormat::V0317SeedMultiRegions => {
803 let empty_patch = patch.lines().count() <= 3;
804 if empty_patch {
805 let tag = multi_region::marker_tag_relative(0);
806 Ok(Some(format!(
807 "{tag}{tag}{}",
808 multi_region::V0317_END_MARKER
809 )))
810 } else {
811 Ok(None)
812 }
813 }
814 _ => Ok(None),
815 }
816}
817
818pub struct ParsedOutput {
819 /// Text that should replace the editable region
820 pub new_editable_region: String,
821 /// The byte range within `cursor_excerpt` that this replacement applies to
822 pub range_in_excerpt: Range<usize>,
823}
824
825/// Parse model output for the given zeta format
826pub fn parse_zeta2_model_output(
827 output: &str,
828 format: ZetaFormat,
829 prompt_inputs: &ZetaPromptInput,
830) -> Result<ParsedOutput> {
831 let output = match output_end_marker_for_format(format) {
832 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
833 None => output,
834 };
835
836 let (context, editable_range_in_context, context_range, cursor_offset) =
837 resolve_cursor_region(prompt_inputs, format);
838 let context_start = context_range.start;
839 let old_editable_region = &context[editable_range_in_context.clone()];
840 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
841
842 let (range_in_context, output) = match format {
843 ZetaFormat::v0226Hashline => (
844 editable_range_in_context,
845 if hashline::output_has_edit_commands(output) {
846 hashline::apply_edit_commands(old_editable_region, output)
847 } else {
848 output.to_string()
849 },
850 ),
851 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
852 ZetaFormat::V0304SeedNoEdits => (
853 editable_range_in_context,
854 if output.starts_with(seed_coder::NO_EDITS) {
855 old_editable_region.to_string()
856 } else {
857 output.to_string()
858 },
859 ),
860 ZetaFormat::V0306SeedMultiRegions => (
861 editable_range_in_context,
862 if output.starts_with(seed_coder::NO_EDITS) {
863 old_editable_region.to_string()
864 } else {
865 multi_region::apply_marker_span(old_editable_region, output)?
866 },
867 ),
868 ZetaFormat::V0316SeedMultiRegions => (
869 editable_range_in_context,
870 multi_region::apply_marker_span_v0316(old_editable_region, output)?,
871 ),
872 ZetaFormat::V0318SeedMultiRegions => (
873 editable_range_in_context,
874 multi_region::apply_marker_span_v0318(old_editable_region, output)?,
875 ),
876 ZetaFormat::V0317SeedMultiRegions => (
877 editable_range_in_context,
878 multi_region::apply_marker_span_v0317(
879 old_editable_region,
880 output,
881 Some(cursor_offset_in_editable),
882 )?,
883 ),
884 _ => (editable_range_in_context, output.to_string()),
885 };
886
887 let range_in_excerpt =
888 range_in_context.start + context_start..range_in_context.end + context_start;
889
890 Ok(ParsedOutput {
891 new_editable_region: output,
892 range_in_excerpt,
893 })
894}
895
896pub fn excerpt_range_for_format(
897 format: ZetaFormat,
898 ranges: &ExcerptRanges,
899) -> (Range<usize>, Range<usize>) {
900 excerpt_ranges_for_format(format, ranges)
901}
902
903pub fn resolve_cursor_region(
904 input: &ZetaPromptInput,
905 format: ZetaFormat,
906) -> (&str, Range<usize>, Range<usize>, usize) {
907 let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
908 let (editable_tokens, context_tokens) = token_limits_for_format(format);
909 compute_editable_and_context_ranges(
910 &input.cursor_excerpt,
911 input.cursor_offset_in_excerpt,
912 syntax_ranges,
913 editable_tokens,
914 context_tokens,
915 )
916 } else {
917 excerpt_range_for_format(format, &input.excerpt_ranges)
918 };
919 let context_start = context_range.start;
920 let context_text = &input.cursor_excerpt[context_range.clone()];
921 let adjusted_editable =
922 (editable_range.start - context_start)..(editable_range.end - context_start);
923 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
924
925 (
926 context_text,
927 adjusted_editable,
928 context_range,
929 adjusted_cursor,
930 )
931}
932
933pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
934 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
935 get_prefill_for_format(format, context, &editable_range)
936}
937
938fn format_edit_history_within_budget(
939 events: &[Arc<Event>],
940 file_marker: &str,
941 edit_history_name: &str,
942 max_tokens: usize,
943 max_edit_event_count: usize,
944) -> String {
945 let header = format!("{}{}\n", file_marker, edit_history_name);
946 let header_tokens = estimate_tokens(header.len());
947 if header_tokens >= max_tokens {
948 return String::new();
949 }
950
951 let mut event_strings: Vec<String> = Vec::new();
952 let mut total_tokens = header_tokens;
953
954 for event in events.iter().rev().take(max_edit_event_count) {
955 let mut event_str = String::new();
956 write_event(&mut event_str, event);
957 let event_tokens = estimate_tokens(event_str.len());
958
959 if total_tokens + event_tokens > max_tokens {
960 break;
961 }
962 total_tokens += event_tokens;
963 event_strings.push(event_str);
964 }
965
966 if event_strings.is_empty() {
967 return String::new();
968 }
969
970 let mut result = header;
971 for event_str in event_strings.iter().rev() {
972 result.push_str(event_str);
973 }
974 result
975}
976
977fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
978 let needs_newline = !excerpt.text.ends_with('\n');
979 let needs_ellipsis = excerpt.row_range.end < file_max_row;
980 let len = excerpt.text.len()
981 + if needs_newline { "\n".len() } else { 0 }
982 + if needs_ellipsis { "...\n".len() } else { 0 };
983 estimate_tokens(len)
984}
985
986pub fn format_related_files_within_budget(
987 related_files: &[RelatedFile],
988 file_prefix: &str,
989 file_suffix: &str,
990 max_tokens: usize,
991) -> String {
992 struct ExcerptCandidate {
993 file_ix: usize,
994 excerpt_ix: usize,
995 order: usize,
996 }
997
998 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
999 .iter()
1000 .enumerate()
1001 .flat_map(|(file_ix, file)| {
1002 file.excerpts
1003 .iter()
1004 .enumerate()
1005 .map(move |(excerpt_ix, e)| ExcerptCandidate {
1006 file_ix,
1007 excerpt_ix,
1008 order: e.order,
1009 })
1010 })
1011 .collect();
1012
1013 // Pre-compute file header strings and their token costs.
1014 let file_headers: Vec<String> = related_files
1015 .iter()
1016 .map(|file| {
1017 let path_str = file.path.to_string_lossy();
1018 format!("{}{}\n", file_prefix, path_str)
1019 })
1020 .collect();
1021
1022 // Sort the excerpts by their order and determine how many fit within the budget.
1023 let mut total_tokens = 0;
1024 let mut included_excerpt_count = 0_usize;
1025 let mut included_file_indices = vec![false; related_files.len()];
1026 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1027 for candidate in &excerpt_candidates {
1028 let file = &related_files[candidate.file_ix];
1029 let excerpt = &file.excerpts[candidate.excerpt_ix];
1030 let file_already_included = included_file_indices[candidate.file_ix];
1031 let header_cost = if file_already_included {
1032 0
1033 } else {
1034 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1035 };
1036 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1037 if total_tokens + header_cost + excerpt_cost > max_tokens {
1038 break;
1039 }
1040 total_tokens += header_cost + excerpt_cost;
1041 if !file_already_included {
1042 included_file_indices[candidate.file_ix] = true;
1043 }
1044 included_excerpt_count += 1;
1045 }
1046
1047 excerpt_candidates.truncate(included_excerpt_count);
1048 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1049
1050 // Render all of the files that fit within the token budget, in the original order.
1051 let mut result = String::new();
1052 let mut last_file_ix = None;
1053 for candidate in &excerpt_candidates {
1054 if last_file_ix != Some(candidate.file_ix) {
1055 if last_file_ix.is_some() {
1056 result.push_str(file_suffix);
1057 }
1058 result.push_str(&file_headers[candidate.file_ix]);
1059 last_file_ix = Some(candidate.file_ix);
1060 }
1061 let file = &related_files[candidate.file_ix];
1062 let excerpt = &file.excerpts[candidate.excerpt_ix];
1063 result.push_str(&excerpt.text);
1064 if !result.ends_with('\n') {
1065 result.push('\n');
1066 }
1067 if excerpt.row_range.end < file.max_row {
1068 result.push_str("...\n");
1069 }
1070 }
1071
1072 result
1073}
1074
1075pub fn write_related_files(
1076 prompt: &mut String,
1077 related_files: &[RelatedFile],
1078) -> Vec<Range<usize>> {
1079 let mut ranges = Vec::new();
1080 for file in related_files {
1081 let start = prompt.len();
1082 let path_str = file.path.to_string_lossy();
1083 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1084 for excerpt in &file.excerpts {
1085 prompt.push_str(&excerpt.text);
1086 if !prompt.ends_with('\n') {
1087 prompt.push('\n');
1088 }
1089 if excerpt.row_range.end < file.max_row {
1090 prompt.push_str("...\n");
1091 }
1092 }
1093 let end = prompt.len();
1094 ranges.push(start..end);
1095 }
1096 ranges
1097}
1098
1099mod v0112_middle_at_end {
1100 use super::*;
1101
1102 pub fn special_tokens() -> &'static [&'static str] {
1103 &[
1104 "<|fim_prefix|>",
1105 "<|fim_suffix|>",
1106 "<|fim_middle|>",
1107 "<|file_sep|>",
1108 CURSOR_MARKER,
1109 ]
1110 }
1111
1112 pub fn write_cursor_excerpt_section(
1113 prompt: &mut String,
1114 path: &Path,
1115 context: &str,
1116 editable_range: &Range<usize>,
1117 cursor_offset: usize,
1118 ) {
1119 let path_str = path.to_string_lossy();
1120 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1121
1122 prompt.push_str("<|fim_prefix|>\n");
1123 prompt.push_str(&context[..editable_range.start]);
1124
1125 prompt.push_str("<|fim_suffix|>\n");
1126 prompt.push_str(&context[editable_range.end..]);
1127 if !prompt.ends_with('\n') {
1128 prompt.push('\n');
1129 }
1130
1131 prompt.push_str("<|fim_middle|>current\n");
1132 prompt.push_str(&context[editable_range.start..cursor_offset]);
1133 prompt.push_str(CURSOR_MARKER);
1134 prompt.push_str(&context[cursor_offset..editable_range.end]);
1135 if !prompt.ends_with('\n') {
1136 prompt.push('\n');
1137 }
1138
1139 prompt.push_str("<|fim_middle|>updated\n");
1140 }
1141}
1142
1143mod v0113_ordered {
1144 use super::*;
1145
1146 pub fn special_tokens() -> &'static [&'static str] {
1147 &[
1148 "<|fim_prefix|>",
1149 "<|fim_suffix|>",
1150 "<|fim_middle|>",
1151 "<|file_sep|>",
1152 CURSOR_MARKER,
1153 ]
1154 }
1155
1156 pub fn write_cursor_excerpt_section(
1157 prompt: &mut String,
1158 path: &Path,
1159 context: &str,
1160 editable_range: &Range<usize>,
1161 cursor_offset: usize,
1162 ) {
1163 let path_str = path.to_string_lossy();
1164 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1165
1166 prompt.push_str("<|fim_prefix|>\n");
1167 prompt.push_str(&context[..editable_range.start]);
1168 if !prompt.ends_with('\n') {
1169 prompt.push('\n');
1170 }
1171
1172 prompt.push_str("<|fim_middle|>current\n");
1173 prompt.push_str(&context[editable_range.start..cursor_offset]);
1174 prompt.push_str(CURSOR_MARKER);
1175 prompt.push_str(&context[cursor_offset..editable_range.end]);
1176 if !prompt.ends_with('\n') {
1177 prompt.push('\n');
1178 }
1179
1180 prompt.push_str("<|fim_suffix|>\n");
1181 prompt.push_str(&context[editable_range.end..]);
1182 if !prompt.ends_with('\n') {
1183 prompt.push('\n');
1184 }
1185
1186 prompt.push_str("<|fim_middle|>updated\n");
1187 }
1188}
1189
1190mod v0114180_editable_region {
1191 use super::*;
1192
1193 pub fn special_tokens() -> &'static [&'static str] {
1194 v0113_ordered::special_tokens()
1195 }
1196}
1197
1198pub mod v0120_git_merge_markers {
1199 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1200 //!
1201 //! Example prompt:
1202 //!
1203 //! <|file_sep|>path/to/target_file.py
1204 //! <|fim_prefix|>
1205 //! code before editable region
1206 //! <|fim_suffix|>
1207 //! code after editable region
1208 //! <|fim_middle|>
1209 //! <<<<<<< CURRENT
1210 //! code that
1211 //! needs to<|user_cursor|>
1212 //! be rewritten
1213 //! =======
1214 //!
1215 //! Expected output (should be generated by the model):
1216 //!
1217 //! updated
1218 //! code with
1219 //! changes applied
1220 //! >>>>>>> UPDATED
1221
1222 use super::*;
1223
1224 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1225 pub const SEPARATOR: &str = "=======\n";
1226 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1227
1228 pub fn special_tokens() -> &'static [&'static str] {
1229 &[
1230 "<|fim_prefix|>",
1231 "<|fim_suffix|>",
1232 "<|fim_middle|>",
1233 "<|file_sep|>",
1234 START_MARKER,
1235 SEPARATOR,
1236 END_MARKER,
1237 CURSOR_MARKER,
1238 ]
1239 }
1240
1241 pub fn write_cursor_excerpt_section(
1242 prompt: &mut String,
1243 path: &Path,
1244 context: &str,
1245 editable_range: &Range<usize>,
1246 cursor_offset: usize,
1247 ) {
1248 let path_str = path.to_string_lossy();
1249 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1250
1251 prompt.push_str("<|fim_prefix|>");
1252 prompt.push_str(&context[..editable_range.start]);
1253
1254 prompt.push_str("<|fim_suffix|>");
1255 prompt.push_str(&context[editable_range.end..]);
1256 if !prompt.ends_with('\n') {
1257 prompt.push('\n');
1258 }
1259
1260 prompt.push_str("<|fim_middle|>");
1261 prompt.push_str(START_MARKER);
1262 prompt.push_str(&context[editable_range.start..cursor_offset]);
1263 prompt.push_str(CURSOR_MARKER);
1264 prompt.push_str(&context[cursor_offset..editable_range.end]);
1265 if !prompt.ends_with('\n') {
1266 prompt.push('\n');
1267 }
1268 prompt.push_str(SEPARATOR);
1269 }
1270}
1271
1272pub mod v0131_git_merge_markers_prefix {
1273 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1274 //!
1275 //! Example prompt:
1276 //!
1277 //! <|file_sep|>path/to/target_file.py
1278 //! <|fim_prefix|>
1279 //! code before editable region
1280 //! <<<<<<< CURRENT
1281 //! code that
1282 //! needs to<|user_cursor|>
1283 //! be rewritten
1284 //! =======
1285 //! <|fim_suffix|>
1286 //! code after editable region
1287 //! <|fim_middle|>
1288 //!
1289 //! Expected output (should be generated by the model):
1290 //!
1291 //! updated
1292 //! code with
1293 //! changes applied
1294 //! >>>>>>> UPDATED
1295
1296 use super::*;
1297
1298 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1299 pub const SEPARATOR: &str = "=======\n";
1300 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1301
1302 pub fn special_tokens() -> &'static [&'static str] {
1303 &[
1304 "<|fim_prefix|>",
1305 "<|fim_suffix|>",
1306 "<|fim_middle|>",
1307 "<|file_sep|>",
1308 START_MARKER,
1309 SEPARATOR,
1310 END_MARKER,
1311 CURSOR_MARKER,
1312 ]
1313 }
1314
1315 pub fn write_cursor_excerpt_section(
1316 prompt: &mut String,
1317 path: &Path,
1318 context: &str,
1319 editable_range: &Range<usize>,
1320 cursor_offset: usize,
1321 ) {
1322 let path_str = path.to_string_lossy();
1323 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1324
1325 prompt.push_str("<|fim_prefix|>");
1326 prompt.push_str(&context[..editable_range.start]);
1327 prompt.push_str(START_MARKER);
1328 prompt.push_str(&context[editable_range.start..cursor_offset]);
1329 prompt.push_str(CURSOR_MARKER);
1330 prompt.push_str(&context[cursor_offset..editable_range.end]);
1331 if !prompt.ends_with('\n') {
1332 prompt.push('\n');
1333 }
1334 prompt.push_str(SEPARATOR);
1335
1336 prompt.push_str("<|fim_suffix|>");
1337 prompt.push_str(&context[editable_range.end..]);
1338 if !prompt.ends_with('\n') {
1339 prompt.push('\n');
1340 }
1341
1342 prompt.push_str("<|fim_middle|>");
1343 }
1344}
1345
1346pub mod v0211_prefill {
1347 use super::*;
1348
1349 pub fn special_tokens() -> &'static [&'static str] {
1350 v0131_git_merge_markers_prefix::special_tokens()
1351 }
1352
1353 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1354 let editable_region = &context[editable_range.start..editable_range.end];
1355
1356 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1357 let prefill_len = editable_region.floor_char_boundary(prefill_len);
1358
1359 // Find a token boundary to avoid splitting tokens in the prefill.
1360 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1361 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1362 // the \n and consume any consecutive \n characters after it.
1363 let prefill = &editable_region[..prefill_len];
1364 match prefill.rfind('\n') {
1365 Some(pos) => {
1366 let mut end = pos + 1;
1367 while end < editable_region.len()
1368 && editable_region.as_bytes().get(end) == Some(&b'\n')
1369 {
1370 end += 1;
1371 }
1372 editable_region[..end].to_string()
1373 }
1374 // No newline found. Fall back to splitting before the last space
1375 // (word-level boundary)
1376 None => match prefill.rfind(' ') {
1377 Some(pos) => prefill[..pos].to_string(),
1378 None => prefill.to_string(),
1379 },
1380 }
1381 }
1382}
1383
1384pub mod hashline {
1385
1386 use std::fmt::Display;
1387
1388 pub const END_MARKER: &str = "<|fim_middle|>updated";
1389 pub const START_MARKER: &str = "<|fim_middle|>current";
1390
1391 use super::*;
1392
1393 const SET_COMMAND_MARKER: &str = "<|set|>";
1394 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1395 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1396
1397 pub fn special_tokens() -> &'static [&'static str] {
1398 return &[
1399 SET_COMMAND_MARKER,
1400 "<|set_range|>",
1401 INSERT_COMMAND_MARKER,
1402 NO_EDITS_COMMAND_MARKER,
1403 CURSOR_MARKER,
1404 "<|file_sep|>",
1405 "<|fim_prefix|>",
1406 "<|fim_suffix|>",
1407 "<|fim_middle|>",
1408 ];
1409 }
1410
1411 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1412 #[derive(Debug, Clone, PartialEq, Eq)]
1413 struct LineRef {
1414 index: usize,
1415 hash: u8,
1416 }
1417
1418 impl Display for LineRef {
1419 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1420 write!(f, "{}:{:02x}", self.index, self.hash)
1421 }
1422 }
1423
1424 pub fn hash_line(line: &[u8]) -> u8 {
1425 let mut h: u8 = 0;
1426 for &byte in line {
1427 h = h.wrapping_add(byte);
1428 }
1429 return h;
1430 }
1431
1432 /// Write the hashline-encoded editable region into `out`. Each line of
1433 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1434 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1435 /// to the start of `editable_text`).
1436 pub fn write_hashline_editable_region(
1437 out: &mut String,
1438 editable_text: &str,
1439 cursor_offset_in_editable: usize,
1440 ) {
1441 let mut offset = 0;
1442 for (i, line) in editable_text.lines().enumerate() {
1443 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1444 && cursor_offset_in_editable < offset + line.len()
1445 {
1446 (
1447 &line[..cursor_offset_in_editable - offset],
1448 CURSOR_MARKER,
1449 &line[cursor_offset_in_editable - offset..],
1450 )
1451 } else {
1452 (line, "", "")
1453 };
1454 write!(
1455 out,
1456 "\n{}|{head}{cursor}{tail}",
1457 LineRef {
1458 index: i,
1459 hash: hash_line(line.as_bytes())
1460 }
1461 )
1462 .unwrap();
1463 offset += line.len() + 1;
1464 }
1465 }
1466
1467 pub fn write_cursor_excerpt_section(
1468 prompt: &mut String,
1469 path: &Path,
1470 context: &str,
1471 editable_range: &Range<usize>,
1472 cursor_offset: usize,
1473 ) {
1474 let path_str = path.to_string_lossy();
1475 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1476
1477 prompt.push_str("<|fim_prefix|>\n");
1478 prompt.push_str(&context[..editable_range.start]);
1479 prompt.push_str(START_MARKER);
1480
1481 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1482 let editable_region = &context[editable_range.clone()];
1483 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1484
1485 if !prompt.ends_with('\n') {
1486 prompt.push('\n');
1487 }
1488
1489 prompt.push_str("<|fim_suffix|>\n");
1490 prompt.push_str(&context[editable_range.end..]);
1491 if !prompt.ends_with('\n') {
1492 prompt.push('\n');
1493 }
1494
1495 prompt.push_str(END_MARKER);
1496 prompt.push('\n');
1497 }
1498
1499 /// A single edit command parsed from the model output.
1500 #[derive(Debug)]
1501 enum EditCommand<'a> {
1502 /// Replace a range of lines (inclusive on both ends). Single-line set is
1503 /// represented by `start == end`.
1504 Set {
1505 start: LineRef,
1506 end: LineRef,
1507 content: &'a str,
1508 },
1509 /// Insert new lines after the given line, or before the first line if
1510 /// `after` is `None`.
1511 Insert {
1512 after: Option<LineRef>,
1513 content: &'a str,
1514 },
1515 }
1516
1517 /// Parse a line reference like `3:c3` into a `LineRef`.
1518 fn parse_line_ref(s: &str) -> Option<LineRef> {
1519 let (idx_str, hash_str) = s.split_once(':')?;
1520 let index = idx_str.parse::<usize>().ok()?;
1521 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1522 Some(LineRef { index, hash })
1523 }
1524
1525 /// Parse the model output into a list of `EditCommand`s.
1526 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1527 let mut commands = Vec::new();
1528 let mut offset = 0usize;
1529
1530 while offset < model_output.len() {
1531 let next_nl = model_output[offset..]
1532 .find('\n')
1533 .map(|i| offset + i)
1534 .unwrap_or(model_output.len());
1535 let line = &model_output[offset..next_nl];
1536 let line_end = if next_nl < model_output.len() {
1537 next_nl + 1
1538 } else {
1539 next_nl
1540 };
1541
1542 let trimmed = line.trim();
1543 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1544 (true, spec)
1545 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1546 (false, spec)
1547 } else {
1548 offset = line_end;
1549 continue;
1550 };
1551
1552 let mut content_end = line_end;
1553 let mut scan = line_end;
1554
1555 while scan < model_output.len() {
1556 let body_nl = model_output[scan..]
1557 .find('\n')
1558 .map(|i| scan + i)
1559 .unwrap_or(model_output.len());
1560 let body_line = &model_output[scan..body_nl];
1561 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1562 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1563 {
1564 break;
1565 }
1566 scan = if body_nl < model_output.len() {
1567 body_nl + 1
1568 } else {
1569 body_nl
1570 };
1571 content_end = scan;
1572 }
1573
1574 let content = &model_output[line_end..content_end];
1575
1576 if is_set {
1577 if let Some((start_str, end_str)) = specifier.split_once('-') {
1578 if let (Some(start), Some(end)) =
1579 (parse_line_ref(start_str), parse_line_ref(end_str))
1580 {
1581 commands.push(EditCommand::Set {
1582 start,
1583 end,
1584 content,
1585 });
1586 }
1587 } else if let Some(target) = parse_line_ref(specifier) {
1588 commands.push(EditCommand::Set {
1589 start: target.clone(),
1590 end: target,
1591 content,
1592 });
1593 }
1594 } else {
1595 let after = parse_line_ref(specifier);
1596 commands.push(EditCommand::Insert { after, content });
1597 }
1598
1599 offset = scan;
1600 }
1601
1602 commands
1603 }
1604
1605 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1606 /// (as opposed to being a plain full-replacement output).
1607 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1608 /// editable region, returning the plain text content.
1609 pub fn strip_hashline_prefixes(region: &str) -> String {
1610 let mut decoded: String = region
1611 .lines()
1612 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1613 .collect::<Vec<_>>()
1614 .join("\n");
1615 if region.ends_with('\n') {
1616 decoded.push('\n');
1617 }
1618 decoded
1619 }
1620
1621 pub fn output_has_edit_commands(model_output: &str) -> bool {
1622 model_output.contains(SET_COMMAND_MARKER)
1623 || model_output.contains(INSERT_COMMAND_MARKER)
1624 || model_output.contains(NO_EDITS_COMMAND_MARKER)
1625 }
1626
1627 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1628 /// original editable region text.
1629 ///
1630 /// `editable_region` is the original text of the editable region (without hash
1631 /// prefixes). `model_output` is the raw model response containing edit commands.
1632 ///
1633 /// Returns the full replacement text for the editable region.
1634 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1635 if model_output
1636 .trim_start()
1637 .starts_with(NO_EDITS_COMMAND_MARKER)
1638 {
1639 return editable_region.to_string();
1640 }
1641
1642 let original_lines: Vec<&str> = editable_region.lines().collect();
1643 let old_hashes: Vec<u8> = original_lines
1644 .iter()
1645 .map(|line| hash_line(line.as_bytes()))
1646 .collect();
1647
1648 let commands = parse_edit_commands(model_output);
1649
1650 // For set operations: indexed by start line → Some((end line index, content))
1651 // For insert operations: indexed by line index → vec of content to insert after
1652 // Insert-before-first is tracked separately.
1653 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1654 let mut insert_before_first: Vec<&str> = Vec::new();
1655 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1656
1657 for command in &commands {
1658 match command {
1659 EditCommand::Set {
1660 start,
1661 end,
1662 content,
1663 } => {
1664 if start.index < old_hashes.len()
1665 && end.index < old_hashes.len()
1666 && start.index <= end.index
1667 && old_hashes[start.index] == start.hash
1668 && old_hashes[end.index] == end.hash
1669 {
1670 set_ops[start.index] = Some((end.index, *content));
1671 }
1672 }
1673 EditCommand::Insert { after, content } => match after {
1674 None => insert_before_first.push(*content),
1675 Some(line_ref) => {
1676 if line_ref.index < old_hashes.len()
1677 && old_hashes[line_ref.index] == line_ref.hash
1678 {
1679 insert_after[line_ref.index].push(*content);
1680 }
1681 }
1682 },
1683 }
1684 }
1685
1686 let mut result = String::new();
1687
1688 // Emit any insertions before the first line
1689 for content in &insert_before_first {
1690 result.push_str(content);
1691 if !content.ends_with('\n') {
1692 result.push('\n');
1693 }
1694 }
1695
1696 let mut i = 0;
1697 while i < original_lines.len() {
1698 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1699 // Replace lines i..=end_index with the replacement content
1700 result.push_str(replacement);
1701 if !replacement.is_empty() && !replacement.ends_with('\n') {
1702 result.push('\n');
1703 }
1704 // Emit any insertions after the end of this set range
1705 if *end_index < insert_after.len() {
1706 for content in &insert_after[*end_index] {
1707 result.push_str(content);
1708 if !content.ends_with('\n') {
1709 result.push('\n');
1710 }
1711 }
1712 }
1713 i = end_index + 1;
1714 } else {
1715 // Keep the original line
1716 result.push_str(original_lines[i]);
1717 result.push('\n');
1718 // Emit any insertions after this line
1719 for content in &insert_after[i] {
1720 result.push_str(content);
1721 if !content.ends_with('\n') {
1722 result.push('\n');
1723 }
1724 }
1725 i += 1;
1726 }
1727 }
1728
1729 // Preserve trailing newline behavior: if the original ended with a
1730 // newline the result already has one; if it didn't, trim the extra one
1731 // we added.
1732 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1733 result.pop();
1734 }
1735
1736 result
1737 }
1738
1739 /// Convert a unified diff patch into hashline edit commands.
1740 ///
1741 /// Parses the unified diff `patch` directly to determine which lines of
1742 /// `old_text` are deleted/replaced and what new lines are added, then emits
1743 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1744 /// `{index}:{hash}` identifiers.
1745 ///
1746 /// `cursor_offset` is an optional byte offset into the first hunk's new
1747 /// text (context + additions) where the cursor marker should be placed.
1748 pub fn patch_to_edit_commands(
1749 old_text: &str,
1750 patch: &str,
1751 cursor_offset: Option<usize>,
1752 ) -> Result<String> {
1753 let old_lines: Vec<&str> = old_text.lines().collect();
1754 let old_hashes: Vec<u8> = old_lines
1755 .iter()
1756 .map(|line| hash_line(line.as_bytes()))
1757 .collect();
1758
1759 let mut result = String::new();
1760 let mut first_hunk = true;
1761
1762 struct Hunk<'a> {
1763 line_range: Range<usize>,
1764 new_text_lines: Vec<&'a str>,
1765 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1766 }
1767
1768 // Parse the patch line by line. We only care about hunk headers,
1769 // context, deletions, and additions.
1770 let mut old_line_index: usize = 0;
1771 let mut current_hunk: Option<Hunk> = None;
1772 // Byte offset tracking within the hunk's new text for cursor placement.
1773 let mut new_text_byte_offset: usize = 0;
1774 // The line index of the last old line seen before/in the current hunk
1775 // (used for insert-after reference).
1776 let mut last_old_line_before_hunk: Option<usize> = None;
1777
1778 fn flush_hunk(
1779 hunk: Hunk,
1780 last_old_line: Option<usize>,
1781 result: &mut String,
1782 old_hashes: &[u8],
1783 ) {
1784 if hunk.line_range.is_empty() {
1785 // Pure insertion — reference the old line to insert after when in bounds.
1786 if let Some(after) = last_old_line
1787 && let Some(&hash) = old_hashes.get(after)
1788 {
1789 write!(
1790 result,
1791 "{INSERT_COMMAND_MARKER}{}\n",
1792 LineRef { index: after, hash }
1793 )
1794 .unwrap();
1795 } else {
1796 result.push_str(INSERT_COMMAND_MARKER);
1797 result.push('\n');
1798 }
1799 } else {
1800 let start = hunk.line_range.start;
1801 let end_exclusive = hunk.line_range.end;
1802 let deleted_line_count = end_exclusive.saturating_sub(start);
1803
1804 if deleted_line_count == 1 {
1805 if let Some(&hash) = old_hashes.get(start) {
1806 write!(
1807 result,
1808 "{SET_COMMAND_MARKER}{}\n",
1809 LineRef { index: start, hash }
1810 )
1811 .unwrap();
1812 } else {
1813 result.push_str(SET_COMMAND_MARKER);
1814 result.push('\n');
1815 }
1816 } else {
1817 let end_inclusive = end_exclusive - 1;
1818 match (
1819 old_hashes.get(start).copied(),
1820 old_hashes.get(end_inclusive).copied(),
1821 ) {
1822 (Some(start_hash), Some(end_hash)) => {
1823 write!(
1824 result,
1825 "{SET_COMMAND_MARKER}{}-{}\n",
1826 LineRef {
1827 index: start,
1828 hash: start_hash
1829 },
1830 LineRef {
1831 index: end_inclusive,
1832 hash: end_hash
1833 }
1834 )
1835 .unwrap();
1836 }
1837 _ => {
1838 result.push_str(SET_COMMAND_MARKER);
1839 result.push('\n');
1840 }
1841 }
1842 }
1843 }
1844 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1845 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1846 && line_offset == cursor_line_offset
1847 {
1848 result.push_str(&line[..char_offset]);
1849 result.push_str(CURSOR_MARKER);
1850 result.push_str(&line[char_offset..]);
1851 continue;
1852 }
1853
1854 result.push_str(line);
1855 }
1856 }
1857
1858 for raw_line in patch.split_inclusive('\n') {
1859 if raw_line.starts_with("@@") {
1860 // Flush any pending change hunk from a previous patch hunk.
1861 if let Some(hunk) = current_hunk.take() {
1862 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1863 }
1864
1865 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1866 // We intentionally do not trust old_start as a direct local index into `old_text`,
1867 // because some patches are produced against a larger file region and carry
1868 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1869 if first_hunk {
1870 new_text_byte_offset = 0;
1871 first_hunk = false;
1872 }
1873 continue;
1874 }
1875
1876 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1877 continue;
1878 }
1879 if raw_line.starts_with("\\ No newline") {
1880 continue;
1881 }
1882
1883 if raw_line.starts_with('-') {
1884 // Extend or start a change hunk with this deleted old line.
1885 match &mut current_hunk {
1886 Some(Hunk {
1887 line_range: range, ..
1888 }) => range.end = old_line_index + 1,
1889 None => {
1890 current_hunk = Some(Hunk {
1891 line_range: old_line_index..old_line_index + 1,
1892 new_text_lines: Vec::new(),
1893 cursor_line_offset_in_new_text: None,
1894 });
1895 }
1896 }
1897 old_line_index += 1;
1898 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1899 // Place cursor marker if cursor_offset falls within this line.
1900 let mut cursor_line_offset = None;
1901 if let Some(cursor_off) = cursor_offset
1902 && (first_hunk
1903 || cursor_off >= new_text_byte_offset
1904 && cursor_off <= new_text_byte_offset + added_content.len())
1905 {
1906 let line_offset = added_content.floor_char_boundary(
1907 cursor_off
1908 .saturating_sub(new_text_byte_offset)
1909 .min(added_content.len()),
1910 );
1911 cursor_line_offset = Some(line_offset);
1912 }
1913
1914 new_text_byte_offset += added_content.len();
1915
1916 let hunk = current_hunk.get_or_insert(Hunk {
1917 line_range: old_line_index..old_line_index,
1918 new_text_lines: vec![],
1919 cursor_line_offset_in_new_text: None,
1920 });
1921 hunk.new_text_lines.push(added_content);
1922 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1923 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1924 } else {
1925 // Context line (starts with ' ' or is empty).
1926 if let Some(hunk) = current_hunk.take() {
1927 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1928 }
1929 last_old_line_before_hunk = Some(old_line_index);
1930 old_line_index += 1;
1931 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1932 new_text_byte_offset += content.len();
1933 }
1934 }
1935
1936 // Flush final group.
1937 if let Some(hunk) = current_hunk.take() {
1938 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1939 }
1940
1941 // Trim a single trailing newline.
1942 if result.ends_with('\n') {
1943 result.pop();
1944 }
1945
1946 if result.is_empty() {
1947 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1948 }
1949
1950 Ok(result)
1951 }
1952
1953 #[cfg(test)]
1954 mod tests {
1955 use super::*;
1956 use indoc::indoc;
1957
1958 #[test]
1959 fn test_format_cursor_region() {
1960 struct Case {
1961 name: &'static str,
1962 context: &'static str,
1963 editable_range: Range<usize>,
1964 cursor_offset: usize,
1965 expected: &'static str,
1966 }
1967
1968 let cases = [
1969 Case {
1970 name: "basic_cursor_placement",
1971 context: "hello world\n",
1972 editable_range: 0..12,
1973 cursor_offset: 5,
1974 expected: indoc! {"
1975 <|file_sep|>test.rs
1976 <|fim_prefix|>
1977 <|fim_middle|>current
1978 0:5c|hello<|user_cursor|> world
1979 <|fim_suffix|>
1980 <|fim_middle|>updated
1981 "},
1982 },
1983 Case {
1984 name: "multiline_cursor_on_second_line",
1985 context: "aaa\nbbb\nccc\n",
1986 editable_range: 0..12,
1987 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1988 expected: indoc! {"
1989 <|file_sep|>test.rs
1990 <|fim_prefix|>
1991 <|fim_middle|>current
1992 0:23|aaa
1993 1:26|b<|user_cursor|>bb
1994 2:29|ccc
1995 <|fim_suffix|>
1996 <|fim_middle|>updated
1997 "},
1998 },
1999 Case {
2000 name: "no_trailing_newline_in_context",
2001 context: "line1\nline2",
2002 editable_range: 0..11,
2003 cursor_offset: 3,
2004 expected: indoc! {"
2005 <|file_sep|>test.rs
2006 <|fim_prefix|>
2007 <|fim_middle|>current
2008 0:d9|lin<|user_cursor|>e1
2009 1:da|line2
2010 <|fim_suffix|>
2011 <|fim_middle|>updated
2012 "},
2013 },
2014 Case {
2015 name: "leading_newline_in_editable_region",
2016 context: "\nabc\n",
2017 editable_range: 0..5,
2018 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2019 expected: indoc! {"
2020 <|file_sep|>test.rs
2021 <|fim_prefix|>
2022 <|fim_middle|>current
2023 0:00|
2024 1:26|a<|user_cursor|>bc
2025 <|fim_suffix|>
2026 <|fim_middle|>updated
2027 "},
2028 },
2029 Case {
2030 name: "with_suffix",
2031 context: "abc\ndef",
2032 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2033 cursor_offset: 2,
2034 expected: indoc! {"
2035 <|file_sep|>test.rs
2036 <|fim_prefix|>
2037 <|fim_middle|>current
2038 0:26|ab<|user_cursor|>c
2039 <|fim_suffix|>
2040 def
2041 <|fim_middle|>updated
2042 "},
2043 },
2044 Case {
2045 name: "unicode_two_byte_chars",
2046 context: "héllo\n",
2047 editable_range: 0..7,
2048 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2049 expected: indoc! {"
2050 <|file_sep|>test.rs
2051 <|fim_prefix|>
2052 <|fim_middle|>current
2053 0:1b|hé<|user_cursor|>llo
2054 <|fim_suffix|>
2055 <|fim_middle|>updated
2056 "},
2057 },
2058 Case {
2059 name: "unicode_three_byte_chars",
2060 context: "日本語\n",
2061 editable_range: 0..10,
2062 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2063 expected: indoc! {"
2064 <|file_sep|>test.rs
2065 <|fim_prefix|>
2066 <|fim_middle|>current
2067 0:80|日本<|user_cursor|>語
2068 <|fim_suffix|>
2069 <|fim_middle|>updated
2070 "},
2071 },
2072 Case {
2073 name: "unicode_four_byte_chars",
2074 context: "a🌍b\n",
2075 editable_range: 0..7,
2076 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2077 expected: indoc! {"
2078 <|file_sep|>test.rs
2079 <|fim_prefix|>
2080 <|fim_middle|>current
2081 0:6b|a🌍<|user_cursor|>b
2082 <|fim_suffix|>
2083 <|fim_middle|>updated
2084 "},
2085 },
2086 Case {
2087 name: "cursor_at_start_of_region_not_placed",
2088 context: "abc\n",
2089 editable_range: 0..4,
2090 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2091 expected: indoc! {"
2092 <|file_sep|>test.rs
2093 <|fim_prefix|>
2094 <|fim_middle|>current
2095 0:26|abc
2096 <|fim_suffix|>
2097 <|fim_middle|>updated
2098 "},
2099 },
2100 Case {
2101 name: "cursor_at_end_of_line_not_placed",
2102 context: "abc\ndef\n",
2103 editable_range: 0..8,
2104 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2105 expected: indoc! {"
2106 <|file_sep|>test.rs
2107 <|fim_prefix|>
2108 <|fim_middle|>current
2109 0:26|abc
2110 1:2f|def
2111 <|fim_suffix|>
2112 <|fim_middle|>updated
2113 "},
2114 },
2115 Case {
2116 name: "cursor_offset_relative_to_context_not_editable_region",
2117 // cursor_offset is relative to `context`, so when editable_range.start > 0,
2118 // write_cursor_excerpt_section must subtract it before comparing against
2119 // per-line offsets within the editable region.
2120 context: "pre\naaa\nbbb\nsuf\n",
2121 editable_range: 4..12, // editable region = "aaa\nbbb\n"
2122 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
2123 expected: indoc! {"
2124 <|file_sep|>test.rs
2125 <|fim_prefix|>
2126 pre
2127 <|fim_middle|>current
2128 0:23|aaa
2129 1:26|b<|user_cursor|>bb
2130 <|fim_suffix|>
2131 suf
2132 <|fim_middle|>updated
2133 "},
2134 },
2135 ];
2136
2137 for case in &cases {
2138 let mut prompt = String::new();
2139 hashline::write_cursor_excerpt_section(
2140 &mut prompt,
2141 Path::new("test.rs"),
2142 case.context,
2143 &case.editable_range,
2144 case.cursor_offset,
2145 );
2146 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2147 }
2148 }
2149
2150 #[test]
2151 fn test_apply_edit_commands() {
2152 struct Case {
2153 name: &'static str,
2154 original: &'static str,
2155 model_output: &'static str,
2156 expected: &'static str,
2157 }
2158
2159 let cases = vec![
2160 Case {
2161 name: "set_single_line",
2162 original: indoc! {"
2163 let mut total = 0;
2164 for product in products {
2165 total += ;
2166 }
2167 total
2168 "},
2169 model_output: indoc! {"
2170 <|set|>2:87
2171 total += product.price;
2172 "},
2173 expected: indoc! {"
2174 let mut total = 0;
2175 for product in products {
2176 total += product.price;
2177 }
2178 total
2179 "},
2180 },
2181 Case {
2182 name: "set_range",
2183 original: indoc! {"
2184 fn foo() {
2185 let x = 1;
2186 let y = 2;
2187 let z = 3;
2188 }
2189 "},
2190 model_output: indoc! {"
2191 <|set|>1:46-3:4a
2192 let sum = 6;
2193 "},
2194 expected: indoc! {"
2195 fn foo() {
2196 let sum = 6;
2197 }
2198 "},
2199 },
2200 Case {
2201 name: "insert_after_line",
2202 original: indoc! {"
2203 fn main() {
2204 let x = 1;
2205 }
2206 "},
2207 model_output: indoc! {"
2208 <|insert|>1:46
2209 let y = 2;
2210 "},
2211 expected: indoc! {"
2212 fn main() {
2213 let x = 1;
2214 let y = 2;
2215 }
2216 "},
2217 },
2218 Case {
2219 name: "insert_before_first",
2220 original: indoc! {"
2221 let x = 1;
2222 let y = 2;
2223 "},
2224 model_output: indoc! {"
2225 <|insert|>
2226 use std::io;
2227 "},
2228 expected: indoc! {"
2229 use std::io;
2230 let x = 1;
2231 let y = 2;
2232 "},
2233 },
2234 Case {
2235 name: "set_with_cursor_marker",
2236 original: indoc! {"
2237 fn main() {
2238 println!();
2239 }
2240 "},
2241 model_output: indoc! {"
2242 <|set|>1:34
2243 eprintln!(\"<|user_cursor|>\");
2244 "},
2245 expected: indoc! {"
2246 fn main() {
2247 eprintln!(\"<|user_cursor|>\");
2248 }
2249 "},
2250 },
2251 Case {
2252 name: "multiple_set_commands",
2253 original: indoc! {"
2254 aaa
2255 bbb
2256 ccc
2257 ddd
2258 "},
2259 model_output: indoc! {"
2260 <|set|>0:23
2261 AAA
2262 <|set|>2:29
2263 CCC
2264 "},
2265 expected: indoc! {"
2266 AAA
2267 bbb
2268 CCC
2269 ddd
2270 "},
2271 },
2272 Case {
2273 name: "set_range_multiline_replacement",
2274 original: indoc! {"
2275 fn handle_submit() {
2276 }
2277
2278 fn handle_keystroke() {
2279 "},
2280 model_output: indoc! {"
2281 <|set|>0:3f-1:7d
2282 fn handle_submit(modal_state: &mut ModalState) {
2283 <|user_cursor|>
2284 }
2285 "},
2286 expected: indoc! {"
2287 fn handle_submit(modal_state: &mut ModalState) {
2288 <|user_cursor|>
2289 }
2290
2291 fn handle_keystroke() {
2292 "},
2293 },
2294 Case {
2295 name: "no_edit_commands_returns_original",
2296 original: indoc! {"
2297 hello
2298 world
2299 "},
2300 model_output: "some random text with no commands",
2301 expected: indoc! {"
2302 hello
2303 world
2304 "},
2305 },
2306 Case {
2307 name: "no_edits_command_returns_original",
2308 original: indoc! {"
2309 hello
2310 world
2311 "},
2312 model_output: "<|no_edits|>",
2313 expected: indoc! {"
2314 hello
2315 world
2316 "},
2317 },
2318 Case {
2319 name: "wrong_hash_set_ignored",
2320 original: indoc! {"
2321 aaa
2322 bbb
2323 "},
2324 model_output: indoc! {"
2325 <|set|>0:ff
2326 ZZZ
2327 "},
2328 expected: indoc! {"
2329 aaa
2330 bbb
2331 "},
2332 },
2333 Case {
2334 name: "insert_and_set_combined",
2335 original: indoc! {"
2336 alpha
2337 beta
2338 gamma
2339 "},
2340 model_output: indoc! {"
2341 <|set|>0:06
2342 ALPHA
2343 <|insert|>1:9c
2344 beta_extra
2345 "},
2346 expected: indoc! {"
2347 ALPHA
2348 beta
2349 beta_extra
2350 gamma
2351 "},
2352 },
2353 Case {
2354 name: "no_trailing_newline_preserved",
2355 original: "hello\nworld",
2356 model_output: indoc! {"
2357 <|set|>0:14
2358 HELLO
2359 "},
2360 expected: "HELLO\nworld",
2361 },
2362 Case {
2363 name: "set_range_hash_mismatch_in_end_bound",
2364 original: indoc! {"
2365 one
2366 two
2367 three
2368 "},
2369 model_output: indoc! {"
2370 <|set|>0:42-2:ff
2371 ONE_TWO_THREE
2372 "},
2373 expected: indoc! {"
2374 one
2375 two
2376 three
2377 "},
2378 },
2379 Case {
2380 name: "set_range_start_greater_than_end_ignored",
2381 original: indoc! {"
2382 a
2383 b
2384 c
2385 "},
2386 model_output: indoc! {"
2387 <|set|>2:63-1:62
2388 X
2389 "},
2390 expected: indoc! {"
2391 a
2392 b
2393 c
2394 "},
2395 },
2396 Case {
2397 name: "insert_out_of_bounds_ignored",
2398 original: indoc! {"
2399 x
2400 y
2401 "},
2402 model_output: indoc! {"
2403 <|insert|>99:aa
2404 z
2405 "},
2406 expected: indoc! {"
2407 x
2408 y
2409 "},
2410 },
2411 Case {
2412 name: "set_out_of_bounds_ignored",
2413 original: indoc! {"
2414 x
2415 y
2416 "},
2417 model_output: indoc! {"
2418 <|set|>99:aa
2419 z
2420 "},
2421 expected: indoc! {"
2422 x
2423 y
2424 "},
2425 },
2426 Case {
2427 name: "malformed_set_command_ignored",
2428 original: indoc! {"
2429 alpha
2430 beta
2431 "},
2432 model_output: indoc! {"
2433 <|set|>not-a-line-ref
2434 UPDATED
2435 "},
2436 expected: indoc! {"
2437 alpha
2438 beta
2439 "},
2440 },
2441 Case {
2442 name: "malformed_insert_hash_treated_as_before_first",
2443 original: indoc! {"
2444 alpha
2445 beta
2446 "},
2447 model_output: indoc! {"
2448 <|insert|>1:nothex
2449 preamble
2450 "},
2451 expected: indoc! {"
2452 preamble
2453 alpha
2454 beta
2455 "},
2456 },
2457 Case {
2458 name: "set_then_insert_same_target_orders_insert_after_replacement",
2459 original: indoc! {"
2460 cat
2461 dog
2462 "},
2463 model_output: indoc! {"
2464 <|set|>0:38
2465 CAT
2466 <|insert|>0:38
2467 TAIL
2468 "},
2469 expected: indoc! {"
2470 CAT
2471 TAIL
2472 dog
2473 "},
2474 },
2475 Case {
2476 name: "overlapping_set_ranges_last_wins",
2477 original: indoc! {"
2478 a
2479 b
2480 c
2481 d
2482 "},
2483 model_output: indoc! {"
2484 <|set|>0:61-2:63
2485 FIRST
2486 <|set|>1:62-3:64
2487 SECOND
2488 "},
2489 expected: indoc! {"
2490 FIRST
2491 d
2492 "},
2493 },
2494 Case {
2495 name: "insert_before_first_and_after_line",
2496 original: indoc! {"
2497 a
2498 b
2499 "},
2500 model_output: indoc! {"
2501 <|insert|>
2502 HEAD
2503 <|insert|>0:61
2504 MID
2505 "},
2506 expected: indoc! {"
2507 HEAD
2508 a
2509 MID
2510 b
2511 "},
2512 },
2513 ];
2514
2515 for case in &cases {
2516 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2517 assert_eq!(result, case.expected, "failed case: {}", case.name);
2518 }
2519 }
2520
2521 #[test]
2522 fn test_output_has_edit_commands() {
2523 assert!(hashline::output_has_edit_commands(&format!(
2524 "{}0:ab\nnew",
2525 SET_COMMAND_MARKER
2526 )));
2527 assert!(hashline::output_has_edit_commands(&format!(
2528 "{}0:ab\nnew",
2529 INSERT_COMMAND_MARKER
2530 )));
2531 assert!(hashline::output_has_edit_commands(&format!(
2532 "some text\n{}1:cd\nstuff",
2533 SET_COMMAND_MARKER
2534 )));
2535 assert!(!hashline::output_has_edit_commands("just plain text"));
2536 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2537 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2538 }
2539
2540 // ---- hashline::patch_to_edit_commands round-trip tests ----
2541
2542 #[test]
2543 fn test_patch_to_edit_commands() {
2544 struct Case {
2545 name: &'static str,
2546 old: &'static str,
2547 patch: &'static str,
2548 expected_new: &'static str,
2549 }
2550
2551 let cases = [
2552 Case {
2553 name: "single_line_replacement",
2554 old: indoc! {"
2555 let mut total = 0;
2556 for product in products {
2557 total += ;
2558 }
2559 total
2560 "},
2561 patch: indoc! {"
2562 @@ -1,5 +1,5 @@
2563 let mut total = 0;
2564 for product in products {
2565 - total += ;
2566 + total += product.price;
2567 }
2568 total
2569 "},
2570 expected_new: indoc! {"
2571 let mut total = 0;
2572 for product in products {
2573 total += product.price;
2574 }
2575 total
2576 "},
2577 },
2578 Case {
2579 name: "multiline_replacement",
2580 old: indoc! {"
2581 fn foo() {
2582 let x = 1;
2583 let y = 2;
2584 let z = 3;
2585 }
2586 "},
2587 patch: indoc! {"
2588 @@ -1,5 +1,3 @@
2589 fn foo() {
2590 - let x = 1;
2591 - let y = 2;
2592 - let z = 3;
2593 + let sum = 1 + 2 + 3;
2594 }
2595 "},
2596 expected_new: indoc! {"
2597 fn foo() {
2598 let sum = 1 + 2 + 3;
2599 }
2600 "},
2601 },
2602 Case {
2603 name: "insertion",
2604 old: indoc! {"
2605 fn main() {
2606 let x = 1;
2607 }
2608 "},
2609 patch: indoc! {"
2610 @@ -1,3 +1,4 @@
2611 fn main() {
2612 let x = 1;
2613 + let y = 2;
2614 }
2615 "},
2616 expected_new: indoc! {"
2617 fn main() {
2618 let x = 1;
2619 let y = 2;
2620 }
2621 "},
2622 },
2623 Case {
2624 name: "insertion_before_first",
2625 old: indoc! {"
2626 let x = 1;
2627 let y = 2;
2628 "},
2629 patch: indoc! {"
2630 @@ -1,2 +1,3 @@
2631 +use std::io;
2632 let x = 1;
2633 let y = 2;
2634 "},
2635 expected_new: indoc! {"
2636 use std::io;
2637 let x = 1;
2638 let y = 2;
2639 "},
2640 },
2641 Case {
2642 name: "deletion",
2643 old: indoc! {"
2644 aaa
2645 bbb
2646 ccc
2647 ddd
2648 "},
2649 patch: indoc! {"
2650 @@ -1,4 +1,2 @@
2651 aaa
2652 -bbb
2653 -ccc
2654 ddd
2655 "},
2656 expected_new: indoc! {"
2657 aaa
2658 ddd
2659 "},
2660 },
2661 Case {
2662 name: "multiple_changes",
2663 old: indoc! {"
2664 alpha
2665 beta
2666 gamma
2667 delta
2668 epsilon
2669 "},
2670 patch: indoc! {"
2671 @@ -1,5 +1,5 @@
2672 -alpha
2673 +ALPHA
2674 beta
2675 gamma
2676 -delta
2677 +DELTA
2678 epsilon
2679 "},
2680 expected_new: indoc! {"
2681 ALPHA
2682 beta
2683 gamma
2684 DELTA
2685 epsilon
2686 "},
2687 },
2688 Case {
2689 name: "replace_with_insertion",
2690 old: indoc! {r#"
2691 fn handle() {
2692 modal_state.close();
2693 modal_state.dismiss();
2694 "#},
2695 patch: indoc! {r#"
2696 @@ -1,3 +1,4 @@
2697 fn handle() {
2698 modal_state.close();
2699 + eprintln!("");
2700 modal_state.dismiss();
2701 "#},
2702 expected_new: indoc! {r#"
2703 fn handle() {
2704 modal_state.close();
2705 eprintln!("");
2706 modal_state.dismiss();
2707 "#},
2708 },
2709 Case {
2710 name: "complete_replacement",
2711 old: indoc! {"
2712 aaa
2713 bbb
2714 ccc
2715 "},
2716 patch: indoc! {"
2717 @@ -1,3 +1,3 @@
2718 -aaa
2719 -bbb
2720 -ccc
2721 +xxx
2722 +yyy
2723 +zzz
2724 "},
2725 expected_new: indoc! {"
2726 xxx
2727 yyy
2728 zzz
2729 "},
2730 },
2731 Case {
2732 name: "add_function_body",
2733 old: indoc! {"
2734 fn foo() {
2735 modal_state.dismiss();
2736 }
2737
2738 fn
2739
2740 fn handle_keystroke() {
2741 "},
2742 patch: indoc! {"
2743 @@ -1,6 +1,8 @@
2744 fn foo() {
2745 modal_state.dismiss();
2746 }
2747
2748 -fn
2749 +fn handle_submit() {
2750 + todo()
2751 +}
2752
2753 fn handle_keystroke() {
2754 "},
2755 expected_new: indoc! {"
2756 fn foo() {
2757 modal_state.dismiss();
2758 }
2759
2760 fn handle_submit() {
2761 todo()
2762 }
2763
2764 fn handle_keystroke() {
2765 "},
2766 },
2767 Case {
2768 name: "with_cursor_offset",
2769 old: indoc! {r#"
2770 fn main() {
2771 println!();
2772 }
2773 "#},
2774 patch: indoc! {r#"
2775 @@ -1,3 +1,3 @@
2776 fn main() {
2777 - println!();
2778 + eprintln!("");
2779 }
2780 "#},
2781 expected_new: indoc! {r#"
2782 fn main() {
2783 eprintln!("<|user_cursor|>");
2784 }
2785 "#},
2786 },
2787 Case {
2788 name: "non_local_hunk_header_pure_insertion_repro",
2789 old: indoc! {"
2790 aaa
2791 bbb
2792 "},
2793 patch: indoc! {"
2794 @@ -20,2 +20,3 @@
2795 aaa
2796 +xxx
2797 bbb
2798 "},
2799 expected_new: indoc! {"
2800 aaa
2801 xxx
2802 bbb
2803 "},
2804 },
2805 Case {
2806 name: "empty_patch_produces_no_edits_marker",
2807 old: indoc! {"
2808 aaa
2809 bbb
2810 "},
2811 patch: "@@ -20,2 +20,3 @@\n",
2812 expected_new: indoc! {"
2813 aaa
2814 bbb
2815 "},
2816 },
2817 ];
2818
2819 for case in &cases {
2820 // The cursor_offset for patch_to_edit_commands is relative to
2821 // the first hunk's new text (context + additions). We compute
2822 // it by finding where the marker sits in the expected output
2823 // (which mirrors the new text of the hunk).
2824 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2825
2826 let commands =
2827 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2828 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2829
2830 assert!(
2831 hashline::output_has_edit_commands(&commands),
2832 "case {}: expected edit commands, got: {commands:?}",
2833 case.name,
2834 );
2835
2836 let applied = hashline::apply_edit_commands(case.old, &commands);
2837 assert_eq!(applied, case.expected_new, "case {}", case.name);
2838 }
2839 }
2840 }
2841}
2842
2843pub mod seed_coder {
2844 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2845 //!
2846 //! Seed-Coder uses different FIM tokens and order than Qwen:
2847 //! - SPM order: suffix comes FIRST, then prefix, then middle
2848 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2849 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2850 //!
2851 //! All context (related files, edit history) goes in the PREFIX section.
2852 //! The suffix contains only code after the editable region.
2853 //!
2854 //! Example prompt:
2855 //!
2856 //! <[fim-suffix]>
2857 //! code after editable region
2858 //! <[fim-prefix]><filename>related/file.py
2859 //! related file content
2860 //!
2861 //! <filename>edit_history
2862 //! --- a/some_file.py
2863 //! +++ b/some_file.py
2864 //! -old
2865 //! +new
2866 //!
2867 //! <filename>path/to/target_file.py
2868 //! code before editable region
2869 //! <<<<<<< CURRENT
2870 //! code that
2871 //! needs to<|user_cursor|>
2872 //! be rewritten
2873 //! =======
2874 //! <[fim-middle]>
2875 //!
2876 //! Expected output (model generates):
2877 //!
2878 //! updated
2879 //! code with
2880 //! changes applied
2881 //! >>>>>>> UPDATED
2882
2883 use super::*;
2884
2885 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2886 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2887 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2888 pub const FILE_MARKER: &str = "<filename>";
2889
2890 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2891 pub const SEPARATOR: &str = "=======\n";
2892 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2893
2894 pub const NO_EDITS: &str = "NO_EDITS\n";
2895
2896 pub fn special_tokens() -> &'static [&'static str] {
2897 &[
2898 FIM_SUFFIX,
2899 FIM_PREFIX,
2900 FIM_MIDDLE,
2901 FILE_MARKER,
2902 START_MARKER,
2903 SEPARATOR,
2904 END_MARKER,
2905 CURSOR_MARKER,
2906 ]
2907 }
2908
2909 pub fn write_cursor_excerpt_section(
2910 prompt: &mut String,
2911 path: &Path,
2912 context: &str,
2913 editable_range: &Range<usize>,
2914 cursor_offset: usize,
2915 ) {
2916 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2917 prompt.push_str(§ion);
2918 }
2919
2920 pub fn format_prompt_with_budget(
2921 path: &Path,
2922 context: &str,
2923 editable_range: &Range<usize>,
2924 cursor_offset: usize,
2925 events: &[Arc<Event>],
2926 related_files: &[RelatedFile],
2927 max_tokens: usize,
2928 ) -> String {
2929 let cursor_prefix_section =
2930 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2931 assemble_fim_prompt(
2932 context,
2933 editable_range,
2934 &cursor_prefix_section,
2935 events,
2936 related_files,
2937 max_tokens,
2938 )
2939 }
2940
2941 pub fn assemble_fim_prompt(
2942 context: &str,
2943 editable_range: &Range<usize>,
2944 cursor_prefix_section: &str,
2945 events: &[Arc<Event>],
2946 related_files: &[RelatedFile],
2947 max_tokens: usize,
2948 ) -> String {
2949 let suffix_section = build_suffix_section(context, editable_range);
2950
2951 let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
2952 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
2953 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2954
2955 let edit_history_section = super::format_edit_history_within_budget(
2956 events,
2957 FILE_MARKER,
2958 "edit_history",
2959 budget_after_cursor,
2960 max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
2961 );
2962 let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
2963 let budget_after_edit_history =
2964 budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
2965
2966 let related_files_section = super::format_related_files_within_budget(
2967 related_files,
2968 FILE_MARKER,
2969 "",
2970 budget_after_edit_history,
2971 );
2972
2973 let mut prompt = String::new();
2974 prompt.push_str(&suffix_section);
2975 prompt.push_str(FIM_PREFIX);
2976 prompt.push_str(&related_files_section);
2977 if !related_files_section.is_empty() {
2978 prompt.push('\n');
2979 }
2980 prompt.push_str(&edit_history_section);
2981 if !edit_history_section.is_empty() {
2982 prompt.push('\n');
2983 }
2984 prompt.push_str(cursor_prefix_section);
2985 prompt.push_str(FIM_MIDDLE);
2986
2987 prompt
2988 }
2989
2990 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2991 let mut section = String::new();
2992 section.push_str(FIM_SUFFIX);
2993 section.push_str(&context[editable_range.end..]);
2994 if !section.ends_with('\n') {
2995 section.push('\n');
2996 }
2997 section
2998 }
2999
3000 fn build_cursor_prefix_section(
3001 path: &Path,
3002 context: &str,
3003 editable_range: &Range<usize>,
3004 cursor_offset: usize,
3005 ) -> String {
3006 let mut section = String::new();
3007 let path_str = path.to_string_lossy();
3008 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3009
3010 section.push_str(&context[..editable_range.start]);
3011 section.push_str(START_MARKER);
3012 section.push_str(&context[editable_range.start..cursor_offset]);
3013 section.push_str(CURSOR_MARKER);
3014 section.push_str(&context[cursor_offset..editable_range.end]);
3015 if !section.ends_with('\n') {
3016 section.push('\n');
3017 }
3018 section.push_str(SEPARATOR);
3019 section
3020 }
3021
3022 /// Format patch as containing no changes if it's empty; otherwise return None.
3023 pub(crate) fn no_edits(patch: &str) -> Option<String> {
3024 // Count lines in the patch
3025 let empty_patch = patch.lines().count() <= 3;
3026 if empty_patch {
3027 Some(format!("{NO_EDITS}{END_MARKER}"))
3028 } else {
3029 None
3030 }
3031 }
3032}
3033
3034pub mod v0304_variable_edit {
3035 //! A prompt format with no fixed editable region. The entire context is shown
3036 //! to the model, and it chooses which text to replace by outputting surrounding
3037 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3038 //! text.
3039 //!
3040 //! Example prompt:
3041 //!
3042 //! <|file_sep|>path/to/file.py
3043 //! zero
3044 //! one
3045 //! two
3046 //! three<|user_cursor|>
3047 //! four
3048 //! five
3049 //! <|fim_prefix|>
3050 //
3051 //! Expected output (model generates):
3052 //!
3053 //! two
3054 //! <|fim_middle|>
3055 //! THREE
3056 //! <|fim_suffix|>
3057 //! four
3058 //!
3059 //! The output means: find "two\n...\nfour" in the context, and replace
3060 //! everything between "two\n" and "four" with "THREE\n".
3061
3062 use super::*;
3063
3064 pub fn special_tokens() -> &'static [&'static str] {
3065 &[
3066 "<|fim_prefix|>",
3067 "<|fim_suffix|>",
3068 "<|fim_middle|>",
3069 "<|file_sep|>",
3070 CURSOR_MARKER,
3071 ]
3072 }
3073
3074 pub fn write_cursor_excerpt_section(
3075 prompt: &mut String,
3076 path: &Path,
3077 context: &str,
3078 cursor_offset: usize,
3079 ) {
3080 let path_str = path.to_string_lossy();
3081 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3082
3083 prompt.push_str(&context[..cursor_offset]);
3084 prompt.push_str(CURSOR_MARKER);
3085 prompt.push_str(&context[cursor_offset..]);
3086 if !prompt.ends_with('\n') {
3087 prompt.push('\n');
3088 }
3089 prompt.push_str("<|fim_prefix|>\n")
3090 }
3091
3092 /// Apply a variable-edit model output to the original context text.
3093 ///
3094 /// The model output has the form:
3095 ///
3096 /// - prefix context lines
3097 /// - `<|fim_middle|>`
3098 /// - new text
3099 /// - `<|fim_suffix|>`
3100 /// - suffix context lines
3101 ///
3102 /// We locate the prefix/suffix context lines in the original text and replace
3103 /// everything between them with the new text.
3104 pub fn apply_variable_edit(
3105 context: &str,
3106 model_output: &str,
3107 ) -> Result<(Range<usize>, String)> {
3108 let (prefix_context, rest) = model_output
3109 .split_once("<|fim_middle|>\n")
3110 .or_else(|| model_output.split_once("<|fim_middle|>"))
3111 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3112
3113 let (new_text, suffix_context) = rest
3114 .split_once("<|fim_suffix|>\n")
3115 .or_else(|| rest.split_once("<|fim_suffix|>"))
3116 .unwrap_or((rest, ""));
3117
3118 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3119 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3120 } else {
3121 suffix_context
3122 };
3123
3124 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3125 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3126 + prefix_context.len();
3127 let suffix_offset = if suffix_context.is_empty() {
3128 context.len()
3129 } else {
3130 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3131 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3132 + prefix_offset
3133 };
3134
3135 let edit_range = prefix_offset..suffix_offset;
3136 return Ok((edit_range, new_text.to_string()));
3137 }
3138
3139 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3140 if needle.is_empty() {
3141 return Some(0);
3142 }
3143
3144 haystack.match_indices(needle).find_map(|(offset, _)| {
3145 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3146 matched_line_start.then_some(offset)
3147 })
3148 }
3149
3150 /// Convert a unified diff patch into the variable-edit output format.
3151 ///
3152 /// Parses `patch` as a unified diff against `old_text` and produces model
3153 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3154 /// delimiters. The diff is resolved by content matching rather than line
3155 /// numbers.
3156 pub fn patch_to_variable_edit_output(
3157 old_text: &str,
3158 patch: &str,
3159 cursor_offset: Option<usize>,
3160 ) -> Result<String> {
3161 // Parse the unified diff into hunks. Each hunk has an `old_context`
3162 // string (context + deleted lines interleaved in order) and a list of
3163 // edits expressed as byte ranges within that context plus replacement
3164 // text.
3165 let hunks = parse_hunks(patch);
3166 if hunks.is_empty() {
3167 return Ok(String::new());
3168 }
3169
3170 // Apply each hunk by finding its old_context in the text and
3171 // performing the edits. We search forward from where the previous
3172 // hunk ended so that hunks are applied in order.
3173 let mut new_text = old_text.to_string();
3174 let mut search_from: usize = 0;
3175 let mut first_hunk_pos: Option<usize> = None;
3176
3177 for hunk in &hunks {
3178 let context_pos = new_text[search_from..]
3179 .find(&hunk.old_context)
3180 .map(|pos| pos + search_from)
3181 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3182
3183 if first_hunk_pos.is_none() {
3184 first_hunk_pos = Some(context_pos);
3185 }
3186
3187 // Apply edits in reverse order so byte offsets remain valid.
3188 for edit in hunk.edits.iter().rev() {
3189 let abs_start = context_pos + edit.range.start;
3190 let abs_end = context_pos + edit.range.end;
3191 new_text.replace_range(abs_start..abs_end, &edit.text);
3192 }
3193
3194 // Advance past this hunk's region in the (now modified) text.
3195 let new_region_len: usize =
3196 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3197 len + edit.text.len() - (edit.range.end - edit.range.start)
3198 });
3199 search_from = context_pos + new_region_len;
3200 }
3201
3202 // Now we have old_text and new_text. Find the changed line range by
3203 // comparing them.
3204 let old_lines: Vec<&str> = old_text.lines().collect();
3205 let new_lines: Vec<&str> = new_text.lines().collect();
3206
3207 // Find first differing line.
3208 let first_changed_row = old_lines
3209 .iter()
3210 .zip(new_lines.iter())
3211 .position(|(a, b)| a != b)
3212 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3213
3214 // Find last differing line (from the end).
3215 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3216 let common_suffix = old_lines
3217 .iter()
3218 .rev()
3219 .zip(new_lines.iter().rev())
3220 .take(max_suffix)
3221 .take_while(|(a, b)| a == b)
3222 .count();
3223
3224 let old_end = old_lines.len() - common_suffix;
3225 let new_end = new_lines.len() - common_suffix;
3226
3227 if first_changed_row == old_end && first_changed_row == new_end {
3228 return Ok(String::new());
3229 }
3230
3231 // Build the replacement text from new_lines[first_diff..new_end].
3232 let mut merged_new_text = String::new();
3233 for line in &new_lines[first_changed_row..new_end] {
3234 merged_new_text.push_str(line);
3235 merged_new_text.push('\n');
3236 }
3237
3238 // cursor_offset is relative to the first hunk's new content in
3239 // new_text. Translate it to an offset within merged_new_text, which
3240 // only contains lines first_diff..new_end of new_text.
3241 if let Some(hunk_offset) = cursor_offset {
3242 let hunk_start = first_hunk_pos.unwrap_or(0);
3243 let absolute_pos = hunk_start + hunk_offset;
3244
3245 // Byte offset where first_diff starts in new_text.
3246 let merged_start: usize = new_lines[..first_changed_row]
3247 .iter()
3248 .map(|line| line.len() + 1)
3249 .sum();
3250
3251 if absolute_pos >= merged_start {
3252 let relative_offset = absolute_pos - merged_start;
3253 if relative_offset <= merged_new_text.len() {
3254 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3255 }
3256 }
3257 }
3258
3259 // Build output with 2 lines of context above and below.
3260 let context_lines_count = 2;
3261 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3262 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3263
3264 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3265 let pattern = &lines[line_range];
3266 let pattern_len = pattern.len();
3267
3268 let mut count = 0;
3269 for offset in 0..=lines.len() - pattern_len {
3270 if &lines[offset..offset + pattern_len] == pattern {
3271 count += 1;
3272 }
3273 }
3274 count
3275 }
3276
3277 // Expand prefix and suffix until they are unique
3278 while prefix_start > 0 {
3279 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3280 prefix_start -= 1;
3281 } else {
3282 break;
3283 }
3284 }
3285 while suffix_end < old_lines.len() {
3286 if count_matches(old_end..suffix_end, &old_lines) > 1 {
3287 suffix_end += 1;
3288 } else {
3289 break;
3290 }
3291 }
3292
3293 let mut output = String::new();
3294 for line in &old_lines[prefix_start..first_changed_row] {
3295 output.push_str(line);
3296 output.push('\n');
3297 }
3298 output.push_str("<|fim_middle|>\n");
3299 output.push_str(&merged_new_text);
3300 output.push_str("<|fim_suffix|>\n");
3301 for line in &old_lines[old_end..suffix_end] {
3302 output.push_str(line);
3303 output.push('\n');
3304 }
3305
3306 Ok(output)
3307 }
3308
3309 struct ParsedHunk {
3310 old_context: String,
3311 edits: Vec<ParsedEdit>,
3312 }
3313
3314 struct ParsedEdit {
3315 range: Range<usize>,
3316 text: String,
3317 }
3318
3319 /// Parse a unified diff into content-based hunks. Each hunk contains an
3320 /// `old_context` string (context lines + deleted lines, which together
3321 /// form the text that should be found in the original) and a list of edits
3322 /// expressed as byte ranges within that context.
3323 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3324 let mut hunks = Vec::new();
3325 let mut current: Option<ParsedHunk> = None;
3326
3327 for line in patch.lines() {
3328 if line.starts_with("@@") {
3329 if let Some(hunk) = current.take() {
3330 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3331 hunks.push(hunk);
3332 }
3333 }
3334 current = Some(ParsedHunk {
3335 old_context: String::new(),
3336 edits: Vec::new(),
3337 });
3338 } else if line.starts_with("---") || line.starts_with("+++") {
3339 continue;
3340 } else if let Some(hunk) = &mut current {
3341 if let Some(added) = line.strip_prefix('+') {
3342 let pos = hunk.old_context.len();
3343 if let Some(last_edit) = hunk.edits.last_mut() {
3344 if last_edit.range.end == pos {
3345 writeln!(&mut last_edit.text, "{added}").ok();
3346 continue;
3347 }
3348 }
3349 hunk.edits.push(ParsedEdit {
3350 range: pos..pos,
3351 text: format!("{added}\n"),
3352 });
3353 } else if let Some(removed) = line.strip_prefix('-') {
3354 let start = hunk.old_context.len();
3355 writeln!(&mut hunk.old_context, "{removed}").ok();
3356 let end = hunk.old_context.len();
3357 if let Some(last_edit) = hunk.edits.last_mut() {
3358 if last_edit.range.end == start {
3359 last_edit.range.end = end;
3360 continue;
3361 }
3362 }
3363 hunk.edits.push(ParsedEdit {
3364 range: start..end,
3365 text: String::new(),
3366 });
3367 } else {
3368 let ctx = line.strip_prefix(' ').unwrap_or(line);
3369 writeln!(&mut hunk.old_context, "{ctx}").ok();
3370 }
3371 }
3372 }
3373
3374 if let Some(hunk) = current {
3375 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3376 hunks.push(hunk);
3377 }
3378 }
3379
3380 hunks
3381 }
3382
3383 #[cfg(test)]
3384 mod tests {
3385 use super::*;
3386 use indoc::indoc;
3387
3388 #[test]
3389 fn test_apply_variable_edit() {
3390 struct Case {
3391 name: &'static str,
3392 original: &'static str,
3393 model_output: &'static str,
3394 expected: &'static str,
3395 }
3396
3397 let cases = [
3398 Case {
3399 name: "simple_single_line_replacement",
3400 original: indoc! {"
3401 zero
3402 one
3403 two
3404 three
3405 four
3406 five
3407 "},
3408 model_output: indoc! {"
3409 two
3410 <|fim_middle|>
3411 THREE
3412 <|fim_suffix|>
3413 four
3414 "},
3415 expected: indoc! {"
3416 zero
3417 one
3418 two
3419 THREE
3420 four
3421 five
3422 "},
3423 },
3424 Case {
3425 name: "multi_line_replacement",
3426 original: indoc! {"
3427 a
3428 b
3429 c
3430 d
3431 e
3432 "},
3433 model_output: indoc! {"
3434 a
3435 <|fim_middle|>
3436 B
3437 C
3438 D
3439 <|fim_suffix|>
3440 e
3441 "},
3442 expected: indoc! {"
3443 a
3444 B
3445 C
3446 D
3447 e
3448 "},
3449 },
3450 Case {
3451 name: "insertion_between_existing_lines",
3452 original: indoc! {"
3453 a
3454 b
3455 c
3456 "},
3457 model_output: indoc! {"
3458 a
3459 <|fim_middle|>
3460 X
3461 <|fim_suffix|>
3462 b
3463 "},
3464 expected: indoc! {"
3465 a
3466 X
3467 b
3468 c
3469 "},
3470 },
3471 Case {
3472 name: "deletion",
3473 original: indoc! {"
3474 a
3475 b
3476 c
3477 d
3478 "},
3479 model_output: indoc! {"
3480 a
3481 <|fim_middle|>
3482 <|fim_suffix|>
3483 c
3484 "},
3485 expected: indoc! {"
3486 a
3487 c
3488 d
3489 "},
3490 },
3491 Case {
3492 name: "replacement_at_start_no_prefix_context",
3493 original: indoc! {"
3494 a
3495 b
3496 c
3497 "},
3498 model_output: indoc! {"
3499 <|fim_middle|>
3500 X
3501 <|fim_suffix|>
3502 b
3503 "},
3504 expected: indoc! {"
3505 X
3506 b
3507 c
3508 "},
3509 },
3510 Case {
3511 name: "replacement_at_end_no_suffix_context",
3512 original: indoc! {"
3513 a
3514 b
3515 c
3516 "},
3517 model_output: indoc! {"
3518 b
3519 <|fim_middle|>
3520 Z
3521 <|fim_suffix|>
3522 "},
3523 expected: indoc! {"
3524 a
3525 b
3526 Z
3527 "},
3528 },
3529 Case {
3530 name: "context_with_trailing_newline_is_preserved",
3531 original: indoc! {"
3532 a
3533 b
3534 c
3535 "},
3536 model_output: indoc! {"
3537 a
3538 <|fim_middle|>
3539 B
3540 <|fim_suffix|>
3541 c
3542 "},
3543 expected: indoc! {"
3544 a
3545 B
3546 c
3547 "},
3548 },
3549 Case {
3550 name: "cursor_marker_passes_through_untouched",
3551 original: indoc! {"
3552 a
3553 b
3554 c
3555 "},
3556 model_output: indoc! {"
3557 a
3558 <|fim_middle|>
3559 B<|user_cursor|>B
3560 <|fim_suffix|>
3561 c
3562 "},
3563 expected: indoc! {"
3564 a
3565 B<|user_cursor|>B
3566 c
3567 "},
3568 },
3569 Case {
3570 name: "multiple_prefix_context_lines",
3571 original: indoc! {"
3572 a
3573 b
3574 c
3575 d
3576 e
3577 "},
3578 model_output: indoc! {"
3579 b
3580 c
3581 <|fim_middle|>
3582 D
3583 <|fim_suffix|>
3584 e
3585 "},
3586 expected: indoc! {"
3587 a
3588 b
3589 c
3590 D
3591 e
3592 "},
3593 },
3594 ];
3595
3596 for case in cases {
3597 let (edit_range, replacement) =
3598 apply_variable_edit(case.original, case.model_output).unwrap();
3599 let mut edited = case.original.to_string();
3600 edited.replace_range(edit_range, &replacement);
3601 assert_eq!(edited, case.expected, "{}", case.name);
3602 }
3603 }
3604
3605 #[test]
3606 fn test_patch_to_variable_edit() {
3607 struct Case {
3608 name: &'static str,
3609 old: &'static str,
3610 patch: &'static str,
3611 cursor_offset: Option<usize>,
3612 expected_variable_edit: &'static str,
3613 expected_after_apply: &'static str,
3614 }
3615
3616 let cases = [
3617 Case {
3618 name: "simple_replacement",
3619 old: indoc! {"
3620 zero
3621 one
3622 two
3623 three
3624 four
3625 five
3626 "},
3627 patch: indoc! {"
3628 @@ -3,3 +3,3 @@
3629 two
3630 -three
3631 +THREE
3632 four
3633 "},
3634 cursor_offset: None,
3635 expected_variable_edit: indoc! {"
3636 one
3637 two
3638 <|fim_middle|>
3639 THREE
3640 <|fim_suffix|>
3641 four
3642 five
3643 "},
3644 expected_after_apply: indoc! {"
3645 zero
3646 one
3647 two
3648 THREE
3649 four
3650 five
3651 "},
3652 },
3653 Case {
3654 name: "insertion",
3655 old: indoc! {"
3656 a
3657 b
3658 c
3659 d
3660 e
3661 "},
3662 patch: indoc! {"
3663 @@ -2,0 +3,1 @@
3664 b
3665 +X
3666 c
3667 "},
3668 cursor_offset: None,
3669 expected_variable_edit: indoc! {"
3670 a
3671 b
3672 <|fim_middle|>
3673 X
3674 <|fim_suffix|>
3675 c
3676 d
3677 "},
3678 expected_after_apply: indoc! {"
3679 a
3680 b
3681 X
3682 c
3683 d
3684 e
3685 "},
3686 },
3687 Case {
3688 name: "deletion",
3689 old: indoc! {"
3690 a
3691 b
3692 c
3693 d
3694 e
3695 "},
3696 patch: indoc! {"
3697 @@ -2,3 +2,2 @@
3698 b
3699 -c
3700 d
3701 "},
3702 cursor_offset: None,
3703 expected_variable_edit: indoc! {"
3704 a
3705 b
3706 <|fim_middle|>
3707 <|fim_suffix|>
3708 d
3709 e
3710 "},
3711 expected_after_apply: indoc! {"
3712 a
3713 b
3714 d
3715 e
3716 "},
3717 },
3718 Case {
3719 name: "edit_near_start",
3720 old: indoc! {"
3721 first
3722 second
3723 third
3724 fourth
3725 "},
3726 patch: indoc! {"
3727 @@ -1,1 +1,1 @@
3728 -first
3729 +FIRST
3730 "},
3731 cursor_offset: None,
3732 expected_variable_edit: indoc! {"
3733 <|fim_middle|>
3734 FIRST
3735 <|fim_suffix|>
3736 second
3737 third
3738 "},
3739 expected_after_apply: indoc! {"
3740 FIRST
3741 second
3742 third
3743 fourth
3744 "},
3745 },
3746 Case {
3747 name: "edit_near_end",
3748 old: indoc! {"
3749 first
3750 second
3751 third
3752 fourth
3753 "},
3754 patch: indoc! {"
3755 @@ -4,1 +4,1 @@
3756 -fourth
3757 +FOURTH
3758 "},
3759 cursor_offset: None,
3760 expected_variable_edit: indoc! {"
3761 second
3762 third
3763 <|fim_middle|>
3764 FOURTH
3765 <|fim_suffix|>
3766 "},
3767 expected_after_apply: indoc! {"
3768 first
3769 second
3770 third
3771 FOURTH
3772 "},
3773 },
3774 Case {
3775 name: "cursor_at_start_of_replacement",
3776 old: indoc! {"
3777 zero
3778 one
3779 two
3780 three
3781 four
3782 five
3783 "},
3784 patch: indoc! {"
3785 @@ -3,3 +3,3 @@
3786 two
3787 -three
3788 +THREE
3789 four
3790 "},
3791 cursor_offset: Some(4),
3792 expected_variable_edit: indoc! {"
3793 one
3794 two
3795 <|fim_middle|>
3796 <|user_cursor|>THREE
3797 <|fim_suffix|>
3798 four
3799 five
3800 "},
3801 expected_after_apply: indoc! {"
3802 zero
3803 one
3804 two
3805 <|user_cursor|>THREE
3806 four
3807 five
3808 "},
3809 },
3810 Case {
3811 name: "cursor_in_middle_of_replacement",
3812 old: indoc! {"
3813 zero
3814 one
3815 two
3816 three
3817 four
3818 five
3819 "},
3820 patch: indoc! {"
3821 @@ -3,3 +3,3 @@
3822 two
3823 -three
3824 +THREE
3825 four
3826 "},
3827 cursor_offset: Some(6),
3828 expected_variable_edit: indoc! {"
3829 one
3830 two
3831 <|fim_middle|>
3832 TH<|user_cursor|>REE
3833 <|fim_suffix|>
3834 four
3835 five
3836 "},
3837 expected_after_apply: indoc! {"
3838 zero
3839 one
3840 two
3841 TH<|user_cursor|>REE
3842 four
3843 five
3844 "},
3845 },
3846 Case {
3847 name: "expands_context_when_two_lines_not_unique_before_and_after",
3848 old: indoc! {"
3849 one
3850 a
3851 b
3852 c
3853 d
3854 two
3855 a
3856 b
3857 c
3858 d
3859 three
3860 a
3861 b
3862 c
3863 d
3864 four
3865 "},
3866 patch: indoc! {"
3867 @@ -4,5 +4,5 @@
3868 two
3869 a
3870 b
3871 -c
3872 +C
3873 d
3874 three
3875 "},
3876 cursor_offset: None,
3877 expected_variable_edit: indoc! {"
3878 two
3879 a
3880 b
3881 <|fim_middle|>
3882 C
3883 <|fim_suffix|>
3884 d
3885 three
3886 "},
3887 expected_after_apply: indoc! {"
3888 one
3889 a
3890 b
3891 c
3892 d
3893 two
3894 a
3895 b
3896 C
3897 d
3898 three
3899 a
3900 b
3901 c
3902 d
3903 four
3904 "},
3905 },
3906 Case {
3907 name: "expands_context_when_two_lines_not_unique_before_and_after",
3908 old: indoc! {"
3909 {
3910 {
3911 one();
3912 }
3913 }
3914 {
3915 {
3916 two();
3917 }
3918 }
3919 {
3920 {
3921 three();
3922 }
3923 }
3924 {
3925 {
3926 four();
3927 }
3928 }
3929 "},
3930 patch: indoc! {"
3931 @@ -4,5 +4,5 @@
3932 {
3933 - two();
3934 + TWO();
3935 }
3936 "},
3937 cursor_offset: None,
3938 expected_variable_edit: indoc! {"
3939 one();
3940 }
3941 }
3942 {
3943 {
3944 <|fim_middle|>
3945 TWO();
3946 <|fim_suffix|>
3947 }
3948 }
3949 {
3950 {
3951 three();
3952 "},
3953 expected_after_apply: indoc! {"
3954 {
3955 {
3956 one();
3957 }
3958 }
3959 {
3960 {
3961 TWO();
3962 }
3963 }
3964 {
3965 {
3966 three();
3967 }
3968 }
3969 {
3970 {
3971 four();
3972 }
3973 }
3974 "},
3975 },
3976 ];
3977
3978 for case in cases {
3979 let output =
3980 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3981 .unwrap_or_else(|error| {
3982 panic!("failed converting patch for {}: {error}", case.name)
3983 });
3984 assert_eq!(
3985 output, case.expected_variable_edit,
3986 "patch->variable_edit mismatch for {}",
3987 case.name
3988 );
3989
3990 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3991 .unwrap_or_else(|error| {
3992 panic!("failed applying variable_edit for {}: {error}", case.name)
3993 });
3994 let mut edited_by_variable_edit = case.old.to_string();
3995 edited_by_variable_edit.replace_range(edit_range, &replacement);
3996 assert_eq!(
3997 edited_by_variable_edit, case.expected_after_apply,
3998 "variable_edit apply mismatch for {}",
3999 case.name
4000 );
4001
4002 let (expected_edit_range, expected_replacement) =
4003 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4004 |error| {
4005 panic!(
4006 "failed applying expected variable_edit for {}: {error}",
4007 case.name
4008 )
4009 },
4010 );
4011 let mut edited_by_expected_variable_edit = case.old.to_string();
4012 edited_by_expected_variable_edit
4013 .replace_range(expected_edit_range, &expected_replacement);
4014 assert_eq!(
4015 edited_by_expected_variable_edit, case.expected_after_apply,
4016 "expected variable_edit apply mismatch for {}",
4017 case.name
4018 );
4019 }
4020 }
4021
4022 #[test]
4023 fn test_write_cursor_excerpt_section() {
4024 let path = Path::new("test.rs");
4025 let context = "fn main() {\n hello();\n}\n";
4026 let cursor_offset = 17;
4027 let mut prompt = String::new();
4028 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4029 assert_eq!(
4030 prompt,
4031 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4032 );
4033 }
4034 }
4035}
4036
4037/// The zeta1 prompt format
4038pub mod zeta1 {
4039 use super::*;
4040 use std::fmt::Write;
4041
4042 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4043 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4044 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4045 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4046
4047 const INSTRUCTION_HEADER: &str = concat!(
4048 "### Instruction:\n",
4049 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4050 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4051 "into account the cursor location.\n\n",
4052 "### User Edits:\n\n"
4053 );
4054 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4055 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4056
4057 /// Formats a complete zeta1 prompt from the input events and excerpt.
4058 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4059 let mut prompt = String::with_capacity(
4060 INSTRUCTION_HEADER.len()
4061 + input_events.len()
4062 + EXCERPT_HEADER.len()
4063 + input_excerpt.len()
4064 + RESPONSE_HEADER.len(),
4065 );
4066 prompt.push_str(INSTRUCTION_HEADER);
4067 prompt.push_str(input_events);
4068 prompt.push_str(EXCERPT_HEADER);
4069 prompt.push_str(input_excerpt);
4070 prompt.push_str(RESPONSE_HEADER);
4071 prompt
4072 }
4073
4074 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4075 /// editable and context byte-offset ranges within `cursor_excerpt`.
4076 pub fn format_zeta1_from_input(
4077 input: &ZetaPromptInput,
4078 editable_range: Range<usize>,
4079 context_range: Range<usize>,
4080 ) -> String {
4081 let events = format_zeta1_events(&input.events);
4082 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4083 format_zeta1_prompt(&events, &excerpt)
4084 }
4085
4086 /// Formats events in zeta1 style (oldest first).
4087 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4088 let mut result = String::new();
4089 for event in
4090 events
4091 .iter()
4092 .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4093 &ZetaFormat::V0114180EditableRegion,
4094 )))
4095 {
4096 let event_string = format_zeta1_event(event);
4097 if event_string.is_empty() {
4098 continue;
4099 }
4100 if !result.is_empty() {
4101 result.push_str("\n\n");
4102 }
4103 result.push_str(&event_string);
4104 }
4105 result
4106 }
4107
4108 fn format_zeta1_event(event: &Event) -> String {
4109 match event {
4110 Event::BufferChange {
4111 path,
4112 old_path,
4113 diff,
4114 ..
4115 } => {
4116 let mut prompt = String::new();
4117 if old_path != path {
4118 writeln!(
4119 prompt,
4120 "User renamed {} to {}\n",
4121 old_path.display(),
4122 path.display()
4123 )
4124 .ok();
4125 }
4126 if !diff.is_empty() {
4127 write!(
4128 prompt,
4129 "User edited {}:\n```diff\n{}\n```",
4130 path.display(),
4131 diff
4132 )
4133 .ok();
4134 }
4135 prompt
4136 }
4137 }
4138 }
4139
4140 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4141 /// within `cursor_excerpt`.
4142 fn format_zeta1_excerpt(
4143 input: &ZetaPromptInput,
4144 editable_range: Range<usize>,
4145 context_range: Range<usize>,
4146 ) -> String {
4147 let path_str = input.cursor_path.to_string_lossy();
4148 let excerpt = &*input.cursor_excerpt;
4149 let cursor_offset = input.cursor_offset_in_excerpt;
4150
4151 let mut prompt = String::new();
4152 writeln!(&mut prompt, "```{path_str}").ok();
4153
4154 let starts_at_file_beginning =
4155 input.excerpt_start_row == Some(0) && context_range.start == 0;
4156 if starts_at_file_beginning {
4157 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4158 }
4159
4160 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4161
4162 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4163 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4164 prompt.push_str(CURSOR_MARKER);
4165 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4166 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4167
4168 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4169 write!(prompt, "\n```").ok();
4170
4171 prompt
4172 }
4173
4174 /// Cleans zeta1 model output by extracting content between editable region
4175 /// markers and converting the zeta1 cursor marker to the universal one.
4176 /// Returns `None` if the output doesn't contain the expected markers.
4177 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4178 let content = output.replace(CURSOR_MARKER, "");
4179
4180 let content_start = content
4181 .find(EDITABLE_REGION_START_MARKER)
4182 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4183 .map(|pos| {
4184 if content.as_bytes().get(pos) == Some(&b'\n') {
4185 pos + 1
4186 } else {
4187 pos
4188 }
4189 })
4190 .unwrap_or(0);
4191
4192 let content_end = content
4193 .find(EDITABLE_REGION_END_MARKER)
4194 .map(|pos| {
4195 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4196 pos - 1
4197 } else {
4198 pos
4199 }
4200 })
4201 .unwrap_or(content.len());
4202
4203 if content_start > content_end {
4204 return Some(String::new());
4205 }
4206
4207 let extracted = &content[content_start..content_end];
4208
4209 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4210 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4211 let text_before_cursor = text_before_cursor
4212 .find(EDITABLE_REGION_START_MARKER)
4213 .map(|pos| {
4214 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4215 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4216 after_marker + 1
4217 } else {
4218 after_marker
4219 }
4220 })
4221 .unwrap_or(0);
4222 let offset_in_extracted = zeta1_cursor_pos
4223 .saturating_sub(text_before_cursor)
4224 .min(extracted.len());
4225 offset_in_extracted
4226 });
4227
4228 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4229 if let Some(offset) = cursor_offset {
4230 result.push_str(&extracted[..offset]);
4231 result.push_str(super::CURSOR_MARKER);
4232 result.push_str(&extracted[offset..]);
4233 } else {
4234 result.push_str(extracted);
4235 }
4236
4237 Some(result)
4238 }
4239}
4240
4241#[cfg(test)]
4242mod tests {
4243 use super::*;
4244 use indoc::indoc;
4245
4246 fn make_input(
4247 cursor_excerpt: &str,
4248 editable_range: Range<usize>,
4249 cursor_offset: usize,
4250 events: Vec<Event>,
4251 related_files: Vec<RelatedFile>,
4252 ) -> ZetaPromptInput {
4253 let context_range = 0..cursor_excerpt.len();
4254 ZetaPromptInput {
4255 cursor_path: Path::new("test.rs").into(),
4256 cursor_excerpt: cursor_excerpt.into(),
4257 cursor_offset_in_excerpt: cursor_offset,
4258 excerpt_start_row: None,
4259 events: events.into_iter().map(Arc::new).collect(),
4260 related_files: Some(related_files),
4261 active_buffer_diagnostics: vec![],
4262 excerpt_ranges: ExcerptRanges {
4263 editable_150: editable_range.clone(),
4264 editable_180: editable_range.clone(),
4265 editable_350: editable_range,
4266 editable_150_context_350: context_range.clone(),
4267 editable_180_context_350: context_range.clone(),
4268 editable_350_context_150: context_range,
4269 ..Default::default()
4270 },
4271 syntax_ranges: None,
4272 experiment: None,
4273 in_open_source_repo: false,
4274 can_collect_data: false,
4275 repo_url: None,
4276 }
4277 }
4278
4279 fn make_input_with_context_range(
4280 excerpt: &str,
4281 editable_range: Range<usize>,
4282 context_range: Range<usize>,
4283 cursor_offset: usize,
4284 ) -> ZetaPromptInput {
4285 ZetaPromptInput {
4286 cursor_path: Path::new("test.rs").into(),
4287 cursor_excerpt: excerpt.into(),
4288 cursor_offset_in_excerpt: cursor_offset,
4289 excerpt_start_row: None,
4290 events: vec![],
4291 related_files: Some(vec![]),
4292 active_buffer_diagnostics: vec![],
4293 excerpt_ranges: ExcerptRanges {
4294 editable_150: editable_range.clone(),
4295 editable_180: editable_range.clone(),
4296 editable_350: editable_range,
4297 editable_150_context_350: context_range.clone(),
4298 editable_180_context_350: context_range.clone(),
4299 editable_350_context_150: context_range,
4300 ..Default::default()
4301 },
4302 syntax_ranges: None,
4303 experiment: None,
4304 in_open_source_repo: false,
4305 can_collect_data: false,
4306 repo_url: None,
4307 }
4308 }
4309
4310 fn make_event(path: &str, diff: &str) -> Event {
4311 Event::BufferChange {
4312 path: Path::new(path).into(),
4313 old_path: Path::new(path).into(),
4314 diff: diff.to_string(),
4315 predicted: false,
4316 in_open_source_repo: false,
4317 }
4318 }
4319
4320 fn make_related_file(path: &str, content: &str) -> RelatedFile {
4321 RelatedFile {
4322 path: Path::new(path).into(),
4323 max_row: content.lines().count() as u32,
4324 excerpts: vec![RelatedExcerpt {
4325 row_range: 0..content.lines().count() as u32,
4326 text: content.into(),
4327 order: 0,
4328 }],
4329 in_open_source_repo: false,
4330 }
4331 }
4332
4333 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4334 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4335 }
4336
4337 fn budget_with_margin(requested_tokens: usize) -> usize {
4338 ((requested_tokens as f64) / 0.9).ceil() as usize
4339 }
4340
4341 #[test]
4342 fn test_no_truncation_when_within_budget() {
4343 let input = make_input(
4344 "prefix\neditable\nsuffix",
4345 7..15,
4346 10,
4347 vec![make_event("a.rs", "-old\n+new\n")],
4348 vec![make_related_file("related.rs", "fn helper() {}\n")],
4349 );
4350
4351 assert_eq!(
4352 format_with_budget(&input, 10000).unwrap(),
4353 indoc! {r#"
4354 <|file_sep|>related.rs
4355 fn helper() {}
4356 <|file_sep|>edit history
4357 --- a/a.rs
4358 +++ b/a.rs
4359 -old
4360 +new
4361 <|file_sep|>test.rs
4362 <|fim_prefix|>
4363 prefix
4364 <|fim_middle|>current
4365 edi<|user_cursor|>table
4366 <|fim_suffix|>
4367
4368 suffix
4369 <|fim_middle|>updated
4370 "#}
4371 .to_string()
4372 );
4373 }
4374
4375 #[test]
4376 fn test_truncation_drops_edit_history_when_budget_tight() {
4377 let input = make_input(
4378 "code",
4379 0..4,
4380 2,
4381 vec![make_event("a.rs", "-x\n+y\n")],
4382 vec![
4383 make_related_file("r1.rs", "aaaaaaa\n"),
4384 make_related_file("r2.rs", "bbbbbbb\n"),
4385 ],
4386 );
4387
4388 assert_eq!(
4389 format_with_budget(&input, 10000).unwrap(),
4390 indoc! {r#"
4391 <|file_sep|>r1.rs
4392 aaaaaaa
4393 <|file_sep|>r2.rs
4394 bbbbbbb
4395 <|file_sep|>edit history
4396 --- a/a.rs
4397 +++ b/a.rs
4398 -x
4399 +y
4400 <|file_sep|>test.rs
4401 <|fim_prefix|>
4402 <|fim_middle|>current
4403 co<|user_cursor|>de
4404 <|fim_suffix|>
4405 <|fim_middle|>updated
4406 "#}
4407 .to_string()
4408 );
4409
4410 assert_eq!(
4411 format_with_budget(&input, budget_with_margin(55)),
4412 Some(
4413 indoc! {r#"
4414 <|file_sep|>edit history
4415 --- a/a.rs
4416 +++ b/a.rs
4417 -x
4418 +y
4419 <|file_sep|>test.rs
4420 <|fim_prefix|>
4421 <|fim_middle|>current
4422 co<|user_cursor|>de
4423 <|fim_suffix|>
4424 <|fim_middle|>updated
4425 "#}
4426 .to_string()
4427 )
4428 );
4429 }
4430
4431 #[test]
4432 fn test_truncation_includes_partial_excerpts() {
4433 let input = make_input(
4434 "x",
4435 0..1,
4436 0,
4437 vec![],
4438 vec![RelatedFile {
4439 path: Path::new("big.rs").into(),
4440 max_row: 30,
4441 in_open_source_repo: false,
4442 excerpts: vec![
4443 RelatedExcerpt {
4444 row_range: 0..10,
4445 text: "first excerpt\n".into(),
4446 order: 0,
4447 },
4448 RelatedExcerpt {
4449 row_range: 10..20,
4450 text: "second excerpt\n".into(),
4451 order: 0,
4452 },
4453 RelatedExcerpt {
4454 row_range: 20..30,
4455 text: "third excerpt\n".into(),
4456 order: 0,
4457 },
4458 ],
4459 }],
4460 );
4461
4462 assert_eq!(
4463 format_with_budget(&input, 10000).unwrap(),
4464 indoc! {r#"
4465 <|file_sep|>big.rs
4466 first excerpt
4467 ...
4468 second excerpt
4469 ...
4470 third excerpt
4471 <|file_sep|>test.rs
4472 <|fim_prefix|>
4473 <|fim_middle|>current
4474 <|user_cursor|>x
4475 <|fim_suffix|>
4476 <|fim_middle|>updated
4477 "#}
4478 .to_string()
4479 );
4480
4481 assert_eq!(
4482 format_with_budget(&input, budget_with_margin(50)).unwrap(),
4483 indoc! {r#"
4484 <|file_sep|>big.rs
4485 first excerpt
4486 ...
4487 <|file_sep|>test.rs
4488 <|fim_prefix|>
4489 <|fim_middle|>current
4490 <|user_cursor|>x
4491 <|fim_suffix|>
4492 <|fim_middle|>updated
4493 "#}
4494 .to_string()
4495 );
4496 }
4497
4498 #[test]
4499 fn test_truncation_prioritizes_lower_order_excerpts() {
4500 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4501 // With tight budget, only the lower-order excerpt from file_b should be included.
4502 let input = make_input(
4503 "x",
4504 0..1,
4505 0,
4506 vec![],
4507 vec![
4508 RelatedFile {
4509 path: Path::new("file_a.rs").into(),
4510 max_row: 10,
4511 in_open_source_repo: false,
4512 excerpts: vec![RelatedExcerpt {
4513 row_range: 0..10,
4514 text: "low priority content\n".into(),
4515 order: 5,
4516 }],
4517 },
4518 RelatedFile {
4519 path: Path::new("file_b.rs").into(),
4520 max_row: 10,
4521 in_open_source_repo: false,
4522 excerpts: vec![RelatedExcerpt {
4523 row_range: 0..10,
4524 text: "high priority content\n".into(),
4525 order: 1,
4526 }],
4527 },
4528 ],
4529 );
4530
4531 // With large budget, both files included; rendered in stable lexicographic order.
4532 assert_eq!(
4533 format_with_budget(&input, 10000).unwrap(),
4534 indoc! {r#"
4535 <|file_sep|>file_a.rs
4536 low priority content
4537 <|file_sep|>file_b.rs
4538 high priority content
4539 <|file_sep|>test.rs
4540 <|fim_prefix|>
4541 <|fim_middle|>current
4542 <|user_cursor|>x
4543 <|fim_suffix|>
4544 <|fim_middle|>updated
4545 "#}
4546 .to_string()
4547 );
4548
4549 // With tight budget, only file_b (lower order) fits.
4550 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4551 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4552 // file_a would need another 14 tokens, which doesn't fit.
4553 assert_eq!(
4554 format_with_budget(&input, budget_with_margin(52)).unwrap(),
4555 indoc! {r#"
4556 <|file_sep|>file_b.rs
4557 high priority content
4558 <|file_sep|>test.rs
4559 <|fim_prefix|>
4560 <|fim_middle|>current
4561 <|user_cursor|>x
4562 <|fim_suffix|>
4563 <|fim_middle|>updated
4564 "#}
4565 .to_string()
4566 );
4567 }
4568
4569 #[test]
4570 fn test_truncation_drops_high_order_excerpts_within_file() {
4571 // A single file has excerpts at order 1 and order 3. With a tight budget,
4572 // only the order-1 excerpts are included while the order-3 excerpt is
4573 // dropped — even though they belong to the same file. This also preserves
4574 // the parent invariant: parent outline items have order ≤ their best
4575 // child, so they're always included when any child is.
4576 let input = make_input(
4577 "x",
4578 0..1,
4579 0,
4580 vec![],
4581 vec![RelatedFile {
4582 path: Path::new("mod.rs").into(),
4583 max_row: 30,
4584 in_open_source_repo: false,
4585 excerpts: vec![
4586 RelatedExcerpt {
4587 row_range: 0..5,
4588 text: "mod header\n".into(),
4589 order: 1,
4590 },
4591 RelatedExcerpt {
4592 row_range: 5..15,
4593 text: "important fn\n".into(),
4594 order: 1,
4595 },
4596 RelatedExcerpt {
4597 row_range: 15..30,
4598 text: "less important fn\n".into(),
4599 order: 3,
4600 },
4601 ],
4602 }],
4603 );
4604
4605 // With large budget, all three excerpts included.
4606 assert_eq!(
4607 format_with_budget(&input, 10000).unwrap(),
4608 indoc! {r#"
4609 <|file_sep|>mod.rs
4610 mod header
4611 ...
4612 important fn
4613 ...
4614 less important fn
4615 <|file_sep|>test.rs
4616 <|fim_prefix|>
4617 <|fim_middle|>current
4618 <|user_cursor|>x
4619 <|fim_suffix|>
4620 <|fim_middle|>updated
4621 "#}
4622 .to_string()
4623 );
4624
4625 // With tight budget, only order<=1 excerpts included (header + important fn).
4626 assert_eq!(
4627 format_with_budget(&input, budget_with_margin(55)).unwrap(),
4628 indoc! {r#"
4629 <|file_sep|>mod.rs
4630 mod header
4631 ...
4632 important fn
4633 ...
4634 <|file_sep|>test.rs
4635 <|fim_prefix|>
4636 <|fim_middle|>current
4637 <|user_cursor|>x
4638 <|fim_suffix|>
4639 <|fim_middle|>updated
4640 "#}
4641 .to_string()
4642 );
4643 }
4644
4645 #[test]
4646 fn test_truncation_drops_older_events_first() {
4647 let input = make_input(
4648 "x",
4649 0..1,
4650 0,
4651 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4652 vec![],
4653 );
4654
4655 assert_eq!(
4656 format_with_budget(&input, 10000).unwrap(),
4657 indoc! {r#"
4658 <|file_sep|>edit history
4659 --- a/old.rs
4660 +++ b/old.rs
4661 -1
4662 --- a/new.rs
4663 +++ b/new.rs
4664 -2
4665 <|file_sep|>test.rs
4666 <|fim_prefix|>
4667 <|fim_middle|>current
4668 <|user_cursor|>x
4669 <|fim_suffix|>
4670 <|fim_middle|>updated
4671 "#}
4672 .to_string()
4673 );
4674
4675 assert_eq!(
4676 format_with_budget(&input, 60).unwrap(),
4677 indoc! {r#"
4678 <|file_sep|>edit history
4679 --- a/new.rs
4680 +++ b/new.rs
4681 -2
4682 <|file_sep|>test.rs
4683 <|fim_prefix|>
4684 <|fim_middle|>current
4685 <|user_cursor|>x
4686 <|fim_suffix|>
4687 <|fim_middle|>updated
4688 "#}
4689 .to_string()
4690 );
4691 }
4692
4693 #[test]
4694 fn test_cursor_excerpt_always_included_with_minimal_budget() {
4695 let input = make_input(
4696 "fn main() {}",
4697 0..12,
4698 3,
4699 vec![make_event("a.rs", "-old\n+new\n")],
4700 vec![make_related_file("related.rs", "helper\n")],
4701 );
4702
4703 assert!(format_with_budget(&input, 30).is_none())
4704 }
4705
4706 #[track_caller]
4707 fn format_seed_coder(input: &ZetaPromptInput) -> String {
4708 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4709 .expect("seed coder prompt formatting should succeed")
4710 }
4711
4712 #[track_caller]
4713 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4714 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4715 .expect("seed coder prompt formatting should succeed")
4716 }
4717
4718 #[test]
4719 fn test_seed_coder_basic_format() {
4720 let input = make_input(
4721 "prefix\neditable\nsuffix",
4722 7..15,
4723 10,
4724 vec![make_event("a.rs", "-old\n+new\n")],
4725 vec![make_related_file("related.rs", "fn helper() {}\n")],
4726 );
4727
4728 assert_eq!(
4729 format_seed_coder(&input),
4730 indoc! {r#"
4731 <[fim-suffix]>
4732 suffix
4733 <[fim-prefix]><filename>related.rs
4734 fn helper() {}
4735
4736 <filename>edit_history
4737 --- a/a.rs
4738 +++ b/a.rs
4739 -old
4740 +new
4741
4742 <filename>test.rs
4743 prefix
4744 <<<<<<< CURRENT
4745 edi<|user_cursor|>table
4746 =======
4747 <[fim-middle]>"#}
4748 );
4749 }
4750
4751 #[test]
4752 fn test_v0317_formats_prompt_with_many_related_files() {
4753 let related_files = (0..900)
4754 .map(|index| {
4755 make_related_file(
4756 &format!("related_{index}.rs"),
4757 "fn helper() {\n let value = 1;\n}\n",
4758 )
4759 })
4760 .collect();
4761
4762 let input = make_input(
4763 "code",
4764 0..4,
4765 2,
4766 vec![make_event("a.rs", "-x\n+y\n")],
4767 related_files,
4768 );
4769
4770 let prompt =
4771 format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
4772
4773 assert!(prompt.is_some());
4774 let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
4775 assert!(prompt.contains("test.rs"));
4776 assert!(prompt.contains(CURSOR_MARKER));
4777 }
4778
4779 #[test]
4780 fn test_seed_coder_no_context() {
4781 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4782
4783 assert_eq!(
4784 format_seed_coder(&input),
4785 indoc! {r#"
4786 <[fim-suffix]>
4787 after
4788 <[fim-prefix]><filename>test.rs
4789 before
4790 <<<<<<< CURRENT
4791 mid<|user_cursor|>dle
4792 =======
4793 <[fim-middle]>"#}
4794 );
4795 }
4796
4797 #[test]
4798 fn test_seed_coder_truncation_drops_context() {
4799 let input = make_input(
4800 "code",
4801 0..4,
4802 2,
4803 vec![make_event("a.rs", "-x\n+y\n")],
4804 vec![make_related_file("r1.rs", "content\n")],
4805 );
4806
4807 // With large budget, everything is included
4808 assert_eq!(
4809 format_seed_coder(&input),
4810 indoc! {r#"
4811 <[fim-suffix]>
4812 <[fim-prefix]><filename>r1.rs
4813 content
4814
4815 <filename>edit_history
4816 --- a/a.rs
4817 +++ b/a.rs
4818 -x
4819 +y
4820
4821 <filename>test.rs
4822 <<<<<<< CURRENT
4823 co<|user_cursor|>de
4824 =======
4825 <[fim-middle]>"#}
4826 );
4827
4828 assert_eq!(
4829 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
4830 None
4831 );
4832
4833 assert_eq!(
4834 format_seed_coder_with_budget(&input, 40),
4835 indoc! {r#"
4836 <[fim-suffix]>
4837 <[fim-prefix]><filename>test.rs
4838 <<<<<<< CURRENT
4839 co<|user_cursor|>de
4840 =======
4841 <[fim-middle]>"#
4842 }
4843 )
4844 }
4845
4846 #[test]
4847 fn test_seed_coder_truncation_prioritizes_lower_order() {
4848 let input = make_input(
4849 "code",
4850 0..4,
4851 2,
4852 vec![],
4853 vec![
4854 RelatedFile {
4855 path: Path::new("low_prio.rs").into(),
4856 max_row: 5,
4857 in_open_source_repo: false,
4858 excerpts: vec![RelatedExcerpt {
4859 row_range: 0..5,
4860 text: "low prio\n".into(),
4861 order: 10,
4862 }],
4863 },
4864 RelatedFile {
4865 path: Path::new("high_prio.rs").into(),
4866 max_row: 5,
4867 in_open_source_repo: false,
4868 excerpts: vec![RelatedExcerpt {
4869 row_range: 0..5,
4870 text: "high prio\n".into(),
4871 order: 1,
4872 }],
4873 },
4874 ],
4875 );
4876
4877 // With large budget, both included; rendered in stable lexicographic order.
4878 assert_eq!(
4879 format_seed_coder(&input),
4880 indoc! {r#"
4881 <[fim-suffix]>
4882 <[fim-prefix]><filename>low_prio.rs
4883 low prio
4884 <filename>high_prio.rs
4885 high prio
4886
4887 <filename>test.rs
4888 <<<<<<< CURRENT
4889 co<|user_cursor|>de
4890 =======
4891 <[fim-middle]>"#}
4892 );
4893
4894 // With tight budget under the generic heuristic, context is dropped but the
4895 // minimal cursor section still fits.
4896 assert_eq!(
4897 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
4898 Some(
4899 indoc! {r#"
4900 <[fim-suffix]>
4901 <[fim-prefix]><filename>test.rs
4902 <<<<<<< CURRENT
4903 co<|user_cursor|>de
4904 =======
4905 <[fim-middle]>"#}
4906 .to_string()
4907 )
4908 );
4909 }
4910
4911 #[test]
4912 fn test_format_zeta1_from_input_basic() {
4913 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
4914 let input = ZetaPromptInput {
4915 cursor_path: Path::new("src/main.rs").into(),
4916 cursor_excerpt: excerpt.into(),
4917 cursor_offset_in_excerpt: 30,
4918 excerpt_start_row: Some(0),
4919 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4920 related_files: Some(vec![]),
4921 active_buffer_diagnostics: vec![],
4922 excerpt_ranges: ExcerptRanges {
4923 editable_150: 15..41,
4924 editable_180: 15..41,
4925 editable_350: 15..41,
4926 editable_150_context_350: 0..excerpt.len(),
4927 editable_180_context_350: 0..excerpt.len(),
4928 editable_350_context_150: 0..excerpt.len(),
4929 ..Default::default()
4930 },
4931 syntax_ranges: None,
4932 experiment: None,
4933 in_open_source_repo: false,
4934 can_collect_data: false,
4935 repo_url: None,
4936 };
4937
4938 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4939
4940 assert_eq!(
4941 prompt,
4942 concat!(
4943 "### Instruction:\n",
4944 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4945 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4946 "into account the cursor location.\n",
4947 "\n",
4948 "### User Edits:\n",
4949 "\n",
4950 "User edited other.rs:\n",
4951 "```diff\n",
4952 "-old\n",
4953 "+new\n",
4954 "\n",
4955 "```\n",
4956 "\n",
4957 "### User Excerpt:\n",
4958 "\n",
4959 "```src/main.rs\n",
4960 "<|start_of_file|>\n",
4961 "fn before() {}\n",
4962 "<|editable_region_start|>\n",
4963 "fn foo() {\n",
4964 " <|user_cursor_is_here|>let x = 1;\n",
4965 "\n",
4966 "<|editable_region_end|>}\n",
4967 "fn after() {}\n",
4968 "\n",
4969 "```\n",
4970 "\n",
4971 "### Response:\n",
4972 ),
4973 );
4974 }
4975
4976 #[test]
4977 fn test_format_zeta1_from_input_no_start_of_file() {
4978 let excerpt = "fn foo() {\n let x = 1;\n}\n";
4979 let input = ZetaPromptInput {
4980 cursor_path: Path::new("src/main.rs").into(),
4981 cursor_excerpt: excerpt.into(),
4982 cursor_offset_in_excerpt: 15,
4983 excerpt_start_row: Some(10),
4984 events: vec![],
4985 related_files: Some(vec![]),
4986 active_buffer_diagnostics: vec![],
4987 excerpt_ranges: ExcerptRanges {
4988 editable_150: 0..28,
4989 editable_180: 0..28,
4990 editable_350: 0..28,
4991 editable_150_context_350: 0..28,
4992 editable_180_context_350: 0..28,
4993 editable_350_context_150: 0..28,
4994 ..Default::default()
4995 },
4996 syntax_ranges: None,
4997 experiment: None,
4998 in_open_source_repo: false,
4999 can_collect_data: false,
5000 repo_url: None,
5001 };
5002
5003 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5004
5005 assert_eq!(
5006 prompt,
5007 concat!(
5008 "### Instruction:\n",
5009 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5010 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5011 "into account the cursor location.\n",
5012 "\n",
5013 "### User Edits:\n",
5014 "\n",
5015 "\n",
5016 "\n",
5017 "### User Excerpt:\n",
5018 "\n",
5019 "```src/main.rs\n",
5020 "<|editable_region_start|>\n",
5021 "fn foo() {\n",
5022 " <|user_cursor_is_here|>let x = 1;\n",
5023 "}\n",
5024 "\n",
5025 "<|editable_region_end|>\n",
5026 "```\n",
5027 "\n",
5028 "### Response:\n",
5029 ),
5030 );
5031 }
5032
5033 #[test]
5034 fn test_format_zeta1_from_input_with_sub_ranges() {
5035 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
5036 let editable_range = 10..37;
5037 let context_range = 0..excerpt.len();
5038
5039 let input = ZetaPromptInput {
5040 cursor_path: Path::new("test.rs").into(),
5041 cursor_excerpt: excerpt.into(),
5042 cursor_offset_in_excerpt: 25,
5043 excerpt_start_row: Some(0),
5044 events: vec![],
5045 related_files: Some(vec![]),
5046 active_buffer_diagnostics: vec![],
5047 excerpt_ranges: ExcerptRanges {
5048 editable_150: editable_range.clone(),
5049 editable_180: editable_range.clone(),
5050 editable_350: editable_range.clone(),
5051 editable_150_context_350: context_range.clone(),
5052 editable_180_context_350: context_range.clone(),
5053 editable_350_context_150: context_range.clone(),
5054 ..Default::default()
5055 },
5056 syntax_ranges: None,
5057 experiment: None,
5058 in_open_source_repo: false,
5059 can_collect_data: false,
5060 repo_url: None,
5061 };
5062
5063 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5064
5065 assert_eq!(
5066 prompt,
5067 concat!(
5068 "### Instruction:\n",
5069 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5070 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5071 "into account the cursor location.\n",
5072 "\n",
5073 "### User Edits:\n",
5074 "\n",
5075 "\n",
5076 "\n",
5077 "### User Excerpt:\n",
5078 "\n",
5079 "```test.rs\n",
5080 "<|start_of_file|>\n",
5081 "// prefix\n",
5082 "<|editable_region_start|>\n",
5083 "fn foo() {\n",
5084 " <|user_cursor_is_here|>let x = 1;\n",
5085 "}\n",
5086 "<|editable_region_end|>\n",
5087 "// suffix\n",
5088 "\n",
5089 "```\n",
5090 "\n",
5091 "### Response:\n",
5092 ),
5093 );
5094 }
5095
5096 #[test]
5097 fn test_max_event_count() {
5098 fn make_numbered_event(index: usize) -> Event {
5099 return make_event(
5100 &format!("event-{index}.rs"),
5101 &format!("-old-{index}\n+new-{index}\n"),
5102 );
5103 }
5104 let input = make_input(
5105 "x",
5106 0..1,
5107 0,
5108 (0..3).map(make_numbered_event).collect(),
5109 vec![],
5110 );
5111
5112 let edit_history_section = format_edit_history_within_budget(
5113 &input.events,
5114 "<|file_sep|>",
5115 "edit history",
5116 usize::MAX,
5117 5,
5118 );
5119
5120 assert_eq!(
5121 &edit_history_section,
5122 indoc!(
5123 "
5124 <|file_sep|>edit history
5125 --- a/event-0.rs
5126 +++ b/event-0.rs
5127 -old-0
5128 +new-0
5129 --- a/event-1.rs
5130 +++ b/event-1.rs
5131 -old-1
5132 +new-1
5133 --- a/event-2.rs
5134 +++ b/event-2.rs
5135 -old-2
5136 +new-2
5137 "
5138 )
5139 );
5140
5141 let edit_history_section = format_edit_history_within_budget(
5142 &input.events,
5143 "<|file_sep|>",
5144 "edit history",
5145 usize::MAX,
5146 2,
5147 );
5148
5149 assert_eq!(
5150 &edit_history_section,
5151 indoc!(
5152 "
5153 <|file_sep|>edit history
5154 --- a/event-1.rs
5155 +++ b/event-1.rs
5156 -old-1
5157 +new-1
5158 --- a/event-2.rs
5159 +++ b/event-2.rs
5160 -old-2
5161 +new-2
5162 "
5163 )
5164 );
5165
5166 let edit_history_section = format_edit_history_within_budget(
5167 &input.events,
5168 "<|file_sep|>",
5169 "edit history",
5170 usize::MAX,
5171 0,
5172 );
5173
5174 assert_eq!(&edit_history_section, "");
5175 }
5176
5177 #[test]
5178 fn test_clean_zeta1_model_output_basic() {
5179 let output = indoc! {"
5180 <|editable_region_start|>
5181 fn main() {
5182 println!(\"hello\");
5183 }
5184 <|editable_region_end|>
5185 "};
5186
5187 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5188 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
5189 }
5190
5191 #[test]
5192 fn test_clean_zeta1_model_output_with_cursor() {
5193 let output = indoc! {"
5194 <|editable_region_start|>
5195 fn main() {
5196 <|user_cursor_is_here|>println!(\"hello\");
5197 }
5198 <|editable_region_end|>
5199 "};
5200
5201 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5202 assert_eq!(
5203 cleaned,
5204 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
5205 );
5206 }
5207
5208 #[test]
5209 fn test_clean_zeta1_model_output_no_markers() {
5210 let output = "fn main() {}\n";
5211 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5212 assert_eq!(cleaned, "fn main() {}\n");
5213 }
5214
5215 #[test]
5216 fn test_clean_zeta1_model_output_empty_region() {
5217 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5218 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5219 assert_eq!(cleaned, "");
5220 }
5221
5222 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5223 let mut result = excerpt.to_string();
5224 result.replace_range(
5225 parsed_output.range_in_excerpt.clone(),
5226 &parsed_output.new_editable_region,
5227 );
5228 result
5229 }
5230
5231 #[test]
5232 fn test_parse_zeta2_model_output() {
5233 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5234 let context_start = excerpt.find("ctx start").unwrap();
5235 let context_end = excerpt.find("after ctx").unwrap();
5236 let editable_start = excerpt.find("editable old").unwrap();
5237 let editable_end = editable_start + "editable old\n".len();
5238 let input = make_input_with_context_range(
5239 excerpt,
5240 editable_start..editable_end,
5241 context_start..context_end,
5242 editable_start,
5243 );
5244
5245 let output = parse_zeta2_model_output(
5246 "editable new\n>>>>>>> UPDATED\n",
5247 ZetaFormat::V0131GitMergeMarkersPrefix,
5248 &input,
5249 )
5250 .unwrap();
5251
5252 assert_eq!(
5253 apply_edit(excerpt, &output),
5254 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5255 );
5256 }
5257
5258 #[test]
5259 fn test_parse_zeta2_model_output_identity() {
5260 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5261 let editable_start = excerpt.find("bbb").unwrap();
5262 let editable_end = excerpt.find("ddd").unwrap();
5263 let input = make_input_with_context_range(
5264 excerpt,
5265 editable_start..editable_end,
5266 0..excerpt.len(),
5267 editable_start,
5268 );
5269
5270 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5271 let output =
5272 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5273
5274 assert_eq!(apply_edit(excerpt, &output), excerpt);
5275 }
5276
5277 #[test]
5278 fn test_parse_zeta2_model_output_strips_end_marker() {
5279 let excerpt = "hello\nworld\n";
5280 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5281
5282 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5283 let output1 =
5284 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5285 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5286
5287 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5288 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5289 }
5290}