1pub mod excerpt_ranges;
2pub mod multi_region;
3
4use anyhow::{Result, anyhow};
5use serde::{Deserialize, Serialize};
6use std::fmt::Write;
7use std::ops::Range;
8use std::path::Path;
9use std::sync::Arc;
10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
11
12pub use crate::excerpt_ranges::{
13 ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
14};
15
16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
17pub const MAX_PROMPT_TOKENS: usize = 4096;
18
19/// Use up to this amount of the editable region for prefill.
20/// Larger values may result in more robust generation, but
21/// this region becomes non-editable.
22pub const PREFILL_RATIO: f64 = 0.1; // 10%
23
24fn estimate_tokens(bytes: usize) -> usize {
25 bytes / 3
26}
27
28/// Leave some slack to avoid overflow.
29fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
30 (max_tokens as f64 * 0.9).floor() as usize
31}
32
33#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
34pub struct ZetaPromptInput {
35 pub cursor_path: Arc<Path>,
36 pub cursor_excerpt: Arc<str>,
37 pub cursor_offset_in_excerpt: usize,
38 #[serde(default, skip_serializing_if = "Option::is_none")]
39 pub excerpt_start_row: Option<u32>,
40 pub events: Vec<Arc<Event>>,
41 #[serde(default)]
42 pub related_files: Option<Vec<RelatedFile>>,
43 #[serde(default, skip_serializing_if = "Vec::is_empty")]
44 pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
45 /// These ranges let the server select model-appropriate subsets.
46 pub excerpt_ranges: ExcerptRanges,
47 /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
48 /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
49 /// When present, the server uses these to compute editable/context ranges
50 /// instead of `excerpt_ranges`.
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub syntax_ranges: Option<Vec<Range<usize>>>,
53 /// The name of the edit prediction model experiment to use.
54 #[serde(default, skip_serializing_if = "Option::is_none")]
55 pub experiment: Option<String>,
56 #[serde(default)]
57 pub in_open_source_repo: bool,
58 #[serde(default)]
59 pub can_collect_data: bool,
60 #[serde(default, skip_serializing_if = "Option::is_none")]
61 pub repo_url: Option<String>,
62}
63
64#[derive(
65 Default,
66 Clone,
67 Copy,
68 Debug,
69 PartialEq,
70 Eq,
71 Hash,
72 EnumIter,
73 IntoStaticStr,
74 Serialize,
75 Deserialize,
76)]
77#[allow(non_camel_case_types)]
78pub enum ZetaFormat {
79 V0112MiddleAtEnd,
80 V0113Ordered,
81 V0114180EditableRegion,
82 V0120GitMergeMarkers,
83 #[default]
84 V0131GitMergeMarkersPrefix,
85 V0211Prefill,
86 V0211SeedCoder,
87 v0226Hashline,
88 V0304VariableEdit,
89 V0304SeedNoEdits,
90 /// Multi-block marker spans with NO_EDITS sentinel.
91 V0306SeedMultiRegions,
92 /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
93 V0316SeedMultiRegions,
94 /// V0316 with larger block sizes.
95 V0318SeedMultiRegions,
96 /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
97 V0317SeedMultiRegions,
98}
99
100impl std::fmt::Display for ZetaFormat {
101 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102 write!(f, "{}", <&'static str>::from(self))
103 }
104}
105
106impl ZetaFormat {
107 pub fn parse(format_name: &str) -> Result<Self> {
108 let mut results = ZetaFormat::iter().filter(|version| {
109 <&'static str>::from(version)
110 .to_lowercase()
111 .contains(&format_name.to_lowercase())
112 });
113 let Some(result) = results.next() else {
114 anyhow::bail!(
115 "`{format_name}` did not match any of:\n{}",
116 Self::options_as_string()
117 );
118 };
119 if results.next().is_some() {
120 anyhow::bail!(
121 "`{format_name}` matched more than one of:\n{}",
122 Self::options_as_string()
123 );
124 }
125 Ok(result)
126 }
127
128 pub fn options_as_string() -> String {
129 ZetaFormat::iter()
130 .map(|format| format!("- {}\n", <&'static str>::from(format)))
131 .collect::<Vec<_>>()
132 .concat()
133 }
134}
135
136#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
137#[serde(tag = "event")]
138pub enum Event {
139 BufferChange {
140 path: Arc<Path>,
141 old_path: Arc<Path>,
142 diff: String,
143 predicted: bool,
144 in_open_source_repo: bool,
145 },
146}
147
148impl Event {
149 pub fn in_open_source_repo(&self) -> bool {
150 match self {
151 Event::BufferChange {
152 in_open_source_repo,
153 ..
154 } => *in_open_source_repo,
155 }
156 }
157}
158
159pub fn write_event(prompt: &mut String, event: &Event) {
160 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
161 for component in path.components() {
162 prompt.push('/');
163 write!(prompt, "{}", component.as_os_str().display()).ok();
164 }
165 }
166 match event {
167 Event::BufferChange {
168 path,
169 old_path,
170 diff,
171 predicted,
172 in_open_source_repo: _,
173 } => {
174 if *predicted {
175 prompt.push_str("// User accepted prediction:\n");
176 }
177 prompt.push_str("--- a");
178 write_path_as_unix_str(prompt, old_path.as_ref());
179 prompt.push_str("\n+++ b");
180 write_path_as_unix_str(prompt, path.as_ref());
181 prompt.push('\n');
182 prompt.push_str(diff);
183 }
184 }
185}
186
187#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
188pub struct ActiveBufferDiagnostic {
189 pub severity: Option<i32>,
190 pub message: String,
191 pub snippet: String,
192 pub snippet_buffer_row_range: Range<u32>,
193 pub diagnostic_range_in_snippet: Range<usize>,
194}
195
196#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
197pub struct RelatedFile {
198 pub path: Arc<Path>,
199 pub max_row: u32,
200 pub excerpts: Vec<RelatedExcerpt>,
201 #[serde(default)]
202 pub in_open_source_repo: bool,
203}
204
205#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
206pub struct RelatedExcerpt {
207 pub row_range: Range<u32>,
208 pub text: Arc<str>,
209 #[serde(default)]
210 pub order: usize,
211}
212
213pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
214 special_tokens_for_format(format).iter().any(|token| {
215 if let Some(line_token) = token.strip_suffix('\n') {
216 input.cursor_excerpt.lines().any(|line| line == line_token)
217 } else {
218 input.cursor_excerpt.contains(token)
219 }
220 })
221}
222
223pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
224 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
225}
226
227pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
228 match format {
229 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
230 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
231 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
232 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
233 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
234 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
235 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
236 ZetaFormat::v0226Hashline => hashline::special_tokens(),
237 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
238 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
239 ZetaFormat::V0316SeedMultiRegions => {
240 static TOKENS: &[&str] = &[
241 seed_coder::FIM_SUFFIX,
242 seed_coder::FIM_PREFIX,
243 seed_coder::FIM_MIDDLE,
244 seed_coder::FILE_MARKER,
245 multi_region::V0316_END_MARKER,
246 CURSOR_MARKER,
247 multi_region::MARKER_TAG_PREFIX,
248 ];
249 TOKENS
250 }
251 ZetaFormat::V0318SeedMultiRegions => {
252 static TOKENS: &[&str] = &[
253 seed_coder::FIM_SUFFIX,
254 seed_coder::FIM_PREFIX,
255 seed_coder::FIM_MIDDLE,
256 seed_coder::FILE_MARKER,
257 multi_region::V0318_END_MARKER,
258 CURSOR_MARKER,
259 multi_region::MARKER_TAG_PREFIX,
260 ];
261 TOKENS
262 }
263 ZetaFormat::V0317SeedMultiRegions => {
264 static TOKENS: &[&str] = &[
265 seed_coder::FIM_SUFFIX,
266 seed_coder::FIM_PREFIX,
267 seed_coder::FIM_MIDDLE,
268 seed_coder::FILE_MARKER,
269 multi_region::V0317_END_MARKER,
270 CURSOR_MARKER,
271 multi_region::RELATIVE_MARKER_TAG_PREFIX,
272 ];
273 TOKENS
274 }
275 ZetaFormat::V0306SeedMultiRegions => {
276 static TOKENS: &[&str] = &[
277 seed_coder::FIM_SUFFIX,
278 seed_coder::FIM_PREFIX,
279 seed_coder::FIM_MIDDLE,
280 seed_coder::FILE_MARKER,
281 seed_coder::START_MARKER,
282 seed_coder::SEPARATOR,
283 seed_coder::END_MARKER,
284 CURSOR_MARKER,
285 multi_region::MARKER_TAG_PREFIX,
286 ];
287 TOKENS
288 }
289 }
290}
291
292/// Returns the (editable_token_limit, context_token_limit) for a given format.
293pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
294 match format {
295 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
296 ZetaFormat::V0114180EditableRegion => (180, 350),
297 ZetaFormat::V0120GitMergeMarkers
298 | ZetaFormat::V0131GitMergeMarkersPrefix
299 | ZetaFormat::V0211Prefill
300 | ZetaFormat::V0211SeedCoder
301 | ZetaFormat::v0226Hashline
302 | ZetaFormat::V0306SeedMultiRegions
303 | ZetaFormat::V0316SeedMultiRegions
304 | ZetaFormat::V0318SeedMultiRegions
305 | ZetaFormat::V0317SeedMultiRegions
306 | ZetaFormat::V0304SeedNoEdits => (350, 150),
307 ZetaFormat::V0304VariableEdit => (1024, 0),
308 }
309}
310
311pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
312 match format {
313 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
314 ZetaFormat::V0112MiddleAtEnd
315 | ZetaFormat::V0113Ordered
316 | ZetaFormat::V0114180EditableRegion
317 | ZetaFormat::V0120GitMergeMarkers
318 | ZetaFormat::V0131GitMergeMarkersPrefix
319 | ZetaFormat::V0211Prefill
320 | ZetaFormat::V0211SeedCoder
321 | ZetaFormat::V0304VariableEdit
322 | ZetaFormat::V0306SeedMultiRegions
323 | ZetaFormat::V0304SeedNoEdits => &[],
324 ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
325 ZetaFormat::V0318SeedMultiRegions => &[multi_region::V0318_END_MARKER],
326 ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
327 }
328}
329
330pub fn excerpt_ranges_for_format(
331 format: ZetaFormat,
332 ranges: &ExcerptRanges,
333) -> (Range<usize>, Range<usize>) {
334 match format {
335 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
336 ranges.editable_150.clone(),
337 ranges.editable_150_context_350.clone(),
338 ),
339 ZetaFormat::V0114180EditableRegion => (
340 ranges.editable_180.clone(),
341 ranges.editable_180_context_350.clone(),
342 ),
343 ZetaFormat::V0120GitMergeMarkers
344 | ZetaFormat::V0131GitMergeMarkersPrefix
345 | ZetaFormat::V0211Prefill
346 | ZetaFormat::V0211SeedCoder
347 | ZetaFormat::v0226Hashline
348 | ZetaFormat::V0304SeedNoEdits
349 | ZetaFormat::V0306SeedMultiRegions
350 | ZetaFormat::V0316SeedMultiRegions
351 | ZetaFormat::V0318SeedMultiRegions
352 | ZetaFormat::V0317SeedMultiRegions => (
353 ranges.editable_350.clone(),
354 ranges.editable_350_context_150.clone(),
355 ),
356 ZetaFormat::V0304VariableEdit => {
357 let context = ranges
358 .editable_350_context_1024
359 .clone()
360 .or(ranges.editable_350_context_512.clone())
361 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
362 (context.clone(), context)
363 }
364 }
365}
366
367pub fn write_cursor_excerpt_section_for_format(
368 format: ZetaFormat,
369 prompt: &mut String,
370 path: &Path,
371 context: &str,
372 editable_range: &Range<usize>,
373 cursor_offset: usize,
374) {
375 match format {
376 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
377 prompt,
378 path,
379 context,
380 editable_range,
381 cursor_offset,
382 ),
383 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
384 v0113_ordered::write_cursor_excerpt_section(
385 prompt,
386 path,
387 context,
388 editable_range,
389 cursor_offset,
390 )
391 }
392 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
393 prompt,
394 path,
395 context,
396 editable_range,
397 cursor_offset,
398 ),
399 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
400 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
401 prompt,
402 path,
403 context,
404 editable_range,
405 cursor_offset,
406 )
407 }
408 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
409 seed_coder::write_cursor_excerpt_section(
410 prompt,
411 path,
412 context,
413 editable_range,
414 cursor_offset,
415 )
416 }
417 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
418 prompt,
419 path,
420 context,
421 editable_range,
422 cursor_offset,
423 ),
424 ZetaFormat::V0304VariableEdit => {
425 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
426 }
427 ZetaFormat::V0306SeedMultiRegions => {
428 prompt.push_str(&build_v0306_cursor_prefix(
429 path,
430 context,
431 editable_range,
432 cursor_offset,
433 ));
434 }
435 ZetaFormat::V0316SeedMultiRegions => {
436 prompt.push_str(&build_v0316_cursor_prefix(
437 path,
438 context,
439 editable_range,
440 cursor_offset,
441 ));
442 }
443 ZetaFormat::V0318SeedMultiRegions => {
444 prompt.push_str(&build_v0318_cursor_prefix(
445 path,
446 context,
447 editable_range,
448 cursor_offset,
449 ));
450 }
451 ZetaFormat::V0317SeedMultiRegions => {
452 prompt.push_str(&build_v0317_cursor_prefix(
453 path,
454 context,
455 editable_range,
456 cursor_offset,
457 ));
458 }
459 }
460}
461
462fn build_v0306_cursor_prefix(
463 path: &Path,
464 context: &str,
465 editable_range: &Range<usize>,
466 cursor_offset: usize,
467) -> String {
468 let mut section = String::new();
469 let path_str = path.to_string_lossy();
470 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
471
472 section.push_str(&context[..editable_range.start]);
473 section.push_str(seed_coder::START_MARKER);
474
475 let editable_text = &context[editable_range.clone()];
476 let cursor_in_editable = cursor_offset - editable_range.start;
477 multi_region::write_editable_with_markers(
478 &mut section,
479 editable_text,
480 cursor_in_editable,
481 CURSOR_MARKER,
482 );
483
484 if !section.ends_with('\n') {
485 section.push('\n');
486 }
487 section.push_str(seed_coder::SEPARATOR);
488 section
489}
490
491fn build_v0316_cursor_prefix(
492 path: &Path,
493 context: &str,
494 editable_range: &Range<usize>,
495 cursor_offset: usize,
496) -> String {
497 let mut section = String::new();
498 let path_str = path.to_string_lossy();
499 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
500
501 section.push_str(&context[..editable_range.start]);
502
503 let editable_text = &context[editable_range.clone()];
504 let cursor_in_editable = cursor_offset - editable_range.start;
505 multi_region::write_editable_with_markers_v0316(
506 &mut section,
507 editable_text,
508 cursor_in_editable,
509 CURSOR_MARKER,
510 );
511
512 if !section.ends_with('\n') {
513 section.push('\n');
514 }
515 section
516}
517
518fn build_v0318_cursor_prefix(
519 path: &Path,
520 context: &str,
521 editable_range: &Range<usize>,
522 cursor_offset: usize,
523) -> String {
524 let mut section = String::new();
525 let path_str = path.to_string_lossy();
526 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
527
528 section.push_str(&context[..editable_range.start]);
529
530 let editable_text = &context[editable_range.clone()];
531 let cursor_in_editable = cursor_offset - editable_range.start;
532 multi_region::write_editable_with_markers_v0318(
533 &mut section,
534 editable_text,
535 cursor_in_editable,
536 CURSOR_MARKER,
537 );
538
539 if !section.ends_with('\n') {
540 section.push('\n');
541 }
542 section
543}
544
545fn build_v0317_cursor_prefix(
546 path: &Path,
547 context: &str,
548 editable_range: &Range<usize>,
549 cursor_offset: usize,
550) -> String {
551 let mut section = String::new();
552 let path_str = path.to_string_lossy();
553 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
554
555 section.push_str(&context[..editable_range.start]);
556
557 let editable_text = &context[editable_range.clone()];
558 let cursor_in_editable = cursor_offset - editable_range.start;
559 multi_region::write_editable_with_markers_v0317(
560 &mut section,
561 editable_text,
562 cursor_in_editable,
563 CURSOR_MARKER,
564 );
565
566 if !section.ends_with('\n') {
567 section.push('\n');
568 }
569 section
570}
571
572fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
573 let start_row = text[0..range.start].matches('\n').count() as u32;
574 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
575 if !text[..range.end].ends_with('\n') {
576 end_row += 1;
577 }
578 return start_row..end_row;
579}
580
581pub fn format_prompt_with_budget_for_format(
582 input: &ZetaPromptInput,
583 format: ZetaFormat,
584 max_tokens: usize,
585) -> Option<String> {
586 let (context, editable_range, context_range, cursor_offset) =
587 resolve_cursor_region(input, format);
588 let path = &*input.cursor_path;
589
590 let empty_files = Vec::new();
591 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
592 let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
593 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
594 let row_range = relative_row_range.start + cursor_excerpt_start_row
595 ..relative_row_range.end + cursor_excerpt_start_row;
596 &filter_redundant_excerpts(
597 input_related_files.to_vec(),
598 input.cursor_path.as_ref(),
599 row_range,
600 )
601 } else {
602 input_related_files
603 };
604
605 let prompt = match format {
606 ZetaFormat::V0211SeedCoder
607 | ZetaFormat::V0304SeedNoEdits
608 | ZetaFormat::V0306SeedMultiRegions
609 | ZetaFormat::V0316SeedMultiRegions
610 | ZetaFormat::V0318SeedMultiRegions
611 | ZetaFormat::V0317SeedMultiRegions => {
612 let mut cursor_section = String::new();
613 write_cursor_excerpt_section_for_format(
614 format,
615 &mut cursor_section,
616 path,
617 context,
618 &editable_range,
619 cursor_offset,
620 );
621
622 let budget_with_margin = apply_prompt_budget_margin(max_tokens);
623 seed_coder::assemble_fim_prompt(
624 context,
625 &editable_range,
626 &cursor_section,
627 &input.events,
628 related_files,
629 budget_with_margin,
630 )
631 }
632 _ => {
633 let mut cursor_section = String::new();
634 write_cursor_excerpt_section_for_format(
635 format,
636 &mut cursor_section,
637 path,
638 context,
639 &editable_range,
640 cursor_offset,
641 );
642
643 let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
644 let cursor_tokens = estimate_tokens(cursor_section.len());
645 remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
646
647 let edit_history_section = format_edit_history_within_budget(
648 &input.events,
649 "<|file_sep|>",
650 "edit history",
651 remaining_budget,
652 max_edit_event_count_for_format(&format),
653 );
654 let edit_history_tokens = estimate_tokens(edit_history_section.len());
655 remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
656
657 let related_files_section = format_related_files_within_budget(
658 &related_files,
659 "<|file_sep|>",
660 "",
661 remaining_budget,
662 );
663
664 let mut prompt = String::new();
665 prompt.push_str(&related_files_section);
666 prompt.push_str(&edit_history_section);
667 prompt.push_str(&cursor_section);
668 prompt
669 }
670 };
671 let prompt_tokens = estimate_tokens(prompt.len());
672 if prompt_tokens > max_tokens {
673 return None;
674 }
675 return Some(prompt);
676}
677
678pub fn filter_redundant_excerpts(
679 mut related_files: Vec<RelatedFile>,
680 cursor_path: &Path,
681 cursor_row_range: Range<u32>,
682) -> Vec<RelatedFile> {
683 for file in &mut related_files {
684 if file.path.as_ref() == cursor_path {
685 file.excerpts.retain(|excerpt| {
686 excerpt.row_range.start < cursor_row_range.start
687 || excerpt.row_range.end > cursor_row_range.end
688 });
689 }
690 }
691 related_files.retain(|file| !file.excerpts.is_empty());
692 related_files
693}
694
695pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
696 match format {
697 ZetaFormat::V0112MiddleAtEnd
698 | ZetaFormat::V0113Ordered
699 | ZetaFormat::V0114180EditableRegion
700 | ZetaFormat::V0120GitMergeMarkers
701 | ZetaFormat::V0131GitMergeMarkersPrefix
702 | ZetaFormat::V0211Prefill
703 | ZetaFormat::V0211SeedCoder
704 | ZetaFormat::v0226Hashline
705 | ZetaFormat::V0304SeedNoEdits
706 | ZetaFormat::V0304VariableEdit
707 | ZetaFormat::V0306SeedMultiRegions
708 | ZetaFormat::V0316SeedMultiRegions
709 | ZetaFormat::V0318SeedMultiRegions
710 | ZetaFormat::V0317SeedMultiRegions => 6,
711 }
712}
713
714pub fn get_prefill_for_format(
715 format: ZetaFormat,
716 context: &str,
717 editable_range: &Range<usize>,
718) -> String {
719 match format {
720 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
721 ZetaFormat::V0112MiddleAtEnd
722 | ZetaFormat::V0113Ordered
723 | ZetaFormat::V0114180EditableRegion
724 | ZetaFormat::V0120GitMergeMarkers
725 | ZetaFormat::V0131GitMergeMarkersPrefix
726 | ZetaFormat::V0211SeedCoder
727 | ZetaFormat::v0226Hashline
728 | ZetaFormat::V0304VariableEdit => String::new(),
729 ZetaFormat::V0304SeedNoEdits
730 | ZetaFormat::V0306SeedMultiRegions
731 | ZetaFormat::V0316SeedMultiRegions
732 | ZetaFormat::V0318SeedMultiRegions
733 | ZetaFormat::V0317SeedMultiRegions => String::new(),
734 }
735}
736
737pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
738 match format {
739 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
740 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
741 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
742 ZetaFormat::V0211SeedCoder
743 | ZetaFormat::V0304SeedNoEdits
744 | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
745 ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
746 ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
747 ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
748 ZetaFormat::V0112MiddleAtEnd
749 | ZetaFormat::V0113Ordered
750 | ZetaFormat::V0114180EditableRegion
751 | ZetaFormat::v0226Hashline
752 | ZetaFormat::V0304VariableEdit => None,
753 }
754}
755
756pub fn encode_patch_as_output_for_format(
757 format: ZetaFormat,
758 old_editable_region: &str,
759 patch: &str,
760 cursor_offset: Option<usize>,
761) -> Result<Option<String>> {
762 match format {
763 ZetaFormat::v0226Hashline => {
764 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
765 }
766 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
767 old_editable_region,
768 patch,
769 cursor_offset,
770 )
771 .map(Some),
772 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
773 Ok(seed_coder::no_edits(patch))
774 }
775 ZetaFormat::V0316SeedMultiRegions => {
776 let empty_patch = patch.lines().count() <= 3;
777 if empty_patch {
778 let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
779 let marker_num =
780 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
781 let tag = multi_region::marker_tag(marker_num);
782 Ok(Some(format!(
783 "{tag}{tag}{}",
784 multi_region::V0316_END_MARKER
785 )))
786 } else {
787 Ok(None)
788 }
789 }
790 ZetaFormat::V0318SeedMultiRegions => {
791 let empty_patch = patch.lines().count() <= 3;
792 if empty_patch {
793 let marker_offsets =
794 multi_region::compute_marker_offsets_v0318(old_editable_region);
795 let marker_num =
796 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
797 let tag = multi_region::marker_tag(marker_num);
798 Ok(Some(format!(
799 "{tag}{tag}{}",
800 multi_region::V0318_END_MARKER
801 )))
802 } else {
803 Ok(None)
804 }
805 }
806 ZetaFormat::V0317SeedMultiRegions => {
807 let empty_patch = patch.lines().count() <= 3;
808 if empty_patch {
809 let tag = multi_region::marker_tag_relative(0);
810 Ok(Some(format!(
811 "{tag}{tag}{}",
812 multi_region::V0317_END_MARKER
813 )))
814 } else {
815 Ok(None)
816 }
817 }
818 _ => Ok(None),
819 }
820}
821
822pub struct ParsedOutput {
823 /// Text that should replace the editable region
824 pub new_editable_region: String,
825 /// The byte range within `cursor_excerpt` that this replacement applies to
826 pub range_in_excerpt: Range<usize>,
827}
828
829/// Parse model output for the given zeta format
830pub fn parse_zeta2_model_output(
831 output: &str,
832 format: ZetaFormat,
833 prompt_inputs: &ZetaPromptInput,
834) -> Result<ParsedOutput> {
835 let output = match output_end_marker_for_format(format) {
836 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
837 None => output,
838 };
839
840 let (context, editable_range_in_context, context_range, cursor_offset) =
841 resolve_cursor_region(prompt_inputs, format);
842 let context_start = context_range.start;
843 let old_editable_region = &context[editable_range_in_context.clone()];
844 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
845
846 let (range_in_context, output) = match format {
847 ZetaFormat::v0226Hashline => (
848 editable_range_in_context,
849 if hashline::output_has_edit_commands(output) {
850 hashline::apply_edit_commands(old_editable_region, output)
851 } else {
852 output.to_string()
853 },
854 ),
855 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
856 ZetaFormat::V0304SeedNoEdits => (
857 editable_range_in_context,
858 if output.starts_with(seed_coder::NO_EDITS) {
859 old_editable_region.to_string()
860 } else {
861 output.to_string()
862 },
863 ),
864 ZetaFormat::V0306SeedMultiRegions => (
865 editable_range_in_context,
866 if output.starts_with(seed_coder::NO_EDITS) {
867 old_editable_region.to_string()
868 } else {
869 multi_region::apply_marker_span(old_editable_region, output)?
870 },
871 ),
872 ZetaFormat::V0316SeedMultiRegions => (
873 editable_range_in_context,
874 multi_region::apply_marker_span_v0316(old_editable_region, output)?,
875 ),
876 ZetaFormat::V0318SeedMultiRegions => (
877 editable_range_in_context,
878 multi_region::apply_marker_span_v0318(old_editable_region, output)?,
879 ),
880 ZetaFormat::V0317SeedMultiRegions => (
881 editable_range_in_context,
882 multi_region::apply_marker_span_v0317(
883 old_editable_region,
884 output,
885 Some(cursor_offset_in_editable),
886 )?,
887 ),
888 _ => (editable_range_in_context, output.to_string()),
889 };
890
891 let range_in_excerpt =
892 range_in_context.start + context_start..range_in_context.end + context_start;
893
894 Ok(ParsedOutput {
895 new_editable_region: output,
896 range_in_excerpt,
897 })
898}
899
900pub fn excerpt_range_for_format(
901 format: ZetaFormat,
902 ranges: &ExcerptRanges,
903) -> (Range<usize>, Range<usize>) {
904 excerpt_ranges_for_format(format, ranges)
905}
906
907pub fn resolve_cursor_region(
908 input: &ZetaPromptInput,
909 format: ZetaFormat,
910) -> (&str, Range<usize>, Range<usize>, usize) {
911 let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
912 let (editable_tokens, context_tokens) = token_limits_for_format(format);
913 compute_editable_and_context_ranges(
914 &input.cursor_excerpt,
915 input.cursor_offset_in_excerpt,
916 syntax_ranges,
917 editable_tokens,
918 context_tokens,
919 )
920 } else {
921 excerpt_range_for_format(format, &input.excerpt_ranges)
922 };
923 let context_start = context_range.start;
924 let context_text = &input.cursor_excerpt[context_range.clone()];
925 let adjusted_editable =
926 (editable_range.start - context_start)..(editable_range.end - context_start);
927 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
928
929 (
930 context_text,
931 adjusted_editable,
932 context_range,
933 adjusted_cursor,
934 )
935}
936
937pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
938 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
939 get_prefill_for_format(format, context, &editable_range)
940}
941
942fn format_edit_history_within_budget(
943 events: &[Arc<Event>],
944 file_marker: &str,
945 edit_history_name: &str,
946 max_tokens: usize,
947 max_edit_event_count: usize,
948) -> String {
949 let header = format!("{}{}\n", file_marker, edit_history_name);
950 let header_tokens = estimate_tokens(header.len());
951 if header_tokens >= max_tokens {
952 return String::new();
953 }
954
955 let mut event_strings: Vec<String> = Vec::new();
956 let mut total_tokens = header_tokens;
957
958 for event in events.iter().rev().take(max_edit_event_count) {
959 let mut event_str = String::new();
960 write_event(&mut event_str, event);
961 let event_tokens = estimate_tokens(event_str.len());
962
963 if total_tokens + event_tokens > max_tokens {
964 break;
965 }
966 total_tokens += event_tokens;
967 event_strings.push(event_str);
968 }
969
970 if event_strings.is_empty() {
971 return String::new();
972 }
973
974 let mut result = header;
975 for event_str in event_strings.iter().rev() {
976 result.push_str(event_str);
977 }
978 result
979}
980
981fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
982 let needs_newline = !excerpt.text.ends_with('\n');
983 let needs_ellipsis = excerpt.row_range.end < file_max_row;
984 let len = excerpt.text.len()
985 + if needs_newline { "\n".len() } else { 0 }
986 + if needs_ellipsis { "...\n".len() } else { 0 };
987 estimate_tokens(len)
988}
989
990pub fn format_related_files_within_budget(
991 related_files: &[RelatedFile],
992 file_prefix: &str,
993 file_suffix: &str,
994 max_tokens: usize,
995) -> String {
996 struct ExcerptCandidate {
997 file_ix: usize,
998 excerpt_ix: usize,
999 order: usize,
1000 }
1001
1002 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
1003 .iter()
1004 .enumerate()
1005 .flat_map(|(file_ix, file)| {
1006 file.excerpts
1007 .iter()
1008 .enumerate()
1009 .map(move |(excerpt_ix, e)| ExcerptCandidate {
1010 file_ix,
1011 excerpt_ix,
1012 order: e.order,
1013 })
1014 })
1015 .collect();
1016
1017 // Pre-compute file header strings and their token costs.
1018 let file_headers: Vec<String> = related_files
1019 .iter()
1020 .map(|file| {
1021 let path_str = file.path.to_string_lossy();
1022 format!("{}{}\n", file_prefix, path_str)
1023 })
1024 .collect();
1025
1026 // Sort the excerpts by their order and determine how many fit within the budget.
1027 let mut total_tokens = 0;
1028 let mut included_excerpt_count = 0_usize;
1029 let mut included_file_indices = vec![false; related_files.len()];
1030 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1031 for candidate in &excerpt_candidates {
1032 let file = &related_files[candidate.file_ix];
1033 let excerpt = &file.excerpts[candidate.excerpt_ix];
1034 let file_already_included = included_file_indices[candidate.file_ix];
1035 let header_cost = if file_already_included {
1036 0
1037 } else {
1038 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1039 };
1040 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1041 if total_tokens + header_cost + excerpt_cost > max_tokens {
1042 break;
1043 }
1044 total_tokens += header_cost + excerpt_cost;
1045 if !file_already_included {
1046 included_file_indices[candidate.file_ix] = true;
1047 }
1048 included_excerpt_count += 1;
1049 }
1050
1051 excerpt_candidates.truncate(included_excerpt_count);
1052 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1053
1054 // Render all of the files that fit within the token budget, in the original order.
1055 let mut result = String::new();
1056 let mut last_file_ix = None;
1057 for candidate in &excerpt_candidates {
1058 if last_file_ix != Some(candidate.file_ix) {
1059 if last_file_ix.is_some() {
1060 result.push_str(file_suffix);
1061 }
1062 result.push_str(&file_headers[candidate.file_ix]);
1063 last_file_ix = Some(candidate.file_ix);
1064 }
1065 let file = &related_files[candidate.file_ix];
1066 let excerpt = &file.excerpts[candidate.excerpt_ix];
1067 result.push_str(&excerpt.text);
1068 if !result.ends_with('\n') {
1069 result.push('\n');
1070 }
1071 if excerpt.row_range.end < file.max_row {
1072 result.push_str("...\n");
1073 }
1074 }
1075
1076 result
1077}
1078
1079pub fn write_related_files(
1080 prompt: &mut String,
1081 related_files: &[RelatedFile],
1082) -> Vec<Range<usize>> {
1083 let mut ranges = Vec::new();
1084 for file in related_files {
1085 let start = prompt.len();
1086 let path_str = file.path.to_string_lossy();
1087 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1088 for excerpt in &file.excerpts {
1089 prompt.push_str(&excerpt.text);
1090 if !prompt.ends_with('\n') {
1091 prompt.push('\n');
1092 }
1093 if excerpt.row_range.end < file.max_row {
1094 prompt.push_str("...\n");
1095 }
1096 }
1097 let end = prompt.len();
1098 ranges.push(start..end);
1099 }
1100 ranges
1101}
1102
1103mod v0112_middle_at_end {
1104 use super::*;
1105
1106 pub fn special_tokens() -> &'static [&'static str] {
1107 &[
1108 "<|fim_prefix|>",
1109 "<|fim_suffix|>",
1110 "<|fim_middle|>",
1111 "<|file_sep|>",
1112 CURSOR_MARKER,
1113 ]
1114 }
1115
1116 pub fn write_cursor_excerpt_section(
1117 prompt: &mut String,
1118 path: &Path,
1119 context: &str,
1120 editable_range: &Range<usize>,
1121 cursor_offset: usize,
1122 ) {
1123 let path_str = path.to_string_lossy();
1124 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1125
1126 prompt.push_str("<|fim_prefix|>\n");
1127 prompt.push_str(&context[..editable_range.start]);
1128
1129 prompt.push_str("<|fim_suffix|>\n");
1130 prompt.push_str(&context[editable_range.end..]);
1131 if !prompt.ends_with('\n') {
1132 prompt.push('\n');
1133 }
1134
1135 prompt.push_str("<|fim_middle|>current\n");
1136 prompt.push_str(&context[editable_range.start..cursor_offset]);
1137 prompt.push_str(CURSOR_MARKER);
1138 prompt.push_str(&context[cursor_offset..editable_range.end]);
1139 if !prompt.ends_with('\n') {
1140 prompt.push('\n');
1141 }
1142
1143 prompt.push_str("<|fim_middle|>updated\n");
1144 }
1145}
1146
1147mod v0113_ordered {
1148 use super::*;
1149
1150 pub fn special_tokens() -> &'static [&'static str] {
1151 &[
1152 "<|fim_prefix|>",
1153 "<|fim_suffix|>",
1154 "<|fim_middle|>",
1155 "<|file_sep|>",
1156 CURSOR_MARKER,
1157 ]
1158 }
1159
1160 pub fn write_cursor_excerpt_section(
1161 prompt: &mut String,
1162 path: &Path,
1163 context: &str,
1164 editable_range: &Range<usize>,
1165 cursor_offset: usize,
1166 ) {
1167 let path_str = path.to_string_lossy();
1168 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1169
1170 prompt.push_str("<|fim_prefix|>\n");
1171 prompt.push_str(&context[..editable_range.start]);
1172 if !prompt.ends_with('\n') {
1173 prompt.push('\n');
1174 }
1175
1176 prompt.push_str("<|fim_middle|>current\n");
1177 prompt.push_str(&context[editable_range.start..cursor_offset]);
1178 prompt.push_str(CURSOR_MARKER);
1179 prompt.push_str(&context[cursor_offset..editable_range.end]);
1180 if !prompt.ends_with('\n') {
1181 prompt.push('\n');
1182 }
1183
1184 prompt.push_str("<|fim_suffix|>\n");
1185 prompt.push_str(&context[editable_range.end..]);
1186 if !prompt.ends_with('\n') {
1187 prompt.push('\n');
1188 }
1189
1190 prompt.push_str("<|fim_middle|>updated\n");
1191 }
1192}
1193
1194mod v0114180_editable_region {
1195 use super::*;
1196
1197 pub fn special_tokens() -> &'static [&'static str] {
1198 v0113_ordered::special_tokens()
1199 }
1200}
1201
1202pub mod v0120_git_merge_markers {
1203 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1204 //!
1205 //! Example prompt:
1206 //!
1207 //! <|file_sep|>path/to/target_file.py
1208 //! <|fim_prefix|>
1209 //! code before editable region
1210 //! <|fim_suffix|>
1211 //! code after editable region
1212 //! <|fim_middle|>
1213 //! <<<<<<< CURRENT
1214 //! code that
1215 //! needs to<|user_cursor|>
1216 //! be rewritten
1217 //! =======
1218 //!
1219 //! Expected output (should be generated by the model):
1220 //!
1221 //! updated
1222 //! code with
1223 //! changes applied
1224 //! >>>>>>> UPDATED
1225
1226 use super::*;
1227
1228 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1229 pub const SEPARATOR: &str = "=======\n";
1230 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1231
1232 pub fn special_tokens() -> &'static [&'static str] {
1233 &[
1234 "<|fim_prefix|>",
1235 "<|fim_suffix|>",
1236 "<|fim_middle|>",
1237 "<|file_sep|>",
1238 START_MARKER,
1239 SEPARATOR,
1240 END_MARKER,
1241 CURSOR_MARKER,
1242 ]
1243 }
1244
1245 pub fn write_cursor_excerpt_section(
1246 prompt: &mut String,
1247 path: &Path,
1248 context: &str,
1249 editable_range: &Range<usize>,
1250 cursor_offset: usize,
1251 ) {
1252 let path_str = path.to_string_lossy();
1253 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1254
1255 prompt.push_str("<|fim_prefix|>");
1256 prompt.push_str(&context[..editable_range.start]);
1257
1258 prompt.push_str("<|fim_suffix|>");
1259 prompt.push_str(&context[editable_range.end..]);
1260 if !prompt.ends_with('\n') {
1261 prompt.push('\n');
1262 }
1263
1264 prompt.push_str("<|fim_middle|>");
1265 prompt.push_str(START_MARKER);
1266 prompt.push_str(&context[editable_range.start..cursor_offset]);
1267 prompt.push_str(CURSOR_MARKER);
1268 prompt.push_str(&context[cursor_offset..editable_range.end]);
1269 if !prompt.ends_with('\n') {
1270 prompt.push('\n');
1271 }
1272 prompt.push_str(SEPARATOR);
1273 }
1274}
1275
1276pub mod v0131_git_merge_markers_prefix {
1277 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1278 //!
1279 //! Example prompt:
1280 //!
1281 //! <|file_sep|>path/to/target_file.py
1282 //! <|fim_prefix|>
1283 //! code before editable region
1284 //! <<<<<<< CURRENT
1285 //! code that
1286 //! needs to<|user_cursor|>
1287 //! be rewritten
1288 //! =======
1289 //! <|fim_suffix|>
1290 //! code after editable region
1291 //! <|fim_middle|>
1292 //!
1293 //! Expected output (should be generated by the model):
1294 //!
1295 //! updated
1296 //! code with
1297 //! changes applied
1298 //! >>>>>>> UPDATED
1299
1300 use super::*;
1301
1302 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1303 pub const SEPARATOR: &str = "=======\n";
1304 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1305
1306 pub fn special_tokens() -> &'static [&'static str] {
1307 &[
1308 "<|fim_prefix|>",
1309 "<|fim_suffix|>",
1310 "<|fim_middle|>",
1311 "<|file_sep|>",
1312 START_MARKER,
1313 SEPARATOR,
1314 END_MARKER,
1315 CURSOR_MARKER,
1316 ]
1317 }
1318
1319 pub fn write_cursor_excerpt_section(
1320 prompt: &mut String,
1321 path: &Path,
1322 context: &str,
1323 editable_range: &Range<usize>,
1324 cursor_offset: usize,
1325 ) {
1326 let path_str = path.to_string_lossy();
1327 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1328
1329 prompt.push_str("<|fim_prefix|>");
1330 prompt.push_str(&context[..editable_range.start]);
1331 prompt.push_str(START_MARKER);
1332 prompt.push_str(&context[editable_range.start..cursor_offset]);
1333 prompt.push_str(CURSOR_MARKER);
1334 prompt.push_str(&context[cursor_offset..editable_range.end]);
1335 if !prompt.ends_with('\n') {
1336 prompt.push('\n');
1337 }
1338 prompt.push_str(SEPARATOR);
1339
1340 prompt.push_str("<|fim_suffix|>");
1341 prompt.push_str(&context[editable_range.end..]);
1342 if !prompt.ends_with('\n') {
1343 prompt.push('\n');
1344 }
1345
1346 prompt.push_str("<|fim_middle|>");
1347 }
1348}
1349
1350pub mod v0211_prefill {
1351 use super::*;
1352
1353 pub fn special_tokens() -> &'static [&'static str] {
1354 v0131_git_merge_markers_prefix::special_tokens()
1355 }
1356
1357 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1358 let editable_region = &context[editable_range.start..editable_range.end];
1359
1360 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1361 let prefill_len = editable_region.floor_char_boundary(prefill_len);
1362
1363 // Find a token boundary to avoid splitting tokens in the prefill.
1364 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1365 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1366 // the \n and consume any consecutive \n characters after it.
1367 let prefill = &editable_region[..prefill_len];
1368 match prefill.rfind('\n') {
1369 Some(pos) => {
1370 let mut end = pos + 1;
1371 while end < editable_region.len()
1372 && editable_region.as_bytes().get(end) == Some(&b'\n')
1373 {
1374 end += 1;
1375 }
1376 editable_region[..end].to_string()
1377 }
1378 // No newline found. Fall back to splitting before the last space
1379 // (word-level boundary)
1380 None => match prefill.rfind(' ') {
1381 Some(pos) => prefill[..pos].to_string(),
1382 None => prefill.to_string(),
1383 },
1384 }
1385 }
1386}
1387
1388pub mod hashline {
1389
1390 use std::fmt::Display;
1391
1392 pub const END_MARKER: &str = "<|fim_middle|>updated";
1393 pub const START_MARKER: &str = "<|fim_middle|>current";
1394
1395 use super::*;
1396
1397 const SET_COMMAND_MARKER: &str = "<|set|>";
1398 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1399 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1400
1401 pub fn special_tokens() -> &'static [&'static str] {
1402 return &[
1403 SET_COMMAND_MARKER,
1404 "<|set_range|>",
1405 INSERT_COMMAND_MARKER,
1406 NO_EDITS_COMMAND_MARKER,
1407 CURSOR_MARKER,
1408 "<|file_sep|>",
1409 "<|fim_prefix|>",
1410 "<|fim_suffix|>",
1411 "<|fim_middle|>",
1412 ];
1413 }
1414
1415 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1416 #[derive(Debug, Clone, PartialEq, Eq)]
1417 struct LineRef {
1418 index: usize,
1419 hash: u8,
1420 }
1421
1422 impl Display for LineRef {
1423 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1424 write!(f, "{}:{:02x}", self.index, self.hash)
1425 }
1426 }
1427
1428 pub fn hash_line(line: &[u8]) -> u8 {
1429 let mut h: u8 = 0;
1430 for &byte in line {
1431 h = h.wrapping_add(byte);
1432 }
1433 return h;
1434 }
1435
1436 /// Write the hashline-encoded editable region into `out`. Each line of
1437 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1438 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1439 /// to the start of `editable_text`).
1440 pub fn write_hashline_editable_region(
1441 out: &mut String,
1442 editable_text: &str,
1443 cursor_offset_in_editable: usize,
1444 ) {
1445 let mut offset = 0;
1446 for (i, line) in editable_text.lines().enumerate() {
1447 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1448 && cursor_offset_in_editable < offset + line.len()
1449 {
1450 (
1451 &line[..cursor_offset_in_editable - offset],
1452 CURSOR_MARKER,
1453 &line[cursor_offset_in_editable - offset..],
1454 )
1455 } else {
1456 (line, "", "")
1457 };
1458 write!(
1459 out,
1460 "\n{}|{head}{cursor}{tail}",
1461 LineRef {
1462 index: i,
1463 hash: hash_line(line.as_bytes())
1464 }
1465 )
1466 .unwrap();
1467 offset += line.len() + 1;
1468 }
1469 }
1470
1471 pub fn write_cursor_excerpt_section(
1472 prompt: &mut String,
1473 path: &Path,
1474 context: &str,
1475 editable_range: &Range<usize>,
1476 cursor_offset: usize,
1477 ) {
1478 let path_str = path.to_string_lossy();
1479 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1480
1481 prompt.push_str("<|fim_prefix|>\n");
1482 prompt.push_str(&context[..editable_range.start]);
1483 prompt.push_str(START_MARKER);
1484
1485 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1486 let editable_region = &context[editable_range.clone()];
1487 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1488
1489 if !prompt.ends_with('\n') {
1490 prompt.push('\n');
1491 }
1492
1493 prompt.push_str("<|fim_suffix|>\n");
1494 prompt.push_str(&context[editable_range.end..]);
1495 if !prompt.ends_with('\n') {
1496 prompt.push('\n');
1497 }
1498
1499 prompt.push_str(END_MARKER);
1500 prompt.push('\n');
1501 }
1502
1503 /// A single edit command parsed from the model output.
1504 #[derive(Debug)]
1505 enum EditCommand<'a> {
1506 /// Replace a range of lines (inclusive on both ends). Single-line set is
1507 /// represented by `start == end`.
1508 Set {
1509 start: LineRef,
1510 end: LineRef,
1511 content: &'a str,
1512 },
1513 /// Insert new lines after the given line, or before the first line if
1514 /// `after` is `None`.
1515 Insert {
1516 after: Option<LineRef>,
1517 content: &'a str,
1518 },
1519 }
1520
1521 /// Parse a line reference like `3:c3` into a `LineRef`.
1522 fn parse_line_ref(s: &str) -> Option<LineRef> {
1523 let (idx_str, hash_str) = s.split_once(':')?;
1524 let index = idx_str.parse::<usize>().ok()?;
1525 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1526 Some(LineRef { index, hash })
1527 }
1528
1529 /// Parse the model output into a list of `EditCommand`s.
1530 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1531 let mut commands = Vec::new();
1532 let mut offset = 0usize;
1533
1534 while offset < model_output.len() {
1535 let next_nl = model_output[offset..]
1536 .find('\n')
1537 .map(|i| offset + i)
1538 .unwrap_or(model_output.len());
1539 let line = &model_output[offset..next_nl];
1540 let line_end = if next_nl < model_output.len() {
1541 next_nl + 1
1542 } else {
1543 next_nl
1544 };
1545
1546 let trimmed = line.trim();
1547 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1548 (true, spec)
1549 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1550 (false, spec)
1551 } else {
1552 offset = line_end;
1553 continue;
1554 };
1555
1556 let mut content_end = line_end;
1557 let mut scan = line_end;
1558
1559 while scan < model_output.len() {
1560 let body_nl = model_output[scan..]
1561 .find('\n')
1562 .map(|i| scan + i)
1563 .unwrap_or(model_output.len());
1564 let body_line = &model_output[scan..body_nl];
1565 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1566 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1567 {
1568 break;
1569 }
1570 scan = if body_nl < model_output.len() {
1571 body_nl + 1
1572 } else {
1573 body_nl
1574 };
1575 content_end = scan;
1576 }
1577
1578 let content = &model_output[line_end..content_end];
1579
1580 if is_set {
1581 if let Some((start_str, end_str)) = specifier.split_once('-') {
1582 if let (Some(start), Some(end)) =
1583 (parse_line_ref(start_str), parse_line_ref(end_str))
1584 {
1585 commands.push(EditCommand::Set {
1586 start,
1587 end,
1588 content,
1589 });
1590 }
1591 } else if let Some(target) = parse_line_ref(specifier) {
1592 commands.push(EditCommand::Set {
1593 start: target.clone(),
1594 end: target,
1595 content,
1596 });
1597 }
1598 } else {
1599 let after = parse_line_ref(specifier);
1600 commands.push(EditCommand::Insert { after, content });
1601 }
1602
1603 offset = scan;
1604 }
1605
1606 commands
1607 }
1608
1609 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1610 /// (as opposed to being a plain full-replacement output).
1611 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1612 /// editable region, returning the plain text content.
1613 pub fn strip_hashline_prefixes(region: &str) -> String {
1614 let mut decoded: String = region
1615 .lines()
1616 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1617 .collect::<Vec<_>>()
1618 .join("\n");
1619 if region.ends_with('\n') {
1620 decoded.push('\n');
1621 }
1622 decoded
1623 }
1624
1625 pub fn output_has_edit_commands(model_output: &str) -> bool {
1626 model_output.contains(SET_COMMAND_MARKER)
1627 || model_output.contains(INSERT_COMMAND_MARKER)
1628 || model_output.contains(NO_EDITS_COMMAND_MARKER)
1629 }
1630
1631 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1632 /// original editable region text.
1633 ///
1634 /// `editable_region` is the original text of the editable region (without hash
1635 /// prefixes). `model_output` is the raw model response containing edit commands.
1636 ///
1637 /// Returns the full replacement text for the editable region.
1638 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1639 if model_output
1640 .trim_start()
1641 .starts_with(NO_EDITS_COMMAND_MARKER)
1642 {
1643 return editable_region.to_string();
1644 }
1645
1646 let original_lines: Vec<&str> = editable_region.lines().collect();
1647 let old_hashes: Vec<u8> = original_lines
1648 .iter()
1649 .map(|line| hash_line(line.as_bytes()))
1650 .collect();
1651
1652 let commands = parse_edit_commands(model_output);
1653
1654 // For set operations: indexed by start line → Some((end line index, content))
1655 // For insert operations: indexed by line index → vec of content to insert after
1656 // Insert-before-first is tracked separately.
1657 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1658 let mut insert_before_first: Vec<&str> = Vec::new();
1659 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1660
1661 for command in &commands {
1662 match command {
1663 EditCommand::Set {
1664 start,
1665 end,
1666 content,
1667 } => {
1668 if start.index < old_hashes.len()
1669 && end.index < old_hashes.len()
1670 && start.index <= end.index
1671 && old_hashes[start.index] == start.hash
1672 && old_hashes[end.index] == end.hash
1673 {
1674 set_ops[start.index] = Some((end.index, *content));
1675 }
1676 }
1677 EditCommand::Insert { after, content } => match after {
1678 None => insert_before_first.push(*content),
1679 Some(line_ref) => {
1680 if line_ref.index < old_hashes.len()
1681 && old_hashes[line_ref.index] == line_ref.hash
1682 {
1683 insert_after[line_ref.index].push(*content);
1684 }
1685 }
1686 },
1687 }
1688 }
1689
1690 let mut result = String::new();
1691
1692 // Emit any insertions before the first line
1693 for content in &insert_before_first {
1694 result.push_str(content);
1695 if !content.ends_with('\n') {
1696 result.push('\n');
1697 }
1698 }
1699
1700 let mut i = 0;
1701 while i < original_lines.len() {
1702 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1703 // Replace lines i..=end_index with the replacement content
1704 result.push_str(replacement);
1705 if !replacement.is_empty() && !replacement.ends_with('\n') {
1706 result.push('\n');
1707 }
1708 // Emit any insertions after the end of this set range
1709 if *end_index < insert_after.len() {
1710 for content in &insert_after[*end_index] {
1711 result.push_str(content);
1712 if !content.ends_with('\n') {
1713 result.push('\n');
1714 }
1715 }
1716 }
1717 i = end_index + 1;
1718 } else {
1719 // Keep the original line
1720 result.push_str(original_lines[i]);
1721 result.push('\n');
1722 // Emit any insertions after this line
1723 for content in &insert_after[i] {
1724 result.push_str(content);
1725 if !content.ends_with('\n') {
1726 result.push('\n');
1727 }
1728 }
1729 i += 1;
1730 }
1731 }
1732
1733 // Preserve trailing newline behavior: if the original ended with a
1734 // newline the result already has one; if it didn't, trim the extra one
1735 // we added.
1736 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1737 result.pop();
1738 }
1739
1740 result
1741 }
1742
1743 /// Convert a unified diff patch into hashline edit commands.
1744 ///
1745 /// Parses the unified diff `patch` directly to determine which lines of
1746 /// `old_text` are deleted/replaced and what new lines are added, then emits
1747 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1748 /// `{index}:{hash}` identifiers.
1749 ///
1750 /// `cursor_offset` is an optional byte offset into the first hunk's new
1751 /// text (context + additions) where the cursor marker should be placed.
1752 pub fn patch_to_edit_commands(
1753 old_text: &str,
1754 patch: &str,
1755 cursor_offset: Option<usize>,
1756 ) -> Result<String> {
1757 let old_lines: Vec<&str> = old_text.lines().collect();
1758 let old_hashes: Vec<u8> = old_lines
1759 .iter()
1760 .map(|line| hash_line(line.as_bytes()))
1761 .collect();
1762
1763 let mut result = String::new();
1764 let mut first_hunk = true;
1765
1766 struct Hunk<'a> {
1767 line_range: Range<usize>,
1768 new_text_lines: Vec<&'a str>,
1769 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1770 }
1771
1772 // Parse the patch line by line. We only care about hunk headers,
1773 // context, deletions, and additions.
1774 let mut old_line_index: usize = 0;
1775 let mut current_hunk: Option<Hunk> = None;
1776 // Byte offset tracking within the hunk's new text for cursor placement.
1777 let mut new_text_byte_offset: usize = 0;
1778 // The line index of the last old line seen before/in the current hunk
1779 // (used for insert-after reference).
1780 let mut last_old_line_before_hunk: Option<usize> = None;
1781
1782 fn flush_hunk(
1783 hunk: Hunk,
1784 last_old_line: Option<usize>,
1785 result: &mut String,
1786 old_hashes: &[u8],
1787 ) {
1788 if hunk.line_range.is_empty() {
1789 // Pure insertion — reference the old line to insert after when in bounds.
1790 if let Some(after) = last_old_line
1791 && let Some(&hash) = old_hashes.get(after)
1792 {
1793 write!(
1794 result,
1795 "{INSERT_COMMAND_MARKER}{}\n",
1796 LineRef { index: after, hash }
1797 )
1798 .unwrap();
1799 } else {
1800 result.push_str(INSERT_COMMAND_MARKER);
1801 result.push('\n');
1802 }
1803 } else {
1804 let start = hunk.line_range.start;
1805 let end_exclusive = hunk.line_range.end;
1806 let deleted_line_count = end_exclusive.saturating_sub(start);
1807
1808 if deleted_line_count == 1 {
1809 if let Some(&hash) = old_hashes.get(start) {
1810 write!(
1811 result,
1812 "{SET_COMMAND_MARKER}{}\n",
1813 LineRef { index: start, hash }
1814 )
1815 .unwrap();
1816 } else {
1817 result.push_str(SET_COMMAND_MARKER);
1818 result.push('\n');
1819 }
1820 } else {
1821 let end_inclusive = end_exclusive - 1;
1822 match (
1823 old_hashes.get(start).copied(),
1824 old_hashes.get(end_inclusive).copied(),
1825 ) {
1826 (Some(start_hash), Some(end_hash)) => {
1827 write!(
1828 result,
1829 "{SET_COMMAND_MARKER}{}-{}\n",
1830 LineRef {
1831 index: start,
1832 hash: start_hash
1833 },
1834 LineRef {
1835 index: end_inclusive,
1836 hash: end_hash
1837 }
1838 )
1839 .unwrap();
1840 }
1841 _ => {
1842 result.push_str(SET_COMMAND_MARKER);
1843 result.push('\n');
1844 }
1845 }
1846 }
1847 }
1848 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1849 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1850 && line_offset == cursor_line_offset
1851 {
1852 result.push_str(&line[..char_offset]);
1853 result.push_str(CURSOR_MARKER);
1854 result.push_str(&line[char_offset..]);
1855 continue;
1856 }
1857
1858 result.push_str(line);
1859 }
1860 }
1861
1862 for raw_line in patch.split_inclusive('\n') {
1863 if raw_line.starts_with("@@") {
1864 // Flush any pending change hunk from a previous patch hunk.
1865 if let Some(hunk) = current_hunk.take() {
1866 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1867 }
1868
1869 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1870 // We intentionally do not trust old_start as a direct local index into `old_text`,
1871 // because some patches are produced against a larger file region and carry
1872 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1873 if first_hunk {
1874 new_text_byte_offset = 0;
1875 first_hunk = false;
1876 }
1877 continue;
1878 }
1879
1880 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1881 continue;
1882 }
1883 if raw_line.starts_with("\\ No newline") {
1884 continue;
1885 }
1886
1887 if raw_line.starts_with('-') {
1888 // Extend or start a change hunk with this deleted old line.
1889 match &mut current_hunk {
1890 Some(Hunk {
1891 line_range: range, ..
1892 }) => range.end = old_line_index + 1,
1893 None => {
1894 current_hunk = Some(Hunk {
1895 line_range: old_line_index..old_line_index + 1,
1896 new_text_lines: Vec::new(),
1897 cursor_line_offset_in_new_text: None,
1898 });
1899 }
1900 }
1901 old_line_index += 1;
1902 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1903 // Place cursor marker if cursor_offset falls within this line.
1904 let mut cursor_line_offset = None;
1905 if let Some(cursor_off) = cursor_offset
1906 && (first_hunk
1907 || cursor_off >= new_text_byte_offset
1908 && cursor_off <= new_text_byte_offset + added_content.len())
1909 {
1910 let line_offset = added_content.floor_char_boundary(
1911 cursor_off
1912 .saturating_sub(new_text_byte_offset)
1913 .min(added_content.len()),
1914 );
1915 cursor_line_offset = Some(line_offset);
1916 }
1917
1918 new_text_byte_offset += added_content.len();
1919
1920 let hunk = current_hunk.get_or_insert(Hunk {
1921 line_range: old_line_index..old_line_index,
1922 new_text_lines: vec![],
1923 cursor_line_offset_in_new_text: None,
1924 });
1925 hunk.new_text_lines.push(added_content);
1926 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1927 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1928 } else {
1929 // Context line (starts with ' ' or is empty).
1930 if let Some(hunk) = current_hunk.take() {
1931 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1932 }
1933 last_old_line_before_hunk = Some(old_line_index);
1934 old_line_index += 1;
1935 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1936 new_text_byte_offset += content.len();
1937 }
1938 }
1939
1940 // Flush final group.
1941 if let Some(hunk) = current_hunk.take() {
1942 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1943 }
1944
1945 // Trim a single trailing newline.
1946 if result.ends_with('\n') {
1947 result.pop();
1948 }
1949
1950 if result.is_empty() {
1951 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1952 }
1953
1954 Ok(result)
1955 }
1956
1957 #[cfg(test)]
1958 mod tests {
1959 use super::*;
1960 use indoc::indoc;
1961
1962 #[test]
1963 fn test_format_cursor_region() {
1964 struct Case {
1965 name: &'static str,
1966 context: &'static str,
1967 editable_range: Range<usize>,
1968 cursor_offset: usize,
1969 expected: &'static str,
1970 }
1971
1972 let cases = [
1973 Case {
1974 name: "basic_cursor_placement",
1975 context: "hello world\n",
1976 editable_range: 0..12,
1977 cursor_offset: 5,
1978 expected: indoc! {"
1979 <|file_sep|>test.rs
1980 <|fim_prefix|>
1981 <|fim_middle|>current
1982 0:5c|hello<|user_cursor|> world
1983 <|fim_suffix|>
1984 <|fim_middle|>updated
1985 "},
1986 },
1987 Case {
1988 name: "multiline_cursor_on_second_line",
1989 context: "aaa\nbbb\nccc\n",
1990 editable_range: 0..12,
1991 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1992 expected: indoc! {"
1993 <|file_sep|>test.rs
1994 <|fim_prefix|>
1995 <|fim_middle|>current
1996 0:23|aaa
1997 1:26|b<|user_cursor|>bb
1998 2:29|ccc
1999 <|fim_suffix|>
2000 <|fim_middle|>updated
2001 "},
2002 },
2003 Case {
2004 name: "no_trailing_newline_in_context",
2005 context: "line1\nline2",
2006 editable_range: 0..11,
2007 cursor_offset: 3,
2008 expected: indoc! {"
2009 <|file_sep|>test.rs
2010 <|fim_prefix|>
2011 <|fim_middle|>current
2012 0:d9|lin<|user_cursor|>e1
2013 1:da|line2
2014 <|fim_suffix|>
2015 <|fim_middle|>updated
2016 "},
2017 },
2018 Case {
2019 name: "leading_newline_in_editable_region",
2020 context: "\nabc\n",
2021 editable_range: 0..5,
2022 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2023 expected: indoc! {"
2024 <|file_sep|>test.rs
2025 <|fim_prefix|>
2026 <|fim_middle|>current
2027 0:00|
2028 1:26|a<|user_cursor|>bc
2029 <|fim_suffix|>
2030 <|fim_middle|>updated
2031 "},
2032 },
2033 Case {
2034 name: "with_suffix",
2035 context: "abc\ndef",
2036 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2037 cursor_offset: 2,
2038 expected: indoc! {"
2039 <|file_sep|>test.rs
2040 <|fim_prefix|>
2041 <|fim_middle|>current
2042 0:26|ab<|user_cursor|>c
2043 <|fim_suffix|>
2044 def
2045 <|fim_middle|>updated
2046 "},
2047 },
2048 Case {
2049 name: "unicode_two_byte_chars",
2050 context: "héllo\n",
2051 editable_range: 0..7,
2052 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2053 expected: indoc! {"
2054 <|file_sep|>test.rs
2055 <|fim_prefix|>
2056 <|fim_middle|>current
2057 0:1b|hé<|user_cursor|>llo
2058 <|fim_suffix|>
2059 <|fim_middle|>updated
2060 "},
2061 },
2062 Case {
2063 name: "unicode_three_byte_chars",
2064 context: "日本語\n",
2065 editable_range: 0..10,
2066 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2067 expected: indoc! {"
2068 <|file_sep|>test.rs
2069 <|fim_prefix|>
2070 <|fim_middle|>current
2071 0:80|日本<|user_cursor|>語
2072 <|fim_suffix|>
2073 <|fim_middle|>updated
2074 "},
2075 },
2076 Case {
2077 name: "unicode_four_byte_chars",
2078 context: "a🌍b\n",
2079 editable_range: 0..7,
2080 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2081 expected: indoc! {"
2082 <|file_sep|>test.rs
2083 <|fim_prefix|>
2084 <|fim_middle|>current
2085 0:6b|a🌍<|user_cursor|>b
2086 <|fim_suffix|>
2087 <|fim_middle|>updated
2088 "},
2089 },
2090 Case {
2091 name: "cursor_at_start_of_region_not_placed",
2092 context: "abc\n",
2093 editable_range: 0..4,
2094 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2095 expected: indoc! {"
2096 <|file_sep|>test.rs
2097 <|fim_prefix|>
2098 <|fim_middle|>current
2099 0:26|abc
2100 <|fim_suffix|>
2101 <|fim_middle|>updated
2102 "},
2103 },
2104 Case {
2105 name: "cursor_at_end_of_line_not_placed",
2106 context: "abc\ndef\n",
2107 editable_range: 0..8,
2108 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2109 expected: indoc! {"
2110 <|file_sep|>test.rs
2111 <|fim_prefix|>
2112 <|fim_middle|>current
2113 0:26|abc
2114 1:2f|def
2115 <|fim_suffix|>
2116 <|fim_middle|>updated
2117 "},
2118 },
2119 Case {
2120 name: "cursor_offset_relative_to_context_not_editable_region",
2121 // cursor_offset is relative to `context`, so when editable_range.start > 0,
2122 // write_cursor_excerpt_section must subtract it before comparing against
2123 // per-line offsets within the editable region.
2124 context: "pre\naaa\nbbb\nsuf\n",
2125 editable_range: 4..12, // editable region = "aaa\nbbb\n"
2126 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
2127 expected: indoc! {"
2128 <|file_sep|>test.rs
2129 <|fim_prefix|>
2130 pre
2131 <|fim_middle|>current
2132 0:23|aaa
2133 1:26|b<|user_cursor|>bb
2134 <|fim_suffix|>
2135 suf
2136 <|fim_middle|>updated
2137 "},
2138 },
2139 ];
2140
2141 for case in &cases {
2142 let mut prompt = String::new();
2143 hashline::write_cursor_excerpt_section(
2144 &mut prompt,
2145 Path::new("test.rs"),
2146 case.context,
2147 &case.editable_range,
2148 case.cursor_offset,
2149 );
2150 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2151 }
2152 }
2153
2154 #[test]
2155 fn test_apply_edit_commands() {
2156 struct Case {
2157 name: &'static str,
2158 original: &'static str,
2159 model_output: &'static str,
2160 expected: &'static str,
2161 }
2162
2163 let cases = vec![
2164 Case {
2165 name: "set_single_line",
2166 original: indoc! {"
2167 let mut total = 0;
2168 for product in products {
2169 total += ;
2170 }
2171 total
2172 "},
2173 model_output: indoc! {"
2174 <|set|>2:87
2175 total += product.price;
2176 "},
2177 expected: indoc! {"
2178 let mut total = 0;
2179 for product in products {
2180 total += product.price;
2181 }
2182 total
2183 "},
2184 },
2185 Case {
2186 name: "set_range",
2187 original: indoc! {"
2188 fn foo() {
2189 let x = 1;
2190 let y = 2;
2191 let z = 3;
2192 }
2193 "},
2194 model_output: indoc! {"
2195 <|set|>1:46-3:4a
2196 let sum = 6;
2197 "},
2198 expected: indoc! {"
2199 fn foo() {
2200 let sum = 6;
2201 }
2202 "},
2203 },
2204 Case {
2205 name: "insert_after_line",
2206 original: indoc! {"
2207 fn main() {
2208 let x = 1;
2209 }
2210 "},
2211 model_output: indoc! {"
2212 <|insert|>1:46
2213 let y = 2;
2214 "},
2215 expected: indoc! {"
2216 fn main() {
2217 let x = 1;
2218 let y = 2;
2219 }
2220 "},
2221 },
2222 Case {
2223 name: "insert_before_first",
2224 original: indoc! {"
2225 let x = 1;
2226 let y = 2;
2227 "},
2228 model_output: indoc! {"
2229 <|insert|>
2230 use std::io;
2231 "},
2232 expected: indoc! {"
2233 use std::io;
2234 let x = 1;
2235 let y = 2;
2236 "},
2237 },
2238 Case {
2239 name: "set_with_cursor_marker",
2240 original: indoc! {"
2241 fn main() {
2242 println!();
2243 }
2244 "},
2245 model_output: indoc! {"
2246 <|set|>1:34
2247 eprintln!(\"<|user_cursor|>\");
2248 "},
2249 expected: indoc! {"
2250 fn main() {
2251 eprintln!(\"<|user_cursor|>\");
2252 }
2253 "},
2254 },
2255 Case {
2256 name: "multiple_set_commands",
2257 original: indoc! {"
2258 aaa
2259 bbb
2260 ccc
2261 ddd
2262 "},
2263 model_output: indoc! {"
2264 <|set|>0:23
2265 AAA
2266 <|set|>2:29
2267 CCC
2268 "},
2269 expected: indoc! {"
2270 AAA
2271 bbb
2272 CCC
2273 ddd
2274 "},
2275 },
2276 Case {
2277 name: "set_range_multiline_replacement",
2278 original: indoc! {"
2279 fn handle_submit() {
2280 }
2281
2282 fn handle_keystroke() {
2283 "},
2284 model_output: indoc! {"
2285 <|set|>0:3f-1:7d
2286 fn handle_submit(modal_state: &mut ModalState) {
2287 <|user_cursor|>
2288 }
2289 "},
2290 expected: indoc! {"
2291 fn handle_submit(modal_state: &mut ModalState) {
2292 <|user_cursor|>
2293 }
2294
2295 fn handle_keystroke() {
2296 "},
2297 },
2298 Case {
2299 name: "no_edit_commands_returns_original",
2300 original: indoc! {"
2301 hello
2302 world
2303 "},
2304 model_output: "some random text with no commands",
2305 expected: indoc! {"
2306 hello
2307 world
2308 "},
2309 },
2310 Case {
2311 name: "no_edits_command_returns_original",
2312 original: indoc! {"
2313 hello
2314 world
2315 "},
2316 model_output: "<|no_edits|>",
2317 expected: indoc! {"
2318 hello
2319 world
2320 "},
2321 },
2322 Case {
2323 name: "wrong_hash_set_ignored",
2324 original: indoc! {"
2325 aaa
2326 bbb
2327 "},
2328 model_output: indoc! {"
2329 <|set|>0:ff
2330 ZZZ
2331 "},
2332 expected: indoc! {"
2333 aaa
2334 bbb
2335 "},
2336 },
2337 Case {
2338 name: "insert_and_set_combined",
2339 original: indoc! {"
2340 alpha
2341 beta
2342 gamma
2343 "},
2344 model_output: indoc! {"
2345 <|set|>0:06
2346 ALPHA
2347 <|insert|>1:9c
2348 beta_extra
2349 "},
2350 expected: indoc! {"
2351 ALPHA
2352 beta
2353 beta_extra
2354 gamma
2355 "},
2356 },
2357 Case {
2358 name: "no_trailing_newline_preserved",
2359 original: "hello\nworld",
2360 model_output: indoc! {"
2361 <|set|>0:14
2362 HELLO
2363 "},
2364 expected: "HELLO\nworld",
2365 },
2366 Case {
2367 name: "set_range_hash_mismatch_in_end_bound",
2368 original: indoc! {"
2369 one
2370 two
2371 three
2372 "},
2373 model_output: indoc! {"
2374 <|set|>0:42-2:ff
2375 ONE_TWO_THREE
2376 "},
2377 expected: indoc! {"
2378 one
2379 two
2380 three
2381 "},
2382 },
2383 Case {
2384 name: "set_range_start_greater_than_end_ignored",
2385 original: indoc! {"
2386 a
2387 b
2388 c
2389 "},
2390 model_output: indoc! {"
2391 <|set|>2:63-1:62
2392 X
2393 "},
2394 expected: indoc! {"
2395 a
2396 b
2397 c
2398 "},
2399 },
2400 Case {
2401 name: "insert_out_of_bounds_ignored",
2402 original: indoc! {"
2403 x
2404 y
2405 "},
2406 model_output: indoc! {"
2407 <|insert|>99:aa
2408 z
2409 "},
2410 expected: indoc! {"
2411 x
2412 y
2413 "},
2414 },
2415 Case {
2416 name: "set_out_of_bounds_ignored",
2417 original: indoc! {"
2418 x
2419 y
2420 "},
2421 model_output: indoc! {"
2422 <|set|>99:aa
2423 z
2424 "},
2425 expected: indoc! {"
2426 x
2427 y
2428 "},
2429 },
2430 Case {
2431 name: "malformed_set_command_ignored",
2432 original: indoc! {"
2433 alpha
2434 beta
2435 "},
2436 model_output: indoc! {"
2437 <|set|>not-a-line-ref
2438 UPDATED
2439 "},
2440 expected: indoc! {"
2441 alpha
2442 beta
2443 "},
2444 },
2445 Case {
2446 name: "malformed_insert_hash_treated_as_before_first",
2447 original: indoc! {"
2448 alpha
2449 beta
2450 "},
2451 model_output: indoc! {"
2452 <|insert|>1:nothex
2453 preamble
2454 "},
2455 expected: indoc! {"
2456 preamble
2457 alpha
2458 beta
2459 "},
2460 },
2461 Case {
2462 name: "set_then_insert_same_target_orders_insert_after_replacement",
2463 original: indoc! {"
2464 cat
2465 dog
2466 "},
2467 model_output: indoc! {"
2468 <|set|>0:38
2469 CAT
2470 <|insert|>0:38
2471 TAIL
2472 "},
2473 expected: indoc! {"
2474 CAT
2475 TAIL
2476 dog
2477 "},
2478 },
2479 Case {
2480 name: "overlapping_set_ranges_last_wins",
2481 original: indoc! {"
2482 a
2483 b
2484 c
2485 d
2486 "},
2487 model_output: indoc! {"
2488 <|set|>0:61-2:63
2489 FIRST
2490 <|set|>1:62-3:64
2491 SECOND
2492 "},
2493 expected: indoc! {"
2494 FIRST
2495 d
2496 "},
2497 },
2498 Case {
2499 name: "insert_before_first_and_after_line",
2500 original: indoc! {"
2501 a
2502 b
2503 "},
2504 model_output: indoc! {"
2505 <|insert|>
2506 HEAD
2507 <|insert|>0:61
2508 MID
2509 "},
2510 expected: indoc! {"
2511 HEAD
2512 a
2513 MID
2514 b
2515 "},
2516 },
2517 ];
2518
2519 for case in &cases {
2520 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2521 assert_eq!(result, case.expected, "failed case: {}", case.name);
2522 }
2523 }
2524
2525 #[test]
2526 fn test_output_has_edit_commands() {
2527 assert!(hashline::output_has_edit_commands(&format!(
2528 "{}0:ab\nnew",
2529 SET_COMMAND_MARKER
2530 )));
2531 assert!(hashline::output_has_edit_commands(&format!(
2532 "{}0:ab\nnew",
2533 INSERT_COMMAND_MARKER
2534 )));
2535 assert!(hashline::output_has_edit_commands(&format!(
2536 "some text\n{}1:cd\nstuff",
2537 SET_COMMAND_MARKER
2538 )));
2539 assert!(!hashline::output_has_edit_commands("just plain text"));
2540 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2541 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2542 }
2543
2544 // ---- hashline::patch_to_edit_commands round-trip tests ----
2545
2546 #[test]
2547 fn test_patch_to_edit_commands() {
2548 struct Case {
2549 name: &'static str,
2550 old: &'static str,
2551 patch: &'static str,
2552 expected_new: &'static str,
2553 }
2554
2555 let cases = [
2556 Case {
2557 name: "single_line_replacement",
2558 old: indoc! {"
2559 let mut total = 0;
2560 for product in products {
2561 total += ;
2562 }
2563 total
2564 "},
2565 patch: indoc! {"
2566 @@ -1,5 +1,5 @@
2567 let mut total = 0;
2568 for product in products {
2569 - total += ;
2570 + total += product.price;
2571 }
2572 total
2573 "},
2574 expected_new: indoc! {"
2575 let mut total = 0;
2576 for product in products {
2577 total += product.price;
2578 }
2579 total
2580 "},
2581 },
2582 Case {
2583 name: "multiline_replacement",
2584 old: indoc! {"
2585 fn foo() {
2586 let x = 1;
2587 let y = 2;
2588 let z = 3;
2589 }
2590 "},
2591 patch: indoc! {"
2592 @@ -1,5 +1,3 @@
2593 fn foo() {
2594 - let x = 1;
2595 - let y = 2;
2596 - let z = 3;
2597 + let sum = 1 + 2 + 3;
2598 }
2599 "},
2600 expected_new: indoc! {"
2601 fn foo() {
2602 let sum = 1 + 2 + 3;
2603 }
2604 "},
2605 },
2606 Case {
2607 name: "insertion",
2608 old: indoc! {"
2609 fn main() {
2610 let x = 1;
2611 }
2612 "},
2613 patch: indoc! {"
2614 @@ -1,3 +1,4 @@
2615 fn main() {
2616 let x = 1;
2617 + let y = 2;
2618 }
2619 "},
2620 expected_new: indoc! {"
2621 fn main() {
2622 let x = 1;
2623 let y = 2;
2624 }
2625 "},
2626 },
2627 Case {
2628 name: "insertion_before_first",
2629 old: indoc! {"
2630 let x = 1;
2631 let y = 2;
2632 "},
2633 patch: indoc! {"
2634 @@ -1,2 +1,3 @@
2635 +use std::io;
2636 let x = 1;
2637 let y = 2;
2638 "},
2639 expected_new: indoc! {"
2640 use std::io;
2641 let x = 1;
2642 let y = 2;
2643 "},
2644 },
2645 Case {
2646 name: "deletion",
2647 old: indoc! {"
2648 aaa
2649 bbb
2650 ccc
2651 ddd
2652 "},
2653 patch: indoc! {"
2654 @@ -1,4 +1,2 @@
2655 aaa
2656 -bbb
2657 -ccc
2658 ddd
2659 "},
2660 expected_new: indoc! {"
2661 aaa
2662 ddd
2663 "},
2664 },
2665 Case {
2666 name: "multiple_changes",
2667 old: indoc! {"
2668 alpha
2669 beta
2670 gamma
2671 delta
2672 epsilon
2673 "},
2674 patch: indoc! {"
2675 @@ -1,5 +1,5 @@
2676 -alpha
2677 +ALPHA
2678 beta
2679 gamma
2680 -delta
2681 +DELTA
2682 epsilon
2683 "},
2684 expected_new: indoc! {"
2685 ALPHA
2686 beta
2687 gamma
2688 DELTA
2689 epsilon
2690 "},
2691 },
2692 Case {
2693 name: "replace_with_insertion",
2694 old: indoc! {r#"
2695 fn handle() {
2696 modal_state.close();
2697 modal_state.dismiss();
2698 "#},
2699 patch: indoc! {r#"
2700 @@ -1,3 +1,4 @@
2701 fn handle() {
2702 modal_state.close();
2703 + eprintln!("");
2704 modal_state.dismiss();
2705 "#},
2706 expected_new: indoc! {r#"
2707 fn handle() {
2708 modal_state.close();
2709 eprintln!("");
2710 modal_state.dismiss();
2711 "#},
2712 },
2713 Case {
2714 name: "complete_replacement",
2715 old: indoc! {"
2716 aaa
2717 bbb
2718 ccc
2719 "},
2720 patch: indoc! {"
2721 @@ -1,3 +1,3 @@
2722 -aaa
2723 -bbb
2724 -ccc
2725 +xxx
2726 +yyy
2727 +zzz
2728 "},
2729 expected_new: indoc! {"
2730 xxx
2731 yyy
2732 zzz
2733 "},
2734 },
2735 Case {
2736 name: "add_function_body",
2737 old: indoc! {"
2738 fn foo() {
2739 modal_state.dismiss();
2740 }
2741
2742 fn
2743
2744 fn handle_keystroke() {
2745 "},
2746 patch: indoc! {"
2747 @@ -1,6 +1,8 @@
2748 fn foo() {
2749 modal_state.dismiss();
2750 }
2751
2752 -fn
2753 +fn handle_submit() {
2754 + todo()
2755 +}
2756
2757 fn handle_keystroke() {
2758 "},
2759 expected_new: indoc! {"
2760 fn foo() {
2761 modal_state.dismiss();
2762 }
2763
2764 fn handle_submit() {
2765 todo()
2766 }
2767
2768 fn handle_keystroke() {
2769 "},
2770 },
2771 Case {
2772 name: "with_cursor_offset",
2773 old: indoc! {r#"
2774 fn main() {
2775 println!();
2776 }
2777 "#},
2778 patch: indoc! {r#"
2779 @@ -1,3 +1,3 @@
2780 fn main() {
2781 - println!();
2782 + eprintln!("");
2783 }
2784 "#},
2785 expected_new: indoc! {r#"
2786 fn main() {
2787 eprintln!("<|user_cursor|>");
2788 }
2789 "#},
2790 },
2791 Case {
2792 name: "non_local_hunk_header_pure_insertion_repro",
2793 old: indoc! {"
2794 aaa
2795 bbb
2796 "},
2797 patch: indoc! {"
2798 @@ -20,2 +20,3 @@
2799 aaa
2800 +xxx
2801 bbb
2802 "},
2803 expected_new: indoc! {"
2804 aaa
2805 xxx
2806 bbb
2807 "},
2808 },
2809 Case {
2810 name: "empty_patch_produces_no_edits_marker",
2811 old: indoc! {"
2812 aaa
2813 bbb
2814 "},
2815 patch: "@@ -20,2 +20,3 @@\n",
2816 expected_new: indoc! {"
2817 aaa
2818 bbb
2819 "},
2820 },
2821 ];
2822
2823 for case in &cases {
2824 // The cursor_offset for patch_to_edit_commands is relative to
2825 // the first hunk's new text (context + additions). We compute
2826 // it by finding where the marker sits in the expected output
2827 // (which mirrors the new text of the hunk).
2828 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2829
2830 let commands =
2831 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2832 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2833
2834 assert!(
2835 hashline::output_has_edit_commands(&commands),
2836 "case {}: expected edit commands, got: {commands:?}",
2837 case.name,
2838 );
2839
2840 let applied = hashline::apply_edit_commands(case.old, &commands);
2841 assert_eq!(applied, case.expected_new, "case {}", case.name);
2842 }
2843 }
2844 }
2845}
2846
2847pub mod seed_coder {
2848 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2849 //!
2850 //! Seed-Coder uses different FIM tokens and order than Qwen:
2851 //! - SPM order: suffix comes FIRST, then prefix, then middle
2852 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2853 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2854 //!
2855 //! All context (related files, edit history) goes in the PREFIX section.
2856 //! The suffix contains only code after the editable region.
2857 //!
2858 //! Example prompt:
2859 //!
2860 //! <[fim-suffix]>
2861 //! code after editable region
2862 //! <[fim-prefix]><filename>related/file.py
2863 //! related file content
2864 //!
2865 //! <filename>edit_history
2866 //! --- a/some_file.py
2867 //! +++ b/some_file.py
2868 //! -old
2869 //! +new
2870 //!
2871 //! <filename>path/to/target_file.py
2872 //! code before editable region
2873 //! <<<<<<< CURRENT
2874 //! code that
2875 //! needs to<|user_cursor|>
2876 //! be rewritten
2877 //! =======
2878 //! <[fim-middle]>
2879 //!
2880 //! Expected output (model generates):
2881 //!
2882 //! updated
2883 //! code with
2884 //! changes applied
2885 //! >>>>>>> UPDATED
2886
2887 use super::*;
2888
2889 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2890 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2891 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2892 pub const FILE_MARKER: &str = "<filename>";
2893
2894 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2895 pub const SEPARATOR: &str = "=======\n";
2896 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2897
2898 pub const NO_EDITS: &str = "NO_EDITS\n";
2899
2900 pub fn special_tokens() -> &'static [&'static str] {
2901 &[
2902 FIM_SUFFIX,
2903 FIM_PREFIX,
2904 FIM_MIDDLE,
2905 FILE_MARKER,
2906 START_MARKER,
2907 SEPARATOR,
2908 END_MARKER,
2909 CURSOR_MARKER,
2910 ]
2911 }
2912
2913 pub fn write_cursor_excerpt_section(
2914 prompt: &mut String,
2915 path: &Path,
2916 context: &str,
2917 editable_range: &Range<usize>,
2918 cursor_offset: usize,
2919 ) {
2920 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2921 prompt.push_str(§ion);
2922 }
2923
2924 pub fn format_prompt_with_budget(
2925 path: &Path,
2926 context: &str,
2927 editable_range: &Range<usize>,
2928 cursor_offset: usize,
2929 events: &[Arc<Event>],
2930 related_files: &[RelatedFile],
2931 max_tokens: usize,
2932 ) -> String {
2933 let cursor_prefix_section =
2934 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2935 assemble_fim_prompt(
2936 context,
2937 editable_range,
2938 &cursor_prefix_section,
2939 events,
2940 related_files,
2941 max_tokens,
2942 )
2943 }
2944
2945 pub fn assemble_fim_prompt(
2946 context: &str,
2947 editable_range: &Range<usize>,
2948 cursor_prefix_section: &str,
2949 events: &[Arc<Event>],
2950 related_files: &[RelatedFile],
2951 max_tokens: usize,
2952 ) -> String {
2953 let suffix_section = build_suffix_section(context, editable_range);
2954
2955 let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
2956 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
2957 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2958
2959 let edit_history_section = super::format_edit_history_within_budget(
2960 events,
2961 FILE_MARKER,
2962 "edit_history",
2963 budget_after_cursor,
2964 max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
2965 );
2966 let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
2967 let budget_after_edit_history =
2968 budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
2969
2970 let related_files_section = super::format_related_files_within_budget(
2971 related_files,
2972 FILE_MARKER,
2973 "",
2974 budget_after_edit_history,
2975 );
2976
2977 let mut prompt = String::new();
2978 prompt.push_str(&suffix_section);
2979 prompt.push_str(FIM_PREFIX);
2980 prompt.push_str(&related_files_section);
2981 if !related_files_section.is_empty() {
2982 prompt.push('\n');
2983 }
2984 prompt.push_str(&edit_history_section);
2985 if !edit_history_section.is_empty() {
2986 prompt.push('\n');
2987 }
2988 prompt.push_str(cursor_prefix_section);
2989 prompt.push_str(FIM_MIDDLE);
2990
2991 prompt
2992 }
2993
2994 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2995 let mut section = String::new();
2996 section.push_str(FIM_SUFFIX);
2997 section.push_str(&context[editable_range.end..]);
2998 if !section.ends_with('\n') {
2999 section.push('\n');
3000 }
3001 section
3002 }
3003
3004 fn build_cursor_prefix_section(
3005 path: &Path,
3006 context: &str,
3007 editable_range: &Range<usize>,
3008 cursor_offset: usize,
3009 ) -> String {
3010 let mut section = String::new();
3011 let path_str = path.to_string_lossy();
3012 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3013
3014 section.push_str(&context[..editable_range.start]);
3015 section.push_str(START_MARKER);
3016 section.push_str(&context[editable_range.start..cursor_offset]);
3017 section.push_str(CURSOR_MARKER);
3018 section.push_str(&context[cursor_offset..editable_range.end]);
3019 if !section.ends_with('\n') {
3020 section.push('\n');
3021 }
3022 section.push_str(SEPARATOR);
3023 section
3024 }
3025
3026 /// Format patch as containing no changes if it's empty; otherwise return None.
3027 pub(crate) fn no_edits(patch: &str) -> Option<String> {
3028 // Count lines in the patch
3029 let empty_patch = patch.lines().count() <= 3;
3030 if empty_patch {
3031 Some(format!("{NO_EDITS}{END_MARKER}"))
3032 } else {
3033 None
3034 }
3035 }
3036}
3037
3038pub mod v0304_variable_edit {
3039 //! A prompt format with no fixed editable region. The entire context is shown
3040 //! to the model, and it chooses which text to replace by outputting surrounding
3041 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3042 //! text.
3043 //!
3044 //! Example prompt:
3045 //!
3046 //! <|file_sep|>path/to/file.py
3047 //! zero
3048 //! one
3049 //! two
3050 //! three<|user_cursor|>
3051 //! four
3052 //! five
3053 //! <|fim_prefix|>
3054 //
3055 //! Expected output (model generates):
3056 //!
3057 //! two
3058 //! <|fim_middle|>
3059 //! THREE
3060 //! <|fim_suffix|>
3061 //! four
3062 //!
3063 //! The output means: find "two\n...\nfour" in the context, and replace
3064 //! everything between "two\n" and "four" with "THREE\n".
3065
3066 use super::*;
3067
3068 pub fn special_tokens() -> &'static [&'static str] {
3069 &[
3070 "<|fim_prefix|>",
3071 "<|fim_suffix|>",
3072 "<|fim_middle|>",
3073 "<|file_sep|>",
3074 CURSOR_MARKER,
3075 ]
3076 }
3077
3078 pub fn write_cursor_excerpt_section(
3079 prompt: &mut String,
3080 path: &Path,
3081 context: &str,
3082 cursor_offset: usize,
3083 ) {
3084 let path_str = path.to_string_lossy();
3085 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3086
3087 prompt.push_str(&context[..cursor_offset]);
3088 prompt.push_str(CURSOR_MARKER);
3089 prompt.push_str(&context[cursor_offset..]);
3090 if !prompt.ends_with('\n') {
3091 prompt.push('\n');
3092 }
3093 prompt.push_str("<|fim_prefix|>\n")
3094 }
3095
3096 /// Apply a variable-edit model output to the original context text.
3097 ///
3098 /// The model output has the form:
3099 ///
3100 /// - prefix context lines
3101 /// - `<|fim_middle|>`
3102 /// - new text
3103 /// - `<|fim_suffix|>`
3104 /// - suffix context lines
3105 ///
3106 /// We locate the prefix/suffix context lines in the original text and replace
3107 /// everything between them with the new text.
3108 pub fn apply_variable_edit(
3109 context: &str,
3110 model_output: &str,
3111 ) -> Result<(Range<usize>, String)> {
3112 let (prefix_context, rest) = model_output
3113 .split_once("<|fim_middle|>\n")
3114 .or_else(|| model_output.split_once("<|fim_middle|>"))
3115 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3116
3117 let (new_text, suffix_context) = rest
3118 .split_once("<|fim_suffix|>\n")
3119 .or_else(|| rest.split_once("<|fim_suffix|>"))
3120 .unwrap_or((rest, ""));
3121
3122 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3123 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3124 } else {
3125 suffix_context
3126 };
3127
3128 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3129 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3130 + prefix_context.len();
3131 let suffix_offset = if suffix_context.is_empty() {
3132 context.len()
3133 } else {
3134 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3135 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3136 + prefix_offset
3137 };
3138
3139 let edit_range = prefix_offset..suffix_offset;
3140 return Ok((edit_range, new_text.to_string()));
3141 }
3142
3143 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3144 if needle.is_empty() {
3145 return Some(0);
3146 }
3147
3148 haystack.match_indices(needle).find_map(|(offset, _)| {
3149 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3150 matched_line_start.then_some(offset)
3151 })
3152 }
3153
3154 /// Convert a unified diff patch into the variable-edit output format.
3155 ///
3156 /// Parses `patch` as a unified diff against `old_text` and produces model
3157 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3158 /// delimiters. The diff is resolved by content matching rather than line
3159 /// numbers.
3160 pub fn patch_to_variable_edit_output(
3161 old_text: &str,
3162 patch: &str,
3163 cursor_offset: Option<usize>,
3164 ) -> Result<String> {
3165 // Parse the unified diff into hunks. Each hunk has an `old_context`
3166 // string (context + deleted lines interleaved in order) and a list of
3167 // edits expressed as byte ranges within that context plus replacement
3168 // text.
3169 let hunks = parse_hunks(patch);
3170 if hunks.is_empty() {
3171 return Ok(String::new());
3172 }
3173
3174 // Apply each hunk by finding its old_context in the text and
3175 // performing the edits. We search forward from where the previous
3176 // hunk ended so that hunks are applied in order.
3177 let mut new_text = old_text.to_string();
3178 let mut search_from: usize = 0;
3179 let mut first_hunk_pos: Option<usize> = None;
3180
3181 for hunk in &hunks {
3182 let context_pos = new_text[search_from..]
3183 .find(&hunk.old_context)
3184 .map(|pos| pos + search_from)
3185 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3186
3187 if first_hunk_pos.is_none() {
3188 first_hunk_pos = Some(context_pos);
3189 }
3190
3191 // Apply edits in reverse order so byte offsets remain valid.
3192 for edit in hunk.edits.iter().rev() {
3193 let abs_start = context_pos + edit.range.start;
3194 let abs_end = context_pos + edit.range.end;
3195 new_text.replace_range(abs_start..abs_end, &edit.text);
3196 }
3197
3198 // Advance past this hunk's region in the (now modified) text.
3199 let new_region_len: usize =
3200 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3201 len + edit.text.len() - (edit.range.end - edit.range.start)
3202 });
3203 search_from = context_pos + new_region_len;
3204 }
3205
3206 // Now we have old_text and new_text. Find the changed line range by
3207 // comparing them.
3208 let old_lines: Vec<&str> = old_text.lines().collect();
3209 let new_lines: Vec<&str> = new_text.lines().collect();
3210
3211 // Find first differing line.
3212 let first_changed_row = old_lines
3213 .iter()
3214 .zip(new_lines.iter())
3215 .position(|(a, b)| a != b)
3216 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3217
3218 // Find last differing line (from the end).
3219 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3220 let common_suffix = old_lines
3221 .iter()
3222 .rev()
3223 .zip(new_lines.iter().rev())
3224 .take(max_suffix)
3225 .take_while(|(a, b)| a == b)
3226 .count();
3227
3228 let old_end = old_lines.len() - common_suffix;
3229 let new_end = new_lines.len() - common_suffix;
3230
3231 if first_changed_row == old_end && first_changed_row == new_end {
3232 return Ok(String::new());
3233 }
3234
3235 // Build the replacement text from new_lines[first_diff..new_end].
3236 let mut merged_new_text = String::new();
3237 for line in &new_lines[first_changed_row..new_end] {
3238 merged_new_text.push_str(line);
3239 merged_new_text.push('\n');
3240 }
3241
3242 // cursor_offset is relative to the first hunk's new content in
3243 // new_text. Translate it to an offset within merged_new_text, which
3244 // only contains lines first_diff..new_end of new_text.
3245 if let Some(hunk_offset) = cursor_offset {
3246 let hunk_start = first_hunk_pos.unwrap_or(0);
3247 let absolute_pos = hunk_start + hunk_offset;
3248
3249 // Byte offset where first_diff starts in new_text.
3250 let merged_start: usize = new_lines[..first_changed_row]
3251 .iter()
3252 .map(|line| line.len() + 1)
3253 .sum();
3254
3255 if absolute_pos >= merged_start {
3256 let relative_offset = absolute_pos - merged_start;
3257 if relative_offset <= merged_new_text.len() {
3258 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3259 }
3260 }
3261 }
3262
3263 // Build output with 2 lines of context above and below.
3264 let context_lines_count = 2;
3265 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3266 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3267
3268 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3269 let pattern = &lines[line_range];
3270 let pattern_len = pattern.len();
3271
3272 let mut count = 0;
3273 for offset in 0..=lines.len() - pattern_len {
3274 if &lines[offset..offset + pattern_len] == pattern {
3275 count += 1;
3276 }
3277 }
3278 count
3279 }
3280
3281 // Expand prefix and suffix until they are unique
3282 while prefix_start > 0 {
3283 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3284 prefix_start -= 1;
3285 } else {
3286 break;
3287 }
3288 }
3289 while suffix_end < old_lines.len() {
3290 if count_matches(old_end..suffix_end, &old_lines) > 1 {
3291 suffix_end += 1;
3292 } else {
3293 break;
3294 }
3295 }
3296
3297 let mut output = String::new();
3298 for line in &old_lines[prefix_start..first_changed_row] {
3299 output.push_str(line);
3300 output.push('\n');
3301 }
3302 output.push_str("<|fim_middle|>\n");
3303 output.push_str(&merged_new_text);
3304 output.push_str("<|fim_suffix|>\n");
3305 for line in &old_lines[old_end..suffix_end] {
3306 output.push_str(line);
3307 output.push('\n');
3308 }
3309
3310 Ok(output)
3311 }
3312
3313 struct ParsedHunk {
3314 old_context: String,
3315 edits: Vec<ParsedEdit>,
3316 }
3317
3318 struct ParsedEdit {
3319 range: Range<usize>,
3320 text: String,
3321 }
3322
3323 /// Parse a unified diff into content-based hunks. Each hunk contains an
3324 /// `old_context` string (context lines + deleted lines, which together
3325 /// form the text that should be found in the original) and a list of edits
3326 /// expressed as byte ranges within that context.
3327 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3328 let mut hunks = Vec::new();
3329 let mut current: Option<ParsedHunk> = None;
3330
3331 for line in patch.lines() {
3332 if line.starts_with("@@") {
3333 if let Some(hunk) = current.take() {
3334 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3335 hunks.push(hunk);
3336 }
3337 }
3338 current = Some(ParsedHunk {
3339 old_context: String::new(),
3340 edits: Vec::new(),
3341 });
3342 } else if line.starts_with("---") || line.starts_with("+++") {
3343 continue;
3344 } else if let Some(hunk) = &mut current {
3345 if let Some(added) = line.strip_prefix('+') {
3346 let pos = hunk.old_context.len();
3347 if let Some(last_edit) = hunk.edits.last_mut() {
3348 if last_edit.range.end == pos {
3349 writeln!(&mut last_edit.text, "{added}").ok();
3350 continue;
3351 }
3352 }
3353 hunk.edits.push(ParsedEdit {
3354 range: pos..pos,
3355 text: format!("{added}\n"),
3356 });
3357 } else if let Some(removed) = line.strip_prefix('-') {
3358 let start = hunk.old_context.len();
3359 writeln!(&mut hunk.old_context, "{removed}").ok();
3360 let end = hunk.old_context.len();
3361 if let Some(last_edit) = hunk.edits.last_mut() {
3362 if last_edit.range.end == start {
3363 last_edit.range.end = end;
3364 continue;
3365 }
3366 }
3367 hunk.edits.push(ParsedEdit {
3368 range: start..end,
3369 text: String::new(),
3370 });
3371 } else {
3372 let ctx = line.strip_prefix(' ').unwrap_or(line);
3373 writeln!(&mut hunk.old_context, "{ctx}").ok();
3374 }
3375 }
3376 }
3377
3378 if let Some(hunk) = current {
3379 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3380 hunks.push(hunk);
3381 }
3382 }
3383
3384 hunks
3385 }
3386
3387 #[cfg(test)]
3388 mod tests {
3389 use super::*;
3390 use indoc::indoc;
3391
3392 #[test]
3393 fn test_apply_variable_edit() {
3394 struct Case {
3395 name: &'static str,
3396 original: &'static str,
3397 model_output: &'static str,
3398 expected: &'static str,
3399 }
3400
3401 let cases = [
3402 Case {
3403 name: "simple_single_line_replacement",
3404 original: indoc! {"
3405 zero
3406 one
3407 two
3408 three
3409 four
3410 five
3411 "},
3412 model_output: indoc! {"
3413 two
3414 <|fim_middle|>
3415 THREE
3416 <|fim_suffix|>
3417 four
3418 "},
3419 expected: indoc! {"
3420 zero
3421 one
3422 two
3423 THREE
3424 four
3425 five
3426 "},
3427 },
3428 Case {
3429 name: "multi_line_replacement",
3430 original: indoc! {"
3431 a
3432 b
3433 c
3434 d
3435 e
3436 "},
3437 model_output: indoc! {"
3438 a
3439 <|fim_middle|>
3440 B
3441 C
3442 D
3443 <|fim_suffix|>
3444 e
3445 "},
3446 expected: indoc! {"
3447 a
3448 B
3449 C
3450 D
3451 e
3452 "},
3453 },
3454 Case {
3455 name: "insertion_between_existing_lines",
3456 original: indoc! {"
3457 a
3458 b
3459 c
3460 "},
3461 model_output: indoc! {"
3462 a
3463 <|fim_middle|>
3464 X
3465 <|fim_suffix|>
3466 b
3467 "},
3468 expected: indoc! {"
3469 a
3470 X
3471 b
3472 c
3473 "},
3474 },
3475 Case {
3476 name: "deletion",
3477 original: indoc! {"
3478 a
3479 b
3480 c
3481 d
3482 "},
3483 model_output: indoc! {"
3484 a
3485 <|fim_middle|>
3486 <|fim_suffix|>
3487 c
3488 "},
3489 expected: indoc! {"
3490 a
3491 c
3492 d
3493 "},
3494 },
3495 Case {
3496 name: "replacement_at_start_no_prefix_context",
3497 original: indoc! {"
3498 a
3499 b
3500 c
3501 "},
3502 model_output: indoc! {"
3503 <|fim_middle|>
3504 X
3505 <|fim_suffix|>
3506 b
3507 "},
3508 expected: indoc! {"
3509 X
3510 b
3511 c
3512 "},
3513 },
3514 Case {
3515 name: "replacement_at_end_no_suffix_context",
3516 original: indoc! {"
3517 a
3518 b
3519 c
3520 "},
3521 model_output: indoc! {"
3522 b
3523 <|fim_middle|>
3524 Z
3525 <|fim_suffix|>
3526 "},
3527 expected: indoc! {"
3528 a
3529 b
3530 Z
3531 "},
3532 },
3533 Case {
3534 name: "context_with_trailing_newline_is_preserved",
3535 original: indoc! {"
3536 a
3537 b
3538 c
3539 "},
3540 model_output: indoc! {"
3541 a
3542 <|fim_middle|>
3543 B
3544 <|fim_suffix|>
3545 c
3546 "},
3547 expected: indoc! {"
3548 a
3549 B
3550 c
3551 "},
3552 },
3553 Case {
3554 name: "cursor_marker_passes_through_untouched",
3555 original: indoc! {"
3556 a
3557 b
3558 c
3559 "},
3560 model_output: indoc! {"
3561 a
3562 <|fim_middle|>
3563 B<|user_cursor|>B
3564 <|fim_suffix|>
3565 c
3566 "},
3567 expected: indoc! {"
3568 a
3569 B<|user_cursor|>B
3570 c
3571 "},
3572 },
3573 Case {
3574 name: "multiple_prefix_context_lines",
3575 original: indoc! {"
3576 a
3577 b
3578 c
3579 d
3580 e
3581 "},
3582 model_output: indoc! {"
3583 b
3584 c
3585 <|fim_middle|>
3586 D
3587 <|fim_suffix|>
3588 e
3589 "},
3590 expected: indoc! {"
3591 a
3592 b
3593 c
3594 D
3595 e
3596 "},
3597 },
3598 ];
3599
3600 for case in cases {
3601 let (edit_range, replacement) =
3602 apply_variable_edit(case.original, case.model_output).unwrap();
3603 let mut edited = case.original.to_string();
3604 edited.replace_range(edit_range, &replacement);
3605 assert_eq!(edited, case.expected, "{}", case.name);
3606 }
3607 }
3608
3609 #[test]
3610 fn test_patch_to_variable_edit() {
3611 struct Case {
3612 name: &'static str,
3613 old: &'static str,
3614 patch: &'static str,
3615 cursor_offset: Option<usize>,
3616 expected_variable_edit: &'static str,
3617 expected_after_apply: &'static str,
3618 }
3619
3620 let cases = [
3621 Case {
3622 name: "simple_replacement",
3623 old: indoc! {"
3624 zero
3625 one
3626 two
3627 three
3628 four
3629 five
3630 "},
3631 patch: indoc! {"
3632 @@ -3,3 +3,3 @@
3633 two
3634 -three
3635 +THREE
3636 four
3637 "},
3638 cursor_offset: None,
3639 expected_variable_edit: indoc! {"
3640 one
3641 two
3642 <|fim_middle|>
3643 THREE
3644 <|fim_suffix|>
3645 four
3646 five
3647 "},
3648 expected_after_apply: indoc! {"
3649 zero
3650 one
3651 two
3652 THREE
3653 four
3654 five
3655 "},
3656 },
3657 Case {
3658 name: "insertion",
3659 old: indoc! {"
3660 a
3661 b
3662 c
3663 d
3664 e
3665 "},
3666 patch: indoc! {"
3667 @@ -2,0 +3,1 @@
3668 b
3669 +X
3670 c
3671 "},
3672 cursor_offset: None,
3673 expected_variable_edit: indoc! {"
3674 a
3675 b
3676 <|fim_middle|>
3677 X
3678 <|fim_suffix|>
3679 c
3680 d
3681 "},
3682 expected_after_apply: indoc! {"
3683 a
3684 b
3685 X
3686 c
3687 d
3688 e
3689 "},
3690 },
3691 Case {
3692 name: "deletion",
3693 old: indoc! {"
3694 a
3695 b
3696 c
3697 d
3698 e
3699 "},
3700 patch: indoc! {"
3701 @@ -2,3 +2,2 @@
3702 b
3703 -c
3704 d
3705 "},
3706 cursor_offset: None,
3707 expected_variable_edit: indoc! {"
3708 a
3709 b
3710 <|fim_middle|>
3711 <|fim_suffix|>
3712 d
3713 e
3714 "},
3715 expected_after_apply: indoc! {"
3716 a
3717 b
3718 d
3719 e
3720 "},
3721 },
3722 Case {
3723 name: "edit_near_start",
3724 old: indoc! {"
3725 first
3726 second
3727 third
3728 fourth
3729 "},
3730 patch: indoc! {"
3731 @@ -1,1 +1,1 @@
3732 -first
3733 +FIRST
3734 "},
3735 cursor_offset: None,
3736 expected_variable_edit: indoc! {"
3737 <|fim_middle|>
3738 FIRST
3739 <|fim_suffix|>
3740 second
3741 third
3742 "},
3743 expected_after_apply: indoc! {"
3744 FIRST
3745 second
3746 third
3747 fourth
3748 "},
3749 },
3750 Case {
3751 name: "edit_near_end",
3752 old: indoc! {"
3753 first
3754 second
3755 third
3756 fourth
3757 "},
3758 patch: indoc! {"
3759 @@ -4,1 +4,1 @@
3760 -fourth
3761 +FOURTH
3762 "},
3763 cursor_offset: None,
3764 expected_variable_edit: indoc! {"
3765 second
3766 third
3767 <|fim_middle|>
3768 FOURTH
3769 <|fim_suffix|>
3770 "},
3771 expected_after_apply: indoc! {"
3772 first
3773 second
3774 third
3775 FOURTH
3776 "},
3777 },
3778 Case {
3779 name: "cursor_at_start_of_replacement",
3780 old: indoc! {"
3781 zero
3782 one
3783 two
3784 three
3785 four
3786 five
3787 "},
3788 patch: indoc! {"
3789 @@ -3,3 +3,3 @@
3790 two
3791 -three
3792 +THREE
3793 four
3794 "},
3795 cursor_offset: Some(4),
3796 expected_variable_edit: indoc! {"
3797 one
3798 two
3799 <|fim_middle|>
3800 <|user_cursor|>THREE
3801 <|fim_suffix|>
3802 four
3803 five
3804 "},
3805 expected_after_apply: indoc! {"
3806 zero
3807 one
3808 two
3809 <|user_cursor|>THREE
3810 four
3811 five
3812 "},
3813 },
3814 Case {
3815 name: "cursor_in_middle_of_replacement",
3816 old: indoc! {"
3817 zero
3818 one
3819 two
3820 three
3821 four
3822 five
3823 "},
3824 patch: indoc! {"
3825 @@ -3,3 +3,3 @@
3826 two
3827 -three
3828 +THREE
3829 four
3830 "},
3831 cursor_offset: Some(6),
3832 expected_variable_edit: indoc! {"
3833 one
3834 two
3835 <|fim_middle|>
3836 TH<|user_cursor|>REE
3837 <|fim_suffix|>
3838 four
3839 five
3840 "},
3841 expected_after_apply: indoc! {"
3842 zero
3843 one
3844 two
3845 TH<|user_cursor|>REE
3846 four
3847 five
3848 "},
3849 },
3850 Case {
3851 name: "expands_context_when_two_lines_not_unique_before_and_after",
3852 old: indoc! {"
3853 one
3854 a
3855 b
3856 c
3857 d
3858 two
3859 a
3860 b
3861 c
3862 d
3863 three
3864 a
3865 b
3866 c
3867 d
3868 four
3869 "},
3870 patch: indoc! {"
3871 @@ -4,5 +4,5 @@
3872 two
3873 a
3874 b
3875 -c
3876 +C
3877 d
3878 three
3879 "},
3880 cursor_offset: None,
3881 expected_variable_edit: indoc! {"
3882 two
3883 a
3884 b
3885 <|fim_middle|>
3886 C
3887 <|fim_suffix|>
3888 d
3889 three
3890 "},
3891 expected_after_apply: indoc! {"
3892 one
3893 a
3894 b
3895 c
3896 d
3897 two
3898 a
3899 b
3900 C
3901 d
3902 three
3903 a
3904 b
3905 c
3906 d
3907 four
3908 "},
3909 },
3910 Case {
3911 name: "expands_context_when_two_lines_not_unique_before_and_after",
3912 old: indoc! {"
3913 {
3914 {
3915 one();
3916 }
3917 }
3918 {
3919 {
3920 two();
3921 }
3922 }
3923 {
3924 {
3925 three();
3926 }
3927 }
3928 {
3929 {
3930 four();
3931 }
3932 }
3933 "},
3934 patch: indoc! {"
3935 @@ -4,5 +4,5 @@
3936 {
3937 - two();
3938 + TWO();
3939 }
3940 "},
3941 cursor_offset: None,
3942 expected_variable_edit: indoc! {"
3943 one();
3944 }
3945 }
3946 {
3947 {
3948 <|fim_middle|>
3949 TWO();
3950 <|fim_suffix|>
3951 }
3952 }
3953 {
3954 {
3955 three();
3956 "},
3957 expected_after_apply: indoc! {"
3958 {
3959 {
3960 one();
3961 }
3962 }
3963 {
3964 {
3965 TWO();
3966 }
3967 }
3968 {
3969 {
3970 three();
3971 }
3972 }
3973 {
3974 {
3975 four();
3976 }
3977 }
3978 "},
3979 },
3980 ];
3981
3982 for case in cases {
3983 let output =
3984 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3985 .unwrap_or_else(|error| {
3986 panic!("failed converting patch for {}: {error}", case.name)
3987 });
3988 assert_eq!(
3989 output, case.expected_variable_edit,
3990 "patch->variable_edit mismatch for {}",
3991 case.name
3992 );
3993
3994 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3995 .unwrap_or_else(|error| {
3996 panic!("failed applying variable_edit for {}: {error}", case.name)
3997 });
3998 let mut edited_by_variable_edit = case.old.to_string();
3999 edited_by_variable_edit.replace_range(edit_range, &replacement);
4000 assert_eq!(
4001 edited_by_variable_edit, case.expected_after_apply,
4002 "variable_edit apply mismatch for {}",
4003 case.name
4004 );
4005
4006 let (expected_edit_range, expected_replacement) =
4007 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4008 |error| {
4009 panic!(
4010 "failed applying expected variable_edit for {}: {error}",
4011 case.name
4012 )
4013 },
4014 );
4015 let mut edited_by_expected_variable_edit = case.old.to_string();
4016 edited_by_expected_variable_edit
4017 .replace_range(expected_edit_range, &expected_replacement);
4018 assert_eq!(
4019 edited_by_expected_variable_edit, case.expected_after_apply,
4020 "expected variable_edit apply mismatch for {}",
4021 case.name
4022 );
4023 }
4024 }
4025
4026 #[test]
4027 fn test_write_cursor_excerpt_section() {
4028 let path = Path::new("test.rs");
4029 let context = "fn main() {\n hello();\n}\n";
4030 let cursor_offset = 17;
4031 let mut prompt = String::new();
4032 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4033 assert_eq!(
4034 prompt,
4035 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4036 );
4037 }
4038 }
4039}
4040
4041/// The zeta1 prompt format
4042pub mod zeta1 {
4043 use super::*;
4044 use std::fmt::Write;
4045
4046 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4047 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4048 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4049 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4050
4051 const INSTRUCTION_HEADER: &str = concat!(
4052 "### Instruction:\n",
4053 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4054 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4055 "into account the cursor location.\n\n",
4056 "### User Edits:\n\n"
4057 );
4058 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4059 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4060
4061 /// Formats a complete zeta1 prompt from the input events and excerpt.
4062 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4063 let mut prompt = String::with_capacity(
4064 INSTRUCTION_HEADER.len()
4065 + input_events.len()
4066 + EXCERPT_HEADER.len()
4067 + input_excerpt.len()
4068 + RESPONSE_HEADER.len(),
4069 );
4070 prompt.push_str(INSTRUCTION_HEADER);
4071 prompt.push_str(input_events);
4072 prompt.push_str(EXCERPT_HEADER);
4073 prompt.push_str(input_excerpt);
4074 prompt.push_str(RESPONSE_HEADER);
4075 prompt
4076 }
4077
4078 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4079 /// editable and context byte-offset ranges within `cursor_excerpt`.
4080 pub fn format_zeta1_from_input(
4081 input: &ZetaPromptInput,
4082 editable_range: Range<usize>,
4083 context_range: Range<usize>,
4084 ) -> String {
4085 let events = format_zeta1_events(&input.events);
4086 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4087 format_zeta1_prompt(&events, &excerpt)
4088 }
4089
4090 /// Formats events in zeta1 style (oldest first).
4091 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4092 let mut result = String::new();
4093 for event in
4094 events
4095 .iter()
4096 .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4097 &ZetaFormat::V0114180EditableRegion,
4098 )))
4099 {
4100 let event_string = format_zeta1_event(event);
4101 if event_string.is_empty() {
4102 continue;
4103 }
4104 if !result.is_empty() {
4105 result.push_str("\n\n");
4106 }
4107 result.push_str(&event_string);
4108 }
4109 result
4110 }
4111
4112 fn format_zeta1_event(event: &Event) -> String {
4113 match event {
4114 Event::BufferChange {
4115 path,
4116 old_path,
4117 diff,
4118 ..
4119 } => {
4120 let mut prompt = String::new();
4121 if old_path != path {
4122 writeln!(
4123 prompt,
4124 "User renamed {} to {}\n",
4125 old_path.display(),
4126 path.display()
4127 )
4128 .ok();
4129 }
4130 if !diff.is_empty() {
4131 write!(
4132 prompt,
4133 "User edited {}:\n```diff\n{}\n```",
4134 path.display(),
4135 diff
4136 )
4137 .ok();
4138 }
4139 prompt
4140 }
4141 }
4142 }
4143
4144 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4145 /// within `cursor_excerpt`.
4146 fn format_zeta1_excerpt(
4147 input: &ZetaPromptInput,
4148 editable_range: Range<usize>,
4149 context_range: Range<usize>,
4150 ) -> String {
4151 let path_str = input.cursor_path.to_string_lossy();
4152 let excerpt = &*input.cursor_excerpt;
4153 let cursor_offset = input.cursor_offset_in_excerpt;
4154
4155 let mut prompt = String::new();
4156 writeln!(&mut prompt, "```{path_str}").ok();
4157
4158 let starts_at_file_beginning =
4159 input.excerpt_start_row == Some(0) && context_range.start == 0;
4160 if starts_at_file_beginning {
4161 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4162 }
4163
4164 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4165
4166 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4167 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4168 prompt.push_str(CURSOR_MARKER);
4169 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4170 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4171
4172 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4173 write!(prompt, "\n```").ok();
4174
4175 prompt
4176 }
4177
4178 /// Cleans zeta1 model output by extracting content between editable region
4179 /// markers and converting the zeta1 cursor marker to the universal one.
4180 /// Returns `None` if the output doesn't contain the expected markers.
4181 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4182 let content = output.replace(CURSOR_MARKER, "");
4183
4184 let content_start = content
4185 .find(EDITABLE_REGION_START_MARKER)
4186 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4187 .map(|pos| {
4188 if content.as_bytes().get(pos) == Some(&b'\n') {
4189 pos + 1
4190 } else {
4191 pos
4192 }
4193 })
4194 .unwrap_or(0);
4195
4196 let content_end = content
4197 .find(EDITABLE_REGION_END_MARKER)
4198 .map(|pos| {
4199 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4200 pos - 1
4201 } else {
4202 pos
4203 }
4204 })
4205 .unwrap_or(content.len());
4206
4207 if content_start > content_end {
4208 return Some(String::new());
4209 }
4210
4211 let extracted = &content[content_start..content_end];
4212
4213 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4214 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4215 let text_before_cursor = text_before_cursor
4216 .find(EDITABLE_REGION_START_MARKER)
4217 .map(|pos| {
4218 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4219 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4220 after_marker + 1
4221 } else {
4222 after_marker
4223 }
4224 })
4225 .unwrap_or(0);
4226 let offset_in_extracted = zeta1_cursor_pos
4227 .saturating_sub(text_before_cursor)
4228 .min(extracted.len());
4229 offset_in_extracted
4230 });
4231
4232 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4233 if let Some(offset) = cursor_offset {
4234 result.push_str(&extracted[..offset]);
4235 result.push_str(super::CURSOR_MARKER);
4236 result.push_str(&extracted[offset..]);
4237 } else {
4238 result.push_str(extracted);
4239 }
4240
4241 Some(result)
4242 }
4243}
4244
4245#[cfg(test)]
4246mod tests {
4247 use super::*;
4248 use indoc::indoc;
4249
4250 fn make_input(
4251 cursor_excerpt: &str,
4252 editable_range: Range<usize>,
4253 cursor_offset: usize,
4254 events: Vec<Event>,
4255 related_files: Vec<RelatedFile>,
4256 ) -> ZetaPromptInput {
4257 let context_range = 0..cursor_excerpt.len();
4258 ZetaPromptInput {
4259 cursor_path: Path::new("test.rs").into(),
4260 cursor_excerpt: cursor_excerpt.into(),
4261 cursor_offset_in_excerpt: cursor_offset,
4262 excerpt_start_row: None,
4263 events: events.into_iter().map(Arc::new).collect(),
4264 related_files: Some(related_files),
4265 active_buffer_diagnostics: vec![],
4266 excerpt_ranges: ExcerptRanges {
4267 editable_150: editable_range.clone(),
4268 editable_180: editable_range.clone(),
4269 editable_350: editable_range,
4270 editable_150_context_350: context_range.clone(),
4271 editable_180_context_350: context_range.clone(),
4272 editable_350_context_150: context_range,
4273 ..Default::default()
4274 },
4275 syntax_ranges: None,
4276 experiment: None,
4277 in_open_source_repo: false,
4278 can_collect_data: false,
4279 repo_url: None,
4280 }
4281 }
4282
4283 fn make_input_with_context_range(
4284 excerpt: &str,
4285 editable_range: Range<usize>,
4286 context_range: Range<usize>,
4287 cursor_offset: usize,
4288 ) -> ZetaPromptInput {
4289 ZetaPromptInput {
4290 cursor_path: Path::new("test.rs").into(),
4291 cursor_excerpt: excerpt.into(),
4292 cursor_offset_in_excerpt: cursor_offset,
4293 excerpt_start_row: None,
4294 events: vec![],
4295 related_files: Some(vec![]),
4296 active_buffer_diagnostics: vec![],
4297 excerpt_ranges: ExcerptRanges {
4298 editable_150: editable_range.clone(),
4299 editable_180: editable_range.clone(),
4300 editable_350: editable_range,
4301 editable_150_context_350: context_range.clone(),
4302 editable_180_context_350: context_range.clone(),
4303 editable_350_context_150: context_range,
4304 ..Default::default()
4305 },
4306 syntax_ranges: None,
4307 experiment: None,
4308 in_open_source_repo: false,
4309 can_collect_data: false,
4310 repo_url: None,
4311 }
4312 }
4313
4314 fn make_event(path: &str, diff: &str) -> Event {
4315 Event::BufferChange {
4316 path: Path::new(path).into(),
4317 old_path: Path::new(path).into(),
4318 diff: diff.to_string(),
4319 predicted: false,
4320 in_open_source_repo: false,
4321 }
4322 }
4323
4324 fn make_related_file(path: &str, content: &str) -> RelatedFile {
4325 RelatedFile {
4326 path: Path::new(path).into(),
4327 max_row: content.lines().count() as u32,
4328 excerpts: vec![RelatedExcerpt {
4329 row_range: 0..content.lines().count() as u32,
4330 text: content.into(),
4331 order: 0,
4332 }],
4333 in_open_source_repo: false,
4334 }
4335 }
4336
4337 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4338 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4339 }
4340
4341 fn budget_with_margin(requested_tokens: usize) -> usize {
4342 ((requested_tokens as f64) / 0.9).ceil() as usize
4343 }
4344
4345 #[test]
4346 fn test_no_truncation_when_within_budget() {
4347 let input = make_input(
4348 "prefix\neditable\nsuffix",
4349 7..15,
4350 10,
4351 vec![make_event("a.rs", "-old\n+new\n")],
4352 vec![make_related_file("related.rs", "fn helper() {}\n")],
4353 );
4354
4355 assert_eq!(
4356 format_with_budget(&input, 10000).unwrap(),
4357 indoc! {r#"
4358 <|file_sep|>related.rs
4359 fn helper() {}
4360 <|file_sep|>edit history
4361 --- a/a.rs
4362 +++ b/a.rs
4363 -old
4364 +new
4365 <|file_sep|>test.rs
4366 <|fim_prefix|>
4367 prefix
4368 <|fim_middle|>current
4369 edi<|user_cursor|>table
4370 <|fim_suffix|>
4371
4372 suffix
4373 <|fim_middle|>updated
4374 "#}
4375 .to_string()
4376 );
4377 }
4378
4379 #[test]
4380 fn test_truncation_drops_edit_history_when_budget_tight() {
4381 let input = make_input(
4382 "code",
4383 0..4,
4384 2,
4385 vec![make_event("a.rs", "-x\n+y\n")],
4386 vec![
4387 make_related_file("r1.rs", "aaaaaaa\n"),
4388 make_related_file("r2.rs", "bbbbbbb\n"),
4389 ],
4390 );
4391
4392 assert_eq!(
4393 format_with_budget(&input, 10000).unwrap(),
4394 indoc! {r#"
4395 <|file_sep|>r1.rs
4396 aaaaaaa
4397 <|file_sep|>r2.rs
4398 bbbbbbb
4399 <|file_sep|>edit history
4400 --- a/a.rs
4401 +++ b/a.rs
4402 -x
4403 +y
4404 <|file_sep|>test.rs
4405 <|fim_prefix|>
4406 <|fim_middle|>current
4407 co<|user_cursor|>de
4408 <|fim_suffix|>
4409 <|fim_middle|>updated
4410 "#}
4411 .to_string()
4412 );
4413
4414 assert_eq!(
4415 format_with_budget(&input, budget_with_margin(55)),
4416 Some(
4417 indoc! {r#"
4418 <|file_sep|>edit history
4419 --- a/a.rs
4420 +++ b/a.rs
4421 -x
4422 +y
4423 <|file_sep|>test.rs
4424 <|fim_prefix|>
4425 <|fim_middle|>current
4426 co<|user_cursor|>de
4427 <|fim_suffix|>
4428 <|fim_middle|>updated
4429 "#}
4430 .to_string()
4431 )
4432 );
4433 }
4434
4435 #[test]
4436 fn test_truncation_includes_partial_excerpts() {
4437 let input = make_input(
4438 "x",
4439 0..1,
4440 0,
4441 vec![],
4442 vec![RelatedFile {
4443 path: Path::new("big.rs").into(),
4444 max_row: 30,
4445 in_open_source_repo: false,
4446 excerpts: vec![
4447 RelatedExcerpt {
4448 row_range: 0..10,
4449 text: "first excerpt\n".into(),
4450 order: 0,
4451 },
4452 RelatedExcerpt {
4453 row_range: 10..20,
4454 text: "second excerpt\n".into(),
4455 order: 0,
4456 },
4457 RelatedExcerpt {
4458 row_range: 20..30,
4459 text: "third excerpt\n".into(),
4460 order: 0,
4461 },
4462 ],
4463 }],
4464 );
4465
4466 assert_eq!(
4467 format_with_budget(&input, 10000).unwrap(),
4468 indoc! {r#"
4469 <|file_sep|>big.rs
4470 first excerpt
4471 ...
4472 second excerpt
4473 ...
4474 third excerpt
4475 <|file_sep|>test.rs
4476 <|fim_prefix|>
4477 <|fim_middle|>current
4478 <|user_cursor|>x
4479 <|fim_suffix|>
4480 <|fim_middle|>updated
4481 "#}
4482 .to_string()
4483 );
4484
4485 assert_eq!(
4486 format_with_budget(&input, budget_with_margin(50)).unwrap(),
4487 indoc! {r#"
4488 <|file_sep|>big.rs
4489 first excerpt
4490 ...
4491 <|file_sep|>test.rs
4492 <|fim_prefix|>
4493 <|fim_middle|>current
4494 <|user_cursor|>x
4495 <|fim_suffix|>
4496 <|fim_middle|>updated
4497 "#}
4498 .to_string()
4499 );
4500 }
4501
4502 #[test]
4503 fn test_truncation_prioritizes_lower_order_excerpts() {
4504 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4505 // With tight budget, only the lower-order excerpt from file_b should be included.
4506 let input = make_input(
4507 "x",
4508 0..1,
4509 0,
4510 vec![],
4511 vec![
4512 RelatedFile {
4513 path: Path::new("file_a.rs").into(),
4514 max_row: 10,
4515 in_open_source_repo: false,
4516 excerpts: vec![RelatedExcerpt {
4517 row_range: 0..10,
4518 text: "low priority content\n".into(),
4519 order: 5,
4520 }],
4521 },
4522 RelatedFile {
4523 path: Path::new("file_b.rs").into(),
4524 max_row: 10,
4525 in_open_source_repo: false,
4526 excerpts: vec![RelatedExcerpt {
4527 row_range: 0..10,
4528 text: "high priority content\n".into(),
4529 order: 1,
4530 }],
4531 },
4532 ],
4533 );
4534
4535 // With large budget, both files included; rendered in stable lexicographic order.
4536 assert_eq!(
4537 format_with_budget(&input, 10000).unwrap(),
4538 indoc! {r#"
4539 <|file_sep|>file_a.rs
4540 low priority content
4541 <|file_sep|>file_b.rs
4542 high priority content
4543 <|file_sep|>test.rs
4544 <|fim_prefix|>
4545 <|fim_middle|>current
4546 <|user_cursor|>x
4547 <|fim_suffix|>
4548 <|fim_middle|>updated
4549 "#}
4550 .to_string()
4551 );
4552
4553 // With tight budget, only file_b (lower order) fits.
4554 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4555 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4556 // file_a would need another 14 tokens, which doesn't fit.
4557 assert_eq!(
4558 format_with_budget(&input, budget_with_margin(52)).unwrap(),
4559 indoc! {r#"
4560 <|file_sep|>file_b.rs
4561 high priority content
4562 <|file_sep|>test.rs
4563 <|fim_prefix|>
4564 <|fim_middle|>current
4565 <|user_cursor|>x
4566 <|fim_suffix|>
4567 <|fim_middle|>updated
4568 "#}
4569 .to_string()
4570 );
4571 }
4572
4573 #[test]
4574 fn test_truncation_drops_high_order_excerpts_within_file() {
4575 // A single file has excerpts at order 1 and order 3. With a tight budget,
4576 // only the order-1 excerpts are included while the order-3 excerpt is
4577 // dropped — even though they belong to the same file. This also preserves
4578 // the parent invariant: parent outline items have order ≤ their best
4579 // child, so they're always included when any child is.
4580 let input = make_input(
4581 "x",
4582 0..1,
4583 0,
4584 vec![],
4585 vec![RelatedFile {
4586 path: Path::new("mod.rs").into(),
4587 max_row: 30,
4588 in_open_source_repo: false,
4589 excerpts: vec![
4590 RelatedExcerpt {
4591 row_range: 0..5,
4592 text: "mod header\n".into(),
4593 order: 1,
4594 },
4595 RelatedExcerpt {
4596 row_range: 5..15,
4597 text: "important fn\n".into(),
4598 order: 1,
4599 },
4600 RelatedExcerpt {
4601 row_range: 15..30,
4602 text: "less important fn\n".into(),
4603 order: 3,
4604 },
4605 ],
4606 }],
4607 );
4608
4609 // With large budget, all three excerpts included.
4610 assert_eq!(
4611 format_with_budget(&input, 10000).unwrap(),
4612 indoc! {r#"
4613 <|file_sep|>mod.rs
4614 mod header
4615 ...
4616 important fn
4617 ...
4618 less important fn
4619 <|file_sep|>test.rs
4620 <|fim_prefix|>
4621 <|fim_middle|>current
4622 <|user_cursor|>x
4623 <|fim_suffix|>
4624 <|fim_middle|>updated
4625 "#}
4626 .to_string()
4627 );
4628
4629 // With tight budget, only order<=1 excerpts included (header + important fn).
4630 assert_eq!(
4631 format_with_budget(&input, budget_with_margin(55)).unwrap(),
4632 indoc! {r#"
4633 <|file_sep|>mod.rs
4634 mod header
4635 ...
4636 important fn
4637 ...
4638 <|file_sep|>test.rs
4639 <|fim_prefix|>
4640 <|fim_middle|>current
4641 <|user_cursor|>x
4642 <|fim_suffix|>
4643 <|fim_middle|>updated
4644 "#}
4645 .to_string()
4646 );
4647 }
4648
4649 #[test]
4650 fn test_truncation_drops_older_events_first() {
4651 let input = make_input(
4652 "x",
4653 0..1,
4654 0,
4655 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4656 vec![],
4657 );
4658
4659 assert_eq!(
4660 format_with_budget(&input, 10000).unwrap(),
4661 indoc! {r#"
4662 <|file_sep|>edit history
4663 --- a/old.rs
4664 +++ b/old.rs
4665 -1
4666 --- a/new.rs
4667 +++ b/new.rs
4668 -2
4669 <|file_sep|>test.rs
4670 <|fim_prefix|>
4671 <|fim_middle|>current
4672 <|user_cursor|>x
4673 <|fim_suffix|>
4674 <|fim_middle|>updated
4675 "#}
4676 .to_string()
4677 );
4678
4679 assert_eq!(
4680 format_with_budget(&input, 60).unwrap(),
4681 indoc! {r#"
4682 <|file_sep|>edit history
4683 --- a/new.rs
4684 +++ b/new.rs
4685 -2
4686 <|file_sep|>test.rs
4687 <|fim_prefix|>
4688 <|fim_middle|>current
4689 <|user_cursor|>x
4690 <|fim_suffix|>
4691 <|fim_middle|>updated
4692 "#}
4693 .to_string()
4694 );
4695 }
4696
4697 #[test]
4698 fn test_cursor_excerpt_always_included_with_minimal_budget() {
4699 let input = make_input(
4700 "fn main() {}",
4701 0..12,
4702 3,
4703 vec![make_event("a.rs", "-old\n+new\n")],
4704 vec![make_related_file("related.rs", "helper\n")],
4705 );
4706
4707 assert!(format_with_budget(&input, 30).is_none())
4708 }
4709
4710 #[track_caller]
4711 fn format_seed_coder(input: &ZetaPromptInput) -> String {
4712 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4713 .expect("seed coder prompt formatting should succeed")
4714 }
4715
4716 #[track_caller]
4717 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4718 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4719 .expect("seed coder prompt formatting should succeed")
4720 }
4721
4722 #[test]
4723 fn test_seed_coder_basic_format() {
4724 let input = make_input(
4725 "prefix\neditable\nsuffix",
4726 7..15,
4727 10,
4728 vec![make_event("a.rs", "-old\n+new\n")],
4729 vec![make_related_file("related.rs", "fn helper() {}\n")],
4730 );
4731
4732 assert_eq!(
4733 format_seed_coder(&input),
4734 indoc! {r#"
4735 <[fim-suffix]>
4736 suffix
4737 <[fim-prefix]><filename>related.rs
4738 fn helper() {}
4739
4740 <filename>edit_history
4741 --- a/a.rs
4742 +++ b/a.rs
4743 -old
4744 +new
4745
4746 <filename>test.rs
4747 prefix
4748 <<<<<<< CURRENT
4749 edi<|user_cursor|>table
4750 =======
4751 <[fim-middle]>"#}
4752 );
4753 }
4754
4755 #[test]
4756 fn test_v0317_formats_prompt_with_many_related_files() {
4757 let related_files = (0..900)
4758 .map(|index| {
4759 make_related_file(
4760 &format!("related_{index}.rs"),
4761 "fn helper() {\n let value = 1;\n}\n",
4762 )
4763 })
4764 .collect();
4765
4766 let input = make_input(
4767 "code",
4768 0..4,
4769 2,
4770 vec![make_event("a.rs", "-x\n+y\n")],
4771 related_files,
4772 );
4773
4774 let prompt =
4775 format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
4776
4777 assert!(prompt.is_some());
4778 let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
4779 assert!(prompt.contains("test.rs"));
4780 assert!(prompt.contains(CURSOR_MARKER));
4781 }
4782
4783 #[test]
4784 fn test_seed_coder_no_context() {
4785 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4786
4787 assert_eq!(
4788 format_seed_coder(&input),
4789 indoc! {r#"
4790 <[fim-suffix]>
4791 after
4792 <[fim-prefix]><filename>test.rs
4793 before
4794 <<<<<<< CURRENT
4795 mid<|user_cursor|>dle
4796 =======
4797 <[fim-middle]>"#}
4798 );
4799 }
4800
4801 #[test]
4802 fn test_seed_coder_truncation_drops_context() {
4803 let input = make_input(
4804 "code",
4805 0..4,
4806 2,
4807 vec![make_event("a.rs", "-x\n+y\n")],
4808 vec![make_related_file("r1.rs", "content\n")],
4809 );
4810
4811 // With large budget, everything is included
4812 assert_eq!(
4813 format_seed_coder(&input),
4814 indoc! {r#"
4815 <[fim-suffix]>
4816 <[fim-prefix]><filename>r1.rs
4817 content
4818
4819 <filename>edit_history
4820 --- a/a.rs
4821 +++ b/a.rs
4822 -x
4823 +y
4824
4825 <filename>test.rs
4826 <<<<<<< CURRENT
4827 co<|user_cursor|>de
4828 =======
4829 <[fim-middle]>"#}
4830 );
4831
4832 assert_eq!(
4833 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
4834 None
4835 );
4836
4837 assert_eq!(
4838 format_seed_coder_with_budget(&input, 40),
4839 indoc! {r#"
4840 <[fim-suffix]>
4841 <[fim-prefix]><filename>test.rs
4842 <<<<<<< CURRENT
4843 co<|user_cursor|>de
4844 =======
4845 <[fim-middle]>"#
4846 }
4847 )
4848 }
4849
4850 #[test]
4851 fn test_seed_coder_truncation_prioritizes_lower_order() {
4852 let input = make_input(
4853 "code",
4854 0..4,
4855 2,
4856 vec![],
4857 vec![
4858 RelatedFile {
4859 path: Path::new("low_prio.rs").into(),
4860 max_row: 5,
4861 in_open_source_repo: false,
4862 excerpts: vec![RelatedExcerpt {
4863 row_range: 0..5,
4864 text: "low prio\n".into(),
4865 order: 10,
4866 }],
4867 },
4868 RelatedFile {
4869 path: Path::new("high_prio.rs").into(),
4870 max_row: 5,
4871 in_open_source_repo: false,
4872 excerpts: vec![RelatedExcerpt {
4873 row_range: 0..5,
4874 text: "high prio\n".into(),
4875 order: 1,
4876 }],
4877 },
4878 ],
4879 );
4880
4881 // With large budget, both included; rendered in stable lexicographic order.
4882 assert_eq!(
4883 format_seed_coder(&input),
4884 indoc! {r#"
4885 <[fim-suffix]>
4886 <[fim-prefix]><filename>low_prio.rs
4887 low prio
4888 <filename>high_prio.rs
4889 high prio
4890
4891 <filename>test.rs
4892 <<<<<<< CURRENT
4893 co<|user_cursor|>de
4894 =======
4895 <[fim-middle]>"#}
4896 );
4897
4898 // With tight budget under the generic heuristic, context is dropped but the
4899 // minimal cursor section still fits.
4900 assert_eq!(
4901 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
4902 Some(
4903 indoc! {r#"
4904 <[fim-suffix]>
4905 <[fim-prefix]><filename>test.rs
4906 <<<<<<< CURRENT
4907 co<|user_cursor|>de
4908 =======
4909 <[fim-middle]>"#}
4910 .to_string()
4911 )
4912 );
4913 }
4914
4915 #[test]
4916 fn test_format_zeta1_from_input_basic() {
4917 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
4918 let input = ZetaPromptInput {
4919 cursor_path: Path::new("src/main.rs").into(),
4920 cursor_excerpt: excerpt.into(),
4921 cursor_offset_in_excerpt: 30,
4922 excerpt_start_row: Some(0),
4923 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4924 related_files: Some(vec![]),
4925 active_buffer_diagnostics: vec![],
4926 excerpt_ranges: ExcerptRanges {
4927 editable_150: 15..41,
4928 editable_180: 15..41,
4929 editable_350: 15..41,
4930 editable_150_context_350: 0..excerpt.len(),
4931 editable_180_context_350: 0..excerpt.len(),
4932 editable_350_context_150: 0..excerpt.len(),
4933 ..Default::default()
4934 },
4935 syntax_ranges: None,
4936 experiment: None,
4937 in_open_source_repo: false,
4938 can_collect_data: false,
4939 repo_url: None,
4940 };
4941
4942 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4943
4944 assert_eq!(
4945 prompt,
4946 concat!(
4947 "### Instruction:\n",
4948 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4949 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4950 "into account the cursor location.\n",
4951 "\n",
4952 "### User Edits:\n",
4953 "\n",
4954 "User edited other.rs:\n",
4955 "```diff\n",
4956 "-old\n",
4957 "+new\n",
4958 "\n",
4959 "```\n",
4960 "\n",
4961 "### User Excerpt:\n",
4962 "\n",
4963 "```src/main.rs\n",
4964 "<|start_of_file|>\n",
4965 "fn before() {}\n",
4966 "<|editable_region_start|>\n",
4967 "fn foo() {\n",
4968 " <|user_cursor_is_here|>let x = 1;\n",
4969 "\n",
4970 "<|editable_region_end|>}\n",
4971 "fn after() {}\n",
4972 "\n",
4973 "```\n",
4974 "\n",
4975 "### Response:\n",
4976 ),
4977 );
4978 }
4979
4980 #[test]
4981 fn test_format_zeta1_from_input_no_start_of_file() {
4982 let excerpt = "fn foo() {\n let x = 1;\n}\n";
4983 let input = ZetaPromptInput {
4984 cursor_path: Path::new("src/main.rs").into(),
4985 cursor_excerpt: excerpt.into(),
4986 cursor_offset_in_excerpt: 15,
4987 excerpt_start_row: Some(10),
4988 events: vec![],
4989 related_files: Some(vec![]),
4990 active_buffer_diagnostics: vec![],
4991 excerpt_ranges: ExcerptRanges {
4992 editable_150: 0..28,
4993 editable_180: 0..28,
4994 editable_350: 0..28,
4995 editable_150_context_350: 0..28,
4996 editable_180_context_350: 0..28,
4997 editable_350_context_150: 0..28,
4998 ..Default::default()
4999 },
5000 syntax_ranges: None,
5001 experiment: None,
5002 in_open_source_repo: false,
5003 can_collect_data: false,
5004 repo_url: None,
5005 };
5006
5007 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5008
5009 assert_eq!(
5010 prompt,
5011 concat!(
5012 "### Instruction:\n",
5013 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5014 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5015 "into account the cursor location.\n",
5016 "\n",
5017 "### User Edits:\n",
5018 "\n",
5019 "\n",
5020 "\n",
5021 "### User Excerpt:\n",
5022 "\n",
5023 "```src/main.rs\n",
5024 "<|editable_region_start|>\n",
5025 "fn foo() {\n",
5026 " <|user_cursor_is_here|>let x = 1;\n",
5027 "}\n",
5028 "\n",
5029 "<|editable_region_end|>\n",
5030 "```\n",
5031 "\n",
5032 "### Response:\n",
5033 ),
5034 );
5035 }
5036
5037 #[test]
5038 fn test_format_zeta1_from_input_with_sub_ranges() {
5039 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
5040 let editable_range = 10..37;
5041 let context_range = 0..excerpt.len();
5042
5043 let input = ZetaPromptInput {
5044 cursor_path: Path::new("test.rs").into(),
5045 cursor_excerpt: excerpt.into(),
5046 cursor_offset_in_excerpt: 25,
5047 excerpt_start_row: Some(0),
5048 events: vec![],
5049 related_files: Some(vec![]),
5050 active_buffer_diagnostics: vec![],
5051 excerpt_ranges: ExcerptRanges {
5052 editable_150: editable_range.clone(),
5053 editable_180: editable_range.clone(),
5054 editable_350: editable_range.clone(),
5055 editable_150_context_350: context_range.clone(),
5056 editable_180_context_350: context_range.clone(),
5057 editable_350_context_150: context_range.clone(),
5058 ..Default::default()
5059 },
5060 syntax_ranges: None,
5061 experiment: None,
5062 in_open_source_repo: false,
5063 can_collect_data: false,
5064 repo_url: None,
5065 };
5066
5067 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5068
5069 assert_eq!(
5070 prompt,
5071 concat!(
5072 "### Instruction:\n",
5073 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5074 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5075 "into account the cursor location.\n",
5076 "\n",
5077 "### User Edits:\n",
5078 "\n",
5079 "\n",
5080 "\n",
5081 "### User Excerpt:\n",
5082 "\n",
5083 "```test.rs\n",
5084 "<|start_of_file|>\n",
5085 "// prefix\n",
5086 "<|editable_region_start|>\n",
5087 "fn foo() {\n",
5088 " <|user_cursor_is_here|>let x = 1;\n",
5089 "}\n",
5090 "<|editable_region_end|>\n",
5091 "// suffix\n",
5092 "\n",
5093 "```\n",
5094 "\n",
5095 "### Response:\n",
5096 ),
5097 );
5098 }
5099
5100 #[test]
5101 fn test_max_event_count() {
5102 fn make_numbered_event(index: usize) -> Event {
5103 return make_event(
5104 &format!("event-{index}.rs"),
5105 &format!("-old-{index}\n+new-{index}\n"),
5106 );
5107 }
5108 let input = make_input(
5109 "x",
5110 0..1,
5111 0,
5112 (0..3).map(make_numbered_event).collect(),
5113 vec![],
5114 );
5115
5116 let edit_history_section = format_edit_history_within_budget(
5117 &input.events,
5118 "<|file_sep|>",
5119 "edit history",
5120 usize::MAX,
5121 5,
5122 );
5123
5124 assert_eq!(
5125 &edit_history_section,
5126 indoc!(
5127 "
5128 <|file_sep|>edit history
5129 --- a/event-0.rs
5130 +++ b/event-0.rs
5131 -old-0
5132 +new-0
5133 --- a/event-1.rs
5134 +++ b/event-1.rs
5135 -old-1
5136 +new-1
5137 --- a/event-2.rs
5138 +++ b/event-2.rs
5139 -old-2
5140 +new-2
5141 "
5142 )
5143 );
5144
5145 let edit_history_section = format_edit_history_within_budget(
5146 &input.events,
5147 "<|file_sep|>",
5148 "edit history",
5149 usize::MAX,
5150 2,
5151 );
5152
5153 assert_eq!(
5154 &edit_history_section,
5155 indoc!(
5156 "
5157 <|file_sep|>edit history
5158 --- a/event-1.rs
5159 +++ b/event-1.rs
5160 -old-1
5161 +new-1
5162 --- a/event-2.rs
5163 +++ b/event-2.rs
5164 -old-2
5165 +new-2
5166 "
5167 )
5168 );
5169
5170 let edit_history_section = format_edit_history_within_budget(
5171 &input.events,
5172 "<|file_sep|>",
5173 "edit history",
5174 usize::MAX,
5175 0,
5176 );
5177
5178 assert_eq!(&edit_history_section, "");
5179 }
5180
5181 #[test]
5182 fn test_clean_zeta1_model_output_basic() {
5183 let output = indoc! {"
5184 <|editable_region_start|>
5185 fn main() {
5186 println!(\"hello\");
5187 }
5188 <|editable_region_end|>
5189 "};
5190
5191 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5192 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
5193 }
5194
5195 #[test]
5196 fn test_clean_zeta1_model_output_with_cursor() {
5197 let output = indoc! {"
5198 <|editable_region_start|>
5199 fn main() {
5200 <|user_cursor_is_here|>println!(\"hello\");
5201 }
5202 <|editable_region_end|>
5203 "};
5204
5205 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5206 assert_eq!(
5207 cleaned,
5208 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
5209 );
5210 }
5211
5212 #[test]
5213 fn test_clean_zeta1_model_output_no_markers() {
5214 let output = "fn main() {}\n";
5215 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5216 assert_eq!(cleaned, "fn main() {}\n");
5217 }
5218
5219 #[test]
5220 fn test_clean_zeta1_model_output_empty_region() {
5221 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5222 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5223 assert_eq!(cleaned, "");
5224 }
5225
5226 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5227 let mut result = excerpt.to_string();
5228 result.replace_range(
5229 parsed_output.range_in_excerpt.clone(),
5230 &parsed_output.new_editable_region,
5231 );
5232 result
5233 }
5234
5235 #[test]
5236 fn test_parse_zeta2_model_output() {
5237 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5238 let context_start = excerpt.find("ctx start").unwrap();
5239 let context_end = excerpt.find("after ctx").unwrap();
5240 let editable_start = excerpt.find("editable old").unwrap();
5241 let editable_end = editable_start + "editable old\n".len();
5242 let input = make_input_with_context_range(
5243 excerpt,
5244 editable_start..editable_end,
5245 context_start..context_end,
5246 editable_start,
5247 );
5248
5249 let output = parse_zeta2_model_output(
5250 "editable new\n>>>>>>> UPDATED\n",
5251 ZetaFormat::V0131GitMergeMarkersPrefix,
5252 &input,
5253 )
5254 .unwrap();
5255
5256 assert_eq!(
5257 apply_edit(excerpt, &output),
5258 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5259 );
5260 }
5261
5262 #[test]
5263 fn test_parse_zeta2_model_output_identity() {
5264 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5265 let editable_start = excerpt.find("bbb").unwrap();
5266 let editable_end = excerpt.find("ddd").unwrap();
5267 let input = make_input_with_context_range(
5268 excerpt,
5269 editable_start..editable_end,
5270 0..excerpt.len(),
5271 editable_start,
5272 );
5273
5274 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5275 let output =
5276 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5277
5278 assert_eq!(apply_edit(excerpt, &output), excerpt);
5279 }
5280
5281 #[test]
5282 fn test_parse_zeta2_model_output_strips_end_marker() {
5283 let excerpt = "hello\nworld\n";
5284 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5285
5286 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5287 let output1 =
5288 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5289 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5290
5291 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5292 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5293 }
5294
5295 #[test]
5296 fn test_special_tokens_not_triggered_by_comment_separator() {
5297 // Regression test for https://github.com/zed-industries/zed/issues/52489
5298 let excerpt = "fn main() {\n // =======\n println!(\"hello\");\n}\n";
5299 let input = make_input(excerpt, 0..excerpt.len(), 0, vec![], vec![]);
5300 assert!(
5301 !prompt_input_contains_special_tokens(&input, ZetaFormat::V0131GitMergeMarkersPrefix),
5302 "comment containing ======= should not trigger special token detection"
5303 );
5304 }
5305}