1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
22/// editable and context token budgets. Allows the server to select the
23/// appropriate ranges for whichever model it uses.
24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
25pub struct ExcerptRanges {
26 /// Editable region computed with a 150-token budget.
27 pub editable_150: Range<usize>,
28 /// Editable region computed with a 180-token budget.
29 pub editable_180: Range<usize>,
30 /// Editable region computed with a 350-token budget.
31 pub editable_350: Range<usize>,
32 /// Editable region computed with a 350-token budget.
33 pub editable_512: Option<Range<usize>>,
34 /// Context boundary when using editable_150 with 350 tokens of additional context.
35 pub editable_150_context_350: Range<usize>,
36 /// Context boundary when using editable_180 with 350 tokens of additional context.
37 pub editable_180_context_350: Range<usize>,
38 /// Context boundary when using editable_350 with 150 tokens of additional context.
39 pub editable_350_context_150: Range<usize>,
40 pub editable_350_context_512: Option<Range<usize>>,
41 pub editable_350_context_1024: Option<Range<usize>>,
42 pub context_4096: Option<Range<usize>>,
43 pub context_8192: Option<Range<usize>>,
44}
45
46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
47pub struct ZetaPromptInput {
48 pub cursor_path: Arc<Path>,
49 pub cursor_excerpt: Arc<str>,
50 pub cursor_offset_in_excerpt: usize,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub excerpt_start_row: Option<u32>,
53 pub events: Vec<Arc<Event>>,
54 pub related_files: Vec<RelatedFile>,
55 /// These ranges let the server select model-appropriate subsets.
56 pub excerpt_ranges: ExcerptRanges,
57 /// The name of the edit prediction model experiment to use.
58 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub experiment: Option<String>,
60 #[serde(default)]
61 pub in_open_source_repo: bool,
62 #[serde(default)]
63 pub can_collect_data: bool,
64 #[serde(default, skip_serializing_if = "Option::is_none")]
65 pub repo_url: Option<String>,
66}
67
68#[derive(
69 Default,
70 Clone,
71 Copy,
72 Debug,
73 PartialEq,
74 Eq,
75 Hash,
76 EnumIter,
77 IntoStaticStr,
78 Serialize,
79 Deserialize,
80)]
81#[allow(non_camel_case_types)]
82pub enum ZetaFormat {
83 V0112MiddleAtEnd,
84 V0113Ordered,
85 V0114180EditableRegion,
86 V0120GitMergeMarkers,
87 #[default]
88 V0131GitMergeMarkersPrefix,
89 V0211Prefill,
90 V0211SeedCoder,
91 v0226Hashline,
92 V0304SeedNoEdits,
93}
94
95impl std::fmt::Display for ZetaFormat {
96 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97 write!(f, "{}", <&'static str>::from(self))
98 }
99}
100
101impl ZetaFormat {
102 pub fn parse(format_name: &str) -> Result<Self> {
103 let mut results = ZetaFormat::iter().filter(|version| {
104 <&'static str>::from(version)
105 .to_lowercase()
106 .contains(&format_name.to_lowercase())
107 });
108 let Some(result) = results.next() else {
109 anyhow::bail!(
110 "`{format_name}` did not match any of:\n{}",
111 Self::options_as_string()
112 );
113 };
114 if results.next().is_some() {
115 anyhow::bail!(
116 "`{format_name}` matched more than one of:\n{}",
117 Self::options_as_string()
118 );
119 }
120 Ok(result)
121 }
122
123 pub fn options_as_string() -> String {
124 ZetaFormat::iter()
125 .map(|format| format!("- {}\n", <&'static str>::from(format)))
126 .collect::<Vec<_>>()
127 .concat()
128 }
129}
130
131#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
132#[serde(tag = "event")]
133pub enum Event {
134 BufferChange {
135 path: Arc<Path>,
136 old_path: Arc<Path>,
137 diff: String,
138 predicted: bool,
139 in_open_source_repo: bool,
140 },
141}
142
143impl Event {
144 pub fn in_open_source_repo(&self) -> bool {
145 match self {
146 Event::BufferChange {
147 in_open_source_repo,
148 ..
149 } => *in_open_source_repo,
150 }
151 }
152}
153
154pub fn write_event(prompt: &mut String, event: &Event) {
155 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
156 for component in path.components() {
157 prompt.push('/');
158 write!(prompt, "{}", component.as_os_str().display()).ok();
159 }
160 }
161 match event {
162 Event::BufferChange {
163 path,
164 old_path,
165 diff,
166 predicted,
167 in_open_source_repo: _,
168 } => {
169 if *predicted {
170 prompt.push_str("// User accepted prediction:\n");
171 }
172 prompt.push_str("--- a");
173 write_path_as_unix_str(prompt, old_path.as_ref());
174 prompt.push_str("\n+++ b");
175 write_path_as_unix_str(prompt, path.as_ref());
176 prompt.push('\n');
177 prompt.push_str(diff);
178 }
179 }
180}
181
182#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
183pub struct RelatedFile {
184 pub path: Arc<Path>,
185 pub max_row: u32,
186 pub excerpts: Vec<RelatedExcerpt>,
187 #[serde(default)]
188 pub in_open_source_repo: bool,
189}
190
191#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
192pub struct RelatedExcerpt {
193 pub row_range: Range<u32>,
194 pub text: Arc<str>,
195 #[serde(default)]
196 pub order: usize,
197}
198
199pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
200 special_tokens_for_format(format)
201 .iter()
202 .any(|token| input.cursor_excerpt.contains(token))
203}
204
205pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
206 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
207}
208
209pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
210 match format {
211 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
212 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
213 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
214 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
215 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
216 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
217 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
218 ZetaFormat::v0226Hashline => hashline::special_tokens(),
219 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
220 }
221}
222
223pub fn excerpt_ranges_for_format(
224 format: ZetaFormat,
225 ranges: &ExcerptRanges,
226) -> (Range<usize>, Range<usize>) {
227 match format {
228 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
229 ranges.editable_150.clone(),
230 ranges.editable_150_context_350.clone(),
231 ),
232 ZetaFormat::V0114180EditableRegion => (
233 ranges.editable_180.clone(),
234 ranges.editable_180_context_350.clone(),
235 ),
236 ZetaFormat::V0120GitMergeMarkers
237 | ZetaFormat::V0131GitMergeMarkersPrefix
238 | ZetaFormat::V0211Prefill
239 | ZetaFormat::V0211SeedCoder
240 | ZetaFormat::v0226Hashline
241 | ZetaFormat::V0304SeedNoEdits => (
242 ranges.editable_350.clone(),
243 ranges.editable_350_context_150.clone(),
244 ),
245 }
246}
247
248pub fn write_cursor_excerpt_section_for_format(
249 format: ZetaFormat,
250 prompt: &mut String,
251 path: &Path,
252 context: &str,
253 editable_range: &Range<usize>,
254 cursor_offset: usize,
255) {
256 match format {
257 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
258 prompt,
259 path,
260 context,
261 editable_range,
262 cursor_offset,
263 ),
264 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
265 v0113_ordered::write_cursor_excerpt_section(
266 prompt,
267 path,
268 context,
269 editable_range,
270 cursor_offset,
271 )
272 }
273 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
274 prompt,
275 path,
276 context,
277 editable_range,
278 cursor_offset,
279 ),
280 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
281 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
282 prompt,
283 path,
284 context,
285 editable_range,
286 cursor_offset,
287 )
288 }
289 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
290 seed_coder::write_cursor_excerpt_section(
291 prompt,
292 path,
293 context,
294 editable_range,
295 cursor_offset,
296 )
297 }
298 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
299 prompt,
300 path,
301 context,
302 editable_range,
303 cursor_offset,
304 ),
305 }
306}
307
308pub fn format_prompt_with_budget_for_format(
309 input: &ZetaPromptInput,
310 format: ZetaFormat,
311 max_tokens: usize,
312) -> String {
313 let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
314 let path = &*input.cursor_path;
315
316 match format {
317 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
318 seed_coder::format_prompt_with_budget(
319 path,
320 context,
321 &editable_range,
322 cursor_offset,
323 &input.events,
324 &input.related_files,
325 max_tokens,
326 )
327 }
328 _ => {
329 let mut cursor_section = String::new();
330 write_cursor_excerpt_section_for_format(
331 format,
332 &mut cursor_section,
333 path,
334 context,
335 &editable_range,
336 cursor_offset,
337 );
338
339 let cursor_tokens = estimate_tokens(cursor_section.len());
340 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
341
342 let edit_history_section = format_edit_history_within_budget(
343 &input.events,
344 "<|file_sep|>",
345 "edit history",
346 budget_after_cursor,
347 );
348 let edit_history_tokens = estimate_tokens(edit_history_section.len());
349 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
350
351 let related_files_section = format_related_files_within_budget(
352 &input.related_files,
353 "<|file_sep|>",
354 "",
355 budget_after_edit_history,
356 );
357
358 let mut prompt = String::new();
359 prompt.push_str(&related_files_section);
360 prompt.push_str(&edit_history_section);
361 prompt.push_str(&cursor_section);
362 prompt
363 }
364 }
365}
366
367pub fn get_prefill_for_format(
368 format: ZetaFormat,
369 context: &str,
370 editable_range: &Range<usize>,
371) -> String {
372 match format {
373 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
374 ZetaFormat::V0112MiddleAtEnd
375 | ZetaFormat::V0113Ordered
376 | ZetaFormat::V0114180EditableRegion
377 | ZetaFormat::V0120GitMergeMarkers
378 | ZetaFormat::V0131GitMergeMarkersPrefix
379 | ZetaFormat::V0211SeedCoder
380 | ZetaFormat::v0226Hashline
381 | ZetaFormat::V0304SeedNoEdits => String::new(),
382 }
383}
384
385pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
386 match format {
387 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
388 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
389 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
390 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => Some(seed_coder::END_MARKER),
391 ZetaFormat::V0112MiddleAtEnd
392 | ZetaFormat::V0113Ordered
393 | ZetaFormat::V0114180EditableRegion
394 | ZetaFormat::v0226Hashline => None,
395 }
396}
397
398pub fn current_region_markers_for_format(format: ZetaFormat) -> (&'static str, &'static str) {
399 match format {
400 ZetaFormat::V0112MiddleAtEnd => ("<|fim_middle|>current\n", "<|fim_middle|>updated"),
401 ZetaFormat::V0113Ordered
402 | ZetaFormat::V0114180EditableRegion
403 | ZetaFormat::v0226Hashline => ("<|fim_middle|>current\n", "<|fim_suffix|>"),
404 ZetaFormat::V0120GitMergeMarkers
405 | ZetaFormat::V0131GitMergeMarkersPrefix
406 | ZetaFormat::V0211Prefill => (
407 v0120_git_merge_markers::START_MARKER,
408 v0120_git_merge_markers::SEPARATOR,
409 ),
410 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
411 (seed_coder::START_MARKER, seed_coder::SEPARATOR)
412 }
413 }
414}
415
416pub fn clean_extracted_region_for_format(format: ZetaFormat, region: &str) -> String {
417 match format {
418 ZetaFormat::v0226Hashline => hashline::strip_hashline_prefixes(region),
419 _ => region.to_string(),
420 }
421}
422
423pub fn encode_patch_as_output_for_format(
424 format: ZetaFormat,
425 old_editable_region: &str,
426 patch: &str,
427 cursor_offset: Option<usize>,
428) -> Result<Option<String>> {
429 match format {
430 ZetaFormat::v0226Hashline => {
431 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
432 }
433 ZetaFormat::V0304SeedNoEdits => Ok(seed_coder::no_edits(patch)),
434 _ => Ok(None),
435 }
436}
437
438pub fn output_with_context_for_format(
439 format: ZetaFormat,
440 old_editable_region: &str,
441 output: &str,
442) -> Result<Option<String>> {
443 match format {
444 ZetaFormat::v0226Hashline => {
445 if hashline::output_has_edit_commands(output) {
446 Ok(Some(hashline::apply_edit_commands(
447 old_editable_region,
448 output,
449 )))
450 } else {
451 Ok(None)
452 }
453 }
454 ZetaFormat::V0304SeedNoEdits => {
455 if output.starts_with(seed_coder::NO_EDITS) {
456 Ok(Some(old_editable_region.to_owned()))
457 } else {
458 Ok(None)
459 }
460 }
461 _ => Ok(None),
462 }
463}
464
465/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
466pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
467 match output_end_marker_for_format(format) {
468 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
469 None => output,
470 }
471}
472
473pub fn excerpt_range_for_format(
474 format: ZetaFormat,
475 ranges: &ExcerptRanges,
476) -> (Range<usize>, Range<usize>) {
477 excerpt_ranges_for_format(format, ranges)
478}
479
480pub fn resolve_cursor_region(
481 input: &ZetaPromptInput,
482 format: ZetaFormat,
483) -> (&str, Range<usize>, usize) {
484 let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
485 let context_start = context_range.start;
486 let context_text = &input.cursor_excerpt[context_range];
487 let adjusted_editable =
488 (editable_range.start - context_start)..(editable_range.end - context_start);
489 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
490
491 (context_text, adjusted_editable, adjusted_cursor)
492}
493
494pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
495 let (context, editable_range, _) = resolve_cursor_region(input, format);
496 get_prefill_for_format(format, context, &editable_range)
497}
498
499fn format_edit_history_within_budget(
500 events: &[Arc<Event>],
501 file_marker: &str,
502 edit_history_name: &str,
503 max_tokens: usize,
504) -> String {
505 let header = format!("{}{}\n", file_marker, edit_history_name);
506 let header_tokens = estimate_tokens(header.len());
507 if header_tokens >= max_tokens {
508 return String::new();
509 }
510
511 let mut event_strings: Vec<String> = Vec::new();
512 let mut total_tokens = header_tokens;
513
514 for event in events.iter().rev() {
515 let mut event_str = String::new();
516 write_event(&mut event_str, event);
517 let event_tokens = estimate_tokens(event_str.len());
518
519 if total_tokens + event_tokens > max_tokens {
520 break;
521 }
522 total_tokens += event_tokens;
523 event_strings.push(event_str);
524 }
525
526 if event_strings.is_empty() {
527 return String::new();
528 }
529
530 let mut result = header;
531 for event_str in event_strings.iter().rev() {
532 result.push_str(event_str);
533 }
534 result
535}
536
537fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
538 let needs_newline = !excerpt.text.ends_with('\n');
539 let needs_ellipsis = excerpt.row_range.end < file_max_row;
540 let len = excerpt.text.len()
541 + if needs_newline { "\n".len() } else { 0 }
542 + if needs_ellipsis { "...\n".len() } else { 0 };
543 estimate_tokens(len)
544}
545
546pub fn format_related_files_within_budget(
547 related_files: &[RelatedFile],
548 file_prefix: &str,
549 file_suffix: &str,
550 max_tokens: usize,
551) -> String {
552 struct ExcerptCandidate {
553 file_ix: usize,
554 excerpt_ix: usize,
555 order: usize,
556 }
557
558 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
559 .iter()
560 .enumerate()
561 .flat_map(|(file_ix, file)| {
562 file.excerpts
563 .iter()
564 .enumerate()
565 .map(move |(excerpt_ix, e)| ExcerptCandidate {
566 file_ix,
567 excerpt_ix,
568 order: e.order,
569 })
570 })
571 .collect();
572
573 // Pre-compute file header strings and their token costs.
574 let file_headers: Vec<String> = related_files
575 .iter()
576 .map(|file| {
577 let path_str = file.path.to_string_lossy();
578 format!("{}{}\n", file_prefix, path_str)
579 })
580 .collect();
581
582 // Sort the excerpts by their order and determine how many fit within the budget.
583 let mut total_tokens = 0;
584 let mut included_excerpt_count = 0_usize;
585 let mut included_file_indices = vec![false; related_files.len()];
586 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
587 for candidate in &excerpt_candidates {
588 let file = &related_files[candidate.file_ix];
589 let excerpt = &file.excerpts[candidate.excerpt_ix];
590 let file_already_included = included_file_indices[candidate.file_ix];
591 let header_cost = if file_already_included {
592 0
593 } else {
594 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
595 };
596 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
597 if total_tokens + header_cost + excerpt_cost > max_tokens {
598 break;
599 }
600 total_tokens += header_cost + excerpt_cost;
601 if !file_already_included {
602 included_file_indices[candidate.file_ix] = true;
603 }
604 included_excerpt_count += 1;
605 }
606
607 excerpt_candidates.truncate(included_excerpt_count);
608 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
609
610 // Render all of the files that fit within the token budget, in the original order.
611 let mut result = String::new();
612 let mut last_file_ix = None;
613 for candidate in &excerpt_candidates {
614 if last_file_ix != Some(candidate.file_ix) {
615 if last_file_ix.is_some() {
616 result.push_str(file_suffix);
617 }
618 result.push_str(&file_headers[candidate.file_ix]);
619 last_file_ix = Some(candidate.file_ix);
620 }
621 let file = &related_files[candidate.file_ix];
622 let excerpt = &file.excerpts[candidate.excerpt_ix];
623 result.push_str(&excerpt.text);
624 if !result.ends_with('\n') {
625 result.push('\n');
626 }
627 if excerpt.row_range.end < file.max_row {
628 result.push_str("...\n");
629 }
630 }
631
632 result
633}
634
635pub fn write_related_files(
636 prompt: &mut String,
637 related_files: &[RelatedFile],
638) -> Vec<Range<usize>> {
639 let mut ranges = Vec::new();
640 for file in related_files {
641 let start = prompt.len();
642 let path_str = file.path.to_string_lossy();
643 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
644 for excerpt in &file.excerpts {
645 prompt.push_str(&excerpt.text);
646 if !prompt.ends_with('\n') {
647 prompt.push('\n');
648 }
649 if excerpt.row_range.end < file.max_row {
650 prompt.push_str("...\n");
651 }
652 }
653 let end = prompt.len();
654 ranges.push(start..end);
655 }
656 ranges
657}
658
659mod v0112_middle_at_end {
660 use super::*;
661
662 pub fn special_tokens() -> &'static [&'static str] {
663 &[
664 "<|fim_prefix|>",
665 "<|fim_suffix|>",
666 "<|fim_middle|>",
667 "<|file_sep|>",
668 CURSOR_MARKER,
669 ]
670 }
671
672 pub fn write_cursor_excerpt_section(
673 prompt: &mut String,
674 path: &Path,
675 context: &str,
676 editable_range: &Range<usize>,
677 cursor_offset: usize,
678 ) {
679 let path_str = path.to_string_lossy();
680 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
681
682 prompt.push_str("<|fim_prefix|>\n");
683 prompt.push_str(&context[..editable_range.start]);
684
685 prompt.push_str("<|fim_suffix|>\n");
686 prompt.push_str(&context[editable_range.end..]);
687 if !prompt.ends_with('\n') {
688 prompt.push('\n');
689 }
690
691 prompt.push_str("<|fim_middle|>current\n");
692 prompt.push_str(&context[editable_range.start..cursor_offset]);
693 prompt.push_str(CURSOR_MARKER);
694 prompt.push_str(&context[cursor_offset..editable_range.end]);
695 if !prompt.ends_with('\n') {
696 prompt.push('\n');
697 }
698
699 prompt.push_str("<|fim_middle|>updated\n");
700 }
701}
702
703mod v0113_ordered {
704 use super::*;
705
706 pub fn special_tokens() -> &'static [&'static str] {
707 &[
708 "<|fim_prefix|>",
709 "<|fim_suffix|>",
710 "<|fim_middle|>",
711 "<|file_sep|>",
712 CURSOR_MARKER,
713 ]
714 }
715
716 pub fn write_cursor_excerpt_section(
717 prompt: &mut String,
718 path: &Path,
719 context: &str,
720 editable_range: &Range<usize>,
721 cursor_offset: usize,
722 ) {
723 let path_str = path.to_string_lossy();
724 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
725
726 prompt.push_str("<|fim_prefix|>\n");
727 prompt.push_str(&context[..editable_range.start]);
728 if !prompt.ends_with('\n') {
729 prompt.push('\n');
730 }
731
732 prompt.push_str("<|fim_middle|>current\n");
733 prompt.push_str(&context[editable_range.start..cursor_offset]);
734 prompt.push_str(CURSOR_MARKER);
735 prompt.push_str(&context[cursor_offset..editable_range.end]);
736 if !prompt.ends_with('\n') {
737 prompt.push('\n');
738 }
739
740 prompt.push_str("<|fim_suffix|>\n");
741 prompt.push_str(&context[editable_range.end..]);
742 if !prompt.ends_with('\n') {
743 prompt.push('\n');
744 }
745
746 prompt.push_str("<|fim_middle|>updated\n");
747 }
748}
749
750mod v0114180_editable_region {
751 use super::*;
752
753 pub fn special_tokens() -> &'static [&'static str] {
754 v0113_ordered::special_tokens()
755 }
756}
757
758pub mod v0120_git_merge_markers {
759 //! A prompt that uses git-style merge conflict markers to represent the editable region.
760 //!
761 //! Example prompt:
762 //!
763 //! <|file_sep|>path/to/target_file.py
764 //! <|fim_prefix|>
765 //! code before editable region
766 //! <|fim_suffix|>
767 //! code after editable region
768 //! <|fim_middle|>
769 //! <<<<<<< CURRENT
770 //! code that
771 //! needs to<|user_cursor|>
772 //! be rewritten
773 //! =======
774 //!
775 //! Expected output (should be generated by the model):
776 //!
777 //! updated
778 //! code with
779 //! changes applied
780 //! >>>>>>> UPDATED
781
782 use super::*;
783
784 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
785 pub const SEPARATOR: &str = "=======\n";
786 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
787
788 pub fn special_tokens() -> &'static [&'static str] {
789 &[
790 "<|fim_prefix|>",
791 "<|fim_suffix|>",
792 "<|fim_middle|>",
793 "<|file_sep|>",
794 START_MARKER,
795 SEPARATOR,
796 END_MARKER,
797 CURSOR_MARKER,
798 ]
799 }
800
801 pub fn write_cursor_excerpt_section(
802 prompt: &mut String,
803 path: &Path,
804 context: &str,
805 editable_range: &Range<usize>,
806 cursor_offset: usize,
807 ) {
808 let path_str = path.to_string_lossy();
809 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
810
811 prompt.push_str("<|fim_prefix|>");
812 prompt.push_str(&context[..editable_range.start]);
813
814 prompt.push_str("<|fim_suffix|>");
815 prompt.push_str(&context[editable_range.end..]);
816 if !prompt.ends_with('\n') {
817 prompt.push('\n');
818 }
819
820 prompt.push_str("<|fim_middle|>");
821 prompt.push_str(START_MARKER);
822 prompt.push_str(&context[editable_range.start..cursor_offset]);
823 prompt.push_str(CURSOR_MARKER);
824 prompt.push_str(&context[cursor_offset..editable_range.end]);
825 if !prompt.ends_with('\n') {
826 prompt.push('\n');
827 }
828 prompt.push_str(SEPARATOR);
829 }
830}
831
832pub mod v0131_git_merge_markers_prefix {
833 //! A prompt that uses git-style merge conflict markers to represent the editable region.
834 //!
835 //! Example prompt:
836 //!
837 //! <|file_sep|>path/to/target_file.py
838 //! <|fim_prefix|>
839 //! code before editable region
840 //! <<<<<<< CURRENT
841 //! code that
842 //! needs to<|user_cursor|>
843 //! be rewritten
844 //! =======
845 //! <|fim_suffix|>
846 //! code after editable region
847 //! <|fim_middle|>
848 //!
849 //! Expected output (should be generated by the model):
850 //!
851 //! updated
852 //! code with
853 //! changes applied
854 //! >>>>>>> UPDATED
855
856 use super::*;
857
858 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
859 pub const SEPARATOR: &str = "=======\n";
860 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
861
862 pub fn special_tokens() -> &'static [&'static str] {
863 &[
864 "<|fim_prefix|>",
865 "<|fim_suffix|>",
866 "<|fim_middle|>",
867 "<|file_sep|>",
868 START_MARKER,
869 SEPARATOR,
870 END_MARKER,
871 CURSOR_MARKER,
872 ]
873 }
874
875 pub fn write_cursor_excerpt_section(
876 prompt: &mut String,
877 path: &Path,
878 context: &str,
879 editable_range: &Range<usize>,
880 cursor_offset: usize,
881 ) {
882 let path_str = path.to_string_lossy();
883 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
884
885 prompt.push_str("<|fim_prefix|>");
886 prompt.push_str(&context[..editable_range.start]);
887 prompt.push_str(START_MARKER);
888 prompt.push_str(&context[editable_range.start..cursor_offset]);
889 prompt.push_str(CURSOR_MARKER);
890 prompt.push_str(&context[cursor_offset..editable_range.end]);
891 if !prompt.ends_with('\n') {
892 prompt.push('\n');
893 }
894 prompt.push_str(SEPARATOR);
895
896 prompt.push_str("<|fim_suffix|>");
897 prompt.push_str(&context[editable_range.end..]);
898 if !prompt.ends_with('\n') {
899 prompt.push('\n');
900 }
901
902 prompt.push_str("<|fim_middle|>");
903 }
904}
905
906pub mod v0211_prefill {
907 use super::*;
908
909 pub fn special_tokens() -> &'static [&'static str] {
910 v0131_git_merge_markers_prefix::special_tokens()
911 }
912
913 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
914 let editable_region = &context[editable_range.start..editable_range.end];
915
916 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
917 let prefill_len = editable_region.floor_char_boundary(prefill_len);
918
919 // Find a token boundary to avoid splitting tokens in the prefill.
920 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
921 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
922 // the \n and consume any consecutive \n characters after it.
923 let prefill = &editable_region[..prefill_len];
924 match prefill.rfind('\n') {
925 Some(pos) => {
926 let mut end = pos + 1;
927 while end < editable_region.len()
928 && editable_region.as_bytes().get(end) == Some(&b'\n')
929 {
930 end += 1;
931 }
932 editable_region[..end].to_string()
933 }
934 // No newline found. Fall back to splitting before the last space
935 // (word-level boundary)
936 None => match prefill.rfind(' ') {
937 Some(pos) => prefill[..pos].to_string(),
938 None => prefill.to_string(),
939 },
940 }
941 }
942}
943
944pub mod hashline {
945
946 use std::fmt::Display;
947
948 pub const END_MARKER: &str = "<|fim_middle|>updated";
949 pub const START_MARKER: &str = "<|fim_middle|>current";
950
951 use super::*;
952
953 const SET_COMMAND_MARKER: &str = "<|set|>";
954 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
955
956 pub fn special_tokens() -> &'static [&'static str] {
957 return &[
958 SET_COMMAND_MARKER,
959 "<|set_range|>",
960 INSERT_COMMAND_MARKER,
961 CURSOR_MARKER,
962 "<|file_sep|>",
963 "<|fim_prefix|>",
964 "<|fim_suffix|>",
965 "<|fim_middle|>",
966 ];
967 }
968
969 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
970 #[derive(Debug, Clone, PartialEq, Eq)]
971 struct LineRef {
972 index: usize,
973 hash: u8,
974 }
975
976 impl Display for LineRef {
977 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
978 write!(f, "{}:{:02x}", self.index, self.hash)
979 }
980 }
981
982 pub fn hash_line(line: &[u8]) -> u8 {
983 let mut h: u8 = 0;
984 for &byte in line {
985 h = h.wrapping_add(byte);
986 }
987 return h;
988 }
989
990 /// Write the hashline-encoded editable region into `out`. Each line of
991 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
992 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
993 /// to the start of `editable_text`).
994 pub fn write_hashline_editable_region(
995 out: &mut String,
996 editable_text: &str,
997 cursor_offset_in_editable: usize,
998 ) {
999 let mut offset = 0;
1000 for (i, line) in editable_text.lines().enumerate() {
1001 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1002 && cursor_offset_in_editable < offset + line.len()
1003 {
1004 (
1005 &line[..cursor_offset_in_editable - offset],
1006 CURSOR_MARKER,
1007 &line[cursor_offset_in_editable - offset..],
1008 )
1009 } else {
1010 (line, "", "")
1011 };
1012 write!(
1013 out,
1014 "\n{}|{head}{cursor}{tail}",
1015 LineRef {
1016 index: i,
1017 hash: hash_line(line.as_bytes())
1018 }
1019 )
1020 .unwrap();
1021 offset += line.len() + 1;
1022 }
1023 }
1024
1025 pub fn write_cursor_excerpt_section(
1026 prompt: &mut String,
1027 path: &Path,
1028 context: &str,
1029 editable_range: &Range<usize>,
1030 cursor_offset: usize,
1031 ) {
1032 let path_str = path.to_string_lossy();
1033 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1034
1035 prompt.push_str("<|fim_prefix|>\n");
1036 prompt.push_str(&context[..editable_range.start]);
1037 prompt.push_str(START_MARKER);
1038
1039 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1040 let editable_region = &context[editable_range.clone()];
1041 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1042
1043 if !prompt.ends_with('\n') {
1044 prompt.push('\n');
1045 }
1046
1047 prompt.push_str("<|fim_suffix|>\n");
1048 prompt.push_str(&context[editable_range.end..]);
1049 if !prompt.ends_with('\n') {
1050 prompt.push('\n');
1051 }
1052
1053 prompt.push_str(END_MARKER);
1054 }
1055
1056 /// A single edit command parsed from the model output.
1057 #[derive(Debug)]
1058 enum EditCommand<'a> {
1059 /// Replace a range of lines (inclusive on both ends). Single-line set is
1060 /// represented by `start == end`.
1061 Set {
1062 start: LineRef,
1063 end: LineRef,
1064 content: &'a str,
1065 },
1066 /// Insert new lines after the given line, or before the first line if
1067 /// `after` is `None`.
1068 Insert {
1069 after: Option<LineRef>,
1070 content: &'a str,
1071 },
1072 }
1073
1074 /// Parse a line reference like `3:c3` into a `LineRef`.
1075 fn parse_line_ref(s: &str) -> Option<LineRef> {
1076 let (idx_str, hash_str) = s.split_once(':')?;
1077 let index = idx_str.parse::<usize>().ok()?;
1078 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1079 Some(LineRef { index, hash })
1080 }
1081
1082 /// Parse the model output into a list of `EditCommand`s.
1083 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1084 let mut commands = Vec::new();
1085 let mut offset = 0usize;
1086
1087 while offset < model_output.len() {
1088 let next_nl = model_output[offset..]
1089 .find('\n')
1090 .map(|i| offset + i)
1091 .unwrap_or(model_output.len());
1092 let line = &model_output[offset..next_nl];
1093 let line_end = if next_nl < model_output.len() {
1094 next_nl + 1
1095 } else {
1096 next_nl
1097 };
1098
1099 let trimmed = line.trim();
1100 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1101 (true, spec)
1102 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1103 (false, spec)
1104 } else {
1105 offset = line_end;
1106 continue;
1107 };
1108
1109 let mut content_end = line_end;
1110 let mut scan = line_end;
1111
1112 while scan < model_output.len() {
1113 let body_nl = model_output[scan..]
1114 .find('\n')
1115 .map(|i| scan + i)
1116 .unwrap_or(model_output.len());
1117 let body_line = &model_output[scan..body_nl];
1118 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1119 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1120 {
1121 break;
1122 }
1123 scan = if body_nl < model_output.len() {
1124 body_nl + 1
1125 } else {
1126 body_nl
1127 };
1128 content_end = scan;
1129 }
1130
1131 let content = &model_output[line_end..content_end];
1132
1133 if is_set {
1134 if let Some((start_str, end_str)) = specifier.split_once('-') {
1135 if let (Some(start), Some(end)) =
1136 (parse_line_ref(start_str), parse_line_ref(end_str))
1137 {
1138 commands.push(EditCommand::Set {
1139 start,
1140 end,
1141 content,
1142 });
1143 }
1144 } else if let Some(target) = parse_line_ref(specifier) {
1145 commands.push(EditCommand::Set {
1146 start: target.clone(),
1147 end: target,
1148 content,
1149 });
1150 }
1151 } else {
1152 let after = parse_line_ref(specifier);
1153 commands.push(EditCommand::Insert { after, content });
1154 }
1155
1156 offset = scan;
1157 }
1158
1159 commands
1160 }
1161
1162 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1163 /// (as opposed to being a plain full-replacement output).
1164 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1165 /// editable region, returning the plain text content.
1166 pub fn strip_hashline_prefixes(region: &str) -> String {
1167 let mut decoded: String = region
1168 .lines()
1169 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1170 .collect::<Vec<_>>()
1171 .join("\n");
1172 if region.ends_with('\n') {
1173 decoded.push('\n');
1174 }
1175 decoded
1176 }
1177
1178 pub fn output_has_edit_commands(model_output: &str) -> bool {
1179 model_output.contains(SET_COMMAND_MARKER) || model_output.contains(INSERT_COMMAND_MARKER)
1180 }
1181
1182 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1183 /// original editable region text.
1184 ///
1185 /// `editable_region` is the original text of the editable region (without hash
1186 /// prefixes). `model_output` is the raw model response containing edit commands.
1187 ///
1188 /// Returns the full replacement text for the editable region.
1189 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1190 let original_lines: Vec<&str> = editable_region.lines().collect();
1191 let old_hashes: Vec<u8> = original_lines
1192 .iter()
1193 .map(|line| hash_line(line.as_bytes()))
1194 .collect();
1195
1196 let commands = parse_edit_commands(model_output);
1197
1198 // For set operations: indexed by start line → Some((end line index, content))
1199 // For insert operations: indexed by line index → vec of content to insert after
1200 // Insert-before-first is tracked separately.
1201 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1202 let mut insert_before_first: Vec<&str> = Vec::new();
1203 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1204
1205 for command in &commands {
1206 match command {
1207 EditCommand::Set {
1208 start,
1209 end,
1210 content,
1211 } => {
1212 if start.index < old_hashes.len()
1213 && end.index < old_hashes.len()
1214 && start.index <= end.index
1215 && old_hashes[start.index] == start.hash
1216 && old_hashes[end.index] == end.hash
1217 {
1218 set_ops[start.index] = Some((end.index, *content));
1219 }
1220 }
1221 EditCommand::Insert { after, content } => match after {
1222 None => insert_before_first.push(*content),
1223 Some(line_ref) => {
1224 if line_ref.index < old_hashes.len()
1225 && old_hashes[line_ref.index] == line_ref.hash
1226 {
1227 insert_after[line_ref.index].push(*content);
1228 }
1229 }
1230 },
1231 }
1232 }
1233
1234 let mut result = String::new();
1235
1236 // Emit any insertions before the first line
1237 for content in &insert_before_first {
1238 result.push_str(content);
1239 if !content.ends_with('\n') {
1240 result.push('\n');
1241 }
1242 }
1243
1244 let mut i = 0;
1245 while i < original_lines.len() {
1246 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1247 // Replace lines i..=end_index with the replacement content
1248 result.push_str(replacement);
1249 if !replacement.is_empty() && !replacement.ends_with('\n') {
1250 result.push('\n');
1251 }
1252 // Emit any insertions after the end of this set range
1253 if *end_index < insert_after.len() {
1254 for content in &insert_after[*end_index] {
1255 result.push_str(content);
1256 if !content.ends_with('\n') {
1257 result.push('\n');
1258 }
1259 }
1260 }
1261 i = end_index + 1;
1262 } else {
1263 // Keep the original line
1264 result.push_str(original_lines[i]);
1265 result.push('\n');
1266 // Emit any insertions after this line
1267 for content in &insert_after[i] {
1268 result.push_str(content);
1269 if !content.ends_with('\n') {
1270 result.push('\n');
1271 }
1272 }
1273 i += 1;
1274 }
1275 }
1276
1277 // Preserve trailing newline behavior: if the original ended with a
1278 // newline the result already has one; if it didn't, trim the extra one
1279 // we added.
1280 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1281 result.pop();
1282 }
1283
1284 result
1285 }
1286
1287 /// Convert a unified diff patch into hashline edit commands.
1288 ///
1289 /// Parses the unified diff `patch` directly to determine which lines of
1290 /// `old_text` are deleted/replaced and what new lines are added, then emits
1291 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1292 /// `{index}:{hash}` identifiers.
1293 ///
1294 /// `cursor_offset` is an optional byte offset into the first hunk's new
1295 /// text (context + additions) where the cursor marker should be placed.
1296 pub fn patch_to_edit_commands(
1297 old_text: &str,
1298 patch: &str,
1299 cursor_offset: Option<usize>,
1300 ) -> Result<String> {
1301 let old_lines: Vec<&str> = old_text.lines().collect();
1302 let old_hashes: Vec<u8> = old_lines
1303 .iter()
1304 .map(|line| hash_line(line.as_bytes()))
1305 .collect();
1306
1307 let mut result = String::new();
1308 let mut first_hunk = true;
1309
1310 struct Hunk<'a> {
1311 line_range: Range<usize>,
1312 new_text_lines: Vec<&'a str>,
1313 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1314 }
1315
1316 // Parse the patch line by line. We only care about hunk headers,
1317 // context, deletions, and additions.
1318 let mut old_line_index: usize = 0;
1319 let mut current_hunk: Option<Hunk> = None;
1320 // Byte offset tracking within the hunk's new text for cursor placement.
1321 let mut new_text_byte_offset: usize = 0;
1322 // The line index of the last old line seen before/in the current hunk
1323 // (used for insert-after reference).
1324 let mut last_old_line_before_hunk: Option<usize> = None;
1325
1326 fn flush_hunk(
1327 hunk: Hunk,
1328 last_old_line: Option<usize>,
1329 result: &mut String,
1330 old_hashes: &[u8],
1331 ) {
1332 if hunk.line_range.is_empty() {
1333 // Pure insertion — reference the old line to insert after when in bounds.
1334 if let Some(after) = last_old_line
1335 && let Some(&hash) = old_hashes.get(after)
1336 {
1337 write!(
1338 result,
1339 "{INSERT_COMMAND_MARKER}{}\n",
1340 LineRef { index: after, hash }
1341 )
1342 .unwrap();
1343 } else {
1344 result.push_str(INSERT_COMMAND_MARKER);
1345 result.push('\n');
1346 }
1347 } else {
1348 let start = hunk.line_range.start;
1349 let end_exclusive = hunk.line_range.end;
1350 let deleted_line_count = end_exclusive.saturating_sub(start);
1351
1352 if deleted_line_count == 1 {
1353 if let Some(&hash) = old_hashes.get(start) {
1354 write!(
1355 result,
1356 "{SET_COMMAND_MARKER}{}\n",
1357 LineRef { index: start, hash }
1358 )
1359 .unwrap();
1360 } else {
1361 result.push_str(SET_COMMAND_MARKER);
1362 result.push('\n');
1363 }
1364 } else {
1365 let end_inclusive = end_exclusive - 1;
1366 match (
1367 old_hashes.get(start).copied(),
1368 old_hashes.get(end_inclusive).copied(),
1369 ) {
1370 (Some(start_hash), Some(end_hash)) => {
1371 write!(
1372 result,
1373 "{SET_COMMAND_MARKER}{}-{}\n",
1374 LineRef {
1375 index: start,
1376 hash: start_hash
1377 },
1378 LineRef {
1379 index: end_inclusive,
1380 hash: end_hash
1381 }
1382 )
1383 .unwrap();
1384 }
1385 _ => {
1386 result.push_str(SET_COMMAND_MARKER);
1387 result.push('\n');
1388 }
1389 }
1390 }
1391 }
1392 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1393 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1394 && line_offset == cursor_line_offset
1395 {
1396 result.push_str(&line[..char_offset]);
1397 result.push_str(CURSOR_MARKER);
1398 result.push_str(&line[char_offset..]);
1399 continue;
1400 }
1401
1402 result.push_str(line);
1403 }
1404 }
1405
1406 for raw_line in patch.split_inclusive('\n') {
1407 if raw_line.starts_with("@@") {
1408 // Flush any pending change hunk from a previous patch hunk.
1409 if let Some(hunk) = current_hunk.take() {
1410 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1411 }
1412
1413 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1414 // We intentionally do not trust old_start as a direct local index into `old_text`,
1415 // because some patches are produced against a larger file region and carry
1416 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1417 if first_hunk {
1418 new_text_byte_offset = 0;
1419 first_hunk = false;
1420 }
1421 continue;
1422 }
1423
1424 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1425 continue;
1426 }
1427 if raw_line.starts_with("\\ No newline") {
1428 continue;
1429 }
1430
1431 if raw_line.starts_with('-') {
1432 // Extend or start a change hunk with this deleted old line.
1433 match &mut current_hunk {
1434 Some(Hunk {
1435 line_range: range, ..
1436 }) => range.end = old_line_index + 1,
1437 None => {
1438 current_hunk = Some(Hunk {
1439 line_range: old_line_index..old_line_index + 1,
1440 new_text_lines: Vec::new(),
1441 cursor_line_offset_in_new_text: None,
1442 });
1443 }
1444 }
1445 old_line_index += 1;
1446 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1447 // Place cursor marker if cursor_offset falls within this line.
1448 let mut cursor_line_offset = None;
1449 if let Some(cursor_off) = cursor_offset
1450 && (first_hunk
1451 || cursor_off >= new_text_byte_offset
1452 && cursor_off <= new_text_byte_offset + added_content.len())
1453 {
1454 let line_offset = added_content.floor_char_boundary(
1455 cursor_off
1456 .saturating_sub(new_text_byte_offset)
1457 .min(added_content.len()),
1458 );
1459 cursor_line_offset = Some(line_offset);
1460 }
1461
1462 new_text_byte_offset += added_content.len();
1463
1464 let hunk = current_hunk.get_or_insert(Hunk {
1465 line_range: old_line_index..old_line_index,
1466 new_text_lines: vec![],
1467 cursor_line_offset_in_new_text: None,
1468 });
1469 hunk.new_text_lines.push(added_content);
1470 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1471 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1472 } else {
1473 // Context line (starts with ' ' or is empty).
1474 if let Some(hunk) = current_hunk.take() {
1475 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1476 }
1477 last_old_line_before_hunk = Some(old_line_index);
1478 old_line_index += 1;
1479 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1480 new_text_byte_offset += content.len();
1481 }
1482 }
1483
1484 // Flush final group.
1485 if let Some(hunk) = current_hunk.take() {
1486 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1487 }
1488
1489 // Trim a single trailing newline.
1490 if result.ends_with('\n') {
1491 result.pop();
1492 }
1493
1494 Ok(result)
1495 }
1496
1497 #[cfg(test)]
1498 mod tests {
1499 use super::*;
1500 use indoc::indoc;
1501
1502 #[test]
1503 fn test_format_cursor_region() {
1504 struct Case {
1505 name: &'static str,
1506 context: &'static str,
1507 editable_range: Range<usize>,
1508 cursor_offset: usize,
1509 expected: &'static str,
1510 }
1511
1512 let cases = [
1513 Case {
1514 name: "basic_cursor_placement",
1515 context: "hello world\n",
1516 editable_range: 0..12,
1517 cursor_offset: 5,
1518 expected: indoc! {"
1519 <|file_sep|>test.rs
1520 <|fim_prefix|>
1521 <|fim_middle|>current
1522 0:5c|hello<|user_cursor|> world
1523 <|fim_suffix|>
1524 <|fim_middle|>updated"},
1525 },
1526 Case {
1527 name: "multiline_cursor_on_second_line",
1528 context: "aaa\nbbb\nccc\n",
1529 editable_range: 0..12,
1530 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1531 expected: indoc! {"
1532 <|file_sep|>test.rs
1533 <|fim_prefix|>
1534 <|fim_middle|>current
1535 0:23|aaa
1536 1:26|b<|user_cursor|>bb
1537 2:29|ccc
1538 <|fim_suffix|>
1539 <|fim_middle|>updated"},
1540 },
1541 Case {
1542 name: "no_trailing_newline_in_context",
1543 context: "line1\nline2",
1544 editable_range: 0..11,
1545 cursor_offset: 3,
1546 expected: indoc! {"
1547 <|file_sep|>test.rs
1548 <|fim_prefix|>
1549 <|fim_middle|>current
1550 0:d9|lin<|user_cursor|>e1
1551 1:da|line2
1552 <|fim_suffix|>
1553 <|fim_middle|>updated"},
1554 },
1555 Case {
1556 name: "leading_newline_in_editable_region",
1557 context: "\nabc\n",
1558 editable_range: 0..5,
1559 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1560 expected: indoc! {"
1561 <|file_sep|>test.rs
1562 <|fim_prefix|>
1563 <|fim_middle|>current
1564 0:00|
1565 1:26|a<|user_cursor|>bc
1566 <|fim_suffix|>
1567 <|fim_middle|>updated"},
1568 },
1569 Case {
1570 name: "with_suffix",
1571 context: "abc\ndef",
1572 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1573 cursor_offset: 2,
1574 expected: indoc! {"
1575 <|file_sep|>test.rs
1576 <|fim_prefix|>
1577 <|fim_middle|>current
1578 0:26|ab<|user_cursor|>c
1579 <|fim_suffix|>
1580 def
1581 <|fim_middle|>updated"},
1582 },
1583 Case {
1584 name: "unicode_two_byte_chars",
1585 context: "héllo\n",
1586 editable_range: 0..7,
1587 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1588 expected: indoc! {"
1589 <|file_sep|>test.rs
1590 <|fim_prefix|>
1591 <|fim_middle|>current
1592 0:1b|hé<|user_cursor|>llo
1593 <|fim_suffix|>
1594 <|fim_middle|>updated"},
1595 },
1596 Case {
1597 name: "unicode_three_byte_chars",
1598 context: "日本語\n",
1599 editable_range: 0..10,
1600 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1601 expected: indoc! {"
1602 <|file_sep|>test.rs
1603 <|fim_prefix|>
1604 <|fim_middle|>current
1605 0:80|日本<|user_cursor|>語
1606 <|fim_suffix|>
1607 <|fim_middle|>updated"},
1608 },
1609 Case {
1610 name: "unicode_four_byte_chars",
1611 context: "a🌍b\n",
1612 editable_range: 0..7,
1613 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1614 expected: indoc! {"
1615 <|file_sep|>test.rs
1616 <|fim_prefix|>
1617 <|fim_middle|>current
1618 0:6b|a🌍<|user_cursor|>b
1619 <|fim_suffix|>
1620 <|fim_middle|>updated"},
1621 },
1622 Case {
1623 name: "cursor_at_start_of_region_not_placed",
1624 context: "abc\n",
1625 editable_range: 0..4,
1626 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1627 expected: indoc! {"
1628 <|file_sep|>test.rs
1629 <|fim_prefix|>
1630 <|fim_middle|>current
1631 0:26|abc
1632 <|fim_suffix|>
1633 <|fim_middle|>updated"},
1634 },
1635 Case {
1636 name: "cursor_at_end_of_line_not_placed",
1637 context: "abc\ndef\n",
1638 editable_range: 0..8,
1639 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1640 expected: indoc! {"
1641 <|file_sep|>test.rs
1642 <|fim_prefix|>
1643 <|fim_middle|>current
1644 0:26|abc
1645 1:2f|def
1646 <|fim_suffix|>
1647 <|fim_middle|>updated"},
1648 },
1649 Case {
1650 name: "cursor_offset_relative_to_context_not_editable_region",
1651 // cursor_offset is relative to `context`, so when editable_range.start > 0,
1652 // write_cursor_excerpt_section must subtract it before comparing against
1653 // per-line offsets within the editable region.
1654 context: "pre\naaa\nbbb\nsuf\n",
1655 editable_range: 4..12, // editable region = "aaa\nbbb\n"
1656 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
1657 expected: indoc! {"
1658 <|file_sep|>test.rs
1659 <|fim_prefix|>
1660 pre
1661 <|fim_middle|>current
1662 0:23|aaa
1663 1:26|b<|user_cursor|>bb
1664 <|fim_suffix|>
1665 suf
1666 <|fim_middle|>updated"},
1667 },
1668 ];
1669
1670 for case in &cases {
1671 let mut prompt = String::new();
1672 hashline::write_cursor_excerpt_section(
1673 &mut prompt,
1674 Path::new("test.rs"),
1675 case.context,
1676 &case.editable_range,
1677 case.cursor_offset,
1678 );
1679 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1680 }
1681 }
1682
1683 #[test]
1684 fn test_apply_edit_commands() {
1685 struct Case {
1686 name: &'static str,
1687 original: &'static str,
1688 model_output: &'static str,
1689 expected: &'static str,
1690 }
1691
1692 let cases = vec![
1693 Case {
1694 name: "set_single_line",
1695 original: indoc! {"
1696 let mut total = 0;
1697 for product in products {
1698 total += ;
1699 }
1700 total
1701 "},
1702 model_output: indoc! {"
1703 <|set|>2:87
1704 total += product.price;
1705 "},
1706 expected: indoc! {"
1707 let mut total = 0;
1708 for product in products {
1709 total += product.price;
1710 }
1711 total
1712 "},
1713 },
1714 Case {
1715 name: "set_range",
1716 original: indoc! {"
1717 fn foo() {
1718 let x = 1;
1719 let y = 2;
1720 let z = 3;
1721 }
1722 "},
1723 model_output: indoc! {"
1724 <|set|>1:46-3:4a
1725 let sum = 6;
1726 "},
1727 expected: indoc! {"
1728 fn foo() {
1729 let sum = 6;
1730 }
1731 "},
1732 },
1733 Case {
1734 name: "insert_after_line",
1735 original: indoc! {"
1736 fn main() {
1737 let x = 1;
1738 }
1739 "},
1740 model_output: indoc! {"
1741 <|insert|>1:46
1742 let y = 2;
1743 "},
1744 expected: indoc! {"
1745 fn main() {
1746 let x = 1;
1747 let y = 2;
1748 }
1749 "},
1750 },
1751 Case {
1752 name: "insert_before_first",
1753 original: indoc! {"
1754 let x = 1;
1755 let y = 2;
1756 "},
1757 model_output: indoc! {"
1758 <|insert|>
1759 use std::io;
1760 "},
1761 expected: indoc! {"
1762 use std::io;
1763 let x = 1;
1764 let y = 2;
1765 "},
1766 },
1767 Case {
1768 name: "set_with_cursor_marker",
1769 original: indoc! {"
1770 fn main() {
1771 println!();
1772 }
1773 "},
1774 model_output: indoc! {"
1775 <|set|>1:34
1776 eprintln!(\"<|user_cursor|>\");
1777 "},
1778 expected: indoc! {"
1779 fn main() {
1780 eprintln!(\"<|user_cursor|>\");
1781 }
1782 "},
1783 },
1784 Case {
1785 name: "multiple_set_commands",
1786 original: indoc! {"
1787 aaa
1788 bbb
1789 ccc
1790 ddd
1791 "},
1792 model_output: indoc! {"
1793 <|set|>0:23
1794 AAA
1795 <|set|>2:29
1796 CCC
1797 "},
1798 expected: indoc! {"
1799 AAA
1800 bbb
1801 CCC
1802 ddd
1803 "},
1804 },
1805 Case {
1806 name: "set_range_multiline_replacement",
1807 original: indoc! {"
1808 fn handle_submit() {
1809 }
1810
1811 fn handle_keystroke() {
1812 "},
1813 model_output: indoc! {"
1814 <|set|>0:3f-1:7d
1815 fn handle_submit(modal_state: &mut ModalState) {
1816 <|user_cursor|>
1817 }
1818 "},
1819 expected: indoc! {"
1820 fn handle_submit(modal_state: &mut ModalState) {
1821 <|user_cursor|>
1822 }
1823
1824 fn handle_keystroke() {
1825 "},
1826 },
1827 Case {
1828 name: "no_edit_commands_returns_original",
1829 original: indoc! {"
1830 hello
1831 world
1832 "},
1833 model_output: "some random text with no commands",
1834 expected: indoc! {"
1835 hello
1836 world
1837 "},
1838 },
1839 Case {
1840 name: "wrong_hash_set_ignored",
1841 original: indoc! {"
1842 aaa
1843 bbb
1844 "},
1845 model_output: indoc! {"
1846 <|set|>0:ff
1847 ZZZ
1848 "},
1849 expected: indoc! {"
1850 aaa
1851 bbb
1852 "},
1853 },
1854 Case {
1855 name: "insert_and_set_combined",
1856 original: indoc! {"
1857 alpha
1858 beta
1859 gamma
1860 "},
1861 model_output: indoc! {"
1862 <|set|>0:06
1863 ALPHA
1864 <|insert|>1:9c
1865 beta_extra
1866 "},
1867 expected: indoc! {"
1868 ALPHA
1869 beta
1870 beta_extra
1871 gamma
1872 "},
1873 },
1874 Case {
1875 name: "no_trailing_newline_preserved",
1876 original: "hello\nworld",
1877 model_output: indoc! {"
1878 <|set|>0:14
1879 HELLO
1880 "},
1881 expected: "HELLO\nworld",
1882 },
1883 Case {
1884 name: "set_range_hash_mismatch_in_end_bound",
1885 original: indoc! {"
1886 one
1887 two
1888 three
1889 "},
1890 model_output: indoc! {"
1891 <|set|>0:42-2:ff
1892 ONE_TWO_THREE
1893 "},
1894 expected: indoc! {"
1895 one
1896 two
1897 three
1898 "},
1899 },
1900 Case {
1901 name: "set_range_start_greater_than_end_ignored",
1902 original: indoc! {"
1903 a
1904 b
1905 c
1906 "},
1907 model_output: indoc! {"
1908 <|set|>2:63-1:62
1909 X
1910 "},
1911 expected: indoc! {"
1912 a
1913 b
1914 c
1915 "},
1916 },
1917 Case {
1918 name: "insert_out_of_bounds_ignored",
1919 original: indoc! {"
1920 x
1921 y
1922 "},
1923 model_output: indoc! {"
1924 <|insert|>99:aa
1925 z
1926 "},
1927 expected: indoc! {"
1928 x
1929 y
1930 "},
1931 },
1932 Case {
1933 name: "set_out_of_bounds_ignored",
1934 original: indoc! {"
1935 x
1936 y
1937 "},
1938 model_output: indoc! {"
1939 <|set|>99:aa
1940 z
1941 "},
1942 expected: indoc! {"
1943 x
1944 y
1945 "},
1946 },
1947 Case {
1948 name: "malformed_set_command_ignored",
1949 original: indoc! {"
1950 alpha
1951 beta
1952 "},
1953 model_output: indoc! {"
1954 <|set|>not-a-line-ref
1955 UPDATED
1956 "},
1957 expected: indoc! {"
1958 alpha
1959 beta
1960 "},
1961 },
1962 Case {
1963 name: "malformed_insert_hash_treated_as_before_first",
1964 original: indoc! {"
1965 alpha
1966 beta
1967 "},
1968 model_output: indoc! {"
1969 <|insert|>1:nothex
1970 preamble
1971 "},
1972 expected: indoc! {"
1973 preamble
1974 alpha
1975 beta
1976 "},
1977 },
1978 Case {
1979 name: "set_then_insert_same_target_orders_insert_after_replacement",
1980 original: indoc! {"
1981 cat
1982 dog
1983 "},
1984 model_output: indoc! {"
1985 <|set|>0:38
1986 CAT
1987 <|insert|>0:38
1988 TAIL
1989 "},
1990 expected: indoc! {"
1991 CAT
1992 TAIL
1993 dog
1994 "},
1995 },
1996 Case {
1997 name: "overlapping_set_ranges_last_wins",
1998 original: indoc! {"
1999 a
2000 b
2001 c
2002 d
2003 "},
2004 model_output: indoc! {"
2005 <|set|>0:61-2:63
2006 FIRST
2007 <|set|>1:62-3:64
2008 SECOND
2009 "},
2010 expected: indoc! {"
2011 FIRST
2012 d
2013 "},
2014 },
2015 Case {
2016 name: "insert_before_first_and_after_line",
2017 original: indoc! {"
2018 a
2019 b
2020 "},
2021 model_output: indoc! {"
2022 <|insert|>
2023 HEAD
2024 <|insert|>0:61
2025 MID
2026 "},
2027 expected: indoc! {"
2028 HEAD
2029 a
2030 MID
2031 b
2032 "},
2033 },
2034 ];
2035
2036 for case in &cases {
2037 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2038 assert_eq!(result, case.expected, "failed case: {}", case.name);
2039 }
2040 }
2041
2042 #[test]
2043 fn test_output_has_edit_commands() {
2044 assert!(hashline::output_has_edit_commands(&format!(
2045 "{}0:ab\nnew",
2046 SET_COMMAND_MARKER
2047 )));
2048 assert!(hashline::output_has_edit_commands(&format!(
2049 "{}0:ab\nnew",
2050 INSERT_COMMAND_MARKER
2051 )));
2052 assert!(hashline::output_has_edit_commands(&format!(
2053 "some text\n{}1:cd\nstuff",
2054 SET_COMMAND_MARKER
2055 )));
2056 assert!(!hashline::output_has_edit_commands("just plain text"));
2057 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2058 }
2059
2060 // ---- hashline::patch_to_edit_commands round-trip tests ----
2061
2062 #[test]
2063 fn test_patch_to_edit_commands() {
2064 struct Case {
2065 name: &'static str,
2066 old: &'static str,
2067 patch: &'static str,
2068 expected_new: &'static str,
2069 }
2070
2071 let cases = [
2072 Case {
2073 name: "single_line_replacement",
2074 old: indoc! {"
2075 let mut total = 0;
2076 for product in products {
2077 total += ;
2078 }
2079 total
2080 "},
2081 patch: indoc! {"
2082 @@ -1,5 +1,5 @@
2083 let mut total = 0;
2084 for product in products {
2085 - total += ;
2086 + total += product.price;
2087 }
2088 total
2089 "},
2090 expected_new: indoc! {"
2091 let mut total = 0;
2092 for product in products {
2093 total += product.price;
2094 }
2095 total
2096 "},
2097 },
2098 Case {
2099 name: "multiline_replacement",
2100 old: indoc! {"
2101 fn foo() {
2102 let x = 1;
2103 let y = 2;
2104 let z = 3;
2105 }
2106 "},
2107 patch: indoc! {"
2108 @@ -1,5 +1,3 @@
2109 fn foo() {
2110 - let x = 1;
2111 - let y = 2;
2112 - let z = 3;
2113 + let sum = 1 + 2 + 3;
2114 }
2115 "},
2116 expected_new: indoc! {"
2117 fn foo() {
2118 let sum = 1 + 2 + 3;
2119 }
2120 "},
2121 },
2122 Case {
2123 name: "insertion",
2124 old: indoc! {"
2125 fn main() {
2126 let x = 1;
2127 }
2128 "},
2129 patch: indoc! {"
2130 @@ -1,3 +1,4 @@
2131 fn main() {
2132 let x = 1;
2133 + let y = 2;
2134 }
2135 "},
2136 expected_new: indoc! {"
2137 fn main() {
2138 let x = 1;
2139 let y = 2;
2140 }
2141 "},
2142 },
2143 Case {
2144 name: "insertion_before_first",
2145 old: indoc! {"
2146 let x = 1;
2147 let y = 2;
2148 "},
2149 patch: indoc! {"
2150 @@ -1,2 +1,3 @@
2151 +use std::io;
2152 let x = 1;
2153 let y = 2;
2154 "},
2155 expected_new: indoc! {"
2156 use std::io;
2157 let x = 1;
2158 let y = 2;
2159 "},
2160 },
2161 Case {
2162 name: "deletion",
2163 old: indoc! {"
2164 aaa
2165 bbb
2166 ccc
2167 ddd
2168 "},
2169 patch: indoc! {"
2170 @@ -1,4 +1,2 @@
2171 aaa
2172 -bbb
2173 -ccc
2174 ddd
2175 "},
2176 expected_new: indoc! {"
2177 aaa
2178 ddd
2179 "},
2180 },
2181 Case {
2182 name: "multiple_changes",
2183 old: indoc! {"
2184 alpha
2185 beta
2186 gamma
2187 delta
2188 epsilon
2189 "},
2190 patch: indoc! {"
2191 @@ -1,5 +1,5 @@
2192 -alpha
2193 +ALPHA
2194 beta
2195 gamma
2196 -delta
2197 +DELTA
2198 epsilon
2199 "},
2200 expected_new: indoc! {"
2201 ALPHA
2202 beta
2203 gamma
2204 DELTA
2205 epsilon
2206 "},
2207 },
2208 Case {
2209 name: "replace_with_insertion",
2210 old: indoc! {r#"
2211 fn handle() {
2212 modal_state.close();
2213 modal_state.dismiss();
2214 "#},
2215 patch: indoc! {r#"
2216 @@ -1,3 +1,4 @@
2217 fn handle() {
2218 modal_state.close();
2219 + eprintln!("");
2220 modal_state.dismiss();
2221 "#},
2222 expected_new: indoc! {r#"
2223 fn handle() {
2224 modal_state.close();
2225 eprintln!("");
2226 modal_state.dismiss();
2227 "#},
2228 },
2229 Case {
2230 name: "complete_replacement",
2231 old: indoc! {"
2232 aaa
2233 bbb
2234 ccc
2235 "},
2236 patch: indoc! {"
2237 @@ -1,3 +1,3 @@
2238 -aaa
2239 -bbb
2240 -ccc
2241 +xxx
2242 +yyy
2243 +zzz
2244 "},
2245 expected_new: indoc! {"
2246 xxx
2247 yyy
2248 zzz
2249 "},
2250 },
2251 Case {
2252 name: "add_function_body",
2253 old: indoc! {"
2254 fn foo() {
2255 modal_state.dismiss();
2256 }
2257
2258 fn
2259
2260 fn handle_keystroke() {
2261 "},
2262 patch: indoc! {"
2263 @@ -1,6 +1,8 @@
2264 fn foo() {
2265 modal_state.dismiss();
2266 }
2267
2268 -fn
2269 +fn handle_submit() {
2270 + todo()
2271 +}
2272
2273 fn handle_keystroke() {
2274 "},
2275 expected_new: indoc! {"
2276 fn foo() {
2277 modal_state.dismiss();
2278 }
2279
2280 fn handle_submit() {
2281 todo()
2282 }
2283
2284 fn handle_keystroke() {
2285 "},
2286 },
2287 Case {
2288 name: "with_cursor_offset",
2289 old: indoc! {r#"
2290 fn main() {
2291 println!();
2292 }
2293 "#},
2294 patch: indoc! {r#"
2295 @@ -1,3 +1,3 @@
2296 fn main() {
2297 - println!();
2298 + eprintln!("");
2299 }
2300 "#},
2301 expected_new: indoc! {r#"
2302 fn main() {
2303 eprintln!("<|user_cursor|>");
2304 }
2305 "#},
2306 },
2307 Case {
2308 name: "non_local_hunk_header_pure_insertion_repro",
2309 old: indoc! {"
2310 aaa
2311 bbb
2312 "},
2313 patch: indoc! {"
2314 @@ -20,2 +20,3 @@
2315 aaa
2316 +xxx
2317 bbb
2318 "},
2319 expected_new: indoc! {"
2320 aaa
2321 xxx
2322 bbb
2323 "},
2324 },
2325 ];
2326
2327 for case in &cases {
2328 // The cursor_offset for patch_to_edit_commands is relative to
2329 // the first hunk's new text (context + additions). We compute
2330 // it by finding where the marker sits in the expected output
2331 // (which mirrors the new text of the hunk).
2332 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2333
2334 let commands =
2335 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2336 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2337
2338 assert!(
2339 hashline::output_has_edit_commands(&commands),
2340 "case {}: expected edit commands, got: {commands:?}",
2341 case.name,
2342 );
2343
2344 let applied = hashline::apply_edit_commands(case.old, &commands);
2345 assert_eq!(applied, case.expected_new, "case {}", case.name);
2346 }
2347 }
2348 }
2349}
2350
2351pub mod seed_coder {
2352 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2353 //!
2354 //! Seed-Coder uses different FIM tokens and order than Qwen:
2355 //! - SPM order: suffix comes FIRST, then prefix, then middle
2356 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2357 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2358 //!
2359 //! All context (related files, edit history) goes in the PREFIX section.
2360 //! The suffix contains only code after the editable region.
2361 //!
2362 //! Example prompt:
2363 //!
2364 //! <[fim-suffix]>
2365 //! code after editable region
2366 //! <[fim-prefix]><filename>related/file.py
2367 //! related file content
2368 //!
2369 //! <filename>edit_history
2370 //! --- a/some_file.py
2371 //! +++ b/some_file.py
2372 //! -old
2373 //! +new
2374 //!
2375 //! <filename>path/to/target_file.py
2376 //! code before editable region
2377 //! <<<<<<< CURRENT
2378 //! code that
2379 //! needs to<|user_cursor|>
2380 //! be rewritten
2381 //! =======
2382 //! <[fim-middle]>
2383 //!
2384 //! Expected output (model generates):
2385 //!
2386 //! updated
2387 //! code with
2388 //! changes applied
2389 //! >>>>>>> UPDATED
2390
2391 use super::*;
2392
2393 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2394 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2395 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2396 pub const FILE_MARKER: &str = "<filename>";
2397
2398 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2399 pub const SEPARATOR: &str = "=======\n";
2400 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2401
2402 pub const NO_EDITS: &str = "NO_EDITS\n";
2403
2404 pub fn special_tokens() -> &'static [&'static str] {
2405 &[
2406 FIM_SUFFIX,
2407 FIM_PREFIX,
2408 FIM_MIDDLE,
2409 FILE_MARKER,
2410 START_MARKER,
2411 SEPARATOR,
2412 END_MARKER,
2413 CURSOR_MARKER,
2414 ]
2415 }
2416
2417 pub fn write_cursor_excerpt_section(
2418 prompt: &mut String,
2419 path: &Path,
2420 context: &str,
2421 editable_range: &Range<usize>,
2422 cursor_offset: usize,
2423 ) {
2424 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2425 prompt.push_str(§ion);
2426 }
2427
2428 pub fn format_prompt_with_budget(
2429 path: &Path,
2430 context: &str,
2431 editable_range: &Range<usize>,
2432 cursor_offset: usize,
2433 events: &[Arc<Event>],
2434 related_files: &[RelatedFile],
2435 max_tokens: usize,
2436 ) -> String {
2437 let suffix_section = build_suffix_section(context, editable_range);
2438 let cursor_prefix_section =
2439 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2440
2441 let suffix_tokens = estimate_tokens(suffix_section.len());
2442 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2443 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2444
2445 let edit_history_section = super::format_edit_history_within_budget(
2446 events,
2447 FILE_MARKER,
2448 "edit_history",
2449 budget_after_cursor,
2450 );
2451 let edit_history_tokens = estimate_tokens(edit_history_section.len());
2452 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2453
2454 let related_files_section = super::format_related_files_within_budget(
2455 related_files,
2456 FILE_MARKER,
2457 "",
2458 budget_after_edit_history,
2459 );
2460
2461 let mut prompt = String::new();
2462 prompt.push_str(&suffix_section);
2463 prompt.push_str(FIM_PREFIX);
2464 prompt.push_str(&related_files_section);
2465 if !related_files_section.is_empty() {
2466 prompt.push('\n');
2467 }
2468 prompt.push_str(&edit_history_section);
2469 if !edit_history_section.is_empty() {
2470 prompt.push('\n');
2471 }
2472 prompt.push_str(&cursor_prefix_section);
2473 prompt.push_str(FIM_MIDDLE);
2474 prompt
2475 }
2476
2477 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2478 let mut section = String::new();
2479 section.push_str(FIM_SUFFIX);
2480 section.push_str(&context[editable_range.end..]);
2481 if !section.ends_with('\n') {
2482 section.push('\n');
2483 }
2484 section
2485 }
2486
2487 fn build_cursor_prefix_section(
2488 path: &Path,
2489 context: &str,
2490 editable_range: &Range<usize>,
2491 cursor_offset: usize,
2492 ) -> String {
2493 let mut section = String::new();
2494 let path_str = path.to_string_lossy();
2495 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2496
2497 section.push_str(&context[..editable_range.start]);
2498 section.push_str(START_MARKER);
2499 section.push_str(&context[editable_range.start..cursor_offset]);
2500 section.push_str(CURSOR_MARKER);
2501 section.push_str(&context[cursor_offset..editable_range.end]);
2502 if !section.ends_with('\n') {
2503 section.push('\n');
2504 }
2505 section.push_str(SEPARATOR);
2506 section
2507 }
2508
2509 /// Format patch as containing no changes if it's empty; otherwise return None.
2510 pub(crate) fn no_edits(patch: &str) -> Option<String> {
2511 // Count lines in the patch
2512 let empty_patch = patch.lines().count() <= 3;
2513 if empty_patch {
2514 Some(format!("{NO_EDITS}{END_MARKER}"))
2515 } else {
2516 None
2517 }
2518 }
2519}
2520
2521/// The zeta1 prompt format
2522pub mod zeta1 {
2523 use super::*;
2524 use std::fmt::Write;
2525
2526 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
2527 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
2528 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
2529 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
2530
2531 const INSTRUCTION_HEADER: &str = concat!(
2532 "### Instruction:\n",
2533 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
2534 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
2535 "into account the cursor location.\n\n",
2536 "### User Edits:\n\n"
2537 );
2538 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
2539 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
2540
2541 /// Formats a complete zeta1 prompt from the input events and excerpt.
2542 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
2543 let mut prompt = String::with_capacity(
2544 INSTRUCTION_HEADER.len()
2545 + input_events.len()
2546 + EXCERPT_HEADER.len()
2547 + input_excerpt.len()
2548 + RESPONSE_HEADER.len(),
2549 );
2550 prompt.push_str(INSTRUCTION_HEADER);
2551 prompt.push_str(input_events);
2552 prompt.push_str(EXCERPT_HEADER);
2553 prompt.push_str(input_excerpt);
2554 prompt.push_str(RESPONSE_HEADER);
2555 prompt
2556 }
2557
2558 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
2559 /// editable and context byte-offset ranges within `cursor_excerpt`.
2560 pub fn format_zeta1_from_input(
2561 input: &ZetaPromptInput,
2562 editable_range: Range<usize>,
2563 context_range: Range<usize>,
2564 ) -> String {
2565 let events = format_zeta1_events(&input.events);
2566 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
2567 format_zeta1_prompt(&events, &excerpt)
2568 }
2569
2570 /// Formats events in zeta1 style (oldest first).
2571 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
2572 let mut result = String::new();
2573 for event in events {
2574 let event_string = format_zeta1_event(event);
2575 if event_string.is_empty() {
2576 continue;
2577 }
2578 if !result.is_empty() {
2579 result.push_str("\n\n");
2580 }
2581 result.push_str(&event_string);
2582 }
2583 result
2584 }
2585
2586 fn format_zeta1_event(event: &Event) -> String {
2587 match event {
2588 Event::BufferChange {
2589 path,
2590 old_path,
2591 diff,
2592 ..
2593 } => {
2594 let mut prompt = String::new();
2595 if old_path != path {
2596 writeln!(
2597 prompt,
2598 "User renamed {} to {}\n",
2599 old_path.display(),
2600 path.display()
2601 )
2602 .ok();
2603 }
2604 if !diff.is_empty() {
2605 write!(
2606 prompt,
2607 "User edited {}:\n```diff\n{}\n```",
2608 path.display(),
2609 diff
2610 )
2611 .ok();
2612 }
2613 prompt
2614 }
2615 }
2616 }
2617
2618 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
2619 /// within `cursor_excerpt`.
2620 fn format_zeta1_excerpt(
2621 input: &ZetaPromptInput,
2622 editable_range: Range<usize>,
2623 context_range: Range<usize>,
2624 ) -> String {
2625 let path_str = input.cursor_path.to_string_lossy();
2626 let excerpt = &*input.cursor_excerpt;
2627 let cursor_offset = input.cursor_offset_in_excerpt;
2628
2629 let mut prompt = String::new();
2630 writeln!(&mut prompt, "```{path_str}").ok();
2631
2632 let starts_at_file_beginning =
2633 input.excerpt_start_row == Some(0) && context_range.start == 0;
2634 if starts_at_file_beginning {
2635 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
2636 }
2637
2638 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
2639
2640 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
2641 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
2642 prompt.push_str(CURSOR_MARKER);
2643 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
2644 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
2645
2646 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
2647 write!(prompt, "\n```").ok();
2648
2649 prompt
2650 }
2651
2652 /// Cleans zeta1 model output by extracting content between editable region
2653 /// markers and converting the zeta1 cursor marker to the universal one.
2654 /// Returns `None` if the output doesn't contain the expected markers.
2655 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
2656 let content = output.replace(CURSOR_MARKER, "");
2657
2658 let content_start = content
2659 .find(EDITABLE_REGION_START_MARKER)
2660 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
2661 .map(|pos| {
2662 if content.as_bytes().get(pos) == Some(&b'\n') {
2663 pos + 1
2664 } else {
2665 pos
2666 }
2667 })
2668 .unwrap_or(0);
2669
2670 let content_end = content
2671 .find(EDITABLE_REGION_END_MARKER)
2672 .map(|pos| {
2673 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
2674 pos - 1
2675 } else {
2676 pos
2677 }
2678 })
2679 .unwrap_or(content.len());
2680
2681 if content_start > content_end {
2682 return Some(String::new());
2683 }
2684
2685 let extracted = &content[content_start..content_end];
2686
2687 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
2688 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
2689 let text_before_cursor = text_before_cursor
2690 .find(EDITABLE_REGION_START_MARKER)
2691 .map(|pos| {
2692 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
2693 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
2694 after_marker + 1
2695 } else {
2696 after_marker
2697 }
2698 })
2699 .unwrap_or(0);
2700 let offset_in_extracted = zeta1_cursor_pos
2701 .saturating_sub(text_before_cursor)
2702 .min(extracted.len());
2703 offset_in_extracted
2704 });
2705
2706 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
2707 if let Some(offset) = cursor_offset {
2708 result.push_str(&extracted[..offset]);
2709 result.push_str(super::CURSOR_MARKER);
2710 result.push_str(&extracted[offset..]);
2711 } else {
2712 result.push_str(extracted);
2713 }
2714
2715 Some(result)
2716 }
2717}
2718
2719#[cfg(test)]
2720mod tests {
2721 use super::*;
2722 use indoc::indoc;
2723
2724 fn make_input(
2725 cursor_excerpt: &str,
2726 editable_range: Range<usize>,
2727 cursor_offset: usize,
2728 events: Vec<Event>,
2729 related_files: Vec<RelatedFile>,
2730 ) -> ZetaPromptInput {
2731 let context_range = 0..cursor_excerpt.len();
2732 ZetaPromptInput {
2733 cursor_path: Path::new("test.rs").into(),
2734 cursor_excerpt: cursor_excerpt.into(),
2735 cursor_offset_in_excerpt: cursor_offset,
2736 excerpt_start_row: None,
2737 events: events.into_iter().map(Arc::new).collect(),
2738 related_files,
2739 excerpt_ranges: ExcerptRanges {
2740 editable_150: editable_range.clone(),
2741 editable_180: editable_range.clone(),
2742 editable_350: editable_range,
2743 editable_150_context_350: context_range.clone(),
2744 editable_180_context_350: context_range.clone(),
2745 editable_350_context_150: context_range,
2746 ..Default::default()
2747 },
2748 experiment: None,
2749 in_open_source_repo: false,
2750 can_collect_data: false,
2751 repo_url: None,
2752 }
2753 }
2754
2755 fn make_event(path: &str, diff: &str) -> Event {
2756 Event::BufferChange {
2757 path: Path::new(path).into(),
2758 old_path: Path::new(path).into(),
2759 diff: diff.to_string(),
2760 predicted: false,
2761 in_open_source_repo: false,
2762 }
2763 }
2764
2765 fn make_related_file(path: &str, content: &str) -> RelatedFile {
2766 RelatedFile {
2767 path: Path::new(path).into(),
2768 max_row: content.lines().count() as u32,
2769 excerpts: vec![RelatedExcerpt {
2770 row_range: 0..content.lines().count() as u32,
2771 text: content.into(),
2772 order: 0,
2773 }],
2774 in_open_source_repo: false,
2775 }
2776 }
2777
2778 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
2779 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
2780 }
2781
2782 #[test]
2783 fn test_no_truncation_when_within_budget() {
2784 let input = make_input(
2785 "prefix\neditable\nsuffix",
2786 7..15,
2787 10,
2788 vec![make_event("a.rs", "-old\n+new\n")],
2789 vec![make_related_file("related.rs", "fn helper() {}\n")],
2790 );
2791
2792 assert_eq!(
2793 format_with_budget(&input, 10000),
2794 indoc! {r#"
2795 <|file_sep|>related.rs
2796 fn helper() {}
2797 <|file_sep|>edit history
2798 --- a/a.rs
2799 +++ b/a.rs
2800 -old
2801 +new
2802 <|file_sep|>test.rs
2803 <|fim_prefix|>
2804 prefix
2805 <|fim_middle|>current
2806 edi<|user_cursor|>table
2807 <|fim_suffix|>
2808
2809 suffix
2810 <|fim_middle|>updated
2811 "#}
2812 );
2813 }
2814
2815 #[test]
2816 fn test_truncation_drops_edit_history_when_budget_tight() {
2817 let input = make_input(
2818 "code",
2819 0..4,
2820 2,
2821 vec![make_event("a.rs", "-x\n+y\n")],
2822 vec![
2823 make_related_file("r1.rs", "a\n"),
2824 make_related_file("r2.rs", "b\n"),
2825 ],
2826 );
2827
2828 assert_eq!(
2829 format_with_budget(&input, 10000),
2830 indoc! {r#"
2831 <|file_sep|>r1.rs
2832 a
2833 <|file_sep|>r2.rs
2834 b
2835 <|file_sep|>edit history
2836 --- a/a.rs
2837 +++ b/a.rs
2838 -x
2839 +y
2840 <|file_sep|>test.rs
2841 <|fim_prefix|>
2842 <|fim_middle|>current
2843 co<|user_cursor|>de
2844 <|fim_suffix|>
2845 <|fim_middle|>updated
2846 "#}
2847 );
2848
2849 assert_eq!(
2850 format_with_budget(&input, 50),
2851 indoc! {r#"
2852 <|file_sep|>r1.rs
2853 a
2854 <|file_sep|>r2.rs
2855 b
2856 <|file_sep|>test.rs
2857 <|fim_prefix|>
2858 <|fim_middle|>current
2859 co<|user_cursor|>de
2860 <|fim_suffix|>
2861 <|fim_middle|>updated
2862 "#}
2863 );
2864 }
2865
2866 #[test]
2867 fn test_truncation_includes_partial_excerpts() {
2868 let input = make_input(
2869 "x",
2870 0..1,
2871 0,
2872 vec![],
2873 vec![RelatedFile {
2874 path: Path::new("big.rs").into(),
2875 max_row: 30,
2876 in_open_source_repo: false,
2877 excerpts: vec![
2878 RelatedExcerpt {
2879 row_range: 0..10,
2880 text: "first excerpt\n".into(),
2881 order: 0,
2882 },
2883 RelatedExcerpt {
2884 row_range: 10..20,
2885 text: "second excerpt\n".into(),
2886 order: 0,
2887 },
2888 RelatedExcerpt {
2889 row_range: 20..30,
2890 text: "third excerpt\n".into(),
2891 order: 0,
2892 },
2893 ],
2894 }],
2895 );
2896
2897 assert_eq!(
2898 format_with_budget(&input, 10000),
2899 indoc! {r#"
2900 <|file_sep|>big.rs
2901 first excerpt
2902 ...
2903 second excerpt
2904 ...
2905 third excerpt
2906 <|file_sep|>test.rs
2907 <|fim_prefix|>
2908 <|fim_middle|>current
2909 <|user_cursor|>x
2910 <|fim_suffix|>
2911 <|fim_middle|>updated
2912 "#}
2913 );
2914
2915 assert_eq!(
2916 format_with_budget(&input, 50),
2917 indoc! {r#"
2918 <|file_sep|>big.rs
2919 first excerpt
2920 ...
2921 <|file_sep|>test.rs
2922 <|fim_prefix|>
2923 <|fim_middle|>current
2924 <|user_cursor|>x
2925 <|fim_suffix|>
2926 <|fim_middle|>updated
2927 "#}
2928 );
2929 }
2930
2931 #[test]
2932 fn test_truncation_prioritizes_lower_order_excerpts() {
2933 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
2934 // With tight budget, only the lower-order excerpt from file_b should be included.
2935 let input = make_input(
2936 "x",
2937 0..1,
2938 0,
2939 vec![],
2940 vec![
2941 RelatedFile {
2942 path: Path::new("file_a.rs").into(),
2943 max_row: 10,
2944 in_open_source_repo: false,
2945 excerpts: vec![RelatedExcerpt {
2946 row_range: 0..10,
2947 text: "low priority content\n".into(),
2948 order: 5,
2949 }],
2950 },
2951 RelatedFile {
2952 path: Path::new("file_b.rs").into(),
2953 max_row: 10,
2954 in_open_source_repo: false,
2955 excerpts: vec![RelatedExcerpt {
2956 row_range: 0..10,
2957 text: "high priority content\n".into(),
2958 order: 1,
2959 }],
2960 },
2961 ],
2962 );
2963
2964 // With large budget, both files included; rendered in stable lexicographic order.
2965 assert_eq!(
2966 format_with_budget(&input, 10000),
2967 indoc! {r#"
2968 <|file_sep|>file_a.rs
2969 low priority content
2970 <|file_sep|>file_b.rs
2971 high priority content
2972 <|file_sep|>test.rs
2973 <|fim_prefix|>
2974 <|fim_middle|>current
2975 <|user_cursor|>x
2976 <|fim_suffix|>
2977 <|fim_middle|>updated
2978 "#}
2979 );
2980
2981 // With tight budget, only file_b (lower order) fits.
2982 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
2983 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
2984 // file_a would need another 14 tokens, which doesn't fit.
2985 assert_eq!(
2986 format_with_budget(&input, 52),
2987 indoc! {r#"
2988 <|file_sep|>file_b.rs
2989 high priority content
2990 <|file_sep|>test.rs
2991 <|fim_prefix|>
2992 <|fim_middle|>current
2993 <|user_cursor|>x
2994 <|fim_suffix|>
2995 <|fim_middle|>updated
2996 "#}
2997 );
2998 }
2999
3000 #[test]
3001 fn test_truncation_drops_high_order_excerpts_within_file() {
3002 // A single file has excerpts at order 1 and order 3. With a tight budget,
3003 // only the order-1 excerpts are included while the order-3 excerpt is
3004 // dropped — even though they belong to the same file. This also preserves
3005 // the parent invariant: parent outline items have order ≤ their best
3006 // child, so they're always included when any child is.
3007 let input = make_input(
3008 "x",
3009 0..1,
3010 0,
3011 vec![],
3012 vec![RelatedFile {
3013 path: Path::new("mod.rs").into(),
3014 max_row: 30,
3015 in_open_source_repo: false,
3016 excerpts: vec![
3017 RelatedExcerpt {
3018 row_range: 0..5,
3019 text: "mod header\n".into(),
3020 order: 1,
3021 },
3022 RelatedExcerpt {
3023 row_range: 5..15,
3024 text: "important fn\n".into(),
3025 order: 1,
3026 },
3027 RelatedExcerpt {
3028 row_range: 15..30,
3029 text: "less important fn\n".into(),
3030 order: 3,
3031 },
3032 ],
3033 }],
3034 );
3035
3036 // With large budget, all three excerpts included.
3037 assert_eq!(
3038 format_with_budget(&input, 10000),
3039 indoc! {r#"
3040 <|file_sep|>mod.rs
3041 mod header
3042 ...
3043 important fn
3044 ...
3045 less important fn
3046 <|file_sep|>test.rs
3047 <|fim_prefix|>
3048 <|fim_middle|>current
3049 <|user_cursor|>x
3050 <|fim_suffix|>
3051 <|fim_middle|>updated
3052 "#}
3053 );
3054
3055 // With tight budget, only order<=1 excerpts included (header + important fn).
3056 assert_eq!(
3057 format_with_budget(&input, 55),
3058 indoc! {r#"
3059 <|file_sep|>mod.rs
3060 mod header
3061 ...
3062 important fn
3063 ...
3064 <|file_sep|>test.rs
3065 <|fim_prefix|>
3066 <|fim_middle|>current
3067 <|user_cursor|>x
3068 <|fim_suffix|>
3069 <|fim_middle|>updated
3070 "#}
3071 );
3072 }
3073
3074 #[test]
3075 fn test_truncation_drops_older_events_first() {
3076 let input = make_input(
3077 "x",
3078 0..1,
3079 0,
3080 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
3081 vec![],
3082 );
3083
3084 assert_eq!(
3085 format_with_budget(&input, 10000),
3086 indoc! {r#"
3087 <|file_sep|>edit history
3088 --- a/old.rs
3089 +++ b/old.rs
3090 -1
3091 --- a/new.rs
3092 +++ b/new.rs
3093 -2
3094 <|file_sep|>test.rs
3095 <|fim_prefix|>
3096 <|fim_middle|>current
3097 <|user_cursor|>x
3098 <|fim_suffix|>
3099 <|fim_middle|>updated
3100 "#}
3101 );
3102
3103 assert_eq!(
3104 format_with_budget(&input, 55),
3105 indoc! {r#"
3106 <|file_sep|>edit history
3107 --- a/new.rs
3108 +++ b/new.rs
3109 -2
3110 <|file_sep|>test.rs
3111 <|fim_prefix|>
3112 <|fim_middle|>current
3113 <|user_cursor|>x
3114 <|fim_suffix|>
3115 <|fim_middle|>updated
3116 "#}
3117 );
3118 }
3119
3120 #[test]
3121 fn test_cursor_excerpt_always_included_with_minimal_budget() {
3122 let input = make_input(
3123 "fn main() {}",
3124 0..12,
3125 3,
3126 vec![make_event("a.rs", "-old\n+new\n")],
3127 vec![make_related_file("related.rs", "helper\n")],
3128 );
3129
3130 assert_eq!(
3131 format_with_budget(&input, 30),
3132 indoc! {r#"
3133 <|file_sep|>test.rs
3134 <|fim_prefix|>
3135 <|fim_middle|>current
3136 fn <|user_cursor|>main() {}
3137 <|fim_suffix|>
3138 <|fim_middle|>updated
3139 "#}
3140 );
3141 }
3142
3143 fn format_seed_coder(input: &ZetaPromptInput) -> String {
3144 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
3145 }
3146
3147 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3148 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
3149 }
3150
3151 #[test]
3152 fn test_seed_coder_basic_format() {
3153 let input = make_input(
3154 "prefix\neditable\nsuffix",
3155 7..15,
3156 10,
3157 vec![make_event("a.rs", "-old\n+new\n")],
3158 vec![make_related_file("related.rs", "fn helper() {}\n")],
3159 );
3160
3161 assert_eq!(
3162 format_seed_coder(&input),
3163 indoc! {r#"
3164 <[fim-suffix]>
3165 suffix
3166 <[fim-prefix]><filename>related.rs
3167 fn helper() {}
3168
3169 <filename>edit_history
3170 --- a/a.rs
3171 +++ b/a.rs
3172 -old
3173 +new
3174
3175 <filename>test.rs
3176 prefix
3177 <<<<<<< CURRENT
3178 edi<|user_cursor|>table
3179 =======
3180 <[fim-middle]>"#}
3181 );
3182 }
3183
3184 #[test]
3185 fn test_seed_coder_no_context() {
3186 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
3187
3188 assert_eq!(
3189 format_seed_coder(&input),
3190 indoc! {r#"
3191 <[fim-suffix]>
3192 after
3193 <[fim-prefix]><filename>test.rs
3194 before
3195 <<<<<<< CURRENT
3196 mid<|user_cursor|>dle
3197 =======
3198 <[fim-middle]>"#}
3199 );
3200 }
3201
3202 #[test]
3203 fn test_seed_coder_truncation_drops_context() {
3204 let input = make_input(
3205 "code",
3206 0..4,
3207 2,
3208 vec![make_event("a.rs", "-x\n+y\n")],
3209 vec![make_related_file("r1.rs", "content\n")],
3210 );
3211
3212 // With large budget, everything is included
3213 assert_eq!(
3214 format_seed_coder(&input),
3215 indoc! {r#"
3216 <[fim-suffix]>
3217 <[fim-prefix]><filename>r1.rs
3218 content
3219
3220 <filename>edit_history
3221 --- a/a.rs
3222 +++ b/a.rs
3223 -x
3224 +y
3225
3226 <filename>test.rs
3227 <<<<<<< CURRENT
3228 co<|user_cursor|>de
3229 =======
3230 <[fim-middle]>"#}
3231 );
3232
3233 // With tight budget, context is dropped but cursor section remains
3234 assert_eq!(
3235 format_seed_coder_with_budget(&input, 30),
3236 indoc! {r#"
3237 <[fim-suffix]>
3238 <[fim-prefix]><filename>test.rs
3239 <<<<<<< CURRENT
3240 co<|user_cursor|>de
3241 =======
3242 <[fim-middle]>"#}
3243 );
3244 }
3245
3246 #[test]
3247 fn test_seed_coder_truncation_prioritizes_lower_order() {
3248 let input = make_input(
3249 "code",
3250 0..4,
3251 2,
3252 vec![],
3253 vec![
3254 RelatedFile {
3255 path: Path::new("low_prio.rs").into(),
3256 max_row: 5,
3257 in_open_source_repo: false,
3258 excerpts: vec![RelatedExcerpt {
3259 row_range: 0..5,
3260 text: "low prio\n".into(),
3261 order: 10,
3262 }],
3263 },
3264 RelatedFile {
3265 path: Path::new("high_prio.rs").into(),
3266 max_row: 5,
3267 in_open_source_repo: false,
3268 excerpts: vec![RelatedExcerpt {
3269 row_range: 0..5,
3270 text: "high prio\n".into(),
3271 order: 1,
3272 }],
3273 },
3274 ],
3275 );
3276
3277 // With large budget, both included; rendered in stable lexicographic order.
3278 assert_eq!(
3279 format_seed_coder(&input),
3280 indoc! {r#"
3281 <[fim-suffix]>
3282 <[fim-prefix]><filename>low_prio.rs
3283 low prio
3284 <filename>high_prio.rs
3285 high prio
3286
3287 <filename>test.rs
3288 <<<<<<< CURRENT
3289 co<|user_cursor|>de
3290 =======
3291 <[fim-middle]>"#}
3292 );
3293
3294 // With tight budget, only high_prio included.
3295 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
3296 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
3297 assert_eq!(
3298 format_seed_coder_with_budget(&input, 44),
3299 indoc! {r#"
3300 <[fim-suffix]>
3301 <[fim-prefix]><filename>high_prio.rs
3302 high prio
3303
3304 <filename>test.rs
3305 <<<<<<< CURRENT
3306 co<|user_cursor|>de
3307 =======
3308 <[fim-middle]>"#}
3309 );
3310 }
3311
3312 #[test]
3313 fn test_seed_coder_clean_output() {
3314 let output_with_marker = "new code\n>>>>>>> UPDATED\n";
3315 let output_without_marker = "new code\n";
3316
3317 assert_eq!(
3318 clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
3319 "new code\n"
3320 );
3321 assert_eq!(
3322 clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
3323 "new code\n"
3324 );
3325 }
3326
3327 #[test]
3328 fn test_format_zeta1_from_input_basic() {
3329 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
3330 let input = ZetaPromptInput {
3331 cursor_path: Path::new("src/main.rs").into(),
3332 cursor_excerpt: excerpt.into(),
3333 cursor_offset_in_excerpt: 30,
3334 excerpt_start_row: Some(0),
3335 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
3336 related_files: vec![],
3337 excerpt_ranges: ExcerptRanges {
3338 editable_150: 15..41,
3339 editable_180: 15..41,
3340 editable_350: 15..41,
3341 editable_150_context_350: 0..excerpt.len(),
3342 editable_180_context_350: 0..excerpt.len(),
3343 editable_350_context_150: 0..excerpt.len(),
3344 ..Default::default()
3345 },
3346 experiment: None,
3347 in_open_source_repo: false,
3348 can_collect_data: false,
3349 repo_url: None,
3350 };
3351
3352 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
3353
3354 assert_eq!(
3355 prompt,
3356 concat!(
3357 "### Instruction:\n",
3358 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3359 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3360 "into account the cursor location.\n",
3361 "\n",
3362 "### User Edits:\n",
3363 "\n",
3364 "User edited other.rs:\n",
3365 "```diff\n",
3366 "-old\n",
3367 "+new\n",
3368 "\n",
3369 "```\n",
3370 "\n",
3371 "### User Excerpt:\n",
3372 "\n",
3373 "```src/main.rs\n",
3374 "<|start_of_file|>\n",
3375 "fn before() {}\n",
3376 "<|editable_region_start|>\n",
3377 "fn foo() {\n",
3378 " <|user_cursor_is_here|>let x = 1;\n",
3379 "\n",
3380 "<|editable_region_end|>}\n",
3381 "fn after() {}\n",
3382 "\n",
3383 "```\n",
3384 "\n",
3385 "### Response:\n",
3386 ),
3387 );
3388 }
3389
3390 #[test]
3391 fn test_format_zeta1_from_input_no_start_of_file() {
3392 let excerpt = "fn foo() {\n let x = 1;\n}\n";
3393 let input = ZetaPromptInput {
3394 cursor_path: Path::new("src/main.rs").into(),
3395 cursor_excerpt: excerpt.into(),
3396 cursor_offset_in_excerpt: 15,
3397 excerpt_start_row: Some(10),
3398 events: vec![],
3399 related_files: vec![],
3400 excerpt_ranges: ExcerptRanges {
3401 editable_150: 0..28,
3402 editable_180: 0..28,
3403 editable_350: 0..28,
3404 editable_150_context_350: 0..28,
3405 editable_180_context_350: 0..28,
3406 editable_350_context_150: 0..28,
3407 ..Default::default()
3408 },
3409 experiment: None,
3410 in_open_source_repo: false,
3411 can_collect_data: false,
3412 repo_url: None,
3413 };
3414
3415 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
3416
3417 assert_eq!(
3418 prompt,
3419 concat!(
3420 "### Instruction:\n",
3421 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3422 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3423 "into account the cursor location.\n",
3424 "\n",
3425 "### User Edits:\n",
3426 "\n",
3427 "\n",
3428 "\n",
3429 "### User Excerpt:\n",
3430 "\n",
3431 "```src/main.rs\n",
3432 "<|editable_region_start|>\n",
3433 "fn foo() {\n",
3434 " <|user_cursor_is_here|>let x = 1;\n",
3435 "}\n",
3436 "\n",
3437 "<|editable_region_end|>\n",
3438 "```\n",
3439 "\n",
3440 "### Response:\n",
3441 ),
3442 );
3443 }
3444
3445 #[test]
3446 fn test_format_zeta1_from_input_with_sub_ranges() {
3447 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
3448 let editable_range = 10..37;
3449 let context_range = 0..excerpt.len();
3450
3451 let input = ZetaPromptInput {
3452 cursor_path: Path::new("test.rs").into(),
3453 cursor_excerpt: excerpt.into(),
3454 cursor_offset_in_excerpt: 25,
3455 excerpt_start_row: Some(0),
3456 events: vec![],
3457 related_files: vec![],
3458 excerpt_ranges: ExcerptRanges {
3459 editable_150: editable_range.clone(),
3460 editable_180: editable_range.clone(),
3461 editable_350: editable_range.clone(),
3462 editable_150_context_350: context_range.clone(),
3463 editable_180_context_350: context_range.clone(),
3464 editable_350_context_150: context_range.clone(),
3465 ..Default::default()
3466 },
3467 experiment: None,
3468 in_open_source_repo: false,
3469 can_collect_data: false,
3470 repo_url: None,
3471 };
3472
3473 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
3474
3475 assert_eq!(
3476 prompt,
3477 concat!(
3478 "### Instruction:\n",
3479 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3480 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3481 "into account the cursor location.\n",
3482 "\n",
3483 "### User Edits:\n",
3484 "\n",
3485 "\n",
3486 "\n",
3487 "### User Excerpt:\n",
3488 "\n",
3489 "```test.rs\n",
3490 "<|start_of_file|>\n",
3491 "// prefix\n",
3492 "<|editable_region_start|>\n",
3493 "fn foo() {\n",
3494 " <|user_cursor_is_here|>let x = 1;\n",
3495 "}\n",
3496 "<|editable_region_end|>\n",
3497 "// suffix\n",
3498 "\n",
3499 "```\n",
3500 "\n",
3501 "### Response:\n",
3502 ),
3503 );
3504 }
3505
3506 #[test]
3507 fn test_clean_zeta1_model_output_basic() {
3508 let output = indoc! {"
3509 <|editable_region_start|>
3510 fn main() {
3511 println!(\"hello\");
3512 }
3513 <|editable_region_end|>
3514 "};
3515
3516 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3517 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
3518 }
3519
3520 #[test]
3521 fn test_clean_zeta1_model_output_with_cursor() {
3522 let output = indoc! {"
3523 <|editable_region_start|>
3524 fn main() {
3525 <|user_cursor_is_here|>println!(\"hello\");
3526 }
3527 <|editable_region_end|>
3528 "};
3529
3530 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3531 assert_eq!(
3532 cleaned,
3533 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
3534 );
3535 }
3536
3537 #[test]
3538 fn test_clean_zeta1_model_output_no_markers() {
3539 let output = "fn main() {}\n";
3540 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3541 assert_eq!(cleaned, "fn main() {}\n");
3542 }
3543
3544 #[test]
3545 fn test_clean_zeta1_model_output_empty_region() {
3546 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
3547 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3548 assert_eq!(cleaned, "");
3549 }
3550}