1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
22/// editable and context token budgets. Allows the server to select the
23/// appropriate ranges for whichever model it uses.
24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
25pub struct ExcerptRanges {
26 /// Editable region computed with a 150-token budget.
27 pub editable_150: Range<usize>,
28 /// Editable region computed with a 180-token budget.
29 pub editable_180: Range<usize>,
30 /// Editable region computed with a 350-token budget.
31 pub editable_350: Range<usize>,
32 /// Editable region computed with a 350-token budget.
33 pub editable_512: Option<Range<usize>>,
34 /// Context boundary when using editable_150 with 350 tokens of additional context.
35 pub editable_150_context_350: Range<usize>,
36 /// Context boundary when using editable_180 with 350 tokens of additional context.
37 pub editable_180_context_350: Range<usize>,
38 /// Context boundary when using editable_350 with 150 tokens of additional context.
39 pub editable_350_context_150: Range<usize>,
40 pub editable_350_context_512: Option<Range<usize>>,
41 pub editable_350_context_1024: Option<Range<usize>>,
42 pub context_4096: Option<Range<usize>>,
43 pub context_8192: Option<Range<usize>>,
44}
45
46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
47pub struct ZetaPromptInput {
48 pub cursor_path: Arc<Path>,
49 pub cursor_excerpt: Arc<str>,
50 pub cursor_offset_in_excerpt: usize,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub excerpt_start_row: Option<u32>,
53 pub events: Vec<Arc<Event>>,
54 pub related_files: Vec<RelatedFile>,
55 /// These ranges let the server select model-appropriate subsets.
56 pub excerpt_ranges: ExcerptRanges,
57 /// The name of the edit prediction model experiment to use.
58 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub experiment: Option<String>,
60 #[serde(default)]
61 pub in_open_source_repo: bool,
62 #[serde(default)]
63 pub can_collect_data: bool,
64 #[serde(default, skip_serializing_if = "Option::is_none")]
65 pub repo_url: Option<String>,
66}
67
68#[derive(
69 Default,
70 Clone,
71 Copy,
72 Debug,
73 PartialEq,
74 Eq,
75 Hash,
76 EnumIter,
77 IntoStaticStr,
78 Serialize,
79 Deserialize,
80)]
81#[allow(non_camel_case_types)]
82pub enum ZetaFormat {
83 V0112MiddleAtEnd,
84 V0113Ordered,
85 V0114180EditableRegion,
86 V0120GitMergeMarkers,
87 #[default]
88 V0131GitMergeMarkersPrefix,
89 V0211Prefill,
90 V0211SeedCoder,
91 v0226Hashline,
92}
93
94impl std::fmt::Display for ZetaFormat {
95 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96 write!(f, "{}", <&'static str>::from(self))
97 }
98}
99
100impl ZetaFormat {
101 pub fn parse(format_name: &str) -> Result<Self> {
102 let mut results = ZetaFormat::iter().filter(|version| {
103 <&'static str>::from(version)
104 .to_lowercase()
105 .contains(&format_name.to_lowercase())
106 });
107 let Some(result) = results.next() else {
108 anyhow::bail!(
109 "`{format_name}` did not match any of:\n{}",
110 Self::options_as_string()
111 );
112 };
113 if results.next().is_some() {
114 anyhow::bail!(
115 "`{format_name}` matched more than one of:\n{}",
116 Self::options_as_string()
117 );
118 }
119 Ok(result)
120 }
121
122 pub fn options_as_string() -> String {
123 ZetaFormat::iter()
124 .map(|format| format!("- {}\n", <&'static str>::from(format)))
125 .collect::<Vec<_>>()
126 .concat()
127 }
128}
129
130#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
131#[serde(tag = "event")]
132pub enum Event {
133 BufferChange {
134 path: Arc<Path>,
135 old_path: Arc<Path>,
136 diff: String,
137 predicted: bool,
138 in_open_source_repo: bool,
139 },
140}
141
142impl Event {
143 pub fn in_open_source_repo(&self) -> bool {
144 match self {
145 Event::BufferChange {
146 in_open_source_repo,
147 ..
148 } => *in_open_source_repo,
149 }
150 }
151}
152
153pub fn write_event(prompt: &mut String, event: &Event) {
154 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
155 for component in path.components() {
156 prompt.push('/');
157 write!(prompt, "{}", component.as_os_str().display()).ok();
158 }
159 }
160 match event {
161 Event::BufferChange {
162 path,
163 old_path,
164 diff,
165 predicted,
166 in_open_source_repo: _,
167 } => {
168 if *predicted {
169 prompt.push_str("// User accepted prediction:\n");
170 }
171 prompt.push_str("--- a");
172 write_path_as_unix_str(prompt, old_path.as_ref());
173 prompt.push_str("\n+++ b");
174 write_path_as_unix_str(prompt, path.as_ref());
175 prompt.push('\n');
176 prompt.push_str(diff);
177 }
178 }
179}
180
181#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
182pub struct RelatedFile {
183 pub path: Arc<Path>,
184 pub max_row: u32,
185 pub excerpts: Vec<RelatedExcerpt>,
186 #[serde(default)]
187 pub in_open_source_repo: bool,
188}
189
190#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
191pub struct RelatedExcerpt {
192 pub row_range: Range<u32>,
193 pub text: Arc<str>,
194 #[serde(default)]
195 pub order: usize,
196}
197
198pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
199 special_tokens_for_format(format)
200 .iter()
201 .any(|token| input.cursor_excerpt.contains(token))
202}
203
204pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
205 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
206}
207
208pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
209 match format {
210 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
211 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
212 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
213 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
214 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
215 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
216 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
217 ZetaFormat::v0226Hashline => hashline::special_tokens(),
218 }
219}
220
221pub fn excerpt_ranges_for_format(
222 format: ZetaFormat,
223 ranges: &ExcerptRanges,
224) -> (Range<usize>, Range<usize>) {
225 match format {
226 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
227 ranges.editable_150.clone(),
228 ranges.editable_150_context_350.clone(),
229 ),
230 ZetaFormat::V0114180EditableRegion => (
231 ranges.editable_180.clone(),
232 ranges.editable_180_context_350.clone(),
233 ),
234 ZetaFormat::V0120GitMergeMarkers
235 | ZetaFormat::V0131GitMergeMarkersPrefix
236 | ZetaFormat::V0211Prefill
237 | ZetaFormat::V0211SeedCoder
238 | ZetaFormat::v0226Hashline => (
239 ranges.editable_350.clone(),
240 ranges.editable_350_context_150.clone(),
241 ),
242 }
243}
244
245pub fn write_cursor_excerpt_section_for_format(
246 format: ZetaFormat,
247 prompt: &mut String,
248 path: &Path,
249 context: &str,
250 editable_range: &Range<usize>,
251 cursor_offset: usize,
252) {
253 match format {
254 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
255 prompt,
256 path,
257 context,
258 editable_range,
259 cursor_offset,
260 ),
261 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
262 v0113_ordered::write_cursor_excerpt_section(
263 prompt,
264 path,
265 context,
266 editable_range,
267 cursor_offset,
268 )
269 }
270 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
271 prompt,
272 path,
273 context,
274 editable_range,
275 cursor_offset,
276 ),
277 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
278 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
279 prompt,
280 path,
281 context,
282 editable_range,
283 cursor_offset,
284 )
285 }
286 ZetaFormat::V0211SeedCoder => seed_coder::write_cursor_excerpt_section(
287 prompt,
288 path,
289 context,
290 editable_range,
291 cursor_offset,
292 ),
293 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
294 prompt,
295 path,
296 context,
297 editable_range,
298 cursor_offset,
299 ),
300 }
301}
302
303pub fn format_prompt_with_budget_for_format(
304 input: &ZetaPromptInput,
305 format: ZetaFormat,
306 max_tokens: usize,
307) -> String {
308 let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
309 let path = &*input.cursor_path;
310
311 match format {
312 ZetaFormat::V0211SeedCoder => seed_coder::format_prompt_with_budget(
313 path,
314 context,
315 &editable_range,
316 cursor_offset,
317 &input.events,
318 &input.related_files,
319 max_tokens,
320 ),
321 _ => {
322 let mut cursor_section = String::new();
323 write_cursor_excerpt_section_for_format(
324 format,
325 &mut cursor_section,
326 path,
327 context,
328 &editable_range,
329 cursor_offset,
330 );
331
332 let cursor_tokens = estimate_tokens(cursor_section.len());
333 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
334
335 let edit_history_section = format_edit_history_within_budget(
336 &input.events,
337 "<|file_sep|>",
338 "edit history",
339 budget_after_cursor,
340 );
341 let edit_history_tokens = estimate_tokens(edit_history_section.len());
342 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
343
344 let related_files_section = format_related_files_within_budget(
345 &input.related_files,
346 "<|file_sep|>",
347 "",
348 budget_after_edit_history,
349 );
350
351 let mut prompt = String::new();
352 prompt.push_str(&related_files_section);
353 prompt.push_str(&edit_history_section);
354 prompt.push_str(&cursor_section);
355 prompt
356 }
357 }
358}
359
360pub fn get_prefill_for_format(
361 format: ZetaFormat,
362 context: &str,
363 editable_range: &Range<usize>,
364) -> String {
365 match format {
366 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
367 ZetaFormat::V0112MiddleAtEnd
368 | ZetaFormat::V0113Ordered
369 | ZetaFormat::V0114180EditableRegion
370 | ZetaFormat::V0120GitMergeMarkers
371 | ZetaFormat::V0131GitMergeMarkersPrefix
372 | ZetaFormat::V0211SeedCoder
373 | ZetaFormat::v0226Hashline => String::new(),
374 }
375}
376
377pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
378 match format {
379 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
380 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
381 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
382 ZetaFormat::V0211SeedCoder => Some(seed_coder::END_MARKER),
383 ZetaFormat::V0112MiddleAtEnd
384 | ZetaFormat::V0113Ordered
385 | ZetaFormat::V0114180EditableRegion
386 | ZetaFormat::v0226Hashline => None,
387 }
388}
389
390pub fn current_region_markers_for_format(format: ZetaFormat) -> (&'static str, &'static str) {
391 match format {
392 ZetaFormat::V0112MiddleAtEnd => ("<|fim_middle|>current\n", "<|fim_middle|>updated"),
393 ZetaFormat::V0113Ordered
394 | ZetaFormat::V0114180EditableRegion
395 | ZetaFormat::v0226Hashline => ("<|fim_middle|>current\n", "<|fim_suffix|>"),
396 ZetaFormat::V0120GitMergeMarkers
397 | ZetaFormat::V0131GitMergeMarkersPrefix
398 | ZetaFormat::V0211Prefill => (
399 v0120_git_merge_markers::START_MARKER,
400 v0120_git_merge_markers::SEPARATOR,
401 ),
402 ZetaFormat::V0211SeedCoder => (seed_coder::START_MARKER, seed_coder::SEPARATOR),
403 }
404}
405
406pub fn clean_extracted_region_for_format(format: ZetaFormat, region: &str) -> String {
407 match format {
408 ZetaFormat::v0226Hashline => hashline::strip_hashline_prefixes(region),
409 _ => region.to_string(),
410 }
411}
412
413pub fn encode_patch_as_output_for_format(
414 format: ZetaFormat,
415 old_editable_region: &str,
416 patch: &str,
417 cursor_offset: Option<usize>,
418) -> Result<Option<String>> {
419 match format {
420 ZetaFormat::v0226Hashline => {
421 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
422 }
423 _ => Ok(None),
424 }
425}
426
427pub fn output_with_context_for_format(
428 format: ZetaFormat,
429 old_editable_region: &str,
430 output: &str,
431) -> Result<Option<String>> {
432 match format {
433 ZetaFormat::v0226Hashline => {
434 if hashline::output_has_edit_commands(output) {
435 Ok(Some(hashline::apply_edit_commands(
436 old_editable_region,
437 output,
438 )))
439 } else {
440 Ok(None)
441 }
442 }
443 _ => Ok(None),
444 }
445}
446
447/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
448pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
449 match output_end_marker_for_format(format) {
450 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
451 None => output,
452 }
453}
454
455pub fn excerpt_range_for_format(
456 format: ZetaFormat,
457 ranges: &ExcerptRanges,
458) -> (Range<usize>, Range<usize>) {
459 excerpt_ranges_for_format(format, ranges)
460}
461
462pub fn resolve_cursor_region(
463 input: &ZetaPromptInput,
464 format: ZetaFormat,
465) -> (&str, Range<usize>, usize) {
466 let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
467 let context_start = context_range.start;
468 let context_text = &input.cursor_excerpt[context_range];
469 let adjusted_editable =
470 (editable_range.start - context_start)..(editable_range.end - context_start);
471 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
472
473 (context_text, adjusted_editable, adjusted_cursor)
474}
475
476pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
477 let (context, editable_range, _) = resolve_cursor_region(input, format);
478 get_prefill_for_format(format, context, &editable_range)
479}
480
481fn format_edit_history_within_budget(
482 events: &[Arc<Event>],
483 file_marker: &str,
484 edit_history_name: &str,
485 max_tokens: usize,
486) -> String {
487 let header = format!("{}{}\n", file_marker, edit_history_name);
488 let header_tokens = estimate_tokens(header.len());
489 if header_tokens >= max_tokens {
490 return String::new();
491 }
492
493 let mut event_strings: Vec<String> = Vec::new();
494 let mut total_tokens = header_tokens;
495
496 for event in events.iter().rev() {
497 let mut event_str = String::new();
498 write_event(&mut event_str, event);
499 let event_tokens = estimate_tokens(event_str.len());
500
501 if total_tokens + event_tokens > max_tokens {
502 break;
503 }
504 total_tokens += event_tokens;
505 event_strings.push(event_str);
506 }
507
508 if event_strings.is_empty() {
509 return String::new();
510 }
511
512 let mut result = header;
513 for event_str in event_strings.iter().rev() {
514 result.push_str(event_str);
515 }
516 result
517}
518
519fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
520 let needs_newline = !excerpt.text.ends_with('\n');
521 let needs_ellipsis = excerpt.row_range.end < file_max_row;
522 let len = excerpt.text.len()
523 + if needs_newline { "\n".len() } else { 0 }
524 + if needs_ellipsis { "...\n".len() } else { 0 };
525 estimate_tokens(len)
526}
527
528pub fn format_related_files_within_budget(
529 related_files: &[RelatedFile],
530 file_prefix: &str,
531 file_suffix: &str,
532 max_tokens: usize,
533) -> String {
534 struct ExcerptCandidate {
535 file_ix: usize,
536 excerpt_ix: usize,
537 order: usize,
538 }
539
540 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
541 .iter()
542 .enumerate()
543 .flat_map(|(file_ix, file)| {
544 file.excerpts
545 .iter()
546 .enumerate()
547 .map(move |(excerpt_ix, e)| ExcerptCandidate {
548 file_ix,
549 excerpt_ix,
550 order: e.order,
551 })
552 })
553 .collect();
554
555 // Pre-compute file header strings and their token costs.
556 let file_headers: Vec<String> = related_files
557 .iter()
558 .map(|file| {
559 let path_str = file.path.to_string_lossy();
560 format!("{}{}\n", file_prefix, path_str)
561 })
562 .collect();
563
564 // Sort the excerpts by their order and determine how many fit within the budget.
565 let mut total_tokens = 0;
566 let mut included_excerpt_count = 0_usize;
567 let mut included_file_indices = vec![false; related_files.len()];
568 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
569 for candidate in &excerpt_candidates {
570 let file = &related_files[candidate.file_ix];
571 let excerpt = &file.excerpts[candidate.excerpt_ix];
572 let file_already_included = included_file_indices[candidate.file_ix];
573 let header_cost = if file_already_included {
574 0
575 } else {
576 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
577 };
578 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
579 if total_tokens + header_cost + excerpt_cost > max_tokens {
580 break;
581 }
582 total_tokens += header_cost + excerpt_cost;
583 if !file_already_included {
584 included_file_indices[candidate.file_ix] = true;
585 }
586 included_excerpt_count += 1;
587 }
588
589 excerpt_candidates.truncate(included_excerpt_count);
590 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
591
592 // Render all of the files that fit within the token budget, in the original order.
593 let mut result = String::new();
594 let mut last_file_ix = None;
595 for candidate in &excerpt_candidates {
596 if last_file_ix != Some(candidate.file_ix) {
597 if last_file_ix.is_some() {
598 result.push_str(file_suffix);
599 }
600 result.push_str(&file_headers[candidate.file_ix]);
601 last_file_ix = Some(candidate.file_ix);
602 }
603 let file = &related_files[candidate.file_ix];
604 let excerpt = &file.excerpts[candidate.excerpt_ix];
605 result.push_str(&excerpt.text);
606 if !result.ends_with('\n') {
607 result.push('\n');
608 }
609 if excerpt.row_range.end < file.max_row {
610 result.push_str("...\n");
611 }
612 }
613
614 result
615}
616
617pub fn write_related_files(
618 prompt: &mut String,
619 related_files: &[RelatedFile],
620) -> Vec<Range<usize>> {
621 let mut ranges = Vec::new();
622 for file in related_files {
623 let start = prompt.len();
624 let path_str = file.path.to_string_lossy();
625 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
626 for excerpt in &file.excerpts {
627 prompt.push_str(&excerpt.text);
628 if !prompt.ends_with('\n') {
629 prompt.push('\n');
630 }
631 if excerpt.row_range.end < file.max_row {
632 prompt.push_str("...\n");
633 }
634 }
635 let end = prompt.len();
636 ranges.push(start..end);
637 }
638 ranges
639}
640
641mod v0112_middle_at_end {
642 use super::*;
643
644 pub fn special_tokens() -> &'static [&'static str] {
645 &[
646 "<|fim_prefix|>",
647 "<|fim_suffix|>",
648 "<|fim_middle|>",
649 "<|file_sep|>",
650 CURSOR_MARKER,
651 ]
652 }
653
654 pub fn write_cursor_excerpt_section(
655 prompt: &mut String,
656 path: &Path,
657 context: &str,
658 editable_range: &Range<usize>,
659 cursor_offset: usize,
660 ) {
661 let path_str = path.to_string_lossy();
662 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
663
664 prompt.push_str("<|fim_prefix|>\n");
665 prompt.push_str(&context[..editable_range.start]);
666
667 prompt.push_str("<|fim_suffix|>\n");
668 prompt.push_str(&context[editable_range.end..]);
669 if !prompt.ends_with('\n') {
670 prompt.push('\n');
671 }
672
673 prompt.push_str("<|fim_middle|>current\n");
674 prompt.push_str(&context[editable_range.start..cursor_offset]);
675 prompt.push_str(CURSOR_MARKER);
676 prompt.push_str(&context[cursor_offset..editable_range.end]);
677 if !prompt.ends_with('\n') {
678 prompt.push('\n');
679 }
680
681 prompt.push_str("<|fim_middle|>updated\n");
682 }
683}
684
685mod v0113_ordered {
686 use super::*;
687
688 pub fn special_tokens() -> &'static [&'static str] {
689 &[
690 "<|fim_prefix|>",
691 "<|fim_suffix|>",
692 "<|fim_middle|>",
693 "<|file_sep|>",
694 CURSOR_MARKER,
695 ]
696 }
697
698 pub fn write_cursor_excerpt_section(
699 prompt: &mut String,
700 path: &Path,
701 context: &str,
702 editable_range: &Range<usize>,
703 cursor_offset: usize,
704 ) {
705 let path_str = path.to_string_lossy();
706 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
707
708 prompt.push_str("<|fim_prefix|>\n");
709 prompt.push_str(&context[..editable_range.start]);
710 if !prompt.ends_with('\n') {
711 prompt.push('\n');
712 }
713
714 prompt.push_str("<|fim_middle|>current\n");
715 prompt.push_str(&context[editable_range.start..cursor_offset]);
716 prompt.push_str(CURSOR_MARKER);
717 prompt.push_str(&context[cursor_offset..editable_range.end]);
718 if !prompt.ends_with('\n') {
719 prompt.push('\n');
720 }
721
722 prompt.push_str("<|fim_suffix|>\n");
723 prompt.push_str(&context[editable_range.end..]);
724 if !prompt.ends_with('\n') {
725 prompt.push('\n');
726 }
727
728 prompt.push_str("<|fim_middle|>updated\n");
729 }
730}
731
732mod v0114180_editable_region {
733 use super::*;
734
735 pub fn special_tokens() -> &'static [&'static str] {
736 v0113_ordered::special_tokens()
737 }
738}
739
740pub mod v0120_git_merge_markers {
741 //! A prompt that uses git-style merge conflict markers to represent the editable region.
742 //!
743 //! Example prompt:
744 //!
745 //! <|file_sep|>path/to/target_file.py
746 //! <|fim_prefix|>
747 //! code before editable region
748 //! <|fim_suffix|>
749 //! code after editable region
750 //! <|fim_middle|>
751 //! <<<<<<< CURRENT
752 //! code that
753 //! needs to<|user_cursor|>
754 //! be rewritten
755 //! =======
756 //!
757 //! Expected output (should be generated by the model):
758 //!
759 //! updated
760 //! code with
761 //! changes applied
762 //! >>>>>>> UPDATED
763
764 use super::*;
765
766 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
767 pub const SEPARATOR: &str = "=======\n";
768 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
769
770 pub fn special_tokens() -> &'static [&'static str] {
771 &[
772 "<|fim_prefix|>",
773 "<|fim_suffix|>",
774 "<|fim_middle|>",
775 "<|file_sep|>",
776 START_MARKER,
777 SEPARATOR,
778 END_MARKER,
779 CURSOR_MARKER,
780 ]
781 }
782
783 pub fn write_cursor_excerpt_section(
784 prompt: &mut String,
785 path: &Path,
786 context: &str,
787 editable_range: &Range<usize>,
788 cursor_offset: usize,
789 ) {
790 let path_str = path.to_string_lossy();
791 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
792
793 prompt.push_str("<|fim_prefix|>");
794 prompt.push_str(&context[..editable_range.start]);
795
796 prompt.push_str("<|fim_suffix|>");
797 prompt.push_str(&context[editable_range.end..]);
798 if !prompt.ends_with('\n') {
799 prompt.push('\n');
800 }
801
802 prompt.push_str("<|fim_middle|>");
803 prompt.push_str(START_MARKER);
804 prompt.push_str(&context[editable_range.start..cursor_offset]);
805 prompt.push_str(CURSOR_MARKER);
806 prompt.push_str(&context[cursor_offset..editable_range.end]);
807 if !prompt.ends_with('\n') {
808 prompt.push('\n');
809 }
810 prompt.push_str(SEPARATOR);
811 }
812}
813
814pub mod v0131_git_merge_markers_prefix {
815 //! A prompt that uses git-style merge conflict markers to represent the editable region.
816 //!
817 //! Example prompt:
818 //!
819 //! <|file_sep|>path/to/target_file.py
820 //! <|fim_prefix|>
821 //! code before editable region
822 //! <<<<<<< CURRENT
823 //! code that
824 //! needs to<|user_cursor|>
825 //! be rewritten
826 //! =======
827 //! <|fim_suffix|>
828 //! code after editable region
829 //! <|fim_middle|>
830 //!
831 //! Expected output (should be generated by the model):
832 //!
833 //! updated
834 //! code with
835 //! changes applied
836 //! >>>>>>> UPDATED
837
838 use super::*;
839
840 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
841 pub const SEPARATOR: &str = "=======\n";
842 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
843
844 pub fn special_tokens() -> &'static [&'static str] {
845 &[
846 "<|fim_prefix|>",
847 "<|fim_suffix|>",
848 "<|fim_middle|>",
849 "<|file_sep|>",
850 START_MARKER,
851 SEPARATOR,
852 END_MARKER,
853 CURSOR_MARKER,
854 ]
855 }
856
857 pub fn write_cursor_excerpt_section(
858 prompt: &mut String,
859 path: &Path,
860 context: &str,
861 editable_range: &Range<usize>,
862 cursor_offset: usize,
863 ) {
864 let path_str = path.to_string_lossy();
865 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
866
867 prompt.push_str("<|fim_prefix|>");
868 prompt.push_str(&context[..editable_range.start]);
869 prompt.push_str(START_MARKER);
870 prompt.push_str(&context[editable_range.start..cursor_offset]);
871 prompt.push_str(CURSOR_MARKER);
872 prompt.push_str(&context[cursor_offset..editable_range.end]);
873 if !prompt.ends_with('\n') {
874 prompt.push('\n');
875 }
876 prompt.push_str(SEPARATOR);
877
878 prompt.push_str("<|fim_suffix|>");
879 prompt.push_str(&context[editable_range.end..]);
880 if !prompt.ends_with('\n') {
881 prompt.push('\n');
882 }
883
884 prompt.push_str("<|fim_middle|>");
885 }
886}
887
888pub mod v0211_prefill {
889 use super::*;
890
891 pub fn special_tokens() -> &'static [&'static str] {
892 v0131_git_merge_markers_prefix::special_tokens()
893 }
894
895 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
896 let editable_region = &context[editable_range.start..editable_range.end];
897
898 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
899 let prefill_len = editable_region.floor_char_boundary(prefill_len);
900
901 // Find a token boundary to avoid splitting tokens in the prefill.
902 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
903 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
904 // the \n and consume any consecutive \n characters after it.
905 let prefill = &editable_region[..prefill_len];
906 match prefill.rfind('\n') {
907 Some(pos) => {
908 let mut end = pos + 1;
909 while end < editable_region.len()
910 && editable_region.as_bytes().get(end) == Some(&b'\n')
911 {
912 end += 1;
913 }
914 editable_region[..end].to_string()
915 }
916 // No newline found. Fall back to splitting before the last space
917 // (word-level boundary)
918 None => match prefill.rfind(' ') {
919 Some(pos) => prefill[..pos].to_string(),
920 None => prefill.to_string(),
921 },
922 }
923 }
924}
925
926pub mod hashline {
927
928 use std::fmt::Display;
929
930 pub const END_MARKER: &str = "<|fim_middle|>updated";
931 pub const START_MARKER: &str = "<|fim_middle|>current";
932
933 use super::*;
934
935 const SET_COMMAND_MARKER: &str = "<|set|>";
936 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
937
938 pub fn special_tokens() -> &'static [&'static str] {
939 return &[
940 SET_COMMAND_MARKER,
941 "<|set_range|>",
942 INSERT_COMMAND_MARKER,
943 CURSOR_MARKER,
944 "<|file_sep|>",
945 "<|fim_prefix|>",
946 "<|fim_suffix|>",
947 "<|fim_middle|>",
948 ];
949 }
950
951 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
952 #[derive(Debug, Clone, PartialEq, Eq)]
953 struct LineRef {
954 index: usize,
955 hash: u8,
956 }
957
958 impl Display for LineRef {
959 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
960 write!(f, "{}:{:02x}", self.index, self.hash)
961 }
962 }
963
964 pub fn hash_line(line: &[u8]) -> u8 {
965 let mut h: u8 = 0;
966 for &byte in line {
967 h = h.wrapping_add(byte);
968 }
969 return h;
970 }
971
972 /// Write the hashline-encoded editable region into `out`. Each line of
973 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
974 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
975 /// to the start of `editable_text`).
976 pub fn write_hashline_editable_region(
977 out: &mut String,
978 editable_text: &str,
979 cursor_offset_in_editable: usize,
980 ) {
981 let mut offset = 0;
982 for (i, line) in editable_text.lines().enumerate() {
983 let (head, cursor, tail) = if cursor_offset_in_editable > offset
984 && cursor_offset_in_editable < offset + line.len()
985 {
986 (
987 &line[..cursor_offset_in_editable - offset],
988 CURSOR_MARKER,
989 &line[cursor_offset_in_editable - offset..],
990 )
991 } else {
992 (line, "", "")
993 };
994 write!(
995 out,
996 "\n{}|{head}{cursor}{tail}",
997 LineRef {
998 index: i,
999 hash: hash_line(line.as_bytes())
1000 }
1001 )
1002 .unwrap();
1003 offset += line.len() + 1;
1004 }
1005 }
1006
1007 pub fn write_cursor_excerpt_section(
1008 prompt: &mut String,
1009 path: &Path,
1010 context: &str,
1011 editable_range: &Range<usize>,
1012 cursor_offset: usize,
1013 ) {
1014 let path_str = path.to_string_lossy();
1015 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1016
1017 prompt.push_str("<|fim_prefix|>\n");
1018 prompt.push_str(&context[..editable_range.start]);
1019 prompt.push_str(START_MARKER);
1020
1021 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1022 let editable_region = &context[editable_range.clone()];
1023 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1024
1025 if !prompt.ends_with('\n') {
1026 prompt.push('\n');
1027 }
1028
1029 prompt.push_str("<|fim_suffix|>\n");
1030 prompt.push_str(&context[editable_range.end..]);
1031 if !prompt.ends_with('\n') {
1032 prompt.push('\n');
1033 }
1034
1035 prompt.push_str(END_MARKER);
1036 }
1037
1038 /// A single edit command parsed from the model output.
1039 #[derive(Debug)]
1040 enum EditCommand<'a> {
1041 /// Replace a range of lines (inclusive on both ends). Single-line set is
1042 /// represented by `start == end`.
1043 Set {
1044 start: LineRef,
1045 end: LineRef,
1046 content: &'a str,
1047 },
1048 /// Insert new lines after the given line, or before the first line if
1049 /// `after` is `None`.
1050 Insert {
1051 after: Option<LineRef>,
1052 content: &'a str,
1053 },
1054 }
1055
1056 /// Parse a line reference like `3:c3` into a `LineRef`.
1057 fn parse_line_ref(s: &str) -> Option<LineRef> {
1058 let (idx_str, hash_str) = s.split_once(':')?;
1059 let index = idx_str.parse::<usize>().ok()?;
1060 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1061 Some(LineRef { index, hash })
1062 }
1063
1064 /// Parse the model output into a list of `EditCommand`s.
1065 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1066 let mut commands = Vec::new();
1067 let mut offset = 0usize;
1068
1069 while offset < model_output.len() {
1070 let next_nl = model_output[offset..]
1071 .find('\n')
1072 .map(|i| offset + i)
1073 .unwrap_or(model_output.len());
1074 let line = &model_output[offset..next_nl];
1075 let line_end = if next_nl < model_output.len() {
1076 next_nl + 1
1077 } else {
1078 next_nl
1079 };
1080
1081 let trimmed = line.trim();
1082 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1083 (true, spec)
1084 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1085 (false, spec)
1086 } else {
1087 offset = line_end;
1088 continue;
1089 };
1090
1091 let mut content_end = line_end;
1092 let mut scan = line_end;
1093
1094 while scan < model_output.len() {
1095 let body_nl = model_output[scan..]
1096 .find('\n')
1097 .map(|i| scan + i)
1098 .unwrap_or(model_output.len());
1099 let body_line = &model_output[scan..body_nl];
1100 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1101 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1102 {
1103 break;
1104 }
1105 scan = if body_nl < model_output.len() {
1106 body_nl + 1
1107 } else {
1108 body_nl
1109 };
1110 content_end = scan;
1111 }
1112
1113 let content = &model_output[line_end..content_end];
1114
1115 if is_set {
1116 if let Some((start_str, end_str)) = specifier.split_once('-') {
1117 if let (Some(start), Some(end)) =
1118 (parse_line_ref(start_str), parse_line_ref(end_str))
1119 {
1120 commands.push(EditCommand::Set {
1121 start,
1122 end,
1123 content,
1124 });
1125 }
1126 } else if let Some(target) = parse_line_ref(specifier) {
1127 commands.push(EditCommand::Set {
1128 start: target.clone(),
1129 end: target,
1130 content,
1131 });
1132 }
1133 } else {
1134 let after = parse_line_ref(specifier);
1135 commands.push(EditCommand::Insert { after, content });
1136 }
1137
1138 offset = scan;
1139 }
1140
1141 commands
1142 }
1143
1144 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1145 /// (as opposed to being a plain full-replacement output).
1146 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1147 /// editable region, returning the plain text content.
1148 pub fn strip_hashline_prefixes(region: &str) -> String {
1149 let mut decoded: String = region
1150 .lines()
1151 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1152 .collect::<Vec<_>>()
1153 .join("\n");
1154 if region.ends_with('\n') {
1155 decoded.push('\n');
1156 }
1157 decoded
1158 }
1159
1160 pub fn output_has_edit_commands(model_output: &str) -> bool {
1161 model_output.contains(SET_COMMAND_MARKER) || model_output.contains(INSERT_COMMAND_MARKER)
1162 }
1163
1164 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1165 /// original editable region text.
1166 ///
1167 /// `editable_region` is the original text of the editable region (without hash
1168 /// prefixes). `model_output` is the raw model response containing edit commands.
1169 ///
1170 /// Returns the full replacement text for the editable region.
1171 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1172 let original_lines: Vec<&str> = editable_region.lines().collect();
1173 let old_hashes: Vec<u8> = original_lines
1174 .iter()
1175 .map(|line| hash_line(line.as_bytes()))
1176 .collect();
1177
1178 let commands = parse_edit_commands(model_output);
1179
1180 // For set operations: indexed by start line → Some((end line index, content))
1181 // For insert operations: indexed by line index → vec of content to insert after
1182 // Insert-before-first is tracked separately.
1183 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1184 let mut insert_before_first: Vec<&str> = Vec::new();
1185 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1186
1187 for command in &commands {
1188 match command {
1189 EditCommand::Set {
1190 start,
1191 end,
1192 content,
1193 } => {
1194 if start.index < old_hashes.len()
1195 && end.index < old_hashes.len()
1196 && start.index <= end.index
1197 && old_hashes[start.index] == start.hash
1198 && old_hashes[end.index] == end.hash
1199 {
1200 set_ops[start.index] = Some((end.index, *content));
1201 }
1202 }
1203 EditCommand::Insert { after, content } => match after {
1204 None => insert_before_first.push(*content),
1205 Some(line_ref) => {
1206 if line_ref.index < old_hashes.len()
1207 && old_hashes[line_ref.index] == line_ref.hash
1208 {
1209 insert_after[line_ref.index].push(*content);
1210 }
1211 }
1212 },
1213 }
1214 }
1215
1216 let mut result = String::new();
1217
1218 // Emit any insertions before the first line
1219 for content in &insert_before_first {
1220 result.push_str(content);
1221 if !content.ends_with('\n') {
1222 result.push('\n');
1223 }
1224 }
1225
1226 let mut i = 0;
1227 while i < original_lines.len() {
1228 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1229 // Replace lines i..=end_index with the replacement content
1230 result.push_str(replacement);
1231 if !replacement.is_empty() && !replacement.ends_with('\n') {
1232 result.push('\n');
1233 }
1234 // Emit any insertions after the end of this set range
1235 if *end_index < insert_after.len() {
1236 for content in &insert_after[*end_index] {
1237 result.push_str(content);
1238 if !content.ends_with('\n') {
1239 result.push('\n');
1240 }
1241 }
1242 }
1243 i = end_index + 1;
1244 } else {
1245 // Keep the original line
1246 result.push_str(original_lines[i]);
1247 result.push('\n');
1248 // Emit any insertions after this line
1249 for content in &insert_after[i] {
1250 result.push_str(content);
1251 if !content.ends_with('\n') {
1252 result.push('\n');
1253 }
1254 }
1255 i += 1;
1256 }
1257 }
1258
1259 // Preserve trailing newline behavior: if the original ended with a
1260 // newline the result already has one; if it didn't, trim the extra one
1261 // we added.
1262 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1263 result.pop();
1264 }
1265
1266 result
1267 }
1268
1269 /// Convert a unified diff patch into hashline edit commands.
1270 ///
1271 /// Parses the unified diff `patch` directly to determine which lines of
1272 /// `old_text` are deleted/replaced and what new lines are added, then emits
1273 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1274 /// `{index}:{hash}` identifiers.
1275 ///
1276 /// `cursor_offset` is an optional byte offset into the first hunk's new
1277 /// text (context + additions) where the cursor marker should be placed.
1278 pub fn patch_to_edit_commands(
1279 old_text: &str,
1280 patch: &str,
1281 cursor_offset: Option<usize>,
1282 ) -> Result<String> {
1283 let old_lines: Vec<&str> = old_text.lines().collect();
1284 let old_hashes: Vec<u8> = old_lines
1285 .iter()
1286 .map(|line| hash_line(line.as_bytes()))
1287 .collect();
1288
1289 let mut result = String::new();
1290 let mut first_hunk = true;
1291
1292 struct Hunk<'a> {
1293 line_range: Range<usize>,
1294 new_text_lines: Vec<&'a str>,
1295 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1296 }
1297
1298 // Parse the patch line by line. We only care about hunk headers,
1299 // context, deletions, and additions.
1300 let mut old_line_index: usize = 0;
1301 let mut current_hunk: Option<Hunk> = None;
1302 // Byte offset tracking within the hunk's new text for cursor placement.
1303 let mut new_text_byte_offset: usize = 0;
1304 // The line index of the last old line seen before/in the current hunk
1305 // (used for insert-after reference).
1306 let mut last_old_line_before_hunk: Option<usize> = None;
1307
1308 fn flush_hunk(
1309 hunk: Hunk,
1310 last_old_line: Option<usize>,
1311 result: &mut String,
1312 old_hashes: &[u8],
1313 ) {
1314 if hunk.line_range.is_empty() {
1315 // Pure insertion — reference the old line to insert after when in bounds.
1316 if let Some(after) = last_old_line
1317 && let Some(&hash) = old_hashes.get(after)
1318 {
1319 write!(
1320 result,
1321 "{INSERT_COMMAND_MARKER}{}\n",
1322 LineRef { index: after, hash }
1323 )
1324 .unwrap();
1325 } else {
1326 result.push_str(INSERT_COMMAND_MARKER);
1327 result.push('\n');
1328 }
1329 } else {
1330 let start = hunk.line_range.start;
1331 let end_exclusive = hunk.line_range.end;
1332 let deleted_line_count = end_exclusive.saturating_sub(start);
1333
1334 if deleted_line_count == 1 {
1335 if let Some(&hash) = old_hashes.get(start) {
1336 write!(
1337 result,
1338 "{SET_COMMAND_MARKER}{}\n",
1339 LineRef { index: start, hash }
1340 )
1341 .unwrap();
1342 } else {
1343 result.push_str(SET_COMMAND_MARKER);
1344 result.push('\n');
1345 }
1346 } else {
1347 let end_inclusive = end_exclusive - 1;
1348 match (
1349 old_hashes.get(start).copied(),
1350 old_hashes.get(end_inclusive).copied(),
1351 ) {
1352 (Some(start_hash), Some(end_hash)) => {
1353 write!(
1354 result,
1355 "{SET_COMMAND_MARKER}{}-{}\n",
1356 LineRef {
1357 index: start,
1358 hash: start_hash
1359 },
1360 LineRef {
1361 index: end_inclusive,
1362 hash: end_hash
1363 }
1364 )
1365 .unwrap();
1366 }
1367 _ => {
1368 result.push_str(SET_COMMAND_MARKER);
1369 result.push('\n');
1370 }
1371 }
1372 }
1373 }
1374 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1375 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1376 && line_offset == cursor_line_offset
1377 {
1378 result.push_str(&line[..char_offset]);
1379 result.push_str(CURSOR_MARKER);
1380 result.push_str(&line[char_offset..]);
1381 continue;
1382 }
1383
1384 result.push_str(line);
1385 }
1386 }
1387
1388 for raw_line in patch.split_inclusive('\n') {
1389 if raw_line.starts_with("@@") {
1390 // Flush any pending change hunk from a previous patch hunk.
1391 if let Some(hunk) = current_hunk.take() {
1392 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1393 }
1394
1395 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1396 // We intentionally do not trust old_start as a direct local index into `old_text`,
1397 // because some patches are produced against a larger file region and carry
1398 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1399 if first_hunk {
1400 new_text_byte_offset = 0;
1401 first_hunk = false;
1402 }
1403 continue;
1404 }
1405
1406 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1407 continue;
1408 }
1409 if raw_line.starts_with("\\ No newline") {
1410 continue;
1411 }
1412
1413 if raw_line.starts_with('-') {
1414 // Extend or start a change hunk with this deleted old line.
1415 match &mut current_hunk {
1416 Some(Hunk {
1417 line_range: range, ..
1418 }) => range.end = old_line_index + 1,
1419 None => {
1420 current_hunk = Some(Hunk {
1421 line_range: old_line_index..old_line_index + 1,
1422 new_text_lines: Vec::new(),
1423 cursor_line_offset_in_new_text: None,
1424 });
1425 }
1426 }
1427 old_line_index += 1;
1428 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1429 // Place cursor marker if cursor_offset falls within this line.
1430 let mut cursor_line_offset = None;
1431 if let Some(cursor_off) = cursor_offset
1432 && (first_hunk
1433 || cursor_off >= new_text_byte_offset
1434 && cursor_off <= new_text_byte_offset + added_content.len())
1435 {
1436 let line_offset = added_content.floor_char_boundary(
1437 cursor_off
1438 .saturating_sub(new_text_byte_offset)
1439 .min(added_content.len()),
1440 );
1441 cursor_line_offset = Some(line_offset);
1442 }
1443
1444 new_text_byte_offset += added_content.len();
1445
1446 let hunk = current_hunk.get_or_insert(Hunk {
1447 line_range: old_line_index..old_line_index,
1448 new_text_lines: vec![],
1449 cursor_line_offset_in_new_text: None,
1450 });
1451 hunk.new_text_lines.push(added_content);
1452 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1453 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1454 } else {
1455 // Context line (starts with ' ' or is empty).
1456 if let Some(hunk) = current_hunk.take() {
1457 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1458 }
1459 last_old_line_before_hunk = Some(old_line_index);
1460 old_line_index += 1;
1461 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1462 new_text_byte_offset += content.len();
1463 }
1464 }
1465
1466 // Flush final group.
1467 if let Some(hunk) = current_hunk.take() {
1468 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1469 }
1470
1471 // Trim a single trailing newline.
1472 if result.ends_with('\n') {
1473 result.pop();
1474 }
1475
1476 Ok(result)
1477 }
1478
1479 #[cfg(test)]
1480 mod tests {
1481 use super::*;
1482 use indoc::indoc;
1483
1484 #[test]
1485 fn test_format_cursor_region() {
1486 struct Case {
1487 name: &'static str,
1488 context: &'static str,
1489 editable_range: Range<usize>,
1490 cursor_offset: usize,
1491 expected: &'static str,
1492 }
1493
1494 let cases = [
1495 Case {
1496 name: "basic_cursor_placement",
1497 context: "hello world\n",
1498 editable_range: 0..12,
1499 cursor_offset: 5,
1500 expected: indoc! {"
1501 <|file_sep|>test.rs
1502 <|fim_prefix|>
1503 <|fim_middle|>current
1504 0:5c|hello<|user_cursor|> world
1505 <|fim_suffix|>
1506 <|fim_middle|>updated"},
1507 },
1508 Case {
1509 name: "multiline_cursor_on_second_line",
1510 context: "aaa\nbbb\nccc\n",
1511 editable_range: 0..12,
1512 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1513 expected: indoc! {"
1514 <|file_sep|>test.rs
1515 <|fim_prefix|>
1516 <|fim_middle|>current
1517 0:23|aaa
1518 1:26|b<|user_cursor|>bb
1519 2:29|ccc
1520 <|fim_suffix|>
1521 <|fim_middle|>updated"},
1522 },
1523 Case {
1524 name: "no_trailing_newline_in_context",
1525 context: "line1\nline2",
1526 editable_range: 0..11,
1527 cursor_offset: 3,
1528 expected: indoc! {"
1529 <|file_sep|>test.rs
1530 <|fim_prefix|>
1531 <|fim_middle|>current
1532 0:d9|lin<|user_cursor|>e1
1533 1:da|line2
1534 <|fim_suffix|>
1535 <|fim_middle|>updated"},
1536 },
1537 Case {
1538 name: "leading_newline_in_editable_region",
1539 context: "\nabc\n",
1540 editable_range: 0..5,
1541 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1542 expected: indoc! {"
1543 <|file_sep|>test.rs
1544 <|fim_prefix|>
1545 <|fim_middle|>current
1546 0:00|
1547 1:26|a<|user_cursor|>bc
1548 <|fim_suffix|>
1549 <|fim_middle|>updated"},
1550 },
1551 Case {
1552 name: "with_suffix",
1553 context: "abc\ndef",
1554 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1555 cursor_offset: 2,
1556 expected: indoc! {"
1557 <|file_sep|>test.rs
1558 <|fim_prefix|>
1559 <|fim_middle|>current
1560 0:26|ab<|user_cursor|>c
1561 <|fim_suffix|>
1562 def
1563 <|fim_middle|>updated"},
1564 },
1565 Case {
1566 name: "unicode_two_byte_chars",
1567 context: "héllo\n",
1568 editable_range: 0..7,
1569 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1570 expected: indoc! {"
1571 <|file_sep|>test.rs
1572 <|fim_prefix|>
1573 <|fim_middle|>current
1574 0:1b|hé<|user_cursor|>llo
1575 <|fim_suffix|>
1576 <|fim_middle|>updated"},
1577 },
1578 Case {
1579 name: "unicode_three_byte_chars",
1580 context: "日本語\n",
1581 editable_range: 0..10,
1582 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1583 expected: indoc! {"
1584 <|file_sep|>test.rs
1585 <|fim_prefix|>
1586 <|fim_middle|>current
1587 0:80|日本<|user_cursor|>語
1588 <|fim_suffix|>
1589 <|fim_middle|>updated"},
1590 },
1591 Case {
1592 name: "unicode_four_byte_chars",
1593 context: "a🌍b\n",
1594 editable_range: 0..7,
1595 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1596 expected: indoc! {"
1597 <|file_sep|>test.rs
1598 <|fim_prefix|>
1599 <|fim_middle|>current
1600 0:6b|a🌍<|user_cursor|>b
1601 <|fim_suffix|>
1602 <|fim_middle|>updated"},
1603 },
1604 Case {
1605 name: "cursor_at_start_of_region_not_placed",
1606 context: "abc\n",
1607 editable_range: 0..4,
1608 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1609 expected: indoc! {"
1610 <|file_sep|>test.rs
1611 <|fim_prefix|>
1612 <|fim_middle|>current
1613 0:26|abc
1614 <|fim_suffix|>
1615 <|fim_middle|>updated"},
1616 },
1617 Case {
1618 name: "cursor_at_end_of_line_not_placed",
1619 context: "abc\ndef\n",
1620 editable_range: 0..8,
1621 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1622 expected: indoc! {"
1623 <|file_sep|>test.rs
1624 <|fim_prefix|>
1625 <|fim_middle|>current
1626 0:26|abc
1627 1:2f|def
1628 <|fim_suffix|>
1629 <|fim_middle|>updated"},
1630 },
1631 Case {
1632 name: "cursor_offset_relative_to_context_not_editable_region",
1633 // cursor_offset is relative to `context`, so when editable_range.start > 0,
1634 // write_cursor_excerpt_section must subtract it before comparing against
1635 // per-line offsets within the editable region.
1636 context: "pre\naaa\nbbb\nsuf\n",
1637 editable_range: 4..12, // editable region = "aaa\nbbb\n"
1638 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
1639 expected: indoc! {"
1640 <|file_sep|>test.rs
1641 <|fim_prefix|>
1642 pre
1643 <|fim_middle|>current
1644 0:23|aaa
1645 1:26|b<|user_cursor|>bb
1646 <|fim_suffix|>
1647 suf
1648 <|fim_middle|>updated"},
1649 },
1650 ];
1651
1652 for case in &cases {
1653 let mut prompt = String::new();
1654 hashline::write_cursor_excerpt_section(
1655 &mut prompt,
1656 Path::new("test.rs"),
1657 case.context,
1658 &case.editable_range,
1659 case.cursor_offset,
1660 );
1661 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1662 }
1663 }
1664
1665 #[test]
1666 fn test_apply_edit_commands() {
1667 struct Case {
1668 name: &'static str,
1669 original: &'static str,
1670 model_output: &'static str,
1671 expected: &'static str,
1672 }
1673
1674 let cases = vec![
1675 Case {
1676 name: "set_single_line",
1677 original: indoc! {"
1678 let mut total = 0;
1679 for product in products {
1680 total += ;
1681 }
1682 total
1683 "},
1684 model_output: indoc! {"
1685 <|set|>2:87
1686 total += product.price;
1687 "},
1688 expected: indoc! {"
1689 let mut total = 0;
1690 for product in products {
1691 total += product.price;
1692 }
1693 total
1694 "},
1695 },
1696 Case {
1697 name: "set_range",
1698 original: indoc! {"
1699 fn foo() {
1700 let x = 1;
1701 let y = 2;
1702 let z = 3;
1703 }
1704 "},
1705 model_output: indoc! {"
1706 <|set|>1:46-3:4a
1707 let sum = 6;
1708 "},
1709 expected: indoc! {"
1710 fn foo() {
1711 let sum = 6;
1712 }
1713 "},
1714 },
1715 Case {
1716 name: "insert_after_line",
1717 original: indoc! {"
1718 fn main() {
1719 let x = 1;
1720 }
1721 "},
1722 model_output: indoc! {"
1723 <|insert|>1:46
1724 let y = 2;
1725 "},
1726 expected: indoc! {"
1727 fn main() {
1728 let x = 1;
1729 let y = 2;
1730 }
1731 "},
1732 },
1733 Case {
1734 name: "insert_before_first",
1735 original: indoc! {"
1736 let x = 1;
1737 let y = 2;
1738 "},
1739 model_output: indoc! {"
1740 <|insert|>
1741 use std::io;
1742 "},
1743 expected: indoc! {"
1744 use std::io;
1745 let x = 1;
1746 let y = 2;
1747 "},
1748 },
1749 Case {
1750 name: "set_with_cursor_marker",
1751 original: indoc! {"
1752 fn main() {
1753 println!();
1754 }
1755 "},
1756 model_output: indoc! {"
1757 <|set|>1:34
1758 eprintln!(\"<|user_cursor|>\");
1759 "},
1760 expected: indoc! {"
1761 fn main() {
1762 eprintln!(\"<|user_cursor|>\");
1763 }
1764 "},
1765 },
1766 Case {
1767 name: "multiple_set_commands",
1768 original: indoc! {"
1769 aaa
1770 bbb
1771 ccc
1772 ddd
1773 "},
1774 model_output: indoc! {"
1775 <|set|>0:23
1776 AAA
1777 <|set|>2:29
1778 CCC
1779 "},
1780 expected: indoc! {"
1781 AAA
1782 bbb
1783 CCC
1784 ddd
1785 "},
1786 },
1787 Case {
1788 name: "set_range_multiline_replacement",
1789 original: indoc! {"
1790 fn handle_submit() {
1791 }
1792
1793 fn handle_keystroke() {
1794 "},
1795 model_output: indoc! {"
1796 <|set|>0:3f-1:7d
1797 fn handle_submit(modal_state: &mut ModalState) {
1798 <|user_cursor|>
1799 }
1800 "},
1801 expected: indoc! {"
1802 fn handle_submit(modal_state: &mut ModalState) {
1803 <|user_cursor|>
1804 }
1805
1806 fn handle_keystroke() {
1807 "},
1808 },
1809 Case {
1810 name: "no_edit_commands_returns_original",
1811 original: indoc! {"
1812 hello
1813 world
1814 "},
1815 model_output: "some random text with no commands",
1816 expected: indoc! {"
1817 hello
1818 world
1819 "},
1820 },
1821 Case {
1822 name: "wrong_hash_set_ignored",
1823 original: indoc! {"
1824 aaa
1825 bbb
1826 "},
1827 model_output: indoc! {"
1828 <|set|>0:ff
1829 ZZZ
1830 "},
1831 expected: indoc! {"
1832 aaa
1833 bbb
1834 "},
1835 },
1836 Case {
1837 name: "insert_and_set_combined",
1838 original: indoc! {"
1839 alpha
1840 beta
1841 gamma
1842 "},
1843 model_output: indoc! {"
1844 <|set|>0:06
1845 ALPHA
1846 <|insert|>1:9c
1847 beta_extra
1848 "},
1849 expected: indoc! {"
1850 ALPHA
1851 beta
1852 beta_extra
1853 gamma
1854 "},
1855 },
1856 Case {
1857 name: "no_trailing_newline_preserved",
1858 original: "hello\nworld",
1859 model_output: indoc! {"
1860 <|set|>0:14
1861 HELLO
1862 "},
1863 expected: "HELLO\nworld",
1864 },
1865 Case {
1866 name: "set_range_hash_mismatch_in_end_bound",
1867 original: indoc! {"
1868 one
1869 two
1870 three
1871 "},
1872 model_output: indoc! {"
1873 <|set|>0:42-2:ff
1874 ONE_TWO_THREE
1875 "},
1876 expected: indoc! {"
1877 one
1878 two
1879 three
1880 "},
1881 },
1882 Case {
1883 name: "set_range_start_greater_than_end_ignored",
1884 original: indoc! {"
1885 a
1886 b
1887 c
1888 "},
1889 model_output: indoc! {"
1890 <|set|>2:63-1:62
1891 X
1892 "},
1893 expected: indoc! {"
1894 a
1895 b
1896 c
1897 "},
1898 },
1899 Case {
1900 name: "insert_out_of_bounds_ignored",
1901 original: indoc! {"
1902 x
1903 y
1904 "},
1905 model_output: indoc! {"
1906 <|insert|>99:aa
1907 z
1908 "},
1909 expected: indoc! {"
1910 x
1911 y
1912 "},
1913 },
1914 Case {
1915 name: "set_out_of_bounds_ignored",
1916 original: indoc! {"
1917 x
1918 y
1919 "},
1920 model_output: indoc! {"
1921 <|set|>99:aa
1922 z
1923 "},
1924 expected: indoc! {"
1925 x
1926 y
1927 "},
1928 },
1929 Case {
1930 name: "malformed_set_command_ignored",
1931 original: indoc! {"
1932 alpha
1933 beta
1934 "},
1935 model_output: indoc! {"
1936 <|set|>not-a-line-ref
1937 UPDATED
1938 "},
1939 expected: indoc! {"
1940 alpha
1941 beta
1942 "},
1943 },
1944 Case {
1945 name: "malformed_insert_hash_treated_as_before_first",
1946 original: indoc! {"
1947 alpha
1948 beta
1949 "},
1950 model_output: indoc! {"
1951 <|insert|>1:nothex
1952 preamble
1953 "},
1954 expected: indoc! {"
1955 preamble
1956 alpha
1957 beta
1958 "},
1959 },
1960 Case {
1961 name: "set_then_insert_same_target_orders_insert_after_replacement",
1962 original: indoc! {"
1963 cat
1964 dog
1965 "},
1966 model_output: indoc! {"
1967 <|set|>0:38
1968 CAT
1969 <|insert|>0:38
1970 TAIL
1971 "},
1972 expected: indoc! {"
1973 CAT
1974 TAIL
1975 dog
1976 "},
1977 },
1978 Case {
1979 name: "overlapping_set_ranges_last_wins",
1980 original: indoc! {"
1981 a
1982 b
1983 c
1984 d
1985 "},
1986 model_output: indoc! {"
1987 <|set|>0:61-2:63
1988 FIRST
1989 <|set|>1:62-3:64
1990 SECOND
1991 "},
1992 expected: indoc! {"
1993 FIRST
1994 d
1995 "},
1996 },
1997 Case {
1998 name: "insert_before_first_and_after_line",
1999 original: indoc! {"
2000 a
2001 b
2002 "},
2003 model_output: indoc! {"
2004 <|insert|>
2005 HEAD
2006 <|insert|>0:61
2007 MID
2008 "},
2009 expected: indoc! {"
2010 HEAD
2011 a
2012 MID
2013 b
2014 "},
2015 },
2016 ];
2017
2018 for case in &cases {
2019 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2020 assert_eq!(result, case.expected, "failed case: {}", case.name);
2021 }
2022 }
2023
2024 #[test]
2025 fn test_output_has_edit_commands() {
2026 assert!(hashline::output_has_edit_commands(&format!(
2027 "{}0:ab\nnew",
2028 SET_COMMAND_MARKER
2029 )));
2030 assert!(hashline::output_has_edit_commands(&format!(
2031 "{}0:ab\nnew",
2032 INSERT_COMMAND_MARKER
2033 )));
2034 assert!(hashline::output_has_edit_commands(&format!(
2035 "some text\n{}1:cd\nstuff",
2036 SET_COMMAND_MARKER
2037 )));
2038 assert!(!hashline::output_has_edit_commands("just plain text"));
2039 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2040 }
2041
2042 // ---- hashline::patch_to_edit_commands round-trip tests ----
2043
2044 #[test]
2045 fn test_patch_to_edit_commands() {
2046 struct Case {
2047 name: &'static str,
2048 old: &'static str,
2049 patch: &'static str,
2050 expected_new: &'static str,
2051 }
2052
2053 let cases = [
2054 Case {
2055 name: "single_line_replacement",
2056 old: indoc! {"
2057 let mut total = 0;
2058 for product in products {
2059 total += ;
2060 }
2061 total
2062 "},
2063 patch: indoc! {"
2064 @@ -1,5 +1,5 @@
2065 let mut total = 0;
2066 for product in products {
2067 - total += ;
2068 + total += product.price;
2069 }
2070 total
2071 "},
2072 expected_new: indoc! {"
2073 let mut total = 0;
2074 for product in products {
2075 total += product.price;
2076 }
2077 total
2078 "},
2079 },
2080 Case {
2081 name: "multiline_replacement",
2082 old: indoc! {"
2083 fn foo() {
2084 let x = 1;
2085 let y = 2;
2086 let z = 3;
2087 }
2088 "},
2089 patch: indoc! {"
2090 @@ -1,5 +1,3 @@
2091 fn foo() {
2092 - let x = 1;
2093 - let y = 2;
2094 - let z = 3;
2095 + let sum = 1 + 2 + 3;
2096 }
2097 "},
2098 expected_new: indoc! {"
2099 fn foo() {
2100 let sum = 1 + 2 + 3;
2101 }
2102 "},
2103 },
2104 Case {
2105 name: "insertion",
2106 old: indoc! {"
2107 fn main() {
2108 let x = 1;
2109 }
2110 "},
2111 patch: indoc! {"
2112 @@ -1,3 +1,4 @@
2113 fn main() {
2114 let x = 1;
2115 + let y = 2;
2116 }
2117 "},
2118 expected_new: indoc! {"
2119 fn main() {
2120 let x = 1;
2121 let y = 2;
2122 }
2123 "},
2124 },
2125 Case {
2126 name: "insertion_before_first",
2127 old: indoc! {"
2128 let x = 1;
2129 let y = 2;
2130 "},
2131 patch: indoc! {"
2132 @@ -1,2 +1,3 @@
2133 +use std::io;
2134 let x = 1;
2135 let y = 2;
2136 "},
2137 expected_new: indoc! {"
2138 use std::io;
2139 let x = 1;
2140 let y = 2;
2141 "},
2142 },
2143 Case {
2144 name: "deletion",
2145 old: indoc! {"
2146 aaa
2147 bbb
2148 ccc
2149 ddd
2150 "},
2151 patch: indoc! {"
2152 @@ -1,4 +1,2 @@
2153 aaa
2154 -bbb
2155 -ccc
2156 ddd
2157 "},
2158 expected_new: indoc! {"
2159 aaa
2160 ddd
2161 "},
2162 },
2163 Case {
2164 name: "multiple_changes",
2165 old: indoc! {"
2166 alpha
2167 beta
2168 gamma
2169 delta
2170 epsilon
2171 "},
2172 patch: indoc! {"
2173 @@ -1,5 +1,5 @@
2174 -alpha
2175 +ALPHA
2176 beta
2177 gamma
2178 -delta
2179 +DELTA
2180 epsilon
2181 "},
2182 expected_new: indoc! {"
2183 ALPHA
2184 beta
2185 gamma
2186 DELTA
2187 epsilon
2188 "},
2189 },
2190 Case {
2191 name: "replace_with_insertion",
2192 old: indoc! {r#"
2193 fn handle() {
2194 modal_state.close();
2195 modal_state.dismiss();
2196 "#},
2197 patch: indoc! {r#"
2198 @@ -1,3 +1,4 @@
2199 fn handle() {
2200 modal_state.close();
2201 + eprintln!("");
2202 modal_state.dismiss();
2203 "#},
2204 expected_new: indoc! {r#"
2205 fn handle() {
2206 modal_state.close();
2207 eprintln!("");
2208 modal_state.dismiss();
2209 "#},
2210 },
2211 Case {
2212 name: "complete_replacement",
2213 old: indoc! {"
2214 aaa
2215 bbb
2216 ccc
2217 "},
2218 patch: indoc! {"
2219 @@ -1,3 +1,3 @@
2220 -aaa
2221 -bbb
2222 -ccc
2223 +xxx
2224 +yyy
2225 +zzz
2226 "},
2227 expected_new: indoc! {"
2228 xxx
2229 yyy
2230 zzz
2231 "},
2232 },
2233 Case {
2234 name: "add_function_body",
2235 old: indoc! {"
2236 fn foo() {
2237 modal_state.dismiss();
2238 }
2239
2240 fn
2241
2242 fn handle_keystroke() {
2243 "},
2244 patch: indoc! {"
2245 @@ -1,6 +1,8 @@
2246 fn foo() {
2247 modal_state.dismiss();
2248 }
2249
2250 -fn
2251 +fn handle_submit() {
2252 + todo()
2253 +}
2254
2255 fn handle_keystroke() {
2256 "},
2257 expected_new: indoc! {"
2258 fn foo() {
2259 modal_state.dismiss();
2260 }
2261
2262 fn handle_submit() {
2263 todo()
2264 }
2265
2266 fn handle_keystroke() {
2267 "},
2268 },
2269 Case {
2270 name: "with_cursor_offset",
2271 old: indoc! {r#"
2272 fn main() {
2273 println!();
2274 }
2275 "#},
2276 patch: indoc! {r#"
2277 @@ -1,3 +1,3 @@
2278 fn main() {
2279 - println!();
2280 + eprintln!("");
2281 }
2282 "#},
2283 expected_new: indoc! {r#"
2284 fn main() {
2285 eprintln!("<|user_cursor|>");
2286 }
2287 "#},
2288 },
2289 Case {
2290 name: "non_local_hunk_header_pure_insertion_repro",
2291 old: indoc! {"
2292 aaa
2293 bbb
2294 "},
2295 patch: indoc! {"
2296 @@ -20,2 +20,3 @@
2297 aaa
2298 +xxx
2299 bbb
2300 "},
2301 expected_new: indoc! {"
2302 aaa
2303 xxx
2304 bbb
2305 "},
2306 },
2307 ];
2308
2309 for case in &cases {
2310 // The cursor_offset for patch_to_edit_commands is relative to
2311 // the first hunk's new text (context + additions). We compute
2312 // it by finding where the marker sits in the expected output
2313 // (which mirrors the new text of the hunk).
2314 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2315
2316 let commands =
2317 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2318 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2319
2320 assert!(
2321 hashline::output_has_edit_commands(&commands),
2322 "case {}: expected edit commands, got: {commands:?}",
2323 case.name,
2324 );
2325
2326 let applied = hashline::apply_edit_commands(case.old, &commands);
2327 assert_eq!(applied, case.expected_new, "case {}", case.name);
2328 }
2329 }
2330 }
2331}
2332
2333pub mod seed_coder {
2334 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2335 //!
2336 //! Seed-Coder uses different FIM tokens and order than Qwen:
2337 //! - SPM order: suffix comes FIRST, then prefix, then middle
2338 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2339 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2340 //!
2341 //! All context (related files, edit history) goes in the PREFIX section.
2342 //! The suffix contains only code after the editable region.
2343 //!
2344 //! Example prompt:
2345 //!
2346 //! <[fim-suffix]>
2347 //! code after editable region
2348 //! <[fim-prefix]><filename>related/file.py
2349 //! related file content
2350 //!
2351 //! <filename>edit_history
2352 //! --- a/some_file.py
2353 //! +++ b/some_file.py
2354 //! -old
2355 //! +new
2356 //!
2357 //! <filename>path/to/target_file.py
2358 //! code before editable region
2359 //! <<<<<<< CURRENT
2360 //! code that
2361 //! needs to<|user_cursor|>
2362 //! be rewritten
2363 //! =======
2364 //! <[fim-middle]>
2365 //!
2366 //! Expected output (model generates):
2367 //!
2368 //! updated
2369 //! code with
2370 //! changes applied
2371 //! >>>>>>> UPDATED
2372
2373 use super::*;
2374
2375 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2376 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2377 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2378 pub const FILE_MARKER: &str = "<filename>";
2379
2380 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2381 pub const SEPARATOR: &str = "=======\n";
2382 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2383
2384 pub fn special_tokens() -> &'static [&'static str] {
2385 &[
2386 FIM_SUFFIX,
2387 FIM_PREFIX,
2388 FIM_MIDDLE,
2389 FILE_MARKER,
2390 START_MARKER,
2391 SEPARATOR,
2392 END_MARKER,
2393 CURSOR_MARKER,
2394 ]
2395 }
2396
2397 pub fn write_cursor_excerpt_section(
2398 prompt: &mut String,
2399 path: &Path,
2400 context: &str,
2401 editable_range: &Range<usize>,
2402 cursor_offset: usize,
2403 ) {
2404 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2405 prompt.push_str(§ion);
2406 }
2407
2408 pub fn format_prompt_with_budget(
2409 path: &Path,
2410 context: &str,
2411 editable_range: &Range<usize>,
2412 cursor_offset: usize,
2413 events: &[Arc<Event>],
2414 related_files: &[RelatedFile],
2415 max_tokens: usize,
2416 ) -> String {
2417 let suffix_section = build_suffix_section(context, editable_range);
2418 let cursor_prefix_section =
2419 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2420
2421 let suffix_tokens = estimate_tokens(suffix_section.len());
2422 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2423 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2424
2425 let edit_history_section = super::format_edit_history_within_budget(
2426 events,
2427 FILE_MARKER,
2428 "edit_history",
2429 budget_after_cursor,
2430 );
2431 let edit_history_tokens = estimate_tokens(edit_history_section.len());
2432 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2433
2434 let related_files_section = super::format_related_files_within_budget(
2435 related_files,
2436 FILE_MARKER,
2437 "",
2438 budget_after_edit_history,
2439 );
2440
2441 let mut prompt = String::new();
2442 prompt.push_str(&suffix_section);
2443 prompt.push_str(FIM_PREFIX);
2444 prompt.push_str(&related_files_section);
2445 if !related_files_section.is_empty() {
2446 prompt.push('\n');
2447 }
2448 prompt.push_str(&edit_history_section);
2449 if !edit_history_section.is_empty() {
2450 prompt.push('\n');
2451 }
2452 prompt.push_str(&cursor_prefix_section);
2453 prompt.push_str(FIM_MIDDLE);
2454 prompt
2455 }
2456
2457 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2458 let mut section = String::new();
2459 section.push_str(FIM_SUFFIX);
2460 section.push_str(&context[editable_range.end..]);
2461 if !section.ends_with('\n') {
2462 section.push('\n');
2463 }
2464 section
2465 }
2466
2467 fn build_cursor_prefix_section(
2468 path: &Path,
2469 context: &str,
2470 editable_range: &Range<usize>,
2471 cursor_offset: usize,
2472 ) -> String {
2473 let mut section = String::new();
2474 let path_str = path.to_string_lossy();
2475 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2476
2477 section.push_str(&context[..editable_range.start]);
2478 section.push_str(START_MARKER);
2479 section.push_str(&context[editable_range.start..cursor_offset]);
2480 section.push_str(CURSOR_MARKER);
2481 section.push_str(&context[cursor_offset..editable_range.end]);
2482 if !section.ends_with('\n') {
2483 section.push('\n');
2484 }
2485 section.push_str(SEPARATOR);
2486 section
2487 }
2488}
2489
2490/// The zeta1 prompt format
2491pub mod zeta1 {
2492 use super::*;
2493 use std::fmt::Write;
2494
2495 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
2496 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
2497 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
2498 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
2499
2500 const INSTRUCTION_HEADER: &str = concat!(
2501 "### Instruction:\n",
2502 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
2503 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
2504 "into account the cursor location.\n\n",
2505 "### User Edits:\n\n"
2506 );
2507 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
2508 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
2509
2510 /// Formats a complete zeta1 prompt from the input events and excerpt.
2511 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
2512 let mut prompt = String::with_capacity(
2513 INSTRUCTION_HEADER.len()
2514 + input_events.len()
2515 + EXCERPT_HEADER.len()
2516 + input_excerpt.len()
2517 + RESPONSE_HEADER.len(),
2518 );
2519 prompt.push_str(INSTRUCTION_HEADER);
2520 prompt.push_str(input_events);
2521 prompt.push_str(EXCERPT_HEADER);
2522 prompt.push_str(input_excerpt);
2523 prompt.push_str(RESPONSE_HEADER);
2524 prompt
2525 }
2526
2527 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
2528 /// editable and context byte-offset ranges within `cursor_excerpt`.
2529 pub fn format_zeta1_from_input(
2530 input: &ZetaPromptInput,
2531 editable_range: Range<usize>,
2532 context_range: Range<usize>,
2533 ) -> String {
2534 let events = format_zeta1_events(&input.events);
2535 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
2536 format_zeta1_prompt(&events, &excerpt)
2537 }
2538
2539 /// Formats events in zeta1 style (oldest first).
2540 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
2541 let mut result = String::new();
2542 for event in events {
2543 let event_string = format_zeta1_event(event);
2544 if event_string.is_empty() {
2545 continue;
2546 }
2547 if !result.is_empty() {
2548 result.push_str("\n\n");
2549 }
2550 result.push_str(&event_string);
2551 }
2552 result
2553 }
2554
2555 fn format_zeta1_event(event: &Event) -> String {
2556 match event {
2557 Event::BufferChange {
2558 path,
2559 old_path,
2560 diff,
2561 ..
2562 } => {
2563 let mut prompt = String::new();
2564 if old_path != path {
2565 writeln!(
2566 prompt,
2567 "User renamed {} to {}\n",
2568 old_path.display(),
2569 path.display()
2570 )
2571 .ok();
2572 }
2573 if !diff.is_empty() {
2574 write!(
2575 prompt,
2576 "User edited {}:\n```diff\n{}\n```",
2577 path.display(),
2578 diff
2579 )
2580 .ok();
2581 }
2582 prompt
2583 }
2584 }
2585 }
2586
2587 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
2588 /// within `cursor_excerpt`.
2589 fn format_zeta1_excerpt(
2590 input: &ZetaPromptInput,
2591 editable_range: Range<usize>,
2592 context_range: Range<usize>,
2593 ) -> String {
2594 let path_str = input.cursor_path.to_string_lossy();
2595 let excerpt = &*input.cursor_excerpt;
2596 let cursor_offset = input.cursor_offset_in_excerpt;
2597
2598 let mut prompt = String::new();
2599 writeln!(&mut prompt, "```{path_str}").ok();
2600
2601 let starts_at_file_beginning =
2602 input.excerpt_start_row == Some(0) && context_range.start == 0;
2603 if starts_at_file_beginning {
2604 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
2605 }
2606
2607 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
2608
2609 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
2610 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
2611 prompt.push_str(CURSOR_MARKER);
2612 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
2613 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
2614
2615 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
2616 write!(prompt, "\n```").ok();
2617
2618 prompt
2619 }
2620
2621 /// Cleans zeta1 model output by extracting content between editable region
2622 /// markers and converting the zeta1 cursor marker to the universal one.
2623 /// Returns `None` if the output doesn't contain the expected markers.
2624 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
2625 let content = output.replace(CURSOR_MARKER, "");
2626
2627 let content_start = content
2628 .find(EDITABLE_REGION_START_MARKER)
2629 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
2630 .map(|pos| {
2631 if content.as_bytes().get(pos) == Some(&b'\n') {
2632 pos + 1
2633 } else {
2634 pos
2635 }
2636 })
2637 .unwrap_or(0);
2638
2639 let content_end = content
2640 .find(EDITABLE_REGION_END_MARKER)
2641 .map(|pos| {
2642 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
2643 pos - 1
2644 } else {
2645 pos
2646 }
2647 })
2648 .unwrap_or(content.len());
2649
2650 if content_start > content_end {
2651 return Some(String::new());
2652 }
2653
2654 let extracted = &content[content_start..content_end];
2655
2656 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
2657 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
2658 let text_before_cursor = text_before_cursor
2659 .find(EDITABLE_REGION_START_MARKER)
2660 .map(|pos| {
2661 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
2662 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
2663 after_marker + 1
2664 } else {
2665 after_marker
2666 }
2667 })
2668 .unwrap_or(0);
2669 let offset_in_extracted = zeta1_cursor_pos
2670 .saturating_sub(text_before_cursor)
2671 .min(extracted.len());
2672 offset_in_extracted
2673 });
2674
2675 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
2676 if let Some(offset) = cursor_offset {
2677 result.push_str(&extracted[..offset]);
2678 result.push_str(super::CURSOR_MARKER);
2679 result.push_str(&extracted[offset..]);
2680 } else {
2681 result.push_str(extracted);
2682 }
2683
2684 Some(result)
2685 }
2686}
2687
2688#[cfg(test)]
2689mod tests {
2690 use super::*;
2691 use indoc::indoc;
2692
2693 fn make_input(
2694 cursor_excerpt: &str,
2695 editable_range: Range<usize>,
2696 cursor_offset: usize,
2697 events: Vec<Event>,
2698 related_files: Vec<RelatedFile>,
2699 ) -> ZetaPromptInput {
2700 let context_range = 0..cursor_excerpt.len();
2701 ZetaPromptInput {
2702 cursor_path: Path::new("test.rs").into(),
2703 cursor_excerpt: cursor_excerpt.into(),
2704 cursor_offset_in_excerpt: cursor_offset,
2705 excerpt_start_row: None,
2706 events: events.into_iter().map(Arc::new).collect(),
2707 related_files,
2708 excerpt_ranges: ExcerptRanges {
2709 editable_150: editable_range.clone(),
2710 editable_180: editable_range.clone(),
2711 editable_350: editable_range,
2712 editable_150_context_350: context_range.clone(),
2713 editable_180_context_350: context_range.clone(),
2714 editable_350_context_150: context_range,
2715 ..Default::default()
2716 },
2717 experiment: None,
2718 in_open_source_repo: false,
2719 can_collect_data: false,
2720 repo_url: None,
2721 }
2722 }
2723
2724 fn make_event(path: &str, diff: &str) -> Event {
2725 Event::BufferChange {
2726 path: Path::new(path).into(),
2727 old_path: Path::new(path).into(),
2728 diff: diff.to_string(),
2729 predicted: false,
2730 in_open_source_repo: false,
2731 }
2732 }
2733
2734 fn make_related_file(path: &str, content: &str) -> RelatedFile {
2735 RelatedFile {
2736 path: Path::new(path).into(),
2737 max_row: content.lines().count() as u32,
2738 excerpts: vec![RelatedExcerpt {
2739 row_range: 0..content.lines().count() as u32,
2740 text: content.into(),
2741 order: 0,
2742 }],
2743 in_open_source_repo: false,
2744 }
2745 }
2746
2747 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
2748 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
2749 }
2750
2751 #[test]
2752 fn test_no_truncation_when_within_budget() {
2753 let input = make_input(
2754 "prefix\neditable\nsuffix",
2755 7..15,
2756 10,
2757 vec![make_event("a.rs", "-old\n+new\n")],
2758 vec![make_related_file("related.rs", "fn helper() {}\n")],
2759 );
2760
2761 assert_eq!(
2762 format_with_budget(&input, 10000),
2763 indoc! {r#"
2764 <|file_sep|>related.rs
2765 fn helper() {}
2766 <|file_sep|>edit history
2767 --- a/a.rs
2768 +++ b/a.rs
2769 -old
2770 +new
2771 <|file_sep|>test.rs
2772 <|fim_prefix|>
2773 prefix
2774 <|fim_middle|>current
2775 edi<|user_cursor|>table
2776 <|fim_suffix|>
2777
2778 suffix
2779 <|fim_middle|>updated
2780 "#}
2781 );
2782 }
2783
2784 #[test]
2785 fn test_truncation_drops_edit_history_when_budget_tight() {
2786 let input = make_input(
2787 "code",
2788 0..4,
2789 2,
2790 vec![make_event("a.rs", "-x\n+y\n")],
2791 vec![
2792 make_related_file("r1.rs", "a\n"),
2793 make_related_file("r2.rs", "b\n"),
2794 ],
2795 );
2796
2797 assert_eq!(
2798 format_with_budget(&input, 10000),
2799 indoc! {r#"
2800 <|file_sep|>r1.rs
2801 a
2802 <|file_sep|>r2.rs
2803 b
2804 <|file_sep|>edit history
2805 --- a/a.rs
2806 +++ b/a.rs
2807 -x
2808 +y
2809 <|file_sep|>test.rs
2810 <|fim_prefix|>
2811 <|fim_middle|>current
2812 co<|user_cursor|>de
2813 <|fim_suffix|>
2814 <|fim_middle|>updated
2815 "#}
2816 );
2817
2818 assert_eq!(
2819 format_with_budget(&input, 50),
2820 indoc! {r#"
2821 <|file_sep|>r1.rs
2822 a
2823 <|file_sep|>r2.rs
2824 b
2825 <|file_sep|>test.rs
2826 <|fim_prefix|>
2827 <|fim_middle|>current
2828 co<|user_cursor|>de
2829 <|fim_suffix|>
2830 <|fim_middle|>updated
2831 "#}
2832 );
2833 }
2834
2835 #[test]
2836 fn test_truncation_includes_partial_excerpts() {
2837 let input = make_input(
2838 "x",
2839 0..1,
2840 0,
2841 vec![],
2842 vec![RelatedFile {
2843 path: Path::new("big.rs").into(),
2844 max_row: 30,
2845 in_open_source_repo: false,
2846 excerpts: vec![
2847 RelatedExcerpt {
2848 row_range: 0..10,
2849 text: "first excerpt\n".into(),
2850 order: 0,
2851 },
2852 RelatedExcerpt {
2853 row_range: 10..20,
2854 text: "second excerpt\n".into(),
2855 order: 0,
2856 },
2857 RelatedExcerpt {
2858 row_range: 20..30,
2859 text: "third excerpt\n".into(),
2860 order: 0,
2861 },
2862 ],
2863 }],
2864 );
2865
2866 assert_eq!(
2867 format_with_budget(&input, 10000),
2868 indoc! {r#"
2869 <|file_sep|>big.rs
2870 first excerpt
2871 ...
2872 second excerpt
2873 ...
2874 third excerpt
2875 <|file_sep|>test.rs
2876 <|fim_prefix|>
2877 <|fim_middle|>current
2878 <|user_cursor|>x
2879 <|fim_suffix|>
2880 <|fim_middle|>updated
2881 "#}
2882 );
2883
2884 assert_eq!(
2885 format_with_budget(&input, 50),
2886 indoc! {r#"
2887 <|file_sep|>big.rs
2888 first excerpt
2889 ...
2890 <|file_sep|>test.rs
2891 <|fim_prefix|>
2892 <|fim_middle|>current
2893 <|user_cursor|>x
2894 <|fim_suffix|>
2895 <|fim_middle|>updated
2896 "#}
2897 );
2898 }
2899
2900 #[test]
2901 fn test_truncation_prioritizes_lower_order_excerpts() {
2902 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
2903 // With tight budget, only the lower-order excerpt from file_b should be included.
2904 let input = make_input(
2905 "x",
2906 0..1,
2907 0,
2908 vec![],
2909 vec![
2910 RelatedFile {
2911 path: Path::new("file_a.rs").into(),
2912 max_row: 10,
2913 in_open_source_repo: false,
2914 excerpts: vec![RelatedExcerpt {
2915 row_range: 0..10,
2916 text: "low priority content\n".into(),
2917 order: 5,
2918 }],
2919 },
2920 RelatedFile {
2921 path: Path::new("file_b.rs").into(),
2922 max_row: 10,
2923 in_open_source_repo: false,
2924 excerpts: vec![RelatedExcerpt {
2925 row_range: 0..10,
2926 text: "high priority content\n".into(),
2927 order: 1,
2928 }],
2929 },
2930 ],
2931 );
2932
2933 // With large budget, both files included; rendered in stable lexicographic order.
2934 assert_eq!(
2935 format_with_budget(&input, 10000),
2936 indoc! {r#"
2937 <|file_sep|>file_a.rs
2938 low priority content
2939 <|file_sep|>file_b.rs
2940 high priority content
2941 <|file_sep|>test.rs
2942 <|fim_prefix|>
2943 <|fim_middle|>current
2944 <|user_cursor|>x
2945 <|fim_suffix|>
2946 <|fim_middle|>updated
2947 "#}
2948 );
2949
2950 // With tight budget, only file_b (lower order) fits.
2951 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
2952 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
2953 // file_a would need another 14 tokens, which doesn't fit.
2954 assert_eq!(
2955 format_with_budget(&input, 52),
2956 indoc! {r#"
2957 <|file_sep|>file_b.rs
2958 high priority content
2959 <|file_sep|>test.rs
2960 <|fim_prefix|>
2961 <|fim_middle|>current
2962 <|user_cursor|>x
2963 <|fim_suffix|>
2964 <|fim_middle|>updated
2965 "#}
2966 );
2967 }
2968
2969 #[test]
2970 fn test_truncation_drops_high_order_excerpts_within_file() {
2971 // A single file has excerpts at order 1 and order 3. With a tight budget,
2972 // only the order-1 excerpts are included while the order-3 excerpt is
2973 // dropped — even though they belong to the same file. This also preserves
2974 // the parent invariant: parent outline items have order ≤ their best
2975 // child, so they're always included when any child is.
2976 let input = make_input(
2977 "x",
2978 0..1,
2979 0,
2980 vec![],
2981 vec![RelatedFile {
2982 path: Path::new("mod.rs").into(),
2983 max_row: 30,
2984 in_open_source_repo: false,
2985 excerpts: vec![
2986 RelatedExcerpt {
2987 row_range: 0..5,
2988 text: "mod header\n".into(),
2989 order: 1,
2990 },
2991 RelatedExcerpt {
2992 row_range: 5..15,
2993 text: "important fn\n".into(),
2994 order: 1,
2995 },
2996 RelatedExcerpt {
2997 row_range: 15..30,
2998 text: "less important fn\n".into(),
2999 order: 3,
3000 },
3001 ],
3002 }],
3003 );
3004
3005 // With large budget, all three excerpts included.
3006 assert_eq!(
3007 format_with_budget(&input, 10000),
3008 indoc! {r#"
3009 <|file_sep|>mod.rs
3010 mod header
3011 ...
3012 important fn
3013 ...
3014 less important fn
3015 <|file_sep|>test.rs
3016 <|fim_prefix|>
3017 <|fim_middle|>current
3018 <|user_cursor|>x
3019 <|fim_suffix|>
3020 <|fim_middle|>updated
3021 "#}
3022 );
3023
3024 // With tight budget, only order<=1 excerpts included (header + important fn).
3025 assert_eq!(
3026 format_with_budget(&input, 55),
3027 indoc! {r#"
3028 <|file_sep|>mod.rs
3029 mod header
3030 ...
3031 important fn
3032 ...
3033 <|file_sep|>test.rs
3034 <|fim_prefix|>
3035 <|fim_middle|>current
3036 <|user_cursor|>x
3037 <|fim_suffix|>
3038 <|fim_middle|>updated
3039 "#}
3040 );
3041 }
3042
3043 #[test]
3044 fn test_truncation_drops_older_events_first() {
3045 let input = make_input(
3046 "x",
3047 0..1,
3048 0,
3049 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
3050 vec![],
3051 );
3052
3053 assert_eq!(
3054 format_with_budget(&input, 10000),
3055 indoc! {r#"
3056 <|file_sep|>edit history
3057 --- a/old.rs
3058 +++ b/old.rs
3059 -1
3060 --- a/new.rs
3061 +++ b/new.rs
3062 -2
3063 <|file_sep|>test.rs
3064 <|fim_prefix|>
3065 <|fim_middle|>current
3066 <|user_cursor|>x
3067 <|fim_suffix|>
3068 <|fim_middle|>updated
3069 "#}
3070 );
3071
3072 assert_eq!(
3073 format_with_budget(&input, 55),
3074 indoc! {r#"
3075 <|file_sep|>edit history
3076 --- a/new.rs
3077 +++ b/new.rs
3078 -2
3079 <|file_sep|>test.rs
3080 <|fim_prefix|>
3081 <|fim_middle|>current
3082 <|user_cursor|>x
3083 <|fim_suffix|>
3084 <|fim_middle|>updated
3085 "#}
3086 );
3087 }
3088
3089 #[test]
3090 fn test_cursor_excerpt_always_included_with_minimal_budget() {
3091 let input = make_input(
3092 "fn main() {}",
3093 0..12,
3094 3,
3095 vec![make_event("a.rs", "-old\n+new\n")],
3096 vec![make_related_file("related.rs", "helper\n")],
3097 );
3098
3099 assert_eq!(
3100 format_with_budget(&input, 30),
3101 indoc! {r#"
3102 <|file_sep|>test.rs
3103 <|fim_prefix|>
3104 <|fim_middle|>current
3105 fn <|user_cursor|>main() {}
3106 <|fim_suffix|>
3107 <|fim_middle|>updated
3108 "#}
3109 );
3110 }
3111
3112 fn format_seed_coder(input: &ZetaPromptInput) -> String {
3113 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
3114 }
3115
3116 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3117 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
3118 }
3119
3120 #[test]
3121 fn test_seed_coder_basic_format() {
3122 let input = make_input(
3123 "prefix\neditable\nsuffix",
3124 7..15,
3125 10,
3126 vec![make_event("a.rs", "-old\n+new\n")],
3127 vec![make_related_file("related.rs", "fn helper() {}\n")],
3128 );
3129
3130 assert_eq!(
3131 format_seed_coder(&input),
3132 indoc! {r#"
3133 <[fim-suffix]>
3134 suffix
3135 <[fim-prefix]><filename>related.rs
3136 fn helper() {}
3137
3138 <filename>edit_history
3139 --- a/a.rs
3140 +++ b/a.rs
3141 -old
3142 +new
3143
3144 <filename>test.rs
3145 prefix
3146 <<<<<<< CURRENT
3147 edi<|user_cursor|>table
3148 =======
3149 <[fim-middle]>"#}
3150 );
3151 }
3152
3153 #[test]
3154 fn test_seed_coder_no_context() {
3155 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
3156
3157 assert_eq!(
3158 format_seed_coder(&input),
3159 indoc! {r#"
3160 <[fim-suffix]>
3161 after
3162 <[fim-prefix]><filename>test.rs
3163 before
3164 <<<<<<< CURRENT
3165 mid<|user_cursor|>dle
3166 =======
3167 <[fim-middle]>"#}
3168 );
3169 }
3170
3171 #[test]
3172 fn test_seed_coder_truncation_drops_context() {
3173 let input = make_input(
3174 "code",
3175 0..4,
3176 2,
3177 vec![make_event("a.rs", "-x\n+y\n")],
3178 vec![make_related_file("r1.rs", "content\n")],
3179 );
3180
3181 // With large budget, everything is included
3182 assert_eq!(
3183 format_seed_coder(&input),
3184 indoc! {r#"
3185 <[fim-suffix]>
3186 <[fim-prefix]><filename>r1.rs
3187 content
3188
3189 <filename>edit_history
3190 --- a/a.rs
3191 +++ b/a.rs
3192 -x
3193 +y
3194
3195 <filename>test.rs
3196 <<<<<<< CURRENT
3197 co<|user_cursor|>de
3198 =======
3199 <[fim-middle]>"#}
3200 );
3201
3202 // With tight budget, context is dropped but cursor section remains
3203 assert_eq!(
3204 format_seed_coder_with_budget(&input, 30),
3205 indoc! {r#"
3206 <[fim-suffix]>
3207 <[fim-prefix]><filename>test.rs
3208 <<<<<<< CURRENT
3209 co<|user_cursor|>de
3210 =======
3211 <[fim-middle]>"#}
3212 );
3213 }
3214
3215 #[test]
3216 fn test_seed_coder_truncation_prioritizes_lower_order() {
3217 let input = make_input(
3218 "code",
3219 0..4,
3220 2,
3221 vec![],
3222 vec![
3223 RelatedFile {
3224 path: Path::new("low_prio.rs").into(),
3225 max_row: 5,
3226 in_open_source_repo: false,
3227 excerpts: vec![RelatedExcerpt {
3228 row_range: 0..5,
3229 text: "low prio\n".into(),
3230 order: 10,
3231 }],
3232 },
3233 RelatedFile {
3234 path: Path::new("high_prio.rs").into(),
3235 max_row: 5,
3236 in_open_source_repo: false,
3237 excerpts: vec![RelatedExcerpt {
3238 row_range: 0..5,
3239 text: "high prio\n".into(),
3240 order: 1,
3241 }],
3242 },
3243 ],
3244 );
3245
3246 // With large budget, both included; rendered in stable lexicographic order.
3247 assert_eq!(
3248 format_seed_coder(&input),
3249 indoc! {r#"
3250 <[fim-suffix]>
3251 <[fim-prefix]><filename>low_prio.rs
3252 low prio
3253 <filename>high_prio.rs
3254 high prio
3255
3256 <filename>test.rs
3257 <<<<<<< CURRENT
3258 co<|user_cursor|>de
3259 =======
3260 <[fim-middle]>"#}
3261 );
3262
3263 // With tight budget, only high_prio included.
3264 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
3265 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
3266 assert_eq!(
3267 format_seed_coder_with_budget(&input, 44),
3268 indoc! {r#"
3269 <[fim-suffix]>
3270 <[fim-prefix]><filename>high_prio.rs
3271 high prio
3272
3273 <filename>test.rs
3274 <<<<<<< CURRENT
3275 co<|user_cursor|>de
3276 =======
3277 <[fim-middle]>"#}
3278 );
3279 }
3280
3281 #[test]
3282 fn test_seed_coder_clean_output() {
3283 let output_with_marker = "new code\n>>>>>>> UPDATED\n";
3284 let output_without_marker = "new code\n";
3285
3286 assert_eq!(
3287 clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
3288 "new code\n"
3289 );
3290 assert_eq!(
3291 clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
3292 "new code\n"
3293 );
3294 }
3295
3296 #[test]
3297 fn test_format_zeta1_from_input_basic() {
3298 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
3299 let input = ZetaPromptInput {
3300 cursor_path: Path::new("src/main.rs").into(),
3301 cursor_excerpt: excerpt.into(),
3302 cursor_offset_in_excerpt: 30,
3303 excerpt_start_row: Some(0),
3304 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
3305 related_files: vec![],
3306 excerpt_ranges: ExcerptRanges {
3307 editable_150: 15..41,
3308 editable_180: 15..41,
3309 editable_350: 15..41,
3310 editable_150_context_350: 0..excerpt.len(),
3311 editable_180_context_350: 0..excerpt.len(),
3312 editable_350_context_150: 0..excerpt.len(),
3313 ..Default::default()
3314 },
3315 experiment: None,
3316 in_open_source_repo: false,
3317 can_collect_data: false,
3318 repo_url: None,
3319 };
3320
3321 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
3322
3323 assert_eq!(
3324 prompt,
3325 concat!(
3326 "### Instruction:\n",
3327 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3328 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3329 "into account the cursor location.\n",
3330 "\n",
3331 "### User Edits:\n",
3332 "\n",
3333 "User edited other.rs:\n",
3334 "```diff\n",
3335 "-old\n",
3336 "+new\n",
3337 "\n",
3338 "```\n",
3339 "\n",
3340 "### User Excerpt:\n",
3341 "\n",
3342 "```src/main.rs\n",
3343 "<|start_of_file|>\n",
3344 "fn before() {}\n",
3345 "<|editable_region_start|>\n",
3346 "fn foo() {\n",
3347 " <|user_cursor_is_here|>let x = 1;\n",
3348 "\n",
3349 "<|editable_region_end|>}\n",
3350 "fn after() {}\n",
3351 "\n",
3352 "```\n",
3353 "\n",
3354 "### Response:\n",
3355 ),
3356 );
3357 }
3358
3359 #[test]
3360 fn test_format_zeta1_from_input_no_start_of_file() {
3361 let excerpt = "fn foo() {\n let x = 1;\n}\n";
3362 let input = ZetaPromptInput {
3363 cursor_path: Path::new("src/main.rs").into(),
3364 cursor_excerpt: excerpt.into(),
3365 cursor_offset_in_excerpt: 15,
3366 excerpt_start_row: Some(10),
3367 events: vec![],
3368 related_files: vec![],
3369 excerpt_ranges: ExcerptRanges {
3370 editable_150: 0..28,
3371 editable_180: 0..28,
3372 editable_350: 0..28,
3373 editable_150_context_350: 0..28,
3374 editable_180_context_350: 0..28,
3375 editable_350_context_150: 0..28,
3376 ..Default::default()
3377 },
3378 experiment: None,
3379 in_open_source_repo: false,
3380 can_collect_data: false,
3381 repo_url: None,
3382 };
3383
3384 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
3385
3386 assert_eq!(
3387 prompt,
3388 concat!(
3389 "### Instruction:\n",
3390 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3391 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3392 "into account the cursor location.\n",
3393 "\n",
3394 "### User Edits:\n",
3395 "\n",
3396 "\n",
3397 "\n",
3398 "### User Excerpt:\n",
3399 "\n",
3400 "```src/main.rs\n",
3401 "<|editable_region_start|>\n",
3402 "fn foo() {\n",
3403 " <|user_cursor_is_here|>let x = 1;\n",
3404 "}\n",
3405 "\n",
3406 "<|editable_region_end|>\n",
3407 "```\n",
3408 "\n",
3409 "### Response:\n",
3410 ),
3411 );
3412 }
3413
3414 #[test]
3415 fn test_format_zeta1_from_input_with_sub_ranges() {
3416 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
3417 let editable_range = 10..37;
3418 let context_range = 0..excerpt.len();
3419
3420 let input = ZetaPromptInput {
3421 cursor_path: Path::new("test.rs").into(),
3422 cursor_excerpt: excerpt.into(),
3423 cursor_offset_in_excerpt: 25,
3424 excerpt_start_row: Some(0),
3425 events: vec![],
3426 related_files: vec![],
3427 excerpt_ranges: ExcerptRanges {
3428 editable_150: editable_range.clone(),
3429 editable_180: editable_range.clone(),
3430 editable_350: editable_range.clone(),
3431 editable_150_context_350: context_range.clone(),
3432 editable_180_context_350: context_range.clone(),
3433 editable_350_context_150: context_range.clone(),
3434 ..Default::default()
3435 },
3436 experiment: None,
3437 in_open_source_repo: false,
3438 can_collect_data: false,
3439 repo_url: None,
3440 };
3441
3442 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
3443
3444 assert_eq!(
3445 prompt,
3446 concat!(
3447 "### Instruction:\n",
3448 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3449 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3450 "into account the cursor location.\n",
3451 "\n",
3452 "### User Edits:\n",
3453 "\n",
3454 "\n",
3455 "\n",
3456 "### User Excerpt:\n",
3457 "\n",
3458 "```test.rs\n",
3459 "<|start_of_file|>\n",
3460 "// prefix\n",
3461 "<|editable_region_start|>\n",
3462 "fn foo() {\n",
3463 " <|user_cursor_is_here|>let x = 1;\n",
3464 "}\n",
3465 "<|editable_region_end|>\n",
3466 "// suffix\n",
3467 "\n",
3468 "```\n",
3469 "\n",
3470 "### Response:\n",
3471 ),
3472 );
3473 }
3474
3475 #[test]
3476 fn test_clean_zeta1_model_output_basic() {
3477 let output = indoc! {"
3478 <|editable_region_start|>
3479 fn main() {
3480 println!(\"hello\");
3481 }
3482 <|editable_region_end|>
3483 "};
3484
3485 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3486 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
3487 }
3488
3489 #[test]
3490 fn test_clean_zeta1_model_output_with_cursor() {
3491 let output = indoc! {"
3492 <|editable_region_start|>
3493 fn main() {
3494 <|user_cursor_is_here|>println!(\"hello\");
3495 }
3496 <|editable_region_end|>
3497 "};
3498
3499 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3500 assert_eq!(
3501 cleaned,
3502 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
3503 );
3504 }
3505
3506 #[test]
3507 fn test_clean_zeta1_model_output_no_markers() {
3508 let output = "fn main() {}\n";
3509 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3510 assert_eq!(cleaned, "fn main() {}\n");
3511 }
3512
3513 #[test]
3514 fn test_clean_zeta1_model_output_empty_region() {
3515 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
3516 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3517 assert_eq!(cleaned, "");
3518 }
3519}