1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
22/// editable and context token budgets. Allows the server to select the
23/// appropriate ranges for whichever model it uses.
24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
25pub struct ExcerptRanges {
26 /// Editable region computed with a 150-token budget.
27 pub editable_150: Range<usize>,
28 /// Editable region computed with a 180-token budget.
29 pub editable_180: Range<usize>,
30 /// Editable region computed with a 350-token budget.
31 pub editable_350: Range<usize>,
32 /// Editable region computed with a 350-token budget.
33 pub editable_512: Option<Range<usize>>,
34 /// Context boundary when using editable_150 with 350 tokens of additional context.
35 pub editable_150_context_350: Range<usize>,
36 /// Context boundary when using editable_180 with 350 tokens of additional context.
37 pub editable_180_context_350: Range<usize>,
38 /// Context boundary when using editable_350 with 150 tokens of additional context.
39 pub editable_350_context_150: Range<usize>,
40 pub editable_350_context_512: Option<Range<usize>>,
41 pub editable_350_context_1024: Option<Range<usize>>,
42 pub context_4096: Option<Range<usize>>,
43 pub context_8192: Option<Range<usize>>,
44}
45
46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
47pub struct ZetaPromptInput {
48 pub cursor_path: Arc<Path>,
49 pub cursor_excerpt: Arc<str>,
50 pub cursor_offset_in_excerpt: usize,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub excerpt_start_row: Option<u32>,
53 pub events: Vec<Arc<Event>>,
54 pub related_files: Vec<RelatedFile>,
55 /// These ranges let the server select model-appropriate subsets.
56 pub excerpt_ranges: ExcerptRanges,
57 /// The name of the edit prediction model experiment to use.
58 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub experiment: Option<String>,
60 #[serde(default)]
61 pub in_open_source_repo: bool,
62 #[serde(default)]
63 pub can_collect_data: bool,
64}
65
66#[derive(
67 Default,
68 Clone,
69 Copy,
70 Debug,
71 PartialEq,
72 Eq,
73 Hash,
74 EnumIter,
75 IntoStaticStr,
76 Serialize,
77 Deserialize,
78)]
79#[allow(non_camel_case_types)]
80pub enum ZetaFormat {
81 V0112MiddleAtEnd,
82 V0113Ordered,
83 V0114180EditableRegion,
84 V0120GitMergeMarkers,
85 #[default]
86 V0131GitMergeMarkersPrefix,
87 V0211Prefill,
88 V0211SeedCoder,
89 v0226Hashline,
90}
91
92impl std::fmt::Display for ZetaFormat {
93 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94 write!(f, "{}", <&'static str>::from(self))
95 }
96}
97
98impl ZetaFormat {
99 pub fn parse(format_name: &str) -> Result<Self> {
100 let mut results = ZetaFormat::iter().filter(|version| {
101 <&'static str>::from(version)
102 .to_lowercase()
103 .contains(&format_name.to_lowercase())
104 });
105 let Some(result) = results.next() else {
106 anyhow::bail!(
107 "`{format_name}` did not match any of:\n{}",
108 Self::options_as_string()
109 );
110 };
111 if results.next().is_some() {
112 anyhow::bail!(
113 "`{format_name}` matched more than one of:\n{}",
114 Self::options_as_string()
115 );
116 }
117 Ok(result)
118 }
119
120 pub fn options_as_string() -> String {
121 ZetaFormat::iter()
122 .map(|format| format!("- {}\n", <&'static str>::from(format)))
123 .collect::<Vec<_>>()
124 .concat()
125 }
126}
127
128#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
129#[serde(tag = "event")]
130pub enum Event {
131 BufferChange {
132 path: Arc<Path>,
133 old_path: Arc<Path>,
134 diff: String,
135 predicted: bool,
136 in_open_source_repo: bool,
137 },
138}
139
140impl Event {
141 pub fn in_open_source_repo(&self) -> bool {
142 match self {
143 Event::BufferChange {
144 in_open_source_repo,
145 ..
146 } => *in_open_source_repo,
147 }
148 }
149}
150
151pub fn write_event(prompt: &mut String, event: &Event) {
152 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
153 for component in path.components() {
154 prompt.push('/');
155 write!(prompt, "{}", component.as_os_str().display()).ok();
156 }
157 }
158 match event {
159 Event::BufferChange {
160 path,
161 old_path,
162 diff,
163 predicted,
164 in_open_source_repo: _,
165 } => {
166 if *predicted {
167 prompt.push_str("// User accepted prediction:\n");
168 }
169 prompt.push_str("--- a");
170 write_path_as_unix_str(prompt, old_path.as_ref());
171 prompt.push_str("\n+++ b");
172 write_path_as_unix_str(prompt, path.as_ref());
173 prompt.push('\n');
174 prompt.push_str(diff);
175 }
176 }
177}
178
179#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
180pub struct RelatedFile {
181 pub path: Arc<Path>,
182 pub max_row: u32,
183 pub excerpts: Vec<RelatedExcerpt>,
184 #[serde(default)]
185 pub in_open_source_repo: bool,
186}
187
188#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
189pub struct RelatedExcerpt {
190 pub row_range: Range<u32>,
191 pub text: Arc<str>,
192 #[serde(default)]
193 pub order: usize,
194}
195
196pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
197 special_tokens_for_format(format)
198 .iter()
199 .any(|token| input.cursor_excerpt.contains(token))
200}
201
202pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
203 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
204}
205
206pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
207 match format {
208 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
209 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
210 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
211 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
212 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
213 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
214 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
215 ZetaFormat::v0226Hashline => hashline::special_tokens(),
216 }
217}
218
219pub fn excerpt_ranges_for_format(
220 format: ZetaFormat,
221 ranges: &ExcerptRanges,
222) -> (Range<usize>, Range<usize>) {
223 match format {
224 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
225 ranges.editable_150.clone(),
226 ranges.editable_150_context_350.clone(),
227 ),
228 ZetaFormat::V0114180EditableRegion => (
229 ranges.editable_180.clone(),
230 ranges.editable_180_context_350.clone(),
231 ),
232 ZetaFormat::V0120GitMergeMarkers
233 | ZetaFormat::V0131GitMergeMarkersPrefix
234 | ZetaFormat::V0211Prefill
235 | ZetaFormat::V0211SeedCoder
236 | ZetaFormat::v0226Hashline => (
237 ranges.editable_350.clone(),
238 ranges.editable_350_context_150.clone(),
239 ),
240 }
241}
242
243pub fn write_cursor_excerpt_section_for_format(
244 format: ZetaFormat,
245 prompt: &mut String,
246 path: &Path,
247 context: &str,
248 editable_range: &Range<usize>,
249 cursor_offset: usize,
250) {
251 match format {
252 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
253 prompt,
254 path,
255 context,
256 editable_range,
257 cursor_offset,
258 ),
259 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
260 v0113_ordered::write_cursor_excerpt_section(
261 prompt,
262 path,
263 context,
264 editable_range,
265 cursor_offset,
266 )
267 }
268 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
269 prompt,
270 path,
271 context,
272 editable_range,
273 cursor_offset,
274 ),
275 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
276 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
277 prompt,
278 path,
279 context,
280 editable_range,
281 cursor_offset,
282 )
283 }
284 ZetaFormat::V0211SeedCoder => seed_coder::write_cursor_excerpt_section(
285 prompt,
286 path,
287 context,
288 editable_range,
289 cursor_offset,
290 ),
291 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
292 prompt,
293 path,
294 context,
295 editable_range,
296 cursor_offset,
297 ),
298 }
299}
300
301pub fn format_prompt_with_budget_for_format(
302 input: &ZetaPromptInput,
303 format: ZetaFormat,
304 max_tokens: usize,
305) -> String {
306 let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
307 let path = &*input.cursor_path;
308
309 match format {
310 ZetaFormat::V0211SeedCoder => seed_coder::format_prompt_with_budget(
311 path,
312 context,
313 &editable_range,
314 cursor_offset,
315 &input.events,
316 &input.related_files,
317 max_tokens,
318 ),
319 _ => {
320 let mut cursor_section = String::new();
321 write_cursor_excerpt_section_for_format(
322 format,
323 &mut cursor_section,
324 path,
325 context,
326 &editable_range,
327 cursor_offset,
328 );
329
330 let cursor_tokens = estimate_tokens(cursor_section.len());
331 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
332
333 let edit_history_section = format_edit_history_within_budget(
334 &input.events,
335 "<|file_sep|>",
336 "edit history",
337 budget_after_cursor,
338 );
339 let edit_history_tokens = estimate_tokens(edit_history_section.len());
340 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
341
342 let related_files_section = format_related_files_within_budget(
343 &input.related_files,
344 "<|file_sep|>",
345 "",
346 budget_after_edit_history,
347 );
348
349 let mut prompt = String::new();
350 prompt.push_str(&related_files_section);
351 prompt.push_str(&edit_history_section);
352 prompt.push_str(&cursor_section);
353 prompt
354 }
355 }
356}
357
358pub fn get_prefill_for_format(
359 format: ZetaFormat,
360 context: &str,
361 editable_range: &Range<usize>,
362) -> String {
363 match format {
364 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
365 ZetaFormat::V0112MiddleAtEnd
366 | ZetaFormat::V0113Ordered
367 | ZetaFormat::V0114180EditableRegion
368 | ZetaFormat::V0120GitMergeMarkers
369 | ZetaFormat::V0131GitMergeMarkersPrefix
370 | ZetaFormat::V0211SeedCoder
371 | ZetaFormat::v0226Hashline => String::new(),
372 }
373}
374
375pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
376 match format {
377 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
378 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
379 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
380 ZetaFormat::V0211SeedCoder => Some(seed_coder::END_MARKER),
381 ZetaFormat::V0112MiddleAtEnd
382 | ZetaFormat::V0113Ordered
383 | ZetaFormat::V0114180EditableRegion
384 | ZetaFormat::v0226Hashline => None,
385 }
386}
387
388pub fn current_region_markers_for_format(format: ZetaFormat) -> (&'static str, &'static str) {
389 match format {
390 ZetaFormat::V0112MiddleAtEnd => ("<|fim_middle|>current\n", "<|fim_middle|>updated"),
391 ZetaFormat::V0113Ordered
392 | ZetaFormat::V0114180EditableRegion
393 | ZetaFormat::v0226Hashline => ("<|fim_middle|>current\n", "<|fim_suffix|>"),
394 ZetaFormat::V0120GitMergeMarkers
395 | ZetaFormat::V0131GitMergeMarkersPrefix
396 | ZetaFormat::V0211Prefill => (
397 v0120_git_merge_markers::START_MARKER,
398 v0120_git_merge_markers::SEPARATOR,
399 ),
400 ZetaFormat::V0211SeedCoder => (seed_coder::START_MARKER, seed_coder::SEPARATOR),
401 }
402}
403
404pub fn clean_extracted_region_for_format(format: ZetaFormat, region: &str) -> String {
405 match format {
406 ZetaFormat::v0226Hashline => hashline::strip_hashline_prefixes(region),
407 _ => region.to_string(),
408 }
409}
410
411pub fn encode_patch_as_output_for_format(
412 format: ZetaFormat,
413 old_editable_region: &str,
414 patch: &str,
415 cursor_offset: Option<usize>,
416) -> Result<Option<String>> {
417 match format {
418 ZetaFormat::v0226Hashline => {
419 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
420 }
421 _ => Ok(None),
422 }
423}
424
425pub fn output_with_context_for_format(
426 format: ZetaFormat,
427 old_editable_region: &str,
428 output: &str,
429) -> Result<Option<String>> {
430 match format {
431 ZetaFormat::v0226Hashline => {
432 if hashline::output_has_edit_commands(output) {
433 Ok(Some(hashline::apply_edit_commands(
434 old_editable_region,
435 output,
436 )))
437 } else {
438 Ok(None)
439 }
440 }
441 _ => Ok(None),
442 }
443}
444
445/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
446pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
447 match output_end_marker_for_format(format) {
448 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
449 None => output,
450 }
451}
452
453pub fn excerpt_range_for_format(
454 format: ZetaFormat,
455 ranges: &ExcerptRanges,
456) -> (Range<usize>, Range<usize>) {
457 excerpt_ranges_for_format(format, ranges)
458}
459
460pub fn resolve_cursor_region(
461 input: &ZetaPromptInput,
462 format: ZetaFormat,
463) -> (&str, Range<usize>, usize) {
464 let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
465 let context_start = context_range.start;
466 let context_text = &input.cursor_excerpt[context_range];
467 let adjusted_editable =
468 (editable_range.start - context_start)..(editable_range.end - context_start);
469 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
470
471 (context_text, adjusted_editable, adjusted_cursor)
472}
473
474pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
475 let (context, editable_range, _) = resolve_cursor_region(input, format);
476 get_prefill_for_format(format, context, &editable_range)
477}
478
479fn format_edit_history_within_budget(
480 events: &[Arc<Event>],
481 file_marker: &str,
482 edit_history_name: &str,
483 max_tokens: usize,
484) -> String {
485 let header = format!("{}{}\n", file_marker, edit_history_name);
486 let header_tokens = estimate_tokens(header.len());
487 if header_tokens >= max_tokens {
488 return String::new();
489 }
490
491 let mut event_strings: Vec<String> = Vec::new();
492 let mut total_tokens = header_tokens;
493
494 for event in events.iter().rev() {
495 let mut event_str = String::new();
496 write_event(&mut event_str, event);
497 let event_tokens = estimate_tokens(event_str.len());
498
499 if total_tokens + event_tokens > max_tokens {
500 break;
501 }
502 total_tokens += event_tokens;
503 event_strings.push(event_str);
504 }
505
506 if event_strings.is_empty() {
507 return String::new();
508 }
509
510 let mut result = header;
511 for event_str in event_strings.iter().rev() {
512 result.push_str(event_str);
513 }
514 result
515}
516
517fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
518 let needs_newline = !excerpt.text.ends_with('\n');
519 let needs_ellipsis = excerpt.row_range.end < file_max_row;
520 let len = excerpt.text.len()
521 + if needs_newline { "\n".len() } else { 0 }
522 + if needs_ellipsis { "...\n".len() } else { 0 };
523 estimate_tokens(len)
524}
525
526pub fn format_related_files_within_budget(
527 related_files: &[RelatedFile],
528 file_prefix: &str,
529 file_suffix: &str,
530 max_tokens: usize,
531) -> String {
532 struct ExcerptCandidate {
533 file_ix: usize,
534 excerpt_ix: usize,
535 order: usize,
536 }
537
538 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
539 .iter()
540 .enumerate()
541 .flat_map(|(file_ix, file)| {
542 file.excerpts
543 .iter()
544 .enumerate()
545 .map(move |(excerpt_ix, e)| ExcerptCandidate {
546 file_ix,
547 excerpt_ix,
548 order: e.order,
549 })
550 })
551 .collect();
552
553 // Pre-compute file header strings and their token costs.
554 let file_headers: Vec<String> = related_files
555 .iter()
556 .map(|file| {
557 let path_str = file.path.to_string_lossy();
558 format!("{}{}\n", file_prefix, path_str)
559 })
560 .collect();
561
562 // Sort the excerpts by their order and determine how many fit within the budget.
563 let mut total_tokens = 0;
564 let mut included_excerpt_count = 0_usize;
565 let mut included_file_indices = vec![false; related_files.len()];
566 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
567 for candidate in &excerpt_candidates {
568 let file = &related_files[candidate.file_ix];
569 let excerpt = &file.excerpts[candidate.excerpt_ix];
570 let file_already_included = included_file_indices[candidate.file_ix];
571 let header_cost = if file_already_included {
572 0
573 } else {
574 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
575 };
576 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
577 if total_tokens + header_cost + excerpt_cost > max_tokens {
578 break;
579 }
580 total_tokens += header_cost + excerpt_cost;
581 if !file_already_included {
582 included_file_indices[candidate.file_ix] = true;
583 }
584 included_excerpt_count += 1;
585 }
586
587 excerpt_candidates.truncate(included_excerpt_count);
588 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
589
590 // Render all of the files that fit within the token budget, in the original order.
591 let mut result = String::new();
592 let mut last_file_ix = None;
593 for candidate in &excerpt_candidates {
594 if last_file_ix != Some(candidate.file_ix) {
595 if last_file_ix.is_some() {
596 result.push_str(file_suffix);
597 }
598 result.push_str(&file_headers[candidate.file_ix]);
599 last_file_ix = Some(candidate.file_ix);
600 }
601 let file = &related_files[candidate.file_ix];
602 let excerpt = &file.excerpts[candidate.excerpt_ix];
603 result.push_str(&excerpt.text);
604 if !result.ends_with('\n') {
605 result.push('\n');
606 }
607 if excerpt.row_range.end < file.max_row {
608 result.push_str("...\n");
609 }
610 }
611
612 result
613}
614
615pub fn write_related_files(
616 prompt: &mut String,
617 related_files: &[RelatedFile],
618) -> Vec<Range<usize>> {
619 let mut ranges = Vec::new();
620 for file in related_files {
621 let start = prompt.len();
622 let path_str = file.path.to_string_lossy();
623 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
624 for excerpt in &file.excerpts {
625 prompt.push_str(&excerpt.text);
626 if !prompt.ends_with('\n') {
627 prompt.push('\n');
628 }
629 if excerpt.row_range.end < file.max_row {
630 prompt.push_str("...\n");
631 }
632 }
633 let end = prompt.len();
634 ranges.push(start..end);
635 }
636 ranges
637}
638
639mod v0112_middle_at_end {
640 use super::*;
641
642 pub fn special_tokens() -> &'static [&'static str] {
643 &[
644 "<|fim_prefix|>",
645 "<|fim_suffix|>",
646 "<|fim_middle|>",
647 "<|file_sep|>",
648 CURSOR_MARKER,
649 ]
650 }
651
652 pub fn write_cursor_excerpt_section(
653 prompt: &mut String,
654 path: &Path,
655 context: &str,
656 editable_range: &Range<usize>,
657 cursor_offset: usize,
658 ) {
659 let path_str = path.to_string_lossy();
660 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
661
662 prompt.push_str("<|fim_prefix|>\n");
663 prompt.push_str(&context[..editable_range.start]);
664
665 prompt.push_str("<|fim_suffix|>\n");
666 prompt.push_str(&context[editable_range.end..]);
667 if !prompt.ends_with('\n') {
668 prompt.push('\n');
669 }
670
671 prompt.push_str("<|fim_middle|>current\n");
672 prompt.push_str(&context[editable_range.start..cursor_offset]);
673 prompt.push_str(CURSOR_MARKER);
674 prompt.push_str(&context[cursor_offset..editable_range.end]);
675 if !prompt.ends_with('\n') {
676 prompt.push('\n');
677 }
678
679 prompt.push_str("<|fim_middle|>updated\n");
680 }
681}
682
683mod v0113_ordered {
684 use super::*;
685
686 pub fn special_tokens() -> &'static [&'static str] {
687 &[
688 "<|fim_prefix|>",
689 "<|fim_suffix|>",
690 "<|fim_middle|>",
691 "<|file_sep|>",
692 CURSOR_MARKER,
693 ]
694 }
695
696 pub fn write_cursor_excerpt_section(
697 prompt: &mut String,
698 path: &Path,
699 context: &str,
700 editable_range: &Range<usize>,
701 cursor_offset: usize,
702 ) {
703 let path_str = path.to_string_lossy();
704 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
705
706 prompt.push_str("<|fim_prefix|>\n");
707 prompt.push_str(&context[..editable_range.start]);
708 if !prompt.ends_with('\n') {
709 prompt.push('\n');
710 }
711
712 prompt.push_str("<|fim_middle|>current\n");
713 prompt.push_str(&context[editable_range.start..cursor_offset]);
714 prompt.push_str(CURSOR_MARKER);
715 prompt.push_str(&context[cursor_offset..editable_range.end]);
716 if !prompt.ends_with('\n') {
717 prompt.push('\n');
718 }
719
720 prompt.push_str("<|fim_suffix|>\n");
721 prompt.push_str(&context[editable_range.end..]);
722 if !prompt.ends_with('\n') {
723 prompt.push('\n');
724 }
725
726 prompt.push_str("<|fim_middle|>updated\n");
727 }
728}
729
730mod v0114180_editable_region {
731 use super::*;
732
733 pub fn special_tokens() -> &'static [&'static str] {
734 v0113_ordered::special_tokens()
735 }
736}
737
738pub mod v0120_git_merge_markers {
739 //! A prompt that uses git-style merge conflict markers to represent the editable region.
740 //!
741 //! Example prompt:
742 //!
743 //! <|file_sep|>path/to/target_file.py
744 //! <|fim_prefix|>
745 //! code before editable region
746 //! <|fim_suffix|>
747 //! code after editable region
748 //! <|fim_middle|>
749 //! <<<<<<< CURRENT
750 //! code that
751 //! needs to<|user_cursor|>
752 //! be rewritten
753 //! =======
754 //!
755 //! Expected output (should be generated by the model):
756 //!
757 //! updated
758 //! code with
759 //! changes applied
760 //! >>>>>>> UPDATED
761
762 use super::*;
763
764 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
765 pub const SEPARATOR: &str = "=======\n";
766 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
767
768 pub fn special_tokens() -> &'static [&'static str] {
769 &[
770 "<|fim_prefix|>",
771 "<|fim_suffix|>",
772 "<|fim_middle|>",
773 "<|file_sep|>",
774 START_MARKER,
775 SEPARATOR,
776 END_MARKER,
777 CURSOR_MARKER,
778 ]
779 }
780
781 pub fn write_cursor_excerpt_section(
782 prompt: &mut String,
783 path: &Path,
784 context: &str,
785 editable_range: &Range<usize>,
786 cursor_offset: usize,
787 ) {
788 let path_str = path.to_string_lossy();
789 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
790
791 prompt.push_str("<|fim_prefix|>");
792 prompt.push_str(&context[..editable_range.start]);
793
794 prompt.push_str("<|fim_suffix|>");
795 prompt.push_str(&context[editable_range.end..]);
796 if !prompt.ends_with('\n') {
797 prompt.push('\n');
798 }
799
800 prompt.push_str("<|fim_middle|>");
801 prompt.push_str(START_MARKER);
802 prompt.push_str(&context[editable_range.start..cursor_offset]);
803 prompt.push_str(CURSOR_MARKER);
804 prompt.push_str(&context[cursor_offset..editable_range.end]);
805 if !prompt.ends_with('\n') {
806 prompt.push('\n');
807 }
808 prompt.push_str(SEPARATOR);
809 }
810}
811
812pub mod v0131_git_merge_markers_prefix {
813 //! A prompt that uses git-style merge conflict markers to represent the editable region.
814 //!
815 //! Example prompt:
816 //!
817 //! <|file_sep|>path/to/target_file.py
818 //! <|fim_prefix|>
819 //! code before editable region
820 //! <<<<<<< CURRENT
821 //! code that
822 //! needs to<|user_cursor|>
823 //! be rewritten
824 //! =======
825 //! <|fim_suffix|>
826 //! code after editable region
827 //! <|fim_middle|>
828 //!
829 //! Expected output (should be generated by the model):
830 //!
831 //! updated
832 //! code with
833 //! changes applied
834 //! >>>>>>> UPDATED
835
836 use super::*;
837
838 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
839 pub const SEPARATOR: &str = "=======\n";
840 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
841
842 pub fn special_tokens() -> &'static [&'static str] {
843 &[
844 "<|fim_prefix|>",
845 "<|fim_suffix|>",
846 "<|fim_middle|>",
847 "<|file_sep|>",
848 START_MARKER,
849 SEPARATOR,
850 END_MARKER,
851 CURSOR_MARKER,
852 ]
853 }
854
855 pub fn write_cursor_excerpt_section(
856 prompt: &mut String,
857 path: &Path,
858 context: &str,
859 editable_range: &Range<usize>,
860 cursor_offset: usize,
861 ) {
862 let path_str = path.to_string_lossy();
863 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
864
865 prompt.push_str("<|fim_prefix|>");
866 prompt.push_str(&context[..editable_range.start]);
867 prompt.push_str(START_MARKER);
868 prompt.push_str(&context[editable_range.start..cursor_offset]);
869 prompt.push_str(CURSOR_MARKER);
870 prompt.push_str(&context[cursor_offset..editable_range.end]);
871 if !prompt.ends_with('\n') {
872 prompt.push('\n');
873 }
874 prompt.push_str(SEPARATOR);
875
876 prompt.push_str("<|fim_suffix|>");
877 prompt.push_str(&context[editable_range.end..]);
878 if !prompt.ends_with('\n') {
879 prompt.push('\n');
880 }
881
882 prompt.push_str("<|fim_middle|>");
883 }
884}
885
886pub mod v0211_prefill {
887 use super::*;
888
889 pub fn special_tokens() -> &'static [&'static str] {
890 v0131_git_merge_markers_prefix::special_tokens()
891 }
892
893 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
894 let editable_region = &context[editable_range.start..editable_range.end];
895
896 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
897 let prefill_len = editable_region.floor_char_boundary(prefill_len);
898
899 // Find a token boundary to avoid splitting tokens in the prefill.
900 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
901 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
902 // the \n and consume any consecutive \n characters after it.
903 let prefill = &editable_region[..prefill_len];
904 match prefill.rfind('\n') {
905 Some(pos) => {
906 let mut end = pos + 1;
907 while end < editable_region.len()
908 && editable_region.as_bytes().get(end) == Some(&b'\n')
909 {
910 end += 1;
911 }
912 editable_region[..end].to_string()
913 }
914 // No newline found. Fall back to splitting before the last space
915 // (word-level boundary)
916 None => match prefill.rfind(' ') {
917 Some(pos) => prefill[..pos].to_string(),
918 None => prefill.to_string(),
919 },
920 }
921 }
922}
923
924pub mod hashline {
925
926 use std::fmt::Display;
927
928 pub const END_MARKER: &str = "<|fim_middle|>updated";
929 pub const START_MARKER: &str = "<|fim_middle|>current";
930
931 use super::*;
932
933 const SET_COMMAND_MARKER: &str = "<|set|>";
934 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
935
936 pub fn special_tokens() -> &'static [&'static str] {
937 return &[
938 SET_COMMAND_MARKER,
939 "<|set_range|>",
940 INSERT_COMMAND_MARKER,
941 CURSOR_MARKER,
942 "<|file_sep|>",
943 "<|fim_prefix|>",
944 "<|fim_suffix|>",
945 "<|fim_middle|>",
946 ];
947 }
948
949 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
950 #[derive(Debug, Clone, PartialEq, Eq)]
951 struct LineRef {
952 index: usize,
953 hash: u8,
954 }
955
956 impl Display for LineRef {
957 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
958 write!(f, "{}:{:02x}", self.index, self.hash)
959 }
960 }
961
962 pub fn hash_line(line: &[u8]) -> u8 {
963 let mut h: u8 = 0;
964 for &byte in line {
965 h = h.wrapping_add(byte);
966 }
967 return h;
968 }
969
970 /// Write the hashline-encoded editable region into `out`. Each line of
971 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
972 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
973 /// to the start of `editable_text`).
974 pub fn write_hashline_editable_region(
975 out: &mut String,
976 editable_text: &str,
977 cursor_offset_in_editable: usize,
978 ) {
979 let mut offset = 0;
980 for (i, line) in editable_text.lines().enumerate() {
981 let (head, cursor, tail) = if cursor_offset_in_editable > offset
982 && cursor_offset_in_editable < offset + line.len()
983 {
984 (
985 &line[..cursor_offset_in_editable - offset],
986 CURSOR_MARKER,
987 &line[cursor_offset_in_editable - offset..],
988 )
989 } else {
990 (line, "", "")
991 };
992 write!(
993 out,
994 "\n{}|{head}{cursor}{tail}",
995 LineRef {
996 index: i,
997 hash: hash_line(line.as_bytes())
998 }
999 )
1000 .unwrap();
1001 offset += line.len() + 1;
1002 }
1003 }
1004
1005 pub fn write_cursor_excerpt_section(
1006 prompt: &mut String,
1007 path: &Path,
1008 context: &str,
1009 editable_range: &Range<usize>,
1010 cursor_offset: usize,
1011 ) {
1012 let path_str = path.to_string_lossy();
1013 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1014
1015 prompt.push_str("<|fim_prefix|>\n");
1016 prompt.push_str(&context[..editable_range.start]);
1017 prompt.push_str(START_MARKER);
1018
1019 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1020 let editable_region = &context[editable_range.clone()];
1021 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1022
1023 if !prompt.ends_with('\n') {
1024 prompt.push('\n');
1025 }
1026
1027 prompt.push_str("<|fim_suffix|>\n");
1028 prompt.push_str(&context[editable_range.end..]);
1029 if !prompt.ends_with('\n') {
1030 prompt.push('\n');
1031 }
1032
1033 prompt.push_str(END_MARKER);
1034 }
1035
1036 /// A single edit command parsed from the model output.
1037 #[derive(Debug)]
1038 enum EditCommand<'a> {
1039 /// Replace a range of lines (inclusive on both ends). Single-line set is
1040 /// represented by `start == end`.
1041 Set {
1042 start: LineRef,
1043 end: LineRef,
1044 content: &'a str,
1045 },
1046 /// Insert new lines after the given line, or before the first line if
1047 /// `after` is `None`.
1048 Insert {
1049 after: Option<LineRef>,
1050 content: &'a str,
1051 },
1052 }
1053
1054 /// Parse a line reference like `3:c3` into a `LineRef`.
1055 fn parse_line_ref(s: &str) -> Option<LineRef> {
1056 let (idx_str, hash_str) = s.split_once(':')?;
1057 let index = idx_str.parse::<usize>().ok()?;
1058 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1059 Some(LineRef { index, hash })
1060 }
1061
1062 /// Parse the model output into a list of `EditCommand`s.
1063 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1064 let mut commands = Vec::new();
1065 let mut offset = 0usize;
1066
1067 while offset < model_output.len() {
1068 let next_nl = model_output[offset..]
1069 .find('\n')
1070 .map(|i| offset + i)
1071 .unwrap_or(model_output.len());
1072 let line = &model_output[offset..next_nl];
1073 let line_end = if next_nl < model_output.len() {
1074 next_nl + 1
1075 } else {
1076 next_nl
1077 };
1078
1079 let trimmed = line.trim();
1080 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1081 (true, spec)
1082 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1083 (false, spec)
1084 } else {
1085 offset = line_end;
1086 continue;
1087 };
1088
1089 let mut content_end = line_end;
1090 let mut scan = line_end;
1091
1092 while scan < model_output.len() {
1093 let body_nl = model_output[scan..]
1094 .find('\n')
1095 .map(|i| scan + i)
1096 .unwrap_or(model_output.len());
1097 let body_line = &model_output[scan..body_nl];
1098 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1099 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1100 {
1101 break;
1102 }
1103 scan = if body_nl < model_output.len() {
1104 body_nl + 1
1105 } else {
1106 body_nl
1107 };
1108 content_end = scan;
1109 }
1110
1111 let content = &model_output[line_end..content_end];
1112
1113 if is_set {
1114 if let Some((start_str, end_str)) = specifier.split_once('-') {
1115 if let (Some(start), Some(end)) =
1116 (parse_line_ref(start_str), parse_line_ref(end_str))
1117 {
1118 commands.push(EditCommand::Set {
1119 start,
1120 end,
1121 content,
1122 });
1123 }
1124 } else if let Some(target) = parse_line_ref(specifier) {
1125 commands.push(EditCommand::Set {
1126 start: target.clone(),
1127 end: target,
1128 content,
1129 });
1130 }
1131 } else {
1132 let after = parse_line_ref(specifier);
1133 commands.push(EditCommand::Insert { after, content });
1134 }
1135
1136 offset = scan;
1137 }
1138
1139 commands
1140 }
1141
1142 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1143 /// (as opposed to being a plain full-replacement output).
1144 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1145 /// editable region, returning the plain text content.
1146 pub fn strip_hashline_prefixes(region: &str) -> String {
1147 let mut decoded: String = region
1148 .lines()
1149 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1150 .collect::<Vec<_>>()
1151 .join("\n");
1152 if region.ends_with('\n') {
1153 decoded.push('\n');
1154 }
1155 decoded
1156 }
1157
1158 pub fn output_has_edit_commands(model_output: &str) -> bool {
1159 model_output.contains(SET_COMMAND_MARKER) || model_output.contains(INSERT_COMMAND_MARKER)
1160 }
1161
1162 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1163 /// original editable region text.
1164 ///
1165 /// `editable_region` is the original text of the editable region (without hash
1166 /// prefixes). `model_output` is the raw model response containing edit commands.
1167 ///
1168 /// Returns the full replacement text for the editable region.
1169 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1170 let original_lines: Vec<&str> = editable_region.lines().collect();
1171 let old_hashes: Vec<u8> = original_lines
1172 .iter()
1173 .map(|line| hash_line(line.as_bytes()))
1174 .collect();
1175
1176 let commands = parse_edit_commands(model_output);
1177
1178 // For set operations: indexed by start line → Some((end line index, content))
1179 // For insert operations: indexed by line index → vec of content to insert after
1180 // Insert-before-first is tracked separately.
1181 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1182 let mut insert_before_first: Vec<&str> = Vec::new();
1183 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1184
1185 for command in &commands {
1186 match command {
1187 EditCommand::Set {
1188 start,
1189 end,
1190 content,
1191 } => {
1192 if start.index < old_hashes.len()
1193 && end.index < old_hashes.len()
1194 && start.index <= end.index
1195 && old_hashes[start.index] == start.hash
1196 && old_hashes[end.index] == end.hash
1197 {
1198 set_ops[start.index] = Some((end.index, *content));
1199 }
1200 }
1201 EditCommand::Insert { after, content } => match after {
1202 None => insert_before_first.push(*content),
1203 Some(line_ref) => {
1204 if line_ref.index < old_hashes.len()
1205 && old_hashes[line_ref.index] == line_ref.hash
1206 {
1207 insert_after[line_ref.index].push(*content);
1208 }
1209 }
1210 },
1211 }
1212 }
1213
1214 let mut result = String::new();
1215
1216 // Emit any insertions before the first line
1217 for content in &insert_before_first {
1218 result.push_str(content);
1219 if !content.ends_with('\n') {
1220 result.push('\n');
1221 }
1222 }
1223
1224 let mut i = 0;
1225 while i < original_lines.len() {
1226 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1227 // Replace lines i..=end_index with the replacement content
1228 result.push_str(replacement);
1229 if !replacement.is_empty() && !replacement.ends_with('\n') {
1230 result.push('\n');
1231 }
1232 // Emit any insertions after the end of this set range
1233 if *end_index < insert_after.len() {
1234 for content in &insert_after[*end_index] {
1235 result.push_str(content);
1236 if !content.ends_with('\n') {
1237 result.push('\n');
1238 }
1239 }
1240 }
1241 i = end_index + 1;
1242 } else {
1243 // Keep the original line
1244 result.push_str(original_lines[i]);
1245 result.push('\n');
1246 // Emit any insertions after this line
1247 for content in &insert_after[i] {
1248 result.push_str(content);
1249 if !content.ends_with('\n') {
1250 result.push('\n');
1251 }
1252 }
1253 i += 1;
1254 }
1255 }
1256
1257 // Preserve trailing newline behavior: if the original ended with a
1258 // newline the result already has one; if it didn't, trim the extra one
1259 // we added.
1260 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1261 result.pop();
1262 }
1263
1264 result
1265 }
1266
1267 /// Convert a unified diff patch into hashline edit commands.
1268 ///
1269 /// Parses the unified diff `patch` directly to determine which lines of
1270 /// `old_text` are deleted/replaced and what new lines are added, then emits
1271 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1272 /// `{index}:{hash}` identifiers.
1273 ///
1274 /// `cursor_offset` is an optional byte offset into the first hunk's new
1275 /// text (context + additions) where the cursor marker should be placed.
1276 pub fn patch_to_edit_commands(
1277 old_text: &str,
1278 patch: &str,
1279 cursor_offset: Option<usize>,
1280 ) -> Result<String> {
1281 let old_lines: Vec<&str> = old_text.lines().collect();
1282 let old_hashes: Vec<u8> = old_lines
1283 .iter()
1284 .map(|line| hash_line(line.as_bytes()))
1285 .collect();
1286
1287 let mut result = String::new();
1288 let mut first_hunk = true;
1289
1290 struct Hunk<'a> {
1291 line_range: Range<usize>,
1292 new_text_lines: Vec<&'a str>,
1293 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1294 }
1295
1296 // Parse the patch line by line. We only care about hunk headers,
1297 // context, deletions, and additions.
1298 let mut old_line_index: usize = 0;
1299 let mut current_hunk: Option<Hunk> = None;
1300 // Byte offset tracking within the hunk's new text for cursor placement.
1301 let mut new_text_byte_offset: usize = 0;
1302 // The line index of the last old line seen before/in the current hunk
1303 // (used for insert-after reference).
1304 let mut last_old_line_before_hunk: Option<usize> = None;
1305
1306 fn flush_hunk(
1307 hunk: Hunk,
1308 last_old_line: Option<usize>,
1309 result: &mut String,
1310 old_hashes: &[u8],
1311 ) {
1312 if hunk.line_range.is_empty() {
1313 // Pure insertion — reference the old line to insert after when in bounds.
1314 if let Some(after) = last_old_line
1315 && let Some(&hash) = old_hashes.get(after)
1316 {
1317 write!(
1318 result,
1319 "{INSERT_COMMAND_MARKER}{}\n",
1320 LineRef { index: after, hash }
1321 )
1322 .unwrap();
1323 } else {
1324 result.push_str(INSERT_COMMAND_MARKER);
1325 result.push('\n');
1326 }
1327 } else {
1328 let start = hunk.line_range.start;
1329 let end_exclusive = hunk.line_range.end;
1330 let deleted_line_count = end_exclusive.saturating_sub(start);
1331
1332 if deleted_line_count == 1 {
1333 if let Some(&hash) = old_hashes.get(start) {
1334 write!(
1335 result,
1336 "{SET_COMMAND_MARKER}{}\n",
1337 LineRef { index: start, hash }
1338 )
1339 .unwrap();
1340 } else {
1341 result.push_str(SET_COMMAND_MARKER);
1342 result.push('\n');
1343 }
1344 } else {
1345 let end_inclusive = end_exclusive - 1;
1346 match (
1347 old_hashes.get(start).copied(),
1348 old_hashes.get(end_inclusive).copied(),
1349 ) {
1350 (Some(start_hash), Some(end_hash)) => {
1351 write!(
1352 result,
1353 "{SET_COMMAND_MARKER}{}-{}\n",
1354 LineRef {
1355 index: start,
1356 hash: start_hash
1357 },
1358 LineRef {
1359 index: end_inclusive,
1360 hash: end_hash
1361 }
1362 )
1363 .unwrap();
1364 }
1365 _ => {
1366 result.push_str(SET_COMMAND_MARKER);
1367 result.push('\n');
1368 }
1369 }
1370 }
1371 }
1372 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1373 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1374 && line_offset == cursor_line_offset
1375 {
1376 result.push_str(&line[..char_offset]);
1377 result.push_str(CURSOR_MARKER);
1378 result.push_str(&line[char_offset..]);
1379 continue;
1380 }
1381
1382 result.push_str(line);
1383 }
1384 }
1385
1386 for raw_line in patch.split_inclusive('\n') {
1387 if raw_line.starts_with("@@") {
1388 // Flush any pending change hunk from a previous patch hunk.
1389 if let Some(hunk) = current_hunk.take() {
1390 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1391 }
1392
1393 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1394 // We intentionally do not trust old_start as a direct local index into `old_text`,
1395 // because some patches are produced against a larger file region and carry
1396 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1397 if first_hunk {
1398 new_text_byte_offset = 0;
1399 first_hunk = false;
1400 }
1401 continue;
1402 }
1403
1404 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1405 continue;
1406 }
1407 if raw_line.starts_with("\\ No newline") {
1408 continue;
1409 }
1410
1411 if raw_line.starts_with('-') {
1412 // Extend or start a change hunk with this deleted old line.
1413 match &mut current_hunk {
1414 Some(Hunk {
1415 line_range: range, ..
1416 }) => range.end = old_line_index + 1,
1417 None => {
1418 current_hunk = Some(Hunk {
1419 line_range: old_line_index..old_line_index + 1,
1420 new_text_lines: Vec::new(),
1421 cursor_line_offset_in_new_text: None,
1422 });
1423 }
1424 }
1425 old_line_index += 1;
1426 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1427 // Place cursor marker if cursor_offset falls within this line.
1428 let mut cursor_line_offset = None;
1429 if let Some(cursor_off) = cursor_offset
1430 && (first_hunk
1431 || cursor_off >= new_text_byte_offset
1432 && cursor_off <= new_text_byte_offset + added_content.len())
1433 {
1434 let line_offset = added_content.floor_char_boundary(
1435 cursor_off
1436 .saturating_sub(new_text_byte_offset)
1437 .min(added_content.len()),
1438 );
1439 cursor_line_offset = Some(line_offset);
1440 }
1441
1442 new_text_byte_offset += added_content.len();
1443
1444 let hunk = current_hunk.get_or_insert(Hunk {
1445 line_range: old_line_index..old_line_index,
1446 new_text_lines: vec![],
1447 cursor_line_offset_in_new_text: None,
1448 });
1449 hunk.new_text_lines.push(added_content);
1450 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1451 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1452 } else {
1453 // Context line (starts with ' ' or is empty).
1454 if let Some(hunk) = current_hunk.take() {
1455 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1456 }
1457 last_old_line_before_hunk = Some(old_line_index);
1458 old_line_index += 1;
1459 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1460 new_text_byte_offset += content.len();
1461 }
1462 }
1463
1464 // Flush final group.
1465 if let Some(hunk) = current_hunk.take() {
1466 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1467 }
1468
1469 // Trim a single trailing newline.
1470 if result.ends_with('\n') {
1471 result.pop();
1472 }
1473
1474 Ok(result)
1475 }
1476
1477 #[cfg(test)]
1478 mod tests {
1479 use super::*;
1480 use indoc::indoc;
1481
1482 #[test]
1483 fn test_format_cursor_region() {
1484 struct Case {
1485 name: &'static str,
1486 context: &'static str,
1487 editable_range: Range<usize>,
1488 cursor_offset: usize,
1489 expected: &'static str,
1490 }
1491
1492 let cases = [
1493 Case {
1494 name: "basic_cursor_placement",
1495 context: "hello world\n",
1496 editable_range: 0..12,
1497 cursor_offset: 5,
1498 expected: indoc! {"
1499 <|file_sep|>test.rs
1500 <|fim_prefix|>
1501 <|fim_middle|>current
1502 0:5c|hello<|user_cursor|> world
1503 <|fim_suffix|>
1504 <|fim_middle|>updated"},
1505 },
1506 Case {
1507 name: "multiline_cursor_on_second_line",
1508 context: "aaa\nbbb\nccc\n",
1509 editable_range: 0..12,
1510 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1511 expected: indoc! {"
1512 <|file_sep|>test.rs
1513 <|fim_prefix|>
1514 <|fim_middle|>current
1515 0:23|aaa
1516 1:26|b<|user_cursor|>bb
1517 2:29|ccc
1518 <|fim_suffix|>
1519 <|fim_middle|>updated"},
1520 },
1521 Case {
1522 name: "no_trailing_newline_in_context",
1523 context: "line1\nline2",
1524 editable_range: 0..11,
1525 cursor_offset: 3,
1526 expected: indoc! {"
1527 <|file_sep|>test.rs
1528 <|fim_prefix|>
1529 <|fim_middle|>current
1530 0:d9|lin<|user_cursor|>e1
1531 1:da|line2
1532 <|fim_suffix|>
1533 <|fim_middle|>updated"},
1534 },
1535 Case {
1536 name: "leading_newline_in_editable_region",
1537 context: "\nabc\n",
1538 editable_range: 0..5,
1539 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1540 expected: indoc! {"
1541 <|file_sep|>test.rs
1542 <|fim_prefix|>
1543 <|fim_middle|>current
1544 0:00|
1545 1:26|a<|user_cursor|>bc
1546 <|fim_suffix|>
1547 <|fim_middle|>updated"},
1548 },
1549 Case {
1550 name: "with_suffix",
1551 context: "abc\ndef",
1552 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1553 cursor_offset: 2,
1554 expected: indoc! {"
1555 <|file_sep|>test.rs
1556 <|fim_prefix|>
1557 <|fim_middle|>current
1558 0:26|ab<|user_cursor|>c
1559 <|fim_suffix|>
1560 def
1561 <|fim_middle|>updated"},
1562 },
1563 Case {
1564 name: "unicode_two_byte_chars",
1565 context: "héllo\n",
1566 editable_range: 0..7,
1567 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1568 expected: indoc! {"
1569 <|file_sep|>test.rs
1570 <|fim_prefix|>
1571 <|fim_middle|>current
1572 0:1b|hé<|user_cursor|>llo
1573 <|fim_suffix|>
1574 <|fim_middle|>updated"},
1575 },
1576 Case {
1577 name: "unicode_three_byte_chars",
1578 context: "日本語\n",
1579 editable_range: 0..10,
1580 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1581 expected: indoc! {"
1582 <|file_sep|>test.rs
1583 <|fim_prefix|>
1584 <|fim_middle|>current
1585 0:80|日本<|user_cursor|>語
1586 <|fim_suffix|>
1587 <|fim_middle|>updated"},
1588 },
1589 Case {
1590 name: "unicode_four_byte_chars",
1591 context: "a🌍b\n",
1592 editable_range: 0..7,
1593 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1594 expected: indoc! {"
1595 <|file_sep|>test.rs
1596 <|fim_prefix|>
1597 <|fim_middle|>current
1598 0:6b|a🌍<|user_cursor|>b
1599 <|fim_suffix|>
1600 <|fim_middle|>updated"},
1601 },
1602 Case {
1603 name: "cursor_at_start_of_region_not_placed",
1604 context: "abc\n",
1605 editable_range: 0..4,
1606 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1607 expected: indoc! {"
1608 <|file_sep|>test.rs
1609 <|fim_prefix|>
1610 <|fim_middle|>current
1611 0:26|abc
1612 <|fim_suffix|>
1613 <|fim_middle|>updated"},
1614 },
1615 Case {
1616 name: "cursor_at_end_of_line_not_placed",
1617 context: "abc\ndef\n",
1618 editable_range: 0..8,
1619 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1620 expected: indoc! {"
1621 <|file_sep|>test.rs
1622 <|fim_prefix|>
1623 <|fim_middle|>current
1624 0:26|abc
1625 1:2f|def
1626 <|fim_suffix|>
1627 <|fim_middle|>updated"},
1628 },
1629 Case {
1630 name: "cursor_offset_relative_to_context_not_editable_region",
1631 // cursor_offset is relative to `context`, so when editable_range.start > 0,
1632 // write_cursor_excerpt_section must subtract it before comparing against
1633 // per-line offsets within the editable region.
1634 context: "pre\naaa\nbbb\nsuf\n",
1635 editable_range: 4..12, // editable region = "aaa\nbbb\n"
1636 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
1637 expected: indoc! {"
1638 <|file_sep|>test.rs
1639 <|fim_prefix|>
1640 pre
1641 <|fim_middle|>current
1642 0:23|aaa
1643 1:26|b<|user_cursor|>bb
1644 <|fim_suffix|>
1645 suf
1646 <|fim_middle|>updated"},
1647 },
1648 ];
1649
1650 for case in &cases {
1651 let mut prompt = String::new();
1652 hashline::write_cursor_excerpt_section(
1653 &mut prompt,
1654 Path::new("test.rs"),
1655 case.context,
1656 &case.editable_range,
1657 case.cursor_offset,
1658 );
1659 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1660 }
1661 }
1662
1663 #[test]
1664 fn test_apply_edit_commands() {
1665 struct Case {
1666 name: &'static str,
1667 original: &'static str,
1668 model_output: &'static str,
1669 expected: &'static str,
1670 }
1671
1672 let cases = vec![
1673 Case {
1674 name: "set_single_line",
1675 original: indoc! {"
1676 let mut total = 0;
1677 for product in products {
1678 total += ;
1679 }
1680 total
1681 "},
1682 model_output: indoc! {"
1683 <|set|>2:87
1684 total += product.price;
1685 "},
1686 expected: indoc! {"
1687 let mut total = 0;
1688 for product in products {
1689 total += product.price;
1690 }
1691 total
1692 "},
1693 },
1694 Case {
1695 name: "set_range",
1696 original: indoc! {"
1697 fn foo() {
1698 let x = 1;
1699 let y = 2;
1700 let z = 3;
1701 }
1702 "},
1703 model_output: indoc! {"
1704 <|set|>1:46-3:4a
1705 let sum = 6;
1706 "},
1707 expected: indoc! {"
1708 fn foo() {
1709 let sum = 6;
1710 }
1711 "},
1712 },
1713 Case {
1714 name: "insert_after_line",
1715 original: indoc! {"
1716 fn main() {
1717 let x = 1;
1718 }
1719 "},
1720 model_output: indoc! {"
1721 <|insert|>1:46
1722 let y = 2;
1723 "},
1724 expected: indoc! {"
1725 fn main() {
1726 let x = 1;
1727 let y = 2;
1728 }
1729 "},
1730 },
1731 Case {
1732 name: "insert_before_first",
1733 original: indoc! {"
1734 let x = 1;
1735 let y = 2;
1736 "},
1737 model_output: indoc! {"
1738 <|insert|>
1739 use std::io;
1740 "},
1741 expected: indoc! {"
1742 use std::io;
1743 let x = 1;
1744 let y = 2;
1745 "},
1746 },
1747 Case {
1748 name: "set_with_cursor_marker",
1749 original: indoc! {"
1750 fn main() {
1751 println!();
1752 }
1753 "},
1754 model_output: indoc! {"
1755 <|set|>1:34
1756 eprintln!(\"<|user_cursor|>\");
1757 "},
1758 expected: indoc! {"
1759 fn main() {
1760 eprintln!(\"<|user_cursor|>\");
1761 }
1762 "},
1763 },
1764 Case {
1765 name: "multiple_set_commands",
1766 original: indoc! {"
1767 aaa
1768 bbb
1769 ccc
1770 ddd
1771 "},
1772 model_output: indoc! {"
1773 <|set|>0:23
1774 AAA
1775 <|set|>2:29
1776 CCC
1777 "},
1778 expected: indoc! {"
1779 AAA
1780 bbb
1781 CCC
1782 ddd
1783 "},
1784 },
1785 Case {
1786 name: "set_range_multiline_replacement",
1787 original: indoc! {"
1788 fn handle_submit() {
1789 }
1790
1791 fn handle_keystroke() {
1792 "},
1793 model_output: indoc! {"
1794 <|set|>0:3f-1:7d
1795 fn handle_submit(modal_state: &mut ModalState) {
1796 <|user_cursor|>
1797 }
1798 "},
1799 expected: indoc! {"
1800 fn handle_submit(modal_state: &mut ModalState) {
1801 <|user_cursor|>
1802 }
1803
1804 fn handle_keystroke() {
1805 "},
1806 },
1807 Case {
1808 name: "no_edit_commands_returns_original",
1809 original: indoc! {"
1810 hello
1811 world
1812 "},
1813 model_output: "some random text with no commands",
1814 expected: indoc! {"
1815 hello
1816 world
1817 "},
1818 },
1819 Case {
1820 name: "wrong_hash_set_ignored",
1821 original: indoc! {"
1822 aaa
1823 bbb
1824 "},
1825 model_output: indoc! {"
1826 <|set|>0:ff
1827 ZZZ
1828 "},
1829 expected: indoc! {"
1830 aaa
1831 bbb
1832 "},
1833 },
1834 Case {
1835 name: "insert_and_set_combined",
1836 original: indoc! {"
1837 alpha
1838 beta
1839 gamma
1840 "},
1841 model_output: indoc! {"
1842 <|set|>0:06
1843 ALPHA
1844 <|insert|>1:9c
1845 beta_extra
1846 "},
1847 expected: indoc! {"
1848 ALPHA
1849 beta
1850 beta_extra
1851 gamma
1852 "},
1853 },
1854 Case {
1855 name: "no_trailing_newline_preserved",
1856 original: "hello\nworld",
1857 model_output: indoc! {"
1858 <|set|>0:14
1859 HELLO
1860 "},
1861 expected: "HELLO\nworld",
1862 },
1863 Case {
1864 name: "set_range_hash_mismatch_in_end_bound",
1865 original: indoc! {"
1866 one
1867 two
1868 three
1869 "},
1870 model_output: indoc! {"
1871 <|set|>0:42-2:ff
1872 ONE_TWO_THREE
1873 "},
1874 expected: indoc! {"
1875 one
1876 two
1877 three
1878 "},
1879 },
1880 Case {
1881 name: "set_range_start_greater_than_end_ignored",
1882 original: indoc! {"
1883 a
1884 b
1885 c
1886 "},
1887 model_output: indoc! {"
1888 <|set|>2:63-1:62
1889 X
1890 "},
1891 expected: indoc! {"
1892 a
1893 b
1894 c
1895 "},
1896 },
1897 Case {
1898 name: "insert_out_of_bounds_ignored",
1899 original: indoc! {"
1900 x
1901 y
1902 "},
1903 model_output: indoc! {"
1904 <|insert|>99:aa
1905 z
1906 "},
1907 expected: indoc! {"
1908 x
1909 y
1910 "},
1911 },
1912 Case {
1913 name: "set_out_of_bounds_ignored",
1914 original: indoc! {"
1915 x
1916 y
1917 "},
1918 model_output: indoc! {"
1919 <|set|>99:aa
1920 z
1921 "},
1922 expected: indoc! {"
1923 x
1924 y
1925 "},
1926 },
1927 Case {
1928 name: "malformed_set_command_ignored",
1929 original: indoc! {"
1930 alpha
1931 beta
1932 "},
1933 model_output: indoc! {"
1934 <|set|>not-a-line-ref
1935 UPDATED
1936 "},
1937 expected: indoc! {"
1938 alpha
1939 beta
1940 "},
1941 },
1942 Case {
1943 name: "malformed_insert_hash_treated_as_before_first",
1944 original: indoc! {"
1945 alpha
1946 beta
1947 "},
1948 model_output: indoc! {"
1949 <|insert|>1:nothex
1950 preamble
1951 "},
1952 expected: indoc! {"
1953 preamble
1954 alpha
1955 beta
1956 "},
1957 },
1958 Case {
1959 name: "set_then_insert_same_target_orders_insert_after_replacement",
1960 original: indoc! {"
1961 cat
1962 dog
1963 "},
1964 model_output: indoc! {"
1965 <|set|>0:38
1966 CAT
1967 <|insert|>0:38
1968 TAIL
1969 "},
1970 expected: indoc! {"
1971 CAT
1972 TAIL
1973 dog
1974 "},
1975 },
1976 Case {
1977 name: "overlapping_set_ranges_last_wins",
1978 original: indoc! {"
1979 a
1980 b
1981 c
1982 d
1983 "},
1984 model_output: indoc! {"
1985 <|set|>0:61-2:63
1986 FIRST
1987 <|set|>1:62-3:64
1988 SECOND
1989 "},
1990 expected: indoc! {"
1991 FIRST
1992 d
1993 "},
1994 },
1995 Case {
1996 name: "insert_before_first_and_after_line",
1997 original: indoc! {"
1998 a
1999 b
2000 "},
2001 model_output: indoc! {"
2002 <|insert|>
2003 HEAD
2004 <|insert|>0:61
2005 MID
2006 "},
2007 expected: indoc! {"
2008 HEAD
2009 a
2010 MID
2011 b
2012 "},
2013 },
2014 ];
2015
2016 for case in &cases {
2017 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2018 assert_eq!(result, case.expected, "failed case: {}", case.name);
2019 }
2020 }
2021
2022 #[test]
2023 fn test_output_has_edit_commands() {
2024 assert!(hashline::output_has_edit_commands(&format!(
2025 "{}0:ab\nnew",
2026 SET_COMMAND_MARKER
2027 )));
2028 assert!(hashline::output_has_edit_commands(&format!(
2029 "{}0:ab\nnew",
2030 INSERT_COMMAND_MARKER
2031 )));
2032 assert!(hashline::output_has_edit_commands(&format!(
2033 "some text\n{}1:cd\nstuff",
2034 SET_COMMAND_MARKER
2035 )));
2036 assert!(!hashline::output_has_edit_commands("just plain text"));
2037 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2038 }
2039
2040 // ---- hashline::patch_to_edit_commands round-trip tests ----
2041
2042 #[test]
2043 fn test_patch_to_edit_commands() {
2044 struct Case {
2045 name: &'static str,
2046 old: &'static str,
2047 patch: &'static str,
2048 expected_new: &'static str,
2049 }
2050
2051 let cases = [
2052 Case {
2053 name: "single_line_replacement",
2054 old: indoc! {"
2055 let mut total = 0;
2056 for product in products {
2057 total += ;
2058 }
2059 total
2060 "},
2061 patch: indoc! {"
2062 @@ -1,5 +1,5 @@
2063 let mut total = 0;
2064 for product in products {
2065 - total += ;
2066 + total += product.price;
2067 }
2068 total
2069 "},
2070 expected_new: indoc! {"
2071 let mut total = 0;
2072 for product in products {
2073 total += product.price;
2074 }
2075 total
2076 "},
2077 },
2078 Case {
2079 name: "multiline_replacement",
2080 old: indoc! {"
2081 fn foo() {
2082 let x = 1;
2083 let y = 2;
2084 let z = 3;
2085 }
2086 "},
2087 patch: indoc! {"
2088 @@ -1,5 +1,3 @@
2089 fn foo() {
2090 - let x = 1;
2091 - let y = 2;
2092 - let z = 3;
2093 + let sum = 1 + 2 + 3;
2094 }
2095 "},
2096 expected_new: indoc! {"
2097 fn foo() {
2098 let sum = 1 + 2 + 3;
2099 }
2100 "},
2101 },
2102 Case {
2103 name: "insertion",
2104 old: indoc! {"
2105 fn main() {
2106 let x = 1;
2107 }
2108 "},
2109 patch: indoc! {"
2110 @@ -1,3 +1,4 @@
2111 fn main() {
2112 let x = 1;
2113 + let y = 2;
2114 }
2115 "},
2116 expected_new: indoc! {"
2117 fn main() {
2118 let x = 1;
2119 let y = 2;
2120 }
2121 "},
2122 },
2123 Case {
2124 name: "insertion_before_first",
2125 old: indoc! {"
2126 let x = 1;
2127 let y = 2;
2128 "},
2129 patch: indoc! {"
2130 @@ -1,2 +1,3 @@
2131 +use std::io;
2132 let x = 1;
2133 let y = 2;
2134 "},
2135 expected_new: indoc! {"
2136 use std::io;
2137 let x = 1;
2138 let y = 2;
2139 "},
2140 },
2141 Case {
2142 name: "deletion",
2143 old: indoc! {"
2144 aaa
2145 bbb
2146 ccc
2147 ddd
2148 "},
2149 patch: indoc! {"
2150 @@ -1,4 +1,2 @@
2151 aaa
2152 -bbb
2153 -ccc
2154 ddd
2155 "},
2156 expected_new: indoc! {"
2157 aaa
2158 ddd
2159 "},
2160 },
2161 Case {
2162 name: "multiple_changes",
2163 old: indoc! {"
2164 alpha
2165 beta
2166 gamma
2167 delta
2168 epsilon
2169 "},
2170 patch: indoc! {"
2171 @@ -1,5 +1,5 @@
2172 -alpha
2173 +ALPHA
2174 beta
2175 gamma
2176 -delta
2177 +DELTA
2178 epsilon
2179 "},
2180 expected_new: indoc! {"
2181 ALPHA
2182 beta
2183 gamma
2184 DELTA
2185 epsilon
2186 "},
2187 },
2188 Case {
2189 name: "replace_with_insertion",
2190 old: indoc! {r#"
2191 fn handle() {
2192 modal_state.close();
2193 modal_state.dismiss();
2194 "#},
2195 patch: indoc! {r#"
2196 @@ -1,3 +1,4 @@
2197 fn handle() {
2198 modal_state.close();
2199 + eprintln!("");
2200 modal_state.dismiss();
2201 "#},
2202 expected_new: indoc! {r#"
2203 fn handle() {
2204 modal_state.close();
2205 eprintln!("");
2206 modal_state.dismiss();
2207 "#},
2208 },
2209 Case {
2210 name: "complete_replacement",
2211 old: indoc! {"
2212 aaa
2213 bbb
2214 ccc
2215 "},
2216 patch: indoc! {"
2217 @@ -1,3 +1,3 @@
2218 -aaa
2219 -bbb
2220 -ccc
2221 +xxx
2222 +yyy
2223 +zzz
2224 "},
2225 expected_new: indoc! {"
2226 xxx
2227 yyy
2228 zzz
2229 "},
2230 },
2231 Case {
2232 name: "add_function_body",
2233 old: indoc! {"
2234 fn foo() {
2235 modal_state.dismiss();
2236 }
2237
2238 fn
2239
2240 fn handle_keystroke() {
2241 "},
2242 patch: indoc! {"
2243 @@ -1,6 +1,8 @@
2244 fn foo() {
2245 modal_state.dismiss();
2246 }
2247
2248 -fn
2249 +fn handle_submit() {
2250 + todo()
2251 +}
2252
2253 fn handle_keystroke() {
2254 "},
2255 expected_new: indoc! {"
2256 fn foo() {
2257 modal_state.dismiss();
2258 }
2259
2260 fn handle_submit() {
2261 todo()
2262 }
2263
2264 fn handle_keystroke() {
2265 "},
2266 },
2267 Case {
2268 name: "with_cursor_offset",
2269 old: indoc! {r#"
2270 fn main() {
2271 println!();
2272 }
2273 "#},
2274 patch: indoc! {r#"
2275 @@ -1,3 +1,3 @@
2276 fn main() {
2277 - println!();
2278 + eprintln!("");
2279 }
2280 "#},
2281 expected_new: indoc! {r#"
2282 fn main() {
2283 eprintln!("<|user_cursor|>");
2284 }
2285 "#},
2286 },
2287 Case {
2288 name: "non_local_hunk_header_pure_insertion_repro",
2289 old: indoc! {"
2290 aaa
2291 bbb
2292 "},
2293 patch: indoc! {"
2294 @@ -20,2 +20,3 @@
2295 aaa
2296 +xxx
2297 bbb
2298 "},
2299 expected_new: indoc! {"
2300 aaa
2301 xxx
2302 bbb
2303 "},
2304 },
2305 ];
2306
2307 for case in &cases {
2308 // The cursor_offset for patch_to_edit_commands is relative to
2309 // the first hunk's new text (context + additions). We compute
2310 // it by finding where the marker sits in the expected output
2311 // (which mirrors the new text of the hunk).
2312 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2313
2314 let commands =
2315 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2316 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2317
2318 assert!(
2319 hashline::output_has_edit_commands(&commands),
2320 "case {}: expected edit commands, got: {commands:?}",
2321 case.name,
2322 );
2323
2324 let applied = hashline::apply_edit_commands(case.old, &commands);
2325 assert_eq!(applied, case.expected_new, "case {}", case.name);
2326 }
2327 }
2328 }
2329}
2330
2331pub mod seed_coder {
2332 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2333 //!
2334 //! Seed-Coder uses different FIM tokens and order than Qwen:
2335 //! - SPM order: suffix comes FIRST, then prefix, then middle
2336 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2337 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2338 //!
2339 //! All context (related files, edit history) goes in the PREFIX section.
2340 //! The suffix contains only code after the editable region.
2341 //!
2342 //! Example prompt:
2343 //!
2344 //! <[fim-suffix]>
2345 //! code after editable region
2346 //! <[fim-prefix]><filename>related/file.py
2347 //! related file content
2348 //!
2349 //! <filename>edit_history
2350 //! --- a/some_file.py
2351 //! +++ b/some_file.py
2352 //! -old
2353 //! +new
2354 //!
2355 //! <filename>path/to/target_file.py
2356 //! code before editable region
2357 //! <<<<<<< CURRENT
2358 //! code that
2359 //! needs to<|user_cursor|>
2360 //! be rewritten
2361 //! =======
2362 //! <[fim-middle]>
2363 //!
2364 //! Expected output (model generates):
2365 //!
2366 //! updated
2367 //! code with
2368 //! changes applied
2369 //! >>>>>>> UPDATED
2370
2371 use super::*;
2372
2373 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2374 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2375 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2376 pub const FILE_MARKER: &str = "<filename>";
2377
2378 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2379 pub const SEPARATOR: &str = "=======\n";
2380 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2381
2382 pub fn special_tokens() -> &'static [&'static str] {
2383 &[
2384 FIM_SUFFIX,
2385 FIM_PREFIX,
2386 FIM_MIDDLE,
2387 FILE_MARKER,
2388 START_MARKER,
2389 SEPARATOR,
2390 END_MARKER,
2391 CURSOR_MARKER,
2392 ]
2393 }
2394
2395 pub fn write_cursor_excerpt_section(
2396 prompt: &mut String,
2397 path: &Path,
2398 context: &str,
2399 editable_range: &Range<usize>,
2400 cursor_offset: usize,
2401 ) {
2402 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2403 prompt.push_str(§ion);
2404 }
2405
2406 pub fn format_prompt_with_budget(
2407 path: &Path,
2408 context: &str,
2409 editable_range: &Range<usize>,
2410 cursor_offset: usize,
2411 events: &[Arc<Event>],
2412 related_files: &[RelatedFile],
2413 max_tokens: usize,
2414 ) -> String {
2415 let suffix_section = build_suffix_section(context, editable_range);
2416 let cursor_prefix_section =
2417 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2418
2419 let suffix_tokens = estimate_tokens(suffix_section.len());
2420 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2421 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2422
2423 let edit_history_section = super::format_edit_history_within_budget(
2424 events,
2425 FILE_MARKER,
2426 "edit_history",
2427 budget_after_cursor,
2428 );
2429 let edit_history_tokens = estimate_tokens(edit_history_section.len());
2430 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2431
2432 let related_files_section = super::format_related_files_within_budget(
2433 related_files,
2434 FILE_MARKER,
2435 "",
2436 budget_after_edit_history,
2437 );
2438
2439 let mut prompt = String::new();
2440 prompt.push_str(&suffix_section);
2441 prompt.push_str(FIM_PREFIX);
2442 prompt.push_str(&related_files_section);
2443 if !related_files_section.is_empty() {
2444 prompt.push('\n');
2445 }
2446 prompt.push_str(&edit_history_section);
2447 if !edit_history_section.is_empty() {
2448 prompt.push('\n');
2449 }
2450 prompt.push_str(&cursor_prefix_section);
2451 prompt.push_str(FIM_MIDDLE);
2452 prompt
2453 }
2454
2455 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2456 let mut section = String::new();
2457 section.push_str(FIM_SUFFIX);
2458 section.push_str(&context[editable_range.end..]);
2459 if !section.ends_with('\n') {
2460 section.push('\n');
2461 }
2462 section
2463 }
2464
2465 fn build_cursor_prefix_section(
2466 path: &Path,
2467 context: &str,
2468 editable_range: &Range<usize>,
2469 cursor_offset: usize,
2470 ) -> String {
2471 let mut section = String::new();
2472 let path_str = path.to_string_lossy();
2473 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2474
2475 section.push_str(&context[..editable_range.start]);
2476 section.push_str(START_MARKER);
2477 section.push_str(&context[editable_range.start..cursor_offset]);
2478 section.push_str(CURSOR_MARKER);
2479 section.push_str(&context[cursor_offset..editable_range.end]);
2480 if !section.ends_with('\n') {
2481 section.push('\n');
2482 }
2483 section.push_str(SEPARATOR);
2484 section
2485 }
2486}
2487
2488/// The zeta1 prompt format
2489pub mod zeta1 {
2490 use super::*;
2491 use std::fmt::Write;
2492
2493 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
2494 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
2495 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
2496 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
2497
2498 const INSTRUCTION_HEADER: &str = concat!(
2499 "### Instruction:\n",
2500 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
2501 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
2502 "into account the cursor location.\n\n",
2503 "### User Edits:\n\n"
2504 );
2505 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
2506 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
2507
2508 /// Formats a complete zeta1 prompt from the input events and excerpt.
2509 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
2510 let mut prompt = String::with_capacity(
2511 INSTRUCTION_HEADER.len()
2512 + input_events.len()
2513 + EXCERPT_HEADER.len()
2514 + input_excerpt.len()
2515 + RESPONSE_HEADER.len(),
2516 );
2517 prompt.push_str(INSTRUCTION_HEADER);
2518 prompt.push_str(input_events);
2519 prompt.push_str(EXCERPT_HEADER);
2520 prompt.push_str(input_excerpt);
2521 prompt.push_str(RESPONSE_HEADER);
2522 prompt
2523 }
2524
2525 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
2526 /// editable and context byte-offset ranges within `cursor_excerpt`.
2527 pub fn format_zeta1_from_input(
2528 input: &ZetaPromptInput,
2529 editable_range: Range<usize>,
2530 context_range: Range<usize>,
2531 ) -> String {
2532 let events = format_zeta1_events(&input.events);
2533 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
2534 format_zeta1_prompt(&events, &excerpt)
2535 }
2536
2537 /// Formats events in zeta1 style (oldest first).
2538 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
2539 let mut result = String::new();
2540 for event in events {
2541 let event_string = format_zeta1_event(event);
2542 if event_string.is_empty() {
2543 continue;
2544 }
2545 if !result.is_empty() {
2546 result.push_str("\n\n");
2547 }
2548 result.push_str(&event_string);
2549 }
2550 result
2551 }
2552
2553 fn format_zeta1_event(event: &Event) -> String {
2554 match event {
2555 Event::BufferChange {
2556 path,
2557 old_path,
2558 diff,
2559 ..
2560 } => {
2561 let mut prompt = String::new();
2562 if old_path != path {
2563 writeln!(
2564 prompt,
2565 "User renamed {} to {}\n",
2566 old_path.display(),
2567 path.display()
2568 )
2569 .ok();
2570 }
2571 if !diff.is_empty() {
2572 write!(
2573 prompt,
2574 "User edited {}:\n```diff\n{}\n```",
2575 path.display(),
2576 diff
2577 )
2578 .ok();
2579 }
2580 prompt
2581 }
2582 }
2583 }
2584
2585 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
2586 /// within `cursor_excerpt`.
2587 fn format_zeta1_excerpt(
2588 input: &ZetaPromptInput,
2589 editable_range: Range<usize>,
2590 context_range: Range<usize>,
2591 ) -> String {
2592 let path_str = input.cursor_path.to_string_lossy();
2593 let excerpt = &*input.cursor_excerpt;
2594 let cursor_offset = input.cursor_offset_in_excerpt;
2595
2596 let mut prompt = String::new();
2597 writeln!(&mut prompt, "```{path_str}").ok();
2598
2599 let starts_at_file_beginning =
2600 input.excerpt_start_row == Some(0) && context_range.start == 0;
2601 if starts_at_file_beginning {
2602 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
2603 }
2604
2605 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
2606
2607 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
2608 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
2609 prompt.push_str(CURSOR_MARKER);
2610 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
2611 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
2612
2613 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
2614 write!(prompt, "\n```").ok();
2615
2616 prompt
2617 }
2618
2619 /// Cleans zeta1 model output by extracting content between editable region
2620 /// markers and converting the zeta1 cursor marker to the universal one.
2621 /// Returns `None` if the output doesn't contain the expected markers.
2622 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
2623 let content = output.replace(CURSOR_MARKER, "");
2624
2625 let content_start = content
2626 .find(EDITABLE_REGION_START_MARKER)
2627 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
2628 .map(|pos| {
2629 if content.as_bytes().get(pos) == Some(&b'\n') {
2630 pos + 1
2631 } else {
2632 pos
2633 }
2634 })
2635 .unwrap_or(0);
2636
2637 let content_end = content
2638 .find(EDITABLE_REGION_END_MARKER)
2639 .map(|pos| {
2640 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
2641 pos - 1
2642 } else {
2643 pos
2644 }
2645 })
2646 .unwrap_or(content.len());
2647
2648 if content_start > content_end {
2649 return Some(String::new());
2650 }
2651
2652 let extracted = &content[content_start..content_end];
2653
2654 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
2655 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
2656 let text_before_cursor = text_before_cursor
2657 .find(EDITABLE_REGION_START_MARKER)
2658 .map(|pos| {
2659 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
2660 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
2661 after_marker + 1
2662 } else {
2663 after_marker
2664 }
2665 })
2666 .unwrap_or(0);
2667 let offset_in_extracted = zeta1_cursor_pos
2668 .saturating_sub(text_before_cursor)
2669 .min(extracted.len());
2670 offset_in_extracted
2671 });
2672
2673 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
2674 if let Some(offset) = cursor_offset {
2675 result.push_str(&extracted[..offset]);
2676 result.push_str(super::CURSOR_MARKER);
2677 result.push_str(&extracted[offset..]);
2678 } else {
2679 result.push_str(extracted);
2680 }
2681
2682 Some(result)
2683 }
2684}
2685
2686#[cfg(test)]
2687mod tests {
2688 use super::*;
2689 use indoc::indoc;
2690
2691 fn make_input(
2692 cursor_excerpt: &str,
2693 editable_range: Range<usize>,
2694 cursor_offset: usize,
2695 events: Vec<Event>,
2696 related_files: Vec<RelatedFile>,
2697 ) -> ZetaPromptInput {
2698 let context_range = 0..cursor_excerpt.len();
2699 ZetaPromptInput {
2700 cursor_path: Path::new("test.rs").into(),
2701 cursor_excerpt: cursor_excerpt.into(),
2702 cursor_offset_in_excerpt: cursor_offset,
2703 excerpt_start_row: None,
2704 events: events.into_iter().map(Arc::new).collect(),
2705 related_files,
2706 excerpt_ranges: ExcerptRanges {
2707 editable_150: editable_range.clone(),
2708 editable_180: editable_range.clone(),
2709 editable_350: editable_range,
2710 editable_150_context_350: context_range.clone(),
2711 editable_180_context_350: context_range.clone(),
2712 editable_350_context_150: context_range,
2713 ..Default::default()
2714 },
2715 experiment: None,
2716 in_open_source_repo: false,
2717 can_collect_data: false,
2718 }
2719 }
2720
2721 fn make_event(path: &str, diff: &str) -> Event {
2722 Event::BufferChange {
2723 path: Path::new(path).into(),
2724 old_path: Path::new(path).into(),
2725 diff: diff.to_string(),
2726 predicted: false,
2727 in_open_source_repo: false,
2728 }
2729 }
2730
2731 fn make_related_file(path: &str, content: &str) -> RelatedFile {
2732 RelatedFile {
2733 path: Path::new(path).into(),
2734 max_row: content.lines().count() as u32,
2735 excerpts: vec![RelatedExcerpt {
2736 row_range: 0..content.lines().count() as u32,
2737 text: content.into(),
2738 order: 0,
2739 }],
2740 in_open_source_repo: false,
2741 }
2742 }
2743
2744 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
2745 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
2746 }
2747
2748 #[test]
2749 fn test_no_truncation_when_within_budget() {
2750 let input = make_input(
2751 "prefix\neditable\nsuffix",
2752 7..15,
2753 10,
2754 vec![make_event("a.rs", "-old\n+new\n")],
2755 vec![make_related_file("related.rs", "fn helper() {}\n")],
2756 );
2757
2758 assert_eq!(
2759 format_with_budget(&input, 10000),
2760 indoc! {r#"
2761 <|file_sep|>related.rs
2762 fn helper() {}
2763 <|file_sep|>edit history
2764 --- a/a.rs
2765 +++ b/a.rs
2766 -old
2767 +new
2768 <|file_sep|>test.rs
2769 <|fim_prefix|>
2770 prefix
2771 <|fim_middle|>current
2772 edi<|user_cursor|>table
2773 <|fim_suffix|>
2774
2775 suffix
2776 <|fim_middle|>updated
2777 "#}
2778 );
2779 }
2780
2781 #[test]
2782 fn test_truncation_drops_edit_history_when_budget_tight() {
2783 let input = make_input(
2784 "code",
2785 0..4,
2786 2,
2787 vec![make_event("a.rs", "-x\n+y\n")],
2788 vec![
2789 make_related_file("r1.rs", "a\n"),
2790 make_related_file("r2.rs", "b\n"),
2791 ],
2792 );
2793
2794 assert_eq!(
2795 format_with_budget(&input, 10000),
2796 indoc! {r#"
2797 <|file_sep|>r1.rs
2798 a
2799 <|file_sep|>r2.rs
2800 b
2801 <|file_sep|>edit history
2802 --- a/a.rs
2803 +++ b/a.rs
2804 -x
2805 +y
2806 <|file_sep|>test.rs
2807 <|fim_prefix|>
2808 <|fim_middle|>current
2809 co<|user_cursor|>de
2810 <|fim_suffix|>
2811 <|fim_middle|>updated
2812 "#}
2813 );
2814
2815 assert_eq!(
2816 format_with_budget(&input, 50),
2817 indoc! {r#"
2818 <|file_sep|>r1.rs
2819 a
2820 <|file_sep|>r2.rs
2821 b
2822 <|file_sep|>test.rs
2823 <|fim_prefix|>
2824 <|fim_middle|>current
2825 co<|user_cursor|>de
2826 <|fim_suffix|>
2827 <|fim_middle|>updated
2828 "#}
2829 );
2830 }
2831
2832 #[test]
2833 fn test_truncation_includes_partial_excerpts() {
2834 let input = make_input(
2835 "x",
2836 0..1,
2837 0,
2838 vec![],
2839 vec![RelatedFile {
2840 path: Path::new("big.rs").into(),
2841 max_row: 30,
2842 in_open_source_repo: false,
2843 excerpts: vec![
2844 RelatedExcerpt {
2845 row_range: 0..10,
2846 text: "first excerpt\n".into(),
2847 order: 0,
2848 },
2849 RelatedExcerpt {
2850 row_range: 10..20,
2851 text: "second excerpt\n".into(),
2852 order: 0,
2853 },
2854 RelatedExcerpt {
2855 row_range: 20..30,
2856 text: "third excerpt\n".into(),
2857 order: 0,
2858 },
2859 ],
2860 }],
2861 );
2862
2863 assert_eq!(
2864 format_with_budget(&input, 10000),
2865 indoc! {r#"
2866 <|file_sep|>big.rs
2867 first excerpt
2868 ...
2869 second excerpt
2870 ...
2871 third excerpt
2872 <|file_sep|>test.rs
2873 <|fim_prefix|>
2874 <|fim_middle|>current
2875 <|user_cursor|>x
2876 <|fim_suffix|>
2877 <|fim_middle|>updated
2878 "#}
2879 );
2880
2881 assert_eq!(
2882 format_with_budget(&input, 50),
2883 indoc! {r#"
2884 <|file_sep|>big.rs
2885 first excerpt
2886 ...
2887 <|file_sep|>test.rs
2888 <|fim_prefix|>
2889 <|fim_middle|>current
2890 <|user_cursor|>x
2891 <|fim_suffix|>
2892 <|fim_middle|>updated
2893 "#}
2894 );
2895 }
2896
2897 #[test]
2898 fn test_truncation_prioritizes_lower_order_excerpts() {
2899 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
2900 // With tight budget, only the lower-order excerpt from file_b should be included.
2901 let input = make_input(
2902 "x",
2903 0..1,
2904 0,
2905 vec![],
2906 vec![
2907 RelatedFile {
2908 path: Path::new("file_a.rs").into(),
2909 max_row: 10,
2910 in_open_source_repo: false,
2911 excerpts: vec![RelatedExcerpt {
2912 row_range: 0..10,
2913 text: "low priority content\n".into(),
2914 order: 5,
2915 }],
2916 },
2917 RelatedFile {
2918 path: Path::new("file_b.rs").into(),
2919 max_row: 10,
2920 in_open_source_repo: false,
2921 excerpts: vec![RelatedExcerpt {
2922 row_range: 0..10,
2923 text: "high priority content\n".into(),
2924 order: 1,
2925 }],
2926 },
2927 ],
2928 );
2929
2930 // With large budget, both files included; rendered in stable lexicographic order.
2931 assert_eq!(
2932 format_with_budget(&input, 10000),
2933 indoc! {r#"
2934 <|file_sep|>file_a.rs
2935 low priority content
2936 <|file_sep|>file_b.rs
2937 high priority content
2938 <|file_sep|>test.rs
2939 <|fim_prefix|>
2940 <|fim_middle|>current
2941 <|user_cursor|>x
2942 <|fim_suffix|>
2943 <|fim_middle|>updated
2944 "#}
2945 );
2946
2947 // With tight budget, only file_b (lower order) fits.
2948 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
2949 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
2950 // file_a would need another 14 tokens, which doesn't fit.
2951 assert_eq!(
2952 format_with_budget(&input, 52),
2953 indoc! {r#"
2954 <|file_sep|>file_b.rs
2955 high priority content
2956 <|file_sep|>test.rs
2957 <|fim_prefix|>
2958 <|fim_middle|>current
2959 <|user_cursor|>x
2960 <|fim_suffix|>
2961 <|fim_middle|>updated
2962 "#}
2963 );
2964 }
2965
2966 #[test]
2967 fn test_truncation_drops_high_order_excerpts_within_file() {
2968 // A single file has excerpts at order 1 and order 3. With a tight budget,
2969 // only the order-1 excerpts are included while the order-3 excerpt is
2970 // dropped — even though they belong to the same file. This also preserves
2971 // the parent invariant: parent outline items have order ≤ their best
2972 // child, so they're always included when any child is.
2973 let input = make_input(
2974 "x",
2975 0..1,
2976 0,
2977 vec![],
2978 vec![RelatedFile {
2979 path: Path::new("mod.rs").into(),
2980 max_row: 30,
2981 in_open_source_repo: false,
2982 excerpts: vec![
2983 RelatedExcerpt {
2984 row_range: 0..5,
2985 text: "mod header\n".into(),
2986 order: 1,
2987 },
2988 RelatedExcerpt {
2989 row_range: 5..15,
2990 text: "important fn\n".into(),
2991 order: 1,
2992 },
2993 RelatedExcerpt {
2994 row_range: 15..30,
2995 text: "less important fn\n".into(),
2996 order: 3,
2997 },
2998 ],
2999 }],
3000 );
3001
3002 // With large budget, all three excerpts included.
3003 assert_eq!(
3004 format_with_budget(&input, 10000),
3005 indoc! {r#"
3006 <|file_sep|>mod.rs
3007 mod header
3008 ...
3009 important fn
3010 ...
3011 less important fn
3012 <|file_sep|>test.rs
3013 <|fim_prefix|>
3014 <|fim_middle|>current
3015 <|user_cursor|>x
3016 <|fim_suffix|>
3017 <|fim_middle|>updated
3018 "#}
3019 );
3020
3021 // With tight budget, only order<=1 excerpts included (header + important fn).
3022 assert_eq!(
3023 format_with_budget(&input, 55),
3024 indoc! {r#"
3025 <|file_sep|>mod.rs
3026 mod header
3027 ...
3028 important fn
3029 ...
3030 <|file_sep|>test.rs
3031 <|fim_prefix|>
3032 <|fim_middle|>current
3033 <|user_cursor|>x
3034 <|fim_suffix|>
3035 <|fim_middle|>updated
3036 "#}
3037 );
3038 }
3039
3040 #[test]
3041 fn test_truncation_drops_older_events_first() {
3042 let input = make_input(
3043 "x",
3044 0..1,
3045 0,
3046 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
3047 vec![],
3048 );
3049
3050 assert_eq!(
3051 format_with_budget(&input, 10000),
3052 indoc! {r#"
3053 <|file_sep|>edit history
3054 --- a/old.rs
3055 +++ b/old.rs
3056 -1
3057 --- a/new.rs
3058 +++ b/new.rs
3059 -2
3060 <|file_sep|>test.rs
3061 <|fim_prefix|>
3062 <|fim_middle|>current
3063 <|user_cursor|>x
3064 <|fim_suffix|>
3065 <|fim_middle|>updated
3066 "#}
3067 );
3068
3069 assert_eq!(
3070 format_with_budget(&input, 55),
3071 indoc! {r#"
3072 <|file_sep|>edit history
3073 --- a/new.rs
3074 +++ b/new.rs
3075 -2
3076 <|file_sep|>test.rs
3077 <|fim_prefix|>
3078 <|fim_middle|>current
3079 <|user_cursor|>x
3080 <|fim_suffix|>
3081 <|fim_middle|>updated
3082 "#}
3083 );
3084 }
3085
3086 #[test]
3087 fn test_cursor_excerpt_always_included_with_minimal_budget() {
3088 let input = make_input(
3089 "fn main() {}",
3090 0..12,
3091 3,
3092 vec![make_event("a.rs", "-old\n+new\n")],
3093 vec![make_related_file("related.rs", "helper\n")],
3094 );
3095
3096 assert_eq!(
3097 format_with_budget(&input, 30),
3098 indoc! {r#"
3099 <|file_sep|>test.rs
3100 <|fim_prefix|>
3101 <|fim_middle|>current
3102 fn <|user_cursor|>main() {}
3103 <|fim_suffix|>
3104 <|fim_middle|>updated
3105 "#}
3106 );
3107 }
3108
3109 fn format_seed_coder(input: &ZetaPromptInput) -> String {
3110 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
3111 }
3112
3113 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3114 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
3115 }
3116
3117 #[test]
3118 fn test_seed_coder_basic_format() {
3119 let input = make_input(
3120 "prefix\neditable\nsuffix",
3121 7..15,
3122 10,
3123 vec![make_event("a.rs", "-old\n+new\n")],
3124 vec![make_related_file("related.rs", "fn helper() {}\n")],
3125 );
3126
3127 assert_eq!(
3128 format_seed_coder(&input),
3129 indoc! {r#"
3130 <[fim-suffix]>
3131 suffix
3132 <[fim-prefix]><filename>related.rs
3133 fn helper() {}
3134
3135 <filename>edit_history
3136 --- a/a.rs
3137 +++ b/a.rs
3138 -old
3139 +new
3140
3141 <filename>test.rs
3142 prefix
3143 <<<<<<< CURRENT
3144 edi<|user_cursor|>table
3145 =======
3146 <[fim-middle]>"#}
3147 );
3148 }
3149
3150 #[test]
3151 fn test_seed_coder_no_context() {
3152 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
3153
3154 assert_eq!(
3155 format_seed_coder(&input),
3156 indoc! {r#"
3157 <[fim-suffix]>
3158 after
3159 <[fim-prefix]><filename>test.rs
3160 before
3161 <<<<<<< CURRENT
3162 mid<|user_cursor|>dle
3163 =======
3164 <[fim-middle]>"#}
3165 );
3166 }
3167
3168 #[test]
3169 fn test_seed_coder_truncation_drops_context() {
3170 let input = make_input(
3171 "code",
3172 0..4,
3173 2,
3174 vec![make_event("a.rs", "-x\n+y\n")],
3175 vec![make_related_file("r1.rs", "content\n")],
3176 );
3177
3178 // With large budget, everything is included
3179 assert_eq!(
3180 format_seed_coder(&input),
3181 indoc! {r#"
3182 <[fim-suffix]>
3183 <[fim-prefix]><filename>r1.rs
3184 content
3185
3186 <filename>edit_history
3187 --- a/a.rs
3188 +++ b/a.rs
3189 -x
3190 +y
3191
3192 <filename>test.rs
3193 <<<<<<< CURRENT
3194 co<|user_cursor|>de
3195 =======
3196 <[fim-middle]>"#}
3197 );
3198
3199 // With tight budget, context is dropped but cursor section remains
3200 assert_eq!(
3201 format_seed_coder_with_budget(&input, 30),
3202 indoc! {r#"
3203 <[fim-suffix]>
3204 <[fim-prefix]><filename>test.rs
3205 <<<<<<< CURRENT
3206 co<|user_cursor|>de
3207 =======
3208 <[fim-middle]>"#}
3209 );
3210 }
3211
3212 #[test]
3213 fn test_seed_coder_truncation_prioritizes_lower_order() {
3214 let input = make_input(
3215 "code",
3216 0..4,
3217 2,
3218 vec![],
3219 vec![
3220 RelatedFile {
3221 path: Path::new("low_prio.rs").into(),
3222 max_row: 5,
3223 in_open_source_repo: false,
3224 excerpts: vec![RelatedExcerpt {
3225 row_range: 0..5,
3226 text: "low prio\n".into(),
3227 order: 10,
3228 }],
3229 },
3230 RelatedFile {
3231 path: Path::new("high_prio.rs").into(),
3232 max_row: 5,
3233 in_open_source_repo: false,
3234 excerpts: vec![RelatedExcerpt {
3235 row_range: 0..5,
3236 text: "high prio\n".into(),
3237 order: 1,
3238 }],
3239 },
3240 ],
3241 );
3242
3243 // With large budget, both included; rendered in stable lexicographic order.
3244 assert_eq!(
3245 format_seed_coder(&input),
3246 indoc! {r#"
3247 <[fim-suffix]>
3248 <[fim-prefix]><filename>low_prio.rs
3249 low prio
3250 <filename>high_prio.rs
3251 high prio
3252
3253 <filename>test.rs
3254 <<<<<<< CURRENT
3255 co<|user_cursor|>de
3256 =======
3257 <[fim-middle]>"#}
3258 );
3259
3260 // With tight budget, only high_prio included.
3261 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
3262 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
3263 assert_eq!(
3264 format_seed_coder_with_budget(&input, 44),
3265 indoc! {r#"
3266 <[fim-suffix]>
3267 <[fim-prefix]><filename>high_prio.rs
3268 high prio
3269
3270 <filename>test.rs
3271 <<<<<<< CURRENT
3272 co<|user_cursor|>de
3273 =======
3274 <[fim-middle]>"#}
3275 );
3276 }
3277
3278 #[test]
3279 fn test_seed_coder_clean_output() {
3280 let output_with_marker = "new code\n>>>>>>> UPDATED\n";
3281 let output_without_marker = "new code\n";
3282
3283 assert_eq!(
3284 clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
3285 "new code\n"
3286 );
3287 assert_eq!(
3288 clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
3289 "new code\n"
3290 );
3291 }
3292
3293 #[test]
3294 fn test_format_zeta1_from_input_basic() {
3295 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
3296 let input = ZetaPromptInput {
3297 cursor_path: Path::new("src/main.rs").into(),
3298 cursor_excerpt: excerpt.into(),
3299 cursor_offset_in_excerpt: 30,
3300 excerpt_start_row: Some(0),
3301 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
3302 related_files: vec![],
3303 excerpt_ranges: ExcerptRanges {
3304 editable_150: 15..41,
3305 editable_180: 15..41,
3306 editable_350: 15..41,
3307 editable_150_context_350: 0..excerpt.len(),
3308 editable_180_context_350: 0..excerpt.len(),
3309 editable_350_context_150: 0..excerpt.len(),
3310 ..Default::default()
3311 },
3312 experiment: None,
3313 in_open_source_repo: false,
3314 can_collect_data: false,
3315 };
3316
3317 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
3318
3319 assert_eq!(
3320 prompt,
3321 concat!(
3322 "### Instruction:\n",
3323 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3324 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3325 "into account the cursor location.\n",
3326 "\n",
3327 "### User Edits:\n",
3328 "\n",
3329 "User edited other.rs:\n",
3330 "```diff\n",
3331 "-old\n",
3332 "+new\n",
3333 "\n",
3334 "```\n",
3335 "\n",
3336 "### User Excerpt:\n",
3337 "\n",
3338 "```src/main.rs\n",
3339 "<|start_of_file|>\n",
3340 "fn before() {}\n",
3341 "<|editable_region_start|>\n",
3342 "fn foo() {\n",
3343 " <|user_cursor_is_here|>let x = 1;\n",
3344 "\n",
3345 "<|editable_region_end|>}\n",
3346 "fn after() {}\n",
3347 "\n",
3348 "```\n",
3349 "\n",
3350 "### Response:\n",
3351 ),
3352 );
3353 }
3354
3355 #[test]
3356 fn test_format_zeta1_from_input_no_start_of_file() {
3357 let excerpt = "fn foo() {\n let x = 1;\n}\n";
3358 let input = ZetaPromptInput {
3359 cursor_path: Path::new("src/main.rs").into(),
3360 cursor_excerpt: excerpt.into(),
3361 cursor_offset_in_excerpt: 15,
3362 excerpt_start_row: Some(10),
3363 events: vec![],
3364 related_files: vec![],
3365 excerpt_ranges: ExcerptRanges {
3366 editable_150: 0..28,
3367 editable_180: 0..28,
3368 editable_350: 0..28,
3369 editable_150_context_350: 0..28,
3370 editable_180_context_350: 0..28,
3371 editable_350_context_150: 0..28,
3372 ..Default::default()
3373 },
3374 experiment: None,
3375 in_open_source_repo: false,
3376 can_collect_data: false,
3377 };
3378
3379 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
3380
3381 assert_eq!(
3382 prompt,
3383 concat!(
3384 "### Instruction:\n",
3385 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3386 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3387 "into account the cursor location.\n",
3388 "\n",
3389 "### User Edits:\n",
3390 "\n",
3391 "\n",
3392 "\n",
3393 "### User Excerpt:\n",
3394 "\n",
3395 "```src/main.rs\n",
3396 "<|editable_region_start|>\n",
3397 "fn foo() {\n",
3398 " <|user_cursor_is_here|>let x = 1;\n",
3399 "}\n",
3400 "\n",
3401 "<|editable_region_end|>\n",
3402 "```\n",
3403 "\n",
3404 "### Response:\n",
3405 ),
3406 );
3407 }
3408
3409 #[test]
3410 fn test_format_zeta1_from_input_with_sub_ranges() {
3411 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
3412 let editable_range = 10..37;
3413 let context_range = 0..excerpt.len();
3414
3415 let input = ZetaPromptInput {
3416 cursor_path: Path::new("test.rs").into(),
3417 cursor_excerpt: excerpt.into(),
3418 cursor_offset_in_excerpt: 25,
3419 excerpt_start_row: Some(0),
3420 events: vec![],
3421 related_files: vec![],
3422 excerpt_ranges: ExcerptRanges {
3423 editable_150: editable_range.clone(),
3424 editable_180: editable_range.clone(),
3425 editable_350: editable_range.clone(),
3426 editable_150_context_350: context_range.clone(),
3427 editable_180_context_350: context_range.clone(),
3428 editable_350_context_150: context_range.clone(),
3429 ..Default::default()
3430 },
3431 experiment: None,
3432 in_open_source_repo: false,
3433 can_collect_data: false,
3434 };
3435
3436 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
3437
3438 assert_eq!(
3439 prompt,
3440 concat!(
3441 "### Instruction:\n",
3442 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3443 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3444 "into account the cursor location.\n",
3445 "\n",
3446 "### User Edits:\n",
3447 "\n",
3448 "\n",
3449 "\n",
3450 "### User Excerpt:\n",
3451 "\n",
3452 "```test.rs\n",
3453 "<|start_of_file|>\n",
3454 "// prefix\n",
3455 "<|editable_region_start|>\n",
3456 "fn foo() {\n",
3457 " <|user_cursor_is_here|>let x = 1;\n",
3458 "}\n",
3459 "<|editable_region_end|>\n",
3460 "// suffix\n",
3461 "\n",
3462 "```\n",
3463 "\n",
3464 "### Response:\n",
3465 ),
3466 );
3467 }
3468
3469 #[test]
3470 fn test_clean_zeta1_model_output_basic() {
3471 let output = indoc! {"
3472 <|editable_region_start|>
3473 fn main() {
3474 println!(\"hello\");
3475 }
3476 <|editable_region_end|>
3477 "};
3478
3479 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3480 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
3481 }
3482
3483 #[test]
3484 fn test_clean_zeta1_model_output_with_cursor() {
3485 let output = indoc! {"
3486 <|editable_region_start|>
3487 fn main() {
3488 <|user_cursor_is_here|>println!(\"hello\");
3489 }
3490 <|editable_region_end|>
3491 "};
3492
3493 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3494 assert_eq!(
3495 cleaned,
3496 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
3497 );
3498 }
3499
3500 #[test]
3501 fn test_clean_zeta1_model_output_no_markers() {
3502 let output = "fn main() {}\n";
3503 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3504 assert_eq!(cleaned, "fn main() {}\n");
3505 }
3506
3507 #[test]
3508 fn test_clean_zeta1_model_output_empty_region() {
3509 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
3510 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3511 assert_eq!(cleaned, "");
3512 }
3513}