1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
22/// editable and context token budgets. Allows the server to select the
23/// appropriate ranges for whichever model it uses.
24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
25pub struct ExcerptRanges {
26 /// Editable region computed with a 150-token budget.
27 pub editable_150: Range<usize>,
28 /// Editable region computed with a 180-token budget.
29 pub editable_180: Range<usize>,
30 /// Editable region computed with a 350-token budget.
31 pub editable_350: Range<usize>,
32 /// Editable region computed with a 350-token budget.
33 pub editable_512: Option<Range<usize>>,
34 /// Context boundary when using editable_150 with 350 tokens of additional context.
35 pub editable_150_context_350: Range<usize>,
36 /// Context boundary when using editable_180 with 350 tokens of additional context.
37 pub editable_180_context_350: Range<usize>,
38 /// Context boundary when using editable_350 with 150 tokens of additional context.
39 pub editable_350_context_150: Range<usize>,
40 pub editable_350_context_512: Option<Range<usize>>,
41 pub editable_350_context_1024: Option<Range<usize>>,
42 pub context_4096: Option<Range<usize>>,
43 pub context_8192: Option<Range<usize>>,
44}
45
46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
47pub struct ZetaPromptInput {
48 pub cursor_path: Arc<Path>,
49 pub cursor_excerpt: Arc<str>,
50 pub cursor_offset_in_excerpt: usize,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub excerpt_start_row: Option<u32>,
53 pub events: Vec<Arc<Event>>,
54 pub related_files: Vec<RelatedFile>,
55 /// These ranges let the server select model-appropriate subsets.
56 pub excerpt_ranges: ExcerptRanges,
57 /// The name of the edit prediction model experiment to use.
58 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub experiment: Option<String>,
60 #[serde(default)]
61 pub in_open_source_repo: bool,
62 #[serde(default)]
63 pub can_collect_data: bool,
64}
65
66#[derive(
67 Default,
68 Clone,
69 Copy,
70 Debug,
71 PartialEq,
72 Eq,
73 Hash,
74 EnumIter,
75 IntoStaticStr,
76 Serialize,
77 Deserialize,
78)]
79#[allow(non_camel_case_types)]
80pub enum ZetaFormat {
81 V0112MiddleAtEnd,
82 V0113Ordered,
83 V0114180EditableRegion,
84 V0120GitMergeMarkers,
85 #[default]
86 V0131GitMergeMarkersPrefix,
87 V0211Prefill,
88 V0211SeedCoder,
89 v0226Hashline,
90}
91
92impl std::fmt::Display for ZetaFormat {
93 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94 write!(f, "{}", <&'static str>::from(self))
95 }
96}
97
98impl ZetaFormat {
99 pub fn parse(format_name: &str) -> Result<Self> {
100 let mut results = ZetaFormat::iter().filter(|version| {
101 <&'static str>::from(version)
102 .to_lowercase()
103 .contains(&format_name.to_lowercase())
104 });
105 let Some(result) = results.next() else {
106 anyhow::bail!(
107 "`{format_name}` did not match any of:\n{}",
108 Self::options_as_string()
109 );
110 };
111 if results.next().is_some() {
112 anyhow::bail!(
113 "`{format_name}` matched more than one of:\n{}",
114 Self::options_as_string()
115 );
116 }
117 Ok(result)
118 }
119
120 pub fn options_as_string() -> String {
121 ZetaFormat::iter()
122 .map(|format| format!("- {}\n", <&'static str>::from(format)))
123 .collect::<Vec<_>>()
124 .concat()
125 }
126}
127
128#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
129#[serde(tag = "event")]
130pub enum Event {
131 BufferChange {
132 path: Arc<Path>,
133 old_path: Arc<Path>,
134 diff: String,
135 predicted: bool,
136 in_open_source_repo: bool,
137 },
138}
139
140impl Event {
141 pub fn in_open_source_repo(&self) -> bool {
142 match self {
143 Event::BufferChange {
144 in_open_source_repo,
145 ..
146 } => *in_open_source_repo,
147 }
148 }
149}
150
151pub fn write_event(prompt: &mut String, event: &Event) {
152 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
153 for component in path.components() {
154 prompt.push('/');
155 write!(prompt, "{}", component.as_os_str().display()).ok();
156 }
157 }
158 match event {
159 Event::BufferChange {
160 path,
161 old_path,
162 diff,
163 predicted,
164 in_open_source_repo: _,
165 } => {
166 if *predicted {
167 prompt.push_str("// User accepted prediction:\n");
168 }
169 prompt.push_str("--- a");
170 write_path_as_unix_str(prompt, old_path.as_ref());
171 prompt.push_str("\n+++ b");
172 write_path_as_unix_str(prompt, path.as_ref());
173 prompt.push('\n');
174 prompt.push_str(diff);
175 }
176 }
177}
178
179#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
180pub struct RelatedFile {
181 pub path: Arc<Path>,
182 pub max_row: u32,
183 pub excerpts: Vec<RelatedExcerpt>,
184 #[serde(default)]
185 pub in_open_source_repo: bool,
186}
187
188#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
189pub struct RelatedExcerpt {
190 pub row_range: Range<u32>,
191 pub text: Arc<str>,
192 #[serde(default)]
193 pub order: usize,
194}
195
196pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
197 special_tokens_for_format(format)
198 .iter()
199 .any(|token| input.cursor_excerpt.contains(token))
200}
201
202pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
203 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
204}
205
206pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
207 match format {
208 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
209 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
210 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
211 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
212 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
213 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
214 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
215 ZetaFormat::v0226Hashline => hashline::special_tokens(),
216 }
217}
218
219pub fn excerpt_ranges_for_format(
220 format: ZetaFormat,
221 ranges: &ExcerptRanges,
222) -> (Range<usize>, Range<usize>) {
223 match format {
224 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
225 ranges.editable_150.clone(),
226 ranges.editable_150_context_350.clone(),
227 ),
228 ZetaFormat::V0114180EditableRegion => (
229 ranges.editable_180.clone(),
230 ranges.editable_180_context_350.clone(),
231 ),
232 ZetaFormat::V0120GitMergeMarkers
233 | ZetaFormat::V0131GitMergeMarkersPrefix
234 | ZetaFormat::V0211Prefill
235 | ZetaFormat::V0211SeedCoder
236 | ZetaFormat::v0226Hashline => (
237 ranges.editable_350.clone(),
238 ranges.editable_350_context_150.clone(),
239 ),
240 }
241}
242
243pub fn write_cursor_excerpt_section_for_format(
244 format: ZetaFormat,
245 prompt: &mut String,
246 path: &Path,
247 context: &str,
248 editable_range: &Range<usize>,
249 cursor_offset: usize,
250) {
251 match format {
252 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
253 prompt,
254 path,
255 context,
256 editable_range,
257 cursor_offset,
258 ),
259 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
260 v0113_ordered::write_cursor_excerpt_section(
261 prompt,
262 path,
263 context,
264 editable_range,
265 cursor_offset,
266 )
267 }
268 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
269 prompt,
270 path,
271 context,
272 editable_range,
273 cursor_offset,
274 ),
275 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
276 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
277 prompt,
278 path,
279 context,
280 editable_range,
281 cursor_offset,
282 )
283 }
284 ZetaFormat::V0211SeedCoder => seed_coder::write_cursor_excerpt_section(
285 prompt,
286 path,
287 context,
288 editable_range,
289 cursor_offset,
290 ),
291 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
292 prompt,
293 path,
294 context,
295 editable_range,
296 cursor_offset,
297 ),
298 }
299}
300
301fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
302 let start_row = text[0..range.start].matches('\n').count() as u32;
303 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
304 if !text[..range.end].ends_with('\n') {
305 end_row += 1;
306 }
307 return start_row..end_row;
308}
309
310pub fn format_prompt_with_budget_for_format(
311 input: &ZetaPromptInput,
312 format: ZetaFormat,
313 max_tokens: usize,
314) -> String {
315 let (context, editable_range, context_range, cursor_offset) =
316 resolve_cursor_region(input, format);
317 let path = &*input.cursor_path;
318
319 let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
320 let relative_row_range = offset_range_to_row_range(context, context_range);
321 let row_range = relative_row_range.start + cursor_excerpt_start_row
322 ..relative_row_range.end + cursor_excerpt_start_row;
323 &filter_redundant_excerpts(
324 input.related_files.clone(),
325 input.cursor_path.as_ref(),
326 row_range,
327 )
328 } else {
329 &input.related_files
330 };
331
332 match format {
333 ZetaFormat::V0211SeedCoder => seed_coder::format_prompt_with_budget(
334 path,
335 context,
336 &editable_range,
337 cursor_offset,
338 &input.events,
339 &related_files,
340 max_tokens,
341 ),
342 _ => {
343 let mut cursor_section = String::new();
344 write_cursor_excerpt_section_for_format(
345 format,
346 &mut cursor_section,
347 path,
348 context,
349 &editable_range,
350 cursor_offset,
351 );
352
353 let cursor_tokens = estimate_tokens(cursor_section.len());
354 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
355
356 let edit_history_section = format_edit_history_within_budget(
357 &input.events,
358 "<|file_sep|>",
359 "edit history",
360 budget_after_cursor,
361 );
362 let edit_history_tokens = estimate_tokens(edit_history_section.len());
363 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
364
365 let related_files_section = format_related_files_within_budget(
366 &related_files,
367 "<|file_sep|>",
368 "",
369 budget_after_edit_history,
370 );
371
372 let mut prompt = String::new();
373 prompt.push_str(&related_files_section);
374 prompt.push_str(&edit_history_section);
375 prompt.push_str(&cursor_section);
376 prompt
377 }
378 }
379}
380
381pub fn filter_redundant_excerpts(
382 mut related_files: Vec<RelatedFile>,
383 cursor_path: &Path,
384 cursor_row_range: Range<u32>,
385) -> Vec<RelatedFile> {
386 for file in &mut related_files {
387 if file.path.as_ref() == cursor_path {
388 file.excerpts.retain(|excerpt| {
389 excerpt.row_range.start < cursor_row_range.start
390 || excerpt.row_range.end > cursor_row_range.end
391 });
392 }
393 }
394 related_files.retain(|file| !file.excerpts.is_empty());
395 related_files
396}
397
398pub fn get_prefill_for_format(
399 format: ZetaFormat,
400 context: &str,
401 editable_range: &Range<usize>,
402) -> String {
403 match format {
404 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
405 ZetaFormat::V0112MiddleAtEnd
406 | ZetaFormat::V0113Ordered
407 | ZetaFormat::V0114180EditableRegion
408 | ZetaFormat::V0120GitMergeMarkers
409 | ZetaFormat::V0131GitMergeMarkersPrefix
410 | ZetaFormat::V0211SeedCoder
411 | ZetaFormat::v0226Hashline => String::new(),
412 }
413}
414
415pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
416 match format {
417 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
418 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
419 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
420 ZetaFormat::V0211SeedCoder => Some(seed_coder::END_MARKER),
421 ZetaFormat::V0112MiddleAtEnd
422 | ZetaFormat::V0113Ordered
423 | ZetaFormat::V0114180EditableRegion
424 | ZetaFormat::v0226Hashline => None,
425 }
426}
427
428pub fn current_region_markers_for_format(format: ZetaFormat) -> (&'static str, &'static str) {
429 match format {
430 ZetaFormat::V0112MiddleAtEnd => ("<|fim_middle|>current\n", "<|fim_middle|>updated"),
431 ZetaFormat::V0113Ordered
432 | ZetaFormat::V0114180EditableRegion
433 | ZetaFormat::v0226Hashline => ("<|fim_middle|>current\n", "<|fim_suffix|>"),
434 ZetaFormat::V0120GitMergeMarkers
435 | ZetaFormat::V0131GitMergeMarkersPrefix
436 | ZetaFormat::V0211Prefill => (
437 v0120_git_merge_markers::START_MARKER,
438 v0120_git_merge_markers::SEPARATOR,
439 ),
440 ZetaFormat::V0211SeedCoder => (seed_coder::START_MARKER, seed_coder::SEPARATOR),
441 }
442}
443
444pub fn clean_extracted_region_for_format(format: ZetaFormat, region: &str) -> String {
445 match format {
446 ZetaFormat::v0226Hashline => hashline::strip_hashline_prefixes(region),
447 _ => region.to_string(),
448 }
449}
450
451pub fn encode_patch_as_output_for_format(
452 format: ZetaFormat,
453 old_editable_region: &str,
454 patch: &str,
455 cursor_offset: Option<usize>,
456) -> Result<Option<String>> {
457 match format {
458 ZetaFormat::v0226Hashline => {
459 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
460 }
461 _ => Ok(None),
462 }
463}
464
465pub fn output_with_context_for_format(
466 format: ZetaFormat,
467 old_editable_region: &str,
468 output: &str,
469) -> Result<Option<String>> {
470 match format {
471 ZetaFormat::v0226Hashline => {
472 if hashline::output_has_edit_commands(output) {
473 Ok(Some(hashline::apply_edit_commands(
474 old_editable_region,
475 output,
476 )))
477 } else {
478 Ok(None)
479 }
480 }
481 _ => Ok(None),
482 }
483}
484
485/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
486pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
487 match output_end_marker_for_format(format) {
488 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
489 None => output,
490 }
491}
492
493pub fn excerpt_range_for_format(
494 format: ZetaFormat,
495 ranges: &ExcerptRanges,
496) -> (Range<usize>, Range<usize>) {
497 excerpt_ranges_for_format(format, ranges)
498}
499
500pub fn resolve_cursor_region(
501 input: &ZetaPromptInput,
502 format: ZetaFormat,
503) -> (&str, Range<usize>, Range<usize>, usize) {
504 let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
505 let context_start = context_range.start;
506 let context_text = &input.cursor_excerpt[context_range.clone()];
507 let adjusted_editable =
508 (editable_range.start - context_start)..(editable_range.end - context_start);
509 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
510 let adjusted_context =
511 (context_range.start - context_start)..(context_range.end - context_start);
512
513 (
514 context_text,
515 adjusted_editable,
516 adjusted_context,
517 adjusted_cursor,
518 )
519}
520
521pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
522 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
523 get_prefill_for_format(format, context, &editable_range)
524}
525
526fn format_edit_history_within_budget(
527 events: &[Arc<Event>],
528 file_marker: &str,
529 edit_history_name: &str,
530 max_tokens: usize,
531) -> String {
532 let header = format!("{}{}\n", file_marker, edit_history_name);
533 let header_tokens = estimate_tokens(header.len());
534 if header_tokens >= max_tokens {
535 return String::new();
536 }
537
538 let mut event_strings: Vec<String> = Vec::new();
539 let mut total_tokens = header_tokens;
540
541 for event in events.iter().rev() {
542 let mut event_str = String::new();
543 write_event(&mut event_str, event);
544 let event_tokens = estimate_tokens(event_str.len());
545
546 if total_tokens + event_tokens > max_tokens {
547 break;
548 }
549 total_tokens += event_tokens;
550 event_strings.push(event_str);
551 }
552
553 if event_strings.is_empty() {
554 return String::new();
555 }
556
557 let mut result = header;
558 for event_str in event_strings.iter().rev() {
559 result.push_str(event_str);
560 }
561 result
562}
563
564fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
565 let needs_newline = !excerpt.text.ends_with('\n');
566 let needs_ellipsis = excerpt.row_range.end < file_max_row;
567 let len = excerpt.text.len()
568 + if needs_newline { "\n".len() } else { 0 }
569 + if needs_ellipsis { "...\n".len() } else { 0 };
570 estimate_tokens(len)
571}
572
573pub fn format_related_files_within_budget(
574 related_files: &[RelatedFile],
575 file_prefix: &str,
576 file_suffix: &str,
577 max_tokens: usize,
578) -> String {
579 struct ExcerptCandidate {
580 file_ix: usize,
581 excerpt_ix: usize,
582 order: usize,
583 }
584
585 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
586 .iter()
587 .enumerate()
588 .flat_map(|(file_ix, file)| {
589 file.excerpts
590 .iter()
591 .enumerate()
592 .map(move |(excerpt_ix, e)| ExcerptCandidate {
593 file_ix,
594 excerpt_ix,
595 order: e.order,
596 })
597 })
598 .collect();
599
600 // Pre-compute file header strings and their token costs.
601 let file_headers: Vec<String> = related_files
602 .iter()
603 .map(|file| {
604 let path_str = file.path.to_string_lossy();
605 format!("{}{}\n", file_prefix, path_str)
606 })
607 .collect();
608
609 // Sort the excerpts by their order and determine how many fit within the budget.
610 let mut total_tokens = 0;
611 let mut included_excerpt_count = 0_usize;
612 let mut included_file_indices = vec![false; related_files.len()];
613 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
614 for candidate in &excerpt_candidates {
615 let file = &related_files[candidate.file_ix];
616 let excerpt = &file.excerpts[candidate.excerpt_ix];
617 let file_already_included = included_file_indices[candidate.file_ix];
618 let header_cost = if file_already_included {
619 0
620 } else {
621 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
622 };
623 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
624 if total_tokens + header_cost + excerpt_cost > max_tokens {
625 break;
626 }
627 total_tokens += header_cost + excerpt_cost;
628 if !file_already_included {
629 included_file_indices[candidate.file_ix] = true;
630 }
631 included_excerpt_count += 1;
632 }
633
634 excerpt_candidates.truncate(included_excerpt_count);
635 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
636
637 // Render all of the files that fit within the token budget, in the original order.
638 let mut result = String::new();
639 let mut last_file_ix = None;
640 for candidate in &excerpt_candidates {
641 if last_file_ix != Some(candidate.file_ix) {
642 if last_file_ix.is_some() {
643 result.push_str(file_suffix);
644 }
645 result.push_str(&file_headers[candidate.file_ix]);
646 last_file_ix = Some(candidate.file_ix);
647 }
648 let file = &related_files[candidate.file_ix];
649 let excerpt = &file.excerpts[candidate.excerpt_ix];
650 result.push_str(&excerpt.text);
651 if !result.ends_with('\n') {
652 result.push('\n');
653 }
654 if excerpt.row_range.end < file.max_row {
655 result.push_str("...\n");
656 }
657 }
658
659 result
660}
661
662pub fn write_related_files(
663 prompt: &mut String,
664 related_files: &[RelatedFile],
665) -> Vec<Range<usize>> {
666 let mut ranges = Vec::new();
667 for file in related_files {
668 let start = prompt.len();
669 let path_str = file.path.to_string_lossy();
670 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
671 for excerpt in &file.excerpts {
672 prompt.push_str(&excerpt.text);
673 if !prompt.ends_with('\n') {
674 prompt.push('\n');
675 }
676 if excerpt.row_range.end < file.max_row {
677 prompt.push_str("...\n");
678 }
679 }
680 let end = prompt.len();
681 ranges.push(start..end);
682 }
683 ranges
684}
685
686mod v0112_middle_at_end {
687 use super::*;
688
689 pub fn special_tokens() -> &'static [&'static str] {
690 &[
691 "<|fim_prefix|>",
692 "<|fim_suffix|>",
693 "<|fim_middle|>",
694 "<|file_sep|>",
695 CURSOR_MARKER,
696 ]
697 }
698
699 pub fn write_cursor_excerpt_section(
700 prompt: &mut String,
701 path: &Path,
702 context: &str,
703 editable_range: &Range<usize>,
704 cursor_offset: usize,
705 ) {
706 let path_str = path.to_string_lossy();
707 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
708
709 prompt.push_str("<|fim_prefix|>\n");
710 prompt.push_str(&context[..editable_range.start]);
711
712 prompt.push_str("<|fim_suffix|>\n");
713 prompt.push_str(&context[editable_range.end..]);
714 if !prompt.ends_with('\n') {
715 prompt.push('\n');
716 }
717
718 prompt.push_str("<|fim_middle|>current\n");
719 prompt.push_str(&context[editable_range.start..cursor_offset]);
720 prompt.push_str(CURSOR_MARKER);
721 prompt.push_str(&context[cursor_offset..editable_range.end]);
722 if !prompt.ends_with('\n') {
723 prompt.push('\n');
724 }
725
726 prompt.push_str("<|fim_middle|>updated\n");
727 }
728}
729
730mod v0113_ordered {
731 use super::*;
732
733 pub fn special_tokens() -> &'static [&'static str] {
734 &[
735 "<|fim_prefix|>",
736 "<|fim_suffix|>",
737 "<|fim_middle|>",
738 "<|file_sep|>",
739 CURSOR_MARKER,
740 ]
741 }
742
743 pub fn write_cursor_excerpt_section(
744 prompt: &mut String,
745 path: &Path,
746 context: &str,
747 editable_range: &Range<usize>,
748 cursor_offset: usize,
749 ) {
750 let path_str = path.to_string_lossy();
751 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
752
753 prompt.push_str("<|fim_prefix|>\n");
754 prompt.push_str(&context[..editable_range.start]);
755 if !prompt.ends_with('\n') {
756 prompt.push('\n');
757 }
758
759 prompt.push_str("<|fim_middle|>current\n");
760 prompt.push_str(&context[editable_range.start..cursor_offset]);
761 prompt.push_str(CURSOR_MARKER);
762 prompt.push_str(&context[cursor_offset..editable_range.end]);
763 if !prompt.ends_with('\n') {
764 prompt.push('\n');
765 }
766
767 prompt.push_str("<|fim_suffix|>\n");
768 prompt.push_str(&context[editable_range.end..]);
769 if !prompt.ends_with('\n') {
770 prompt.push('\n');
771 }
772
773 prompt.push_str("<|fim_middle|>updated\n");
774 }
775}
776
777mod v0114180_editable_region {
778 use super::*;
779
780 pub fn special_tokens() -> &'static [&'static str] {
781 v0113_ordered::special_tokens()
782 }
783}
784
785pub mod v0120_git_merge_markers {
786 //! A prompt that uses git-style merge conflict markers to represent the editable region.
787 //!
788 //! Example prompt:
789 //!
790 //! <|file_sep|>path/to/target_file.py
791 //! <|fim_prefix|>
792 //! code before editable region
793 //! <|fim_suffix|>
794 //! code after editable region
795 //! <|fim_middle|>
796 //! <<<<<<< CURRENT
797 //! code that
798 //! needs to<|user_cursor|>
799 //! be rewritten
800 //! =======
801 //!
802 //! Expected output (should be generated by the model):
803 //!
804 //! updated
805 //! code with
806 //! changes applied
807 //! >>>>>>> UPDATED
808
809 use super::*;
810
811 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
812 pub const SEPARATOR: &str = "=======\n";
813 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
814
815 pub fn special_tokens() -> &'static [&'static str] {
816 &[
817 "<|fim_prefix|>",
818 "<|fim_suffix|>",
819 "<|fim_middle|>",
820 "<|file_sep|>",
821 START_MARKER,
822 SEPARATOR,
823 END_MARKER,
824 CURSOR_MARKER,
825 ]
826 }
827
828 pub fn write_cursor_excerpt_section(
829 prompt: &mut String,
830 path: &Path,
831 context: &str,
832 editable_range: &Range<usize>,
833 cursor_offset: usize,
834 ) {
835 let path_str = path.to_string_lossy();
836 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
837
838 prompt.push_str("<|fim_prefix|>");
839 prompt.push_str(&context[..editable_range.start]);
840
841 prompt.push_str("<|fim_suffix|>");
842 prompt.push_str(&context[editable_range.end..]);
843 if !prompt.ends_with('\n') {
844 prompt.push('\n');
845 }
846
847 prompt.push_str("<|fim_middle|>");
848 prompt.push_str(START_MARKER);
849 prompt.push_str(&context[editable_range.start..cursor_offset]);
850 prompt.push_str(CURSOR_MARKER);
851 prompt.push_str(&context[cursor_offset..editable_range.end]);
852 if !prompt.ends_with('\n') {
853 prompt.push('\n');
854 }
855 prompt.push_str(SEPARATOR);
856 }
857}
858
859pub mod v0131_git_merge_markers_prefix {
860 //! A prompt that uses git-style merge conflict markers to represent the editable region.
861 //!
862 //! Example prompt:
863 //!
864 //! <|file_sep|>path/to/target_file.py
865 //! <|fim_prefix|>
866 //! code before editable region
867 //! <<<<<<< CURRENT
868 //! code that
869 //! needs to<|user_cursor|>
870 //! be rewritten
871 //! =======
872 //! <|fim_suffix|>
873 //! code after editable region
874 //! <|fim_middle|>
875 //!
876 //! Expected output (should be generated by the model):
877 //!
878 //! updated
879 //! code with
880 //! changes applied
881 //! >>>>>>> UPDATED
882
883 use super::*;
884
885 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
886 pub const SEPARATOR: &str = "=======\n";
887 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
888
889 pub fn special_tokens() -> &'static [&'static str] {
890 &[
891 "<|fim_prefix|>",
892 "<|fim_suffix|>",
893 "<|fim_middle|>",
894 "<|file_sep|>",
895 START_MARKER,
896 SEPARATOR,
897 END_MARKER,
898 CURSOR_MARKER,
899 ]
900 }
901
902 pub fn write_cursor_excerpt_section(
903 prompt: &mut String,
904 path: &Path,
905 context: &str,
906 editable_range: &Range<usize>,
907 cursor_offset: usize,
908 ) {
909 let path_str = path.to_string_lossy();
910 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
911
912 prompt.push_str("<|fim_prefix|>");
913 prompt.push_str(&context[..editable_range.start]);
914 prompt.push_str(START_MARKER);
915 prompt.push_str(&context[editable_range.start..cursor_offset]);
916 prompt.push_str(CURSOR_MARKER);
917 prompt.push_str(&context[cursor_offset..editable_range.end]);
918 if !prompt.ends_with('\n') {
919 prompt.push('\n');
920 }
921 prompt.push_str(SEPARATOR);
922
923 prompt.push_str("<|fim_suffix|>");
924 prompt.push_str(&context[editable_range.end..]);
925 if !prompt.ends_with('\n') {
926 prompt.push('\n');
927 }
928
929 prompt.push_str("<|fim_middle|>");
930 }
931}
932
933pub mod v0211_prefill {
934 use super::*;
935
936 pub fn special_tokens() -> &'static [&'static str] {
937 v0131_git_merge_markers_prefix::special_tokens()
938 }
939
940 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
941 let editable_region = &context[editable_range.start..editable_range.end];
942
943 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
944 let prefill_len = editable_region.floor_char_boundary(prefill_len);
945
946 // Find a token boundary to avoid splitting tokens in the prefill.
947 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
948 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
949 // the \n and consume any consecutive \n characters after it.
950 let prefill = &editable_region[..prefill_len];
951 match prefill.rfind('\n') {
952 Some(pos) => {
953 let mut end = pos + 1;
954 while end < editable_region.len()
955 && editable_region.as_bytes().get(end) == Some(&b'\n')
956 {
957 end += 1;
958 }
959 editable_region[..end].to_string()
960 }
961 // No newline found. Fall back to splitting before the last space
962 // (word-level boundary)
963 None => match prefill.rfind(' ') {
964 Some(pos) => prefill[..pos].to_string(),
965 None => prefill.to_string(),
966 },
967 }
968 }
969}
970
971pub mod hashline {
972
973 use std::fmt::Display;
974
975 pub const END_MARKER: &str = "<|fim_middle|>updated";
976 pub const START_MARKER: &str = "<|fim_middle|>current";
977
978 use super::*;
979
980 const SET_COMMAND_MARKER: &str = "<|set|>";
981 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
982
983 pub fn special_tokens() -> &'static [&'static str] {
984 return &[
985 SET_COMMAND_MARKER,
986 "<|set_range|>",
987 INSERT_COMMAND_MARKER,
988 CURSOR_MARKER,
989 "<|file_sep|>",
990 "<|fim_prefix|>",
991 "<|fim_suffix|>",
992 "<|fim_middle|>",
993 ];
994 }
995
996 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
997 #[derive(Debug, Clone, PartialEq, Eq)]
998 struct LineRef {
999 index: usize,
1000 hash: u8,
1001 }
1002
1003 impl Display for LineRef {
1004 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1005 write!(f, "{}:{:02x}", self.index, self.hash)
1006 }
1007 }
1008
1009 pub fn hash_line(line: &[u8]) -> u8 {
1010 let mut h: u8 = 0;
1011 for &byte in line {
1012 h = h.wrapping_add(byte);
1013 }
1014 return h;
1015 }
1016
1017 /// Write the hashline-encoded editable region into `out`. Each line of
1018 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1019 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1020 /// to the start of `editable_text`).
1021 pub fn write_hashline_editable_region(
1022 out: &mut String,
1023 editable_text: &str,
1024 cursor_offset_in_editable: usize,
1025 ) {
1026 let mut offset = 0;
1027 for (i, line) in editable_text.lines().enumerate() {
1028 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1029 && cursor_offset_in_editable < offset + line.len()
1030 {
1031 (
1032 &line[..cursor_offset_in_editable - offset],
1033 CURSOR_MARKER,
1034 &line[cursor_offset_in_editable - offset..],
1035 )
1036 } else {
1037 (line, "", "")
1038 };
1039 write!(
1040 out,
1041 "\n{}|{head}{cursor}{tail}",
1042 LineRef {
1043 index: i,
1044 hash: hash_line(line.as_bytes())
1045 }
1046 )
1047 .unwrap();
1048 offset += line.len() + 1;
1049 }
1050 }
1051
1052 pub fn write_cursor_excerpt_section(
1053 prompt: &mut String,
1054 path: &Path,
1055 context: &str,
1056 editable_range: &Range<usize>,
1057 cursor_offset: usize,
1058 ) {
1059 let path_str = path.to_string_lossy();
1060 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1061
1062 prompt.push_str("<|fim_prefix|>\n");
1063 prompt.push_str(&context[..editable_range.start]);
1064 prompt.push_str(START_MARKER);
1065
1066 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1067 let editable_region = &context[editable_range.clone()];
1068 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1069
1070 if !prompt.ends_with('\n') {
1071 prompt.push('\n');
1072 }
1073
1074 prompt.push_str("<|fim_suffix|>\n");
1075 prompt.push_str(&context[editable_range.end..]);
1076 if !prompt.ends_with('\n') {
1077 prompt.push('\n');
1078 }
1079
1080 prompt.push_str(END_MARKER);
1081 }
1082
1083 /// A single edit command parsed from the model output.
1084 #[derive(Debug)]
1085 enum EditCommand<'a> {
1086 /// Replace a range of lines (inclusive on both ends). Single-line set is
1087 /// represented by `start == end`.
1088 Set {
1089 start: LineRef,
1090 end: LineRef,
1091 content: &'a str,
1092 },
1093 /// Insert new lines after the given line, or before the first line if
1094 /// `after` is `None`.
1095 Insert {
1096 after: Option<LineRef>,
1097 content: &'a str,
1098 },
1099 }
1100
1101 /// Parse a line reference like `3:c3` into a `LineRef`.
1102 fn parse_line_ref(s: &str) -> Option<LineRef> {
1103 let (idx_str, hash_str) = s.split_once(':')?;
1104 let index = idx_str.parse::<usize>().ok()?;
1105 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1106 Some(LineRef { index, hash })
1107 }
1108
1109 /// Parse the model output into a list of `EditCommand`s.
1110 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1111 let mut commands = Vec::new();
1112 let mut offset = 0usize;
1113
1114 while offset < model_output.len() {
1115 let next_nl = model_output[offset..]
1116 .find('\n')
1117 .map(|i| offset + i)
1118 .unwrap_or(model_output.len());
1119 let line = &model_output[offset..next_nl];
1120 let line_end = if next_nl < model_output.len() {
1121 next_nl + 1
1122 } else {
1123 next_nl
1124 };
1125
1126 let trimmed = line.trim();
1127 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1128 (true, spec)
1129 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1130 (false, spec)
1131 } else {
1132 offset = line_end;
1133 continue;
1134 };
1135
1136 let mut content_end = line_end;
1137 let mut scan = line_end;
1138
1139 while scan < model_output.len() {
1140 let body_nl = model_output[scan..]
1141 .find('\n')
1142 .map(|i| scan + i)
1143 .unwrap_or(model_output.len());
1144 let body_line = &model_output[scan..body_nl];
1145 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1146 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1147 {
1148 break;
1149 }
1150 scan = if body_nl < model_output.len() {
1151 body_nl + 1
1152 } else {
1153 body_nl
1154 };
1155 content_end = scan;
1156 }
1157
1158 let content = &model_output[line_end..content_end];
1159
1160 if is_set {
1161 if let Some((start_str, end_str)) = specifier.split_once('-') {
1162 if let (Some(start), Some(end)) =
1163 (parse_line_ref(start_str), parse_line_ref(end_str))
1164 {
1165 commands.push(EditCommand::Set {
1166 start,
1167 end,
1168 content,
1169 });
1170 }
1171 } else if let Some(target) = parse_line_ref(specifier) {
1172 commands.push(EditCommand::Set {
1173 start: target.clone(),
1174 end: target,
1175 content,
1176 });
1177 }
1178 } else {
1179 let after = parse_line_ref(specifier);
1180 commands.push(EditCommand::Insert { after, content });
1181 }
1182
1183 offset = scan;
1184 }
1185
1186 commands
1187 }
1188
1189 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1190 /// (as opposed to being a plain full-replacement output).
1191 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1192 /// editable region, returning the plain text content.
1193 pub fn strip_hashline_prefixes(region: &str) -> String {
1194 let mut decoded: String = region
1195 .lines()
1196 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1197 .collect::<Vec<_>>()
1198 .join("\n");
1199 if region.ends_with('\n') {
1200 decoded.push('\n');
1201 }
1202 decoded
1203 }
1204
1205 pub fn output_has_edit_commands(model_output: &str) -> bool {
1206 model_output.contains(SET_COMMAND_MARKER) || model_output.contains(INSERT_COMMAND_MARKER)
1207 }
1208
1209 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1210 /// original editable region text.
1211 ///
1212 /// `editable_region` is the original text of the editable region (without hash
1213 /// prefixes). `model_output` is the raw model response containing edit commands.
1214 ///
1215 /// Returns the full replacement text for the editable region.
1216 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1217 let original_lines: Vec<&str> = editable_region.lines().collect();
1218 let old_hashes: Vec<u8> = original_lines
1219 .iter()
1220 .map(|line| hash_line(line.as_bytes()))
1221 .collect();
1222
1223 let commands = parse_edit_commands(model_output);
1224
1225 // For set operations: indexed by start line → Some((end line index, content))
1226 // For insert operations: indexed by line index → vec of content to insert after
1227 // Insert-before-first is tracked separately.
1228 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1229 let mut insert_before_first: Vec<&str> = Vec::new();
1230 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1231
1232 for command in &commands {
1233 match command {
1234 EditCommand::Set {
1235 start,
1236 end,
1237 content,
1238 } => {
1239 if start.index < old_hashes.len()
1240 && end.index < old_hashes.len()
1241 && start.index <= end.index
1242 && old_hashes[start.index] == start.hash
1243 && old_hashes[end.index] == end.hash
1244 {
1245 set_ops[start.index] = Some((end.index, *content));
1246 }
1247 }
1248 EditCommand::Insert { after, content } => match after {
1249 None => insert_before_first.push(*content),
1250 Some(line_ref) => {
1251 if line_ref.index < old_hashes.len()
1252 && old_hashes[line_ref.index] == line_ref.hash
1253 {
1254 insert_after[line_ref.index].push(*content);
1255 }
1256 }
1257 },
1258 }
1259 }
1260
1261 let mut result = String::new();
1262
1263 // Emit any insertions before the first line
1264 for content in &insert_before_first {
1265 result.push_str(content);
1266 if !content.ends_with('\n') {
1267 result.push('\n');
1268 }
1269 }
1270
1271 let mut i = 0;
1272 while i < original_lines.len() {
1273 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1274 // Replace lines i..=end_index with the replacement content
1275 result.push_str(replacement);
1276 if !replacement.is_empty() && !replacement.ends_with('\n') {
1277 result.push('\n');
1278 }
1279 // Emit any insertions after the end of this set range
1280 if *end_index < insert_after.len() {
1281 for content in &insert_after[*end_index] {
1282 result.push_str(content);
1283 if !content.ends_with('\n') {
1284 result.push('\n');
1285 }
1286 }
1287 }
1288 i = end_index + 1;
1289 } else {
1290 // Keep the original line
1291 result.push_str(original_lines[i]);
1292 result.push('\n');
1293 // Emit any insertions after this line
1294 for content in &insert_after[i] {
1295 result.push_str(content);
1296 if !content.ends_with('\n') {
1297 result.push('\n');
1298 }
1299 }
1300 i += 1;
1301 }
1302 }
1303
1304 // Preserve trailing newline behavior: if the original ended with a
1305 // newline the result already has one; if it didn't, trim the extra one
1306 // we added.
1307 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1308 result.pop();
1309 }
1310
1311 result
1312 }
1313
1314 /// Convert a unified diff patch into hashline edit commands.
1315 ///
1316 /// Parses the unified diff `patch` directly to determine which lines of
1317 /// `old_text` are deleted/replaced and what new lines are added, then emits
1318 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1319 /// `{index}:{hash}` identifiers.
1320 ///
1321 /// `cursor_offset` is an optional byte offset into the first hunk's new
1322 /// text (context + additions) where the cursor marker should be placed.
1323 pub fn patch_to_edit_commands(
1324 old_text: &str,
1325 patch: &str,
1326 cursor_offset: Option<usize>,
1327 ) -> Result<String> {
1328 let old_lines: Vec<&str> = old_text.lines().collect();
1329 let old_hashes: Vec<u8> = old_lines
1330 .iter()
1331 .map(|line| hash_line(line.as_bytes()))
1332 .collect();
1333
1334 let mut result = String::new();
1335 let mut first_hunk = true;
1336
1337 struct Hunk<'a> {
1338 line_range: Range<usize>,
1339 new_text_lines: Vec<&'a str>,
1340 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1341 }
1342
1343 // Parse the patch line by line. We only care about hunk headers,
1344 // context, deletions, and additions.
1345 let mut old_line_index: usize = 0;
1346 let mut current_hunk: Option<Hunk> = None;
1347 // Byte offset tracking within the hunk's new text for cursor placement.
1348 let mut new_text_byte_offset: usize = 0;
1349 // The line index of the last old line seen before/in the current hunk
1350 // (used for insert-after reference).
1351 let mut last_old_line_before_hunk: Option<usize> = None;
1352
1353 fn flush_hunk(
1354 hunk: Hunk,
1355 last_old_line: Option<usize>,
1356 result: &mut String,
1357 old_hashes: &[u8],
1358 ) {
1359 if hunk.line_range.is_empty() {
1360 // Pure insertion — reference the old line to insert after when in bounds.
1361 if let Some(after) = last_old_line
1362 && let Some(&hash) = old_hashes.get(after)
1363 {
1364 write!(
1365 result,
1366 "{INSERT_COMMAND_MARKER}{}\n",
1367 LineRef { index: after, hash }
1368 )
1369 .unwrap();
1370 } else {
1371 result.push_str(INSERT_COMMAND_MARKER);
1372 result.push('\n');
1373 }
1374 } else {
1375 let start = hunk.line_range.start;
1376 let end_exclusive = hunk.line_range.end;
1377 let deleted_line_count = end_exclusive.saturating_sub(start);
1378
1379 if deleted_line_count == 1 {
1380 if let Some(&hash) = old_hashes.get(start) {
1381 write!(
1382 result,
1383 "{SET_COMMAND_MARKER}{}\n",
1384 LineRef { index: start, hash }
1385 )
1386 .unwrap();
1387 } else {
1388 result.push_str(SET_COMMAND_MARKER);
1389 result.push('\n');
1390 }
1391 } else {
1392 let end_inclusive = end_exclusive - 1;
1393 match (
1394 old_hashes.get(start).copied(),
1395 old_hashes.get(end_inclusive).copied(),
1396 ) {
1397 (Some(start_hash), Some(end_hash)) => {
1398 write!(
1399 result,
1400 "{SET_COMMAND_MARKER}{}-{}\n",
1401 LineRef {
1402 index: start,
1403 hash: start_hash
1404 },
1405 LineRef {
1406 index: end_inclusive,
1407 hash: end_hash
1408 }
1409 )
1410 .unwrap();
1411 }
1412 _ => {
1413 result.push_str(SET_COMMAND_MARKER);
1414 result.push('\n');
1415 }
1416 }
1417 }
1418 }
1419 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1420 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1421 && line_offset == cursor_line_offset
1422 {
1423 result.push_str(&line[..char_offset]);
1424 result.push_str(CURSOR_MARKER);
1425 result.push_str(&line[char_offset..]);
1426 continue;
1427 }
1428
1429 result.push_str(line);
1430 }
1431 }
1432
1433 for raw_line in patch.split_inclusive('\n') {
1434 if raw_line.starts_with("@@") {
1435 // Flush any pending change hunk from a previous patch hunk.
1436 if let Some(hunk) = current_hunk.take() {
1437 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1438 }
1439
1440 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1441 // We intentionally do not trust old_start as a direct local index into `old_text`,
1442 // because some patches are produced against a larger file region and carry
1443 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1444 if first_hunk {
1445 new_text_byte_offset = 0;
1446 first_hunk = false;
1447 }
1448 continue;
1449 }
1450
1451 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1452 continue;
1453 }
1454 if raw_line.starts_with("\\ No newline") {
1455 continue;
1456 }
1457
1458 if raw_line.starts_with('-') {
1459 // Extend or start a change hunk with this deleted old line.
1460 match &mut current_hunk {
1461 Some(Hunk {
1462 line_range: range, ..
1463 }) => range.end = old_line_index + 1,
1464 None => {
1465 current_hunk = Some(Hunk {
1466 line_range: old_line_index..old_line_index + 1,
1467 new_text_lines: Vec::new(),
1468 cursor_line_offset_in_new_text: None,
1469 });
1470 }
1471 }
1472 old_line_index += 1;
1473 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1474 // Place cursor marker if cursor_offset falls within this line.
1475 let mut cursor_line_offset = None;
1476 if let Some(cursor_off) = cursor_offset
1477 && (first_hunk
1478 || cursor_off >= new_text_byte_offset
1479 && cursor_off <= new_text_byte_offset + added_content.len())
1480 {
1481 let line_offset = added_content.floor_char_boundary(
1482 cursor_off
1483 .saturating_sub(new_text_byte_offset)
1484 .min(added_content.len()),
1485 );
1486 cursor_line_offset = Some(line_offset);
1487 }
1488
1489 new_text_byte_offset += added_content.len();
1490
1491 let hunk = current_hunk.get_or_insert(Hunk {
1492 line_range: old_line_index..old_line_index,
1493 new_text_lines: vec![],
1494 cursor_line_offset_in_new_text: None,
1495 });
1496 hunk.new_text_lines.push(added_content);
1497 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1498 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1499 } else {
1500 // Context line (starts with ' ' or is empty).
1501 if let Some(hunk) = current_hunk.take() {
1502 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1503 }
1504 last_old_line_before_hunk = Some(old_line_index);
1505 old_line_index += 1;
1506 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1507 new_text_byte_offset += content.len();
1508 }
1509 }
1510
1511 // Flush final group.
1512 if let Some(hunk) = current_hunk.take() {
1513 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1514 }
1515
1516 // Trim a single trailing newline.
1517 if result.ends_with('\n') {
1518 result.pop();
1519 }
1520
1521 Ok(result)
1522 }
1523
1524 #[cfg(test)]
1525 mod tests {
1526 use super::*;
1527 use indoc::indoc;
1528
1529 #[test]
1530 fn test_format_cursor_region() {
1531 struct Case {
1532 name: &'static str,
1533 context: &'static str,
1534 editable_range: Range<usize>,
1535 cursor_offset: usize,
1536 expected: &'static str,
1537 }
1538
1539 let cases = [
1540 Case {
1541 name: "basic_cursor_placement",
1542 context: "hello world\n",
1543 editable_range: 0..12,
1544 cursor_offset: 5,
1545 expected: indoc! {"
1546 <|file_sep|>test.rs
1547 <|fim_prefix|>
1548 <|fim_middle|>current
1549 0:5c|hello<|user_cursor|> world
1550 <|fim_suffix|>
1551 <|fim_middle|>updated"},
1552 },
1553 Case {
1554 name: "multiline_cursor_on_second_line",
1555 context: "aaa\nbbb\nccc\n",
1556 editable_range: 0..12,
1557 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1558 expected: indoc! {"
1559 <|file_sep|>test.rs
1560 <|fim_prefix|>
1561 <|fim_middle|>current
1562 0:23|aaa
1563 1:26|b<|user_cursor|>bb
1564 2:29|ccc
1565 <|fim_suffix|>
1566 <|fim_middle|>updated"},
1567 },
1568 Case {
1569 name: "no_trailing_newline_in_context",
1570 context: "line1\nline2",
1571 editable_range: 0..11,
1572 cursor_offset: 3,
1573 expected: indoc! {"
1574 <|file_sep|>test.rs
1575 <|fim_prefix|>
1576 <|fim_middle|>current
1577 0:d9|lin<|user_cursor|>e1
1578 1:da|line2
1579 <|fim_suffix|>
1580 <|fim_middle|>updated"},
1581 },
1582 Case {
1583 name: "leading_newline_in_editable_region",
1584 context: "\nabc\n",
1585 editable_range: 0..5,
1586 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1587 expected: indoc! {"
1588 <|file_sep|>test.rs
1589 <|fim_prefix|>
1590 <|fim_middle|>current
1591 0:00|
1592 1:26|a<|user_cursor|>bc
1593 <|fim_suffix|>
1594 <|fim_middle|>updated"},
1595 },
1596 Case {
1597 name: "with_suffix",
1598 context: "abc\ndef",
1599 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1600 cursor_offset: 2,
1601 expected: indoc! {"
1602 <|file_sep|>test.rs
1603 <|fim_prefix|>
1604 <|fim_middle|>current
1605 0:26|ab<|user_cursor|>c
1606 <|fim_suffix|>
1607 def
1608 <|fim_middle|>updated"},
1609 },
1610 Case {
1611 name: "unicode_two_byte_chars",
1612 context: "héllo\n",
1613 editable_range: 0..7,
1614 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1615 expected: indoc! {"
1616 <|file_sep|>test.rs
1617 <|fim_prefix|>
1618 <|fim_middle|>current
1619 0:1b|hé<|user_cursor|>llo
1620 <|fim_suffix|>
1621 <|fim_middle|>updated"},
1622 },
1623 Case {
1624 name: "unicode_three_byte_chars",
1625 context: "日本語\n",
1626 editable_range: 0..10,
1627 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1628 expected: indoc! {"
1629 <|file_sep|>test.rs
1630 <|fim_prefix|>
1631 <|fim_middle|>current
1632 0:80|日本<|user_cursor|>語
1633 <|fim_suffix|>
1634 <|fim_middle|>updated"},
1635 },
1636 Case {
1637 name: "unicode_four_byte_chars",
1638 context: "a🌍b\n",
1639 editable_range: 0..7,
1640 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1641 expected: indoc! {"
1642 <|file_sep|>test.rs
1643 <|fim_prefix|>
1644 <|fim_middle|>current
1645 0:6b|a🌍<|user_cursor|>b
1646 <|fim_suffix|>
1647 <|fim_middle|>updated"},
1648 },
1649 Case {
1650 name: "cursor_at_start_of_region_not_placed",
1651 context: "abc\n",
1652 editable_range: 0..4,
1653 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1654 expected: indoc! {"
1655 <|file_sep|>test.rs
1656 <|fim_prefix|>
1657 <|fim_middle|>current
1658 0:26|abc
1659 <|fim_suffix|>
1660 <|fim_middle|>updated"},
1661 },
1662 Case {
1663 name: "cursor_at_end_of_line_not_placed",
1664 context: "abc\ndef\n",
1665 editable_range: 0..8,
1666 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1667 expected: indoc! {"
1668 <|file_sep|>test.rs
1669 <|fim_prefix|>
1670 <|fim_middle|>current
1671 0:26|abc
1672 1:2f|def
1673 <|fim_suffix|>
1674 <|fim_middle|>updated"},
1675 },
1676 Case {
1677 name: "cursor_offset_relative_to_context_not_editable_region",
1678 // cursor_offset is relative to `context`, so when editable_range.start > 0,
1679 // write_cursor_excerpt_section must subtract it before comparing against
1680 // per-line offsets within the editable region.
1681 context: "pre\naaa\nbbb\nsuf\n",
1682 editable_range: 4..12, // editable region = "aaa\nbbb\n"
1683 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
1684 expected: indoc! {"
1685 <|file_sep|>test.rs
1686 <|fim_prefix|>
1687 pre
1688 <|fim_middle|>current
1689 0:23|aaa
1690 1:26|b<|user_cursor|>bb
1691 <|fim_suffix|>
1692 suf
1693 <|fim_middle|>updated"},
1694 },
1695 ];
1696
1697 for case in &cases {
1698 let mut prompt = String::new();
1699 hashline::write_cursor_excerpt_section(
1700 &mut prompt,
1701 Path::new("test.rs"),
1702 case.context,
1703 &case.editable_range,
1704 case.cursor_offset,
1705 );
1706 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1707 }
1708 }
1709
1710 #[test]
1711 fn test_apply_edit_commands() {
1712 struct Case {
1713 name: &'static str,
1714 original: &'static str,
1715 model_output: &'static str,
1716 expected: &'static str,
1717 }
1718
1719 let cases = vec![
1720 Case {
1721 name: "set_single_line",
1722 original: indoc! {"
1723 let mut total = 0;
1724 for product in products {
1725 total += ;
1726 }
1727 total
1728 "},
1729 model_output: indoc! {"
1730 <|set|>2:87
1731 total += product.price;
1732 "},
1733 expected: indoc! {"
1734 let mut total = 0;
1735 for product in products {
1736 total += product.price;
1737 }
1738 total
1739 "},
1740 },
1741 Case {
1742 name: "set_range",
1743 original: indoc! {"
1744 fn foo() {
1745 let x = 1;
1746 let y = 2;
1747 let z = 3;
1748 }
1749 "},
1750 model_output: indoc! {"
1751 <|set|>1:46-3:4a
1752 let sum = 6;
1753 "},
1754 expected: indoc! {"
1755 fn foo() {
1756 let sum = 6;
1757 }
1758 "},
1759 },
1760 Case {
1761 name: "insert_after_line",
1762 original: indoc! {"
1763 fn main() {
1764 let x = 1;
1765 }
1766 "},
1767 model_output: indoc! {"
1768 <|insert|>1:46
1769 let y = 2;
1770 "},
1771 expected: indoc! {"
1772 fn main() {
1773 let x = 1;
1774 let y = 2;
1775 }
1776 "},
1777 },
1778 Case {
1779 name: "insert_before_first",
1780 original: indoc! {"
1781 let x = 1;
1782 let y = 2;
1783 "},
1784 model_output: indoc! {"
1785 <|insert|>
1786 use std::io;
1787 "},
1788 expected: indoc! {"
1789 use std::io;
1790 let x = 1;
1791 let y = 2;
1792 "},
1793 },
1794 Case {
1795 name: "set_with_cursor_marker",
1796 original: indoc! {"
1797 fn main() {
1798 println!();
1799 }
1800 "},
1801 model_output: indoc! {"
1802 <|set|>1:34
1803 eprintln!(\"<|user_cursor|>\");
1804 "},
1805 expected: indoc! {"
1806 fn main() {
1807 eprintln!(\"<|user_cursor|>\");
1808 }
1809 "},
1810 },
1811 Case {
1812 name: "multiple_set_commands",
1813 original: indoc! {"
1814 aaa
1815 bbb
1816 ccc
1817 ddd
1818 "},
1819 model_output: indoc! {"
1820 <|set|>0:23
1821 AAA
1822 <|set|>2:29
1823 CCC
1824 "},
1825 expected: indoc! {"
1826 AAA
1827 bbb
1828 CCC
1829 ddd
1830 "},
1831 },
1832 Case {
1833 name: "set_range_multiline_replacement",
1834 original: indoc! {"
1835 fn handle_submit() {
1836 }
1837
1838 fn handle_keystroke() {
1839 "},
1840 model_output: indoc! {"
1841 <|set|>0:3f-1:7d
1842 fn handle_submit(modal_state: &mut ModalState) {
1843 <|user_cursor|>
1844 }
1845 "},
1846 expected: indoc! {"
1847 fn handle_submit(modal_state: &mut ModalState) {
1848 <|user_cursor|>
1849 }
1850
1851 fn handle_keystroke() {
1852 "},
1853 },
1854 Case {
1855 name: "no_edit_commands_returns_original",
1856 original: indoc! {"
1857 hello
1858 world
1859 "},
1860 model_output: "some random text with no commands",
1861 expected: indoc! {"
1862 hello
1863 world
1864 "},
1865 },
1866 Case {
1867 name: "wrong_hash_set_ignored",
1868 original: indoc! {"
1869 aaa
1870 bbb
1871 "},
1872 model_output: indoc! {"
1873 <|set|>0:ff
1874 ZZZ
1875 "},
1876 expected: indoc! {"
1877 aaa
1878 bbb
1879 "},
1880 },
1881 Case {
1882 name: "insert_and_set_combined",
1883 original: indoc! {"
1884 alpha
1885 beta
1886 gamma
1887 "},
1888 model_output: indoc! {"
1889 <|set|>0:06
1890 ALPHA
1891 <|insert|>1:9c
1892 beta_extra
1893 "},
1894 expected: indoc! {"
1895 ALPHA
1896 beta
1897 beta_extra
1898 gamma
1899 "},
1900 },
1901 Case {
1902 name: "no_trailing_newline_preserved",
1903 original: "hello\nworld",
1904 model_output: indoc! {"
1905 <|set|>0:14
1906 HELLO
1907 "},
1908 expected: "HELLO\nworld",
1909 },
1910 Case {
1911 name: "set_range_hash_mismatch_in_end_bound",
1912 original: indoc! {"
1913 one
1914 two
1915 three
1916 "},
1917 model_output: indoc! {"
1918 <|set|>0:42-2:ff
1919 ONE_TWO_THREE
1920 "},
1921 expected: indoc! {"
1922 one
1923 two
1924 three
1925 "},
1926 },
1927 Case {
1928 name: "set_range_start_greater_than_end_ignored",
1929 original: indoc! {"
1930 a
1931 b
1932 c
1933 "},
1934 model_output: indoc! {"
1935 <|set|>2:63-1:62
1936 X
1937 "},
1938 expected: indoc! {"
1939 a
1940 b
1941 c
1942 "},
1943 },
1944 Case {
1945 name: "insert_out_of_bounds_ignored",
1946 original: indoc! {"
1947 x
1948 y
1949 "},
1950 model_output: indoc! {"
1951 <|insert|>99:aa
1952 z
1953 "},
1954 expected: indoc! {"
1955 x
1956 y
1957 "},
1958 },
1959 Case {
1960 name: "set_out_of_bounds_ignored",
1961 original: indoc! {"
1962 x
1963 y
1964 "},
1965 model_output: indoc! {"
1966 <|set|>99:aa
1967 z
1968 "},
1969 expected: indoc! {"
1970 x
1971 y
1972 "},
1973 },
1974 Case {
1975 name: "malformed_set_command_ignored",
1976 original: indoc! {"
1977 alpha
1978 beta
1979 "},
1980 model_output: indoc! {"
1981 <|set|>not-a-line-ref
1982 UPDATED
1983 "},
1984 expected: indoc! {"
1985 alpha
1986 beta
1987 "},
1988 },
1989 Case {
1990 name: "malformed_insert_hash_treated_as_before_first",
1991 original: indoc! {"
1992 alpha
1993 beta
1994 "},
1995 model_output: indoc! {"
1996 <|insert|>1:nothex
1997 preamble
1998 "},
1999 expected: indoc! {"
2000 preamble
2001 alpha
2002 beta
2003 "},
2004 },
2005 Case {
2006 name: "set_then_insert_same_target_orders_insert_after_replacement",
2007 original: indoc! {"
2008 cat
2009 dog
2010 "},
2011 model_output: indoc! {"
2012 <|set|>0:38
2013 CAT
2014 <|insert|>0:38
2015 TAIL
2016 "},
2017 expected: indoc! {"
2018 CAT
2019 TAIL
2020 dog
2021 "},
2022 },
2023 Case {
2024 name: "overlapping_set_ranges_last_wins",
2025 original: indoc! {"
2026 a
2027 b
2028 c
2029 d
2030 "},
2031 model_output: indoc! {"
2032 <|set|>0:61-2:63
2033 FIRST
2034 <|set|>1:62-3:64
2035 SECOND
2036 "},
2037 expected: indoc! {"
2038 FIRST
2039 d
2040 "},
2041 },
2042 Case {
2043 name: "insert_before_first_and_after_line",
2044 original: indoc! {"
2045 a
2046 b
2047 "},
2048 model_output: indoc! {"
2049 <|insert|>
2050 HEAD
2051 <|insert|>0:61
2052 MID
2053 "},
2054 expected: indoc! {"
2055 HEAD
2056 a
2057 MID
2058 b
2059 "},
2060 },
2061 ];
2062
2063 for case in &cases {
2064 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2065 assert_eq!(result, case.expected, "failed case: {}", case.name);
2066 }
2067 }
2068
2069 #[test]
2070 fn test_output_has_edit_commands() {
2071 assert!(hashline::output_has_edit_commands(&format!(
2072 "{}0:ab\nnew",
2073 SET_COMMAND_MARKER
2074 )));
2075 assert!(hashline::output_has_edit_commands(&format!(
2076 "{}0:ab\nnew",
2077 INSERT_COMMAND_MARKER
2078 )));
2079 assert!(hashline::output_has_edit_commands(&format!(
2080 "some text\n{}1:cd\nstuff",
2081 SET_COMMAND_MARKER
2082 )));
2083 assert!(!hashline::output_has_edit_commands("just plain text"));
2084 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2085 }
2086
2087 // ---- hashline::patch_to_edit_commands round-trip tests ----
2088
2089 #[test]
2090 fn test_patch_to_edit_commands() {
2091 struct Case {
2092 name: &'static str,
2093 old: &'static str,
2094 patch: &'static str,
2095 expected_new: &'static str,
2096 }
2097
2098 let cases = [
2099 Case {
2100 name: "single_line_replacement",
2101 old: indoc! {"
2102 let mut total = 0;
2103 for product in products {
2104 total += ;
2105 }
2106 total
2107 "},
2108 patch: indoc! {"
2109 @@ -1,5 +1,5 @@
2110 let mut total = 0;
2111 for product in products {
2112 - total += ;
2113 + total += product.price;
2114 }
2115 total
2116 "},
2117 expected_new: indoc! {"
2118 let mut total = 0;
2119 for product in products {
2120 total += product.price;
2121 }
2122 total
2123 "},
2124 },
2125 Case {
2126 name: "multiline_replacement",
2127 old: indoc! {"
2128 fn foo() {
2129 let x = 1;
2130 let y = 2;
2131 let z = 3;
2132 }
2133 "},
2134 patch: indoc! {"
2135 @@ -1,5 +1,3 @@
2136 fn foo() {
2137 - let x = 1;
2138 - let y = 2;
2139 - let z = 3;
2140 + let sum = 1 + 2 + 3;
2141 }
2142 "},
2143 expected_new: indoc! {"
2144 fn foo() {
2145 let sum = 1 + 2 + 3;
2146 }
2147 "},
2148 },
2149 Case {
2150 name: "insertion",
2151 old: indoc! {"
2152 fn main() {
2153 let x = 1;
2154 }
2155 "},
2156 patch: indoc! {"
2157 @@ -1,3 +1,4 @@
2158 fn main() {
2159 let x = 1;
2160 + let y = 2;
2161 }
2162 "},
2163 expected_new: indoc! {"
2164 fn main() {
2165 let x = 1;
2166 let y = 2;
2167 }
2168 "},
2169 },
2170 Case {
2171 name: "insertion_before_first",
2172 old: indoc! {"
2173 let x = 1;
2174 let y = 2;
2175 "},
2176 patch: indoc! {"
2177 @@ -1,2 +1,3 @@
2178 +use std::io;
2179 let x = 1;
2180 let y = 2;
2181 "},
2182 expected_new: indoc! {"
2183 use std::io;
2184 let x = 1;
2185 let y = 2;
2186 "},
2187 },
2188 Case {
2189 name: "deletion",
2190 old: indoc! {"
2191 aaa
2192 bbb
2193 ccc
2194 ddd
2195 "},
2196 patch: indoc! {"
2197 @@ -1,4 +1,2 @@
2198 aaa
2199 -bbb
2200 -ccc
2201 ddd
2202 "},
2203 expected_new: indoc! {"
2204 aaa
2205 ddd
2206 "},
2207 },
2208 Case {
2209 name: "multiple_changes",
2210 old: indoc! {"
2211 alpha
2212 beta
2213 gamma
2214 delta
2215 epsilon
2216 "},
2217 patch: indoc! {"
2218 @@ -1,5 +1,5 @@
2219 -alpha
2220 +ALPHA
2221 beta
2222 gamma
2223 -delta
2224 +DELTA
2225 epsilon
2226 "},
2227 expected_new: indoc! {"
2228 ALPHA
2229 beta
2230 gamma
2231 DELTA
2232 epsilon
2233 "},
2234 },
2235 Case {
2236 name: "replace_with_insertion",
2237 old: indoc! {r#"
2238 fn handle() {
2239 modal_state.close();
2240 modal_state.dismiss();
2241 "#},
2242 patch: indoc! {r#"
2243 @@ -1,3 +1,4 @@
2244 fn handle() {
2245 modal_state.close();
2246 + eprintln!("");
2247 modal_state.dismiss();
2248 "#},
2249 expected_new: indoc! {r#"
2250 fn handle() {
2251 modal_state.close();
2252 eprintln!("");
2253 modal_state.dismiss();
2254 "#},
2255 },
2256 Case {
2257 name: "complete_replacement",
2258 old: indoc! {"
2259 aaa
2260 bbb
2261 ccc
2262 "},
2263 patch: indoc! {"
2264 @@ -1,3 +1,3 @@
2265 -aaa
2266 -bbb
2267 -ccc
2268 +xxx
2269 +yyy
2270 +zzz
2271 "},
2272 expected_new: indoc! {"
2273 xxx
2274 yyy
2275 zzz
2276 "},
2277 },
2278 Case {
2279 name: "add_function_body",
2280 old: indoc! {"
2281 fn foo() {
2282 modal_state.dismiss();
2283 }
2284
2285 fn
2286
2287 fn handle_keystroke() {
2288 "},
2289 patch: indoc! {"
2290 @@ -1,6 +1,8 @@
2291 fn foo() {
2292 modal_state.dismiss();
2293 }
2294
2295 -fn
2296 +fn handle_submit() {
2297 + todo()
2298 +}
2299
2300 fn handle_keystroke() {
2301 "},
2302 expected_new: indoc! {"
2303 fn foo() {
2304 modal_state.dismiss();
2305 }
2306
2307 fn handle_submit() {
2308 todo()
2309 }
2310
2311 fn handle_keystroke() {
2312 "},
2313 },
2314 Case {
2315 name: "with_cursor_offset",
2316 old: indoc! {r#"
2317 fn main() {
2318 println!();
2319 }
2320 "#},
2321 patch: indoc! {r#"
2322 @@ -1,3 +1,3 @@
2323 fn main() {
2324 - println!();
2325 + eprintln!("");
2326 }
2327 "#},
2328 expected_new: indoc! {r#"
2329 fn main() {
2330 eprintln!("<|user_cursor|>");
2331 }
2332 "#},
2333 },
2334 Case {
2335 name: "non_local_hunk_header_pure_insertion_repro",
2336 old: indoc! {"
2337 aaa
2338 bbb
2339 "},
2340 patch: indoc! {"
2341 @@ -20,2 +20,3 @@
2342 aaa
2343 +xxx
2344 bbb
2345 "},
2346 expected_new: indoc! {"
2347 aaa
2348 xxx
2349 bbb
2350 "},
2351 },
2352 ];
2353
2354 for case in &cases {
2355 // The cursor_offset for patch_to_edit_commands is relative to
2356 // the first hunk's new text (context + additions). We compute
2357 // it by finding where the marker sits in the expected output
2358 // (which mirrors the new text of the hunk).
2359 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2360
2361 let commands =
2362 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2363 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2364
2365 assert!(
2366 hashline::output_has_edit_commands(&commands),
2367 "case {}: expected edit commands, got: {commands:?}",
2368 case.name,
2369 );
2370
2371 let applied = hashline::apply_edit_commands(case.old, &commands);
2372 assert_eq!(applied, case.expected_new, "case {}", case.name);
2373 }
2374 }
2375 }
2376}
2377
2378pub mod seed_coder {
2379 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2380 //!
2381 //! Seed-Coder uses different FIM tokens and order than Qwen:
2382 //! - SPM order: suffix comes FIRST, then prefix, then middle
2383 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2384 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2385 //!
2386 //! All context (related files, edit history) goes in the PREFIX section.
2387 //! The suffix contains only code after the editable region.
2388 //!
2389 //! Example prompt:
2390 //!
2391 //! <[fim-suffix]>
2392 //! code after editable region
2393 //! <[fim-prefix]><filename>related/file.py
2394 //! related file content
2395 //!
2396 //! <filename>edit_history
2397 //! --- a/some_file.py
2398 //! +++ b/some_file.py
2399 //! -old
2400 //! +new
2401 //!
2402 //! <filename>path/to/target_file.py
2403 //! code before editable region
2404 //! <<<<<<< CURRENT
2405 //! code that
2406 //! needs to<|user_cursor|>
2407 //! be rewritten
2408 //! =======
2409 //! <[fim-middle]>
2410 //!
2411 //! Expected output (model generates):
2412 //!
2413 //! updated
2414 //! code with
2415 //! changes applied
2416 //! >>>>>>> UPDATED
2417
2418 use super::*;
2419
2420 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2421 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2422 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2423 pub const FILE_MARKER: &str = "<filename>";
2424
2425 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2426 pub const SEPARATOR: &str = "=======\n";
2427 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2428
2429 pub fn special_tokens() -> &'static [&'static str] {
2430 &[
2431 FIM_SUFFIX,
2432 FIM_PREFIX,
2433 FIM_MIDDLE,
2434 FILE_MARKER,
2435 START_MARKER,
2436 SEPARATOR,
2437 END_MARKER,
2438 CURSOR_MARKER,
2439 ]
2440 }
2441
2442 pub fn write_cursor_excerpt_section(
2443 prompt: &mut String,
2444 path: &Path,
2445 context: &str,
2446 editable_range: &Range<usize>,
2447 cursor_offset: usize,
2448 ) {
2449 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2450 prompt.push_str(§ion);
2451 }
2452
2453 pub fn format_prompt_with_budget(
2454 path: &Path,
2455 context: &str,
2456 editable_range: &Range<usize>,
2457 cursor_offset: usize,
2458 events: &[Arc<Event>],
2459 related_files: &[RelatedFile],
2460 max_tokens: usize,
2461 ) -> String {
2462 let suffix_section = build_suffix_section(context, editable_range);
2463 let cursor_prefix_section =
2464 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2465
2466 let suffix_tokens = estimate_tokens(suffix_section.len());
2467 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2468 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2469
2470 let edit_history_section = super::format_edit_history_within_budget(
2471 events,
2472 FILE_MARKER,
2473 "edit_history",
2474 budget_after_cursor,
2475 );
2476 let edit_history_tokens = estimate_tokens(edit_history_section.len());
2477 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2478
2479 let related_files_section = super::format_related_files_within_budget(
2480 related_files,
2481 FILE_MARKER,
2482 "",
2483 budget_after_edit_history,
2484 );
2485
2486 let mut prompt = String::new();
2487 prompt.push_str(&suffix_section);
2488 prompt.push_str(FIM_PREFIX);
2489 prompt.push_str(&related_files_section);
2490 if !related_files_section.is_empty() {
2491 prompt.push('\n');
2492 }
2493 prompt.push_str(&edit_history_section);
2494 if !edit_history_section.is_empty() {
2495 prompt.push('\n');
2496 }
2497 prompt.push_str(&cursor_prefix_section);
2498 prompt.push_str(FIM_MIDDLE);
2499 prompt
2500 }
2501
2502 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2503 let mut section = String::new();
2504 section.push_str(FIM_SUFFIX);
2505 section.push_str(&context[editable_range.end..]);
2506 if !section.ends_with('\n') {
2507 section.push('\n');
2508 }
2509 section
2510 }
2511
2512 fn build_cursor_prefix_section(
2513 path: &Path,
2514 context: &str,
2515 editable_range: &Range<usize>,
2516 cursor_offset: usize,
2517 ) -> String {
2518 let mut section = String::new();
2519 let path_str = path.to_string_lossy();
2520 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2521
2522 section.push_str(&context[..editable_range.start]);
2523 section.push_str(START_MARKER);
2524 section.push_str(&context[editable_range.start..cursor_offset]);
2525 section.push_str(CURSOR_MARKER);
2526 section.push_str(&context[cursor_offset..editable_range.end]);
2527 if !section.ends_with('\n') {
2528 section.push('\n');
2529 }
2530 section.push_str(SEPARATOR);
2531 section
2532 }
2533}
2534
2535/// The zeta1 prompt format
2536pub mod zeta1 {
2537 use super::*;
2538 use std::fmt::Write;
2539
2540 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
2541 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
2542 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
2543 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
2544
2545 const INSTRUCTION_HEADER: &str = concat!(
2546 "### Instruction:\n",
2547 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
2548 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
2549 "into account the cursor location.\n\n",
2550 "### User Edits:\n\n"
2551 );
2552 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
2553 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
2554
2555 /// Formats a complete zeta1 prompt from the input events and excerpt.
2556 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
2557 let mut prompt = String::with_capacity(
2558 INSTRUCTION_HEADER.len()
2559 + input_events.len()
2560 + EXCERPT_HEADER.len()
2561 + input_excerpt.len()
2562 + RESPONSE_HEADER.len(),
2563 );
2564 prompt.push_str(INSTRUCTION_HEADER);
2565 prompt.push_str(input_events);
2566 prompt.push_str(EXCERPT_HEADER);
2567 prompt.push_str(input_excerpt);
2568 prompt.push_str(RESPONSE_HEADER);
2569 prompt
2570 }
2571
2572 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
2573 /// editable and context byte-offset ranges within `cursor_excerpt`.
2574 pub fn format_zeta1_from_input(
2575 input: &ZetaPromptInput,
2576 editable_range: Range<usize>,
2577 context_range: Range<usize>,
2578 ) -> String {
2579 let events = format_zeta1_events(&input.events);
2580 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
2581 format_zeta1_prompt(&events, &excerpt)
2582 }
2583
2584 /// Formats events in zeta1 style (oldest first).
2585 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
2586 let mut result = String::new();
2587 for event in events {
2588 let event_string = format_zeta1_event(event);
2589 if event_string.is_empty() {
2590 continue;
2591 }
2592 if !result.is_empty() {
2593 result.push_str("\n\n");
2594 }
2595 result.push_str(&event_string);
2596 }
2597 result
2598 }
2599
2600 fn format_zeta1_event(event: &Event) -> String {
2601 match event {
2602 Event::BufferChange {
2603 path,
2604 old_path,
2605 diff,
2606 ..
2607 } => {
2608 let mut prompt = String::new();
2609 if old_path != path {
2610 writeln!(
2611 prompt,
2612 "User renamed {} to {}\n",
2613 old_path.display(),
2614 path.display()
2615 )
2616 .ok();
2617 }
2618 if !diff.is_empty() {
2619 write!(
2620 prompt,
2621 "User edited {}:\n```diff\n{}\n```",
2622 path.display(),
2623 diff
2624 )
2625 .ok();
2626 }
2627 prompt
2628 }
2629 }
2630 }
2631
2632 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
2633 /// within `cursor_excerpt`.
2634 fn format_zeta1_excerpt(
2635 input: &ZetaPromptInput,
2636 editable_range: Range<usize>,
2637 context_range: Range<usize>,
2638 ) -> String {
2639 let path_str = input.cursor_path.to_string_lossy();
2640 let excerpt = &*input.cursor_excerpt;
2641 let cursor_offset = input.cursor_offset_in_excerpt;
2642
2643 let mut prompt = String::new();
2644 writeln!(&mut prompt, "```{path_str}").ok();
2645
2646 let starts_at_file_beginning =
2647 input.excerpt_start_row == Some(0) && context_range.start == 0;
2648 if starts_at_file_beginning {
2649 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
2650 }
2651
2652 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
2653
2654 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
2655 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
2656 prompt.push_str(CURSOR_MARKER);
2657 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
2658 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
2659
2660 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
2661 write!(prompt, "\n```").ok();
2662
2663 prompt
2664 }
2665
2666 /// Cleans zeta1 model output by extracting content between editable region
2667 /// markers and converting the zeta1 cursor marker to the universal one.
2668 /// Returns `None` if the output doesn't contain the expected markers.
2669 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
2670 let content = output.replace(CURSOR_MARKER, "");
2671
2672 let content_start = content
2673 .find(EDITABLE_REGION_START_MARKER)
2674 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
2675 .map(|pos| {
2676 if content.as_bytes().get(pos) == Some(&b'\n') {
2677 pos + 1
2678 } else {
2679 pos
2680 }
2681 })
2682 .unwrap_or(0);
2683
2684 let content_end = content
2685 .find(EDITABLE_REGION_END_MARKER)
2686 .map(|pos| {
2687 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
2688 pos - 1
2689 } else {
2690 pos
2691 }
2692 })
2693 .unwrap_or(content.len());
2694
2695 if content_start > content_end {
2696 return Some(String::new());
2697 }
2698
2699 let extracted = &content[content_start..content_end];
2700
2701 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
2702 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
2703 let text_before_cursor = text_before_cursor
2704 .find(EDITABLE_REGION_START_MARKER)
2705 .map(|pos| {
2706 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
2707 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
2708 after_marker + 1
2709 } else {
2710 after_marker
2711 }
2712 })
2713 .unwrap_or(0);
2714 let offset_in_extracted = zeta1_cursor_pos
2715 .saturating_sub(text_before_cursor)
2716 .min(extracted.len());
2717 offset_in_extracted
2718 });
2719
2720 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
2721 if let Some(offset) = cursor_offset {
2722 result.push_str(&extracted[..offset]);
2723 result.push_str(super::CURSOR_MARKER);
2724 result.push_str(&extracted[offset..]);
2725 } else {
2726 result.push_str(extracted);
2727 }
2728
2729 Some(result)
2730 }
2731}
2732
2733#[cfg(test)]
2734mod tests {
2735 use super::*;
2736 use indoc::indoc;
2737
2738 fn make_input(
2739 cursor_excerpt: &str,
2740 editable_range: Range<usize>,
2741 cursor_offset: usize,
2742 events: Vec<Event>,
2743 related_files: Vec<RelatedFile>,
2744 ) -> ZetaPromptInput {
2745 let context_range = 0..cursor_excerpt.len();
2746 ZetaPromptInput {
2747 cursor_path: Path::new("test.rs").into(),
2748 cursor_excerpt: cursor_excerpt.into(),
2749 cursor_offset_in_excerpt: cursor_offset,
2750 excerpt_start_row: None,
2751 events: events.into_iter().map(Arc::new).collect(),
2752 related_files,
2753 excerpt_ranges: ExcerptRanges {
2754 editable_150: editable_range.clone(),
2755 editable_180: editable_range.clone(),
2756 editable_350: editable_range,
2757 editable_150_context_350: context_range.clone(),
2758 editable_180_context_350: context_range.clone(),
2759 editable_350_context_150: context_range,
2760 ..Default::default()
2761 },
2762 experiment: None,
2763 in_open_source_repo: false,
2764 can_collect_data: false,
2765 }
2766 }
2767
2768 fn make_event(path: &str, diff: &str) -> Event {
2769 Event::BufferChange {
2770 path: Path::new(path).into(),
2771 old_path: Path::new(path).into(),
2772 diff: diff.to_string(),
2773 predicted: false,
2774 in_open_source_repo: false,
2775 }
2776 }
2777
2778 fn make_related_file(path: &str, content: &str) -> RelatedFile {
2779 RelatedFile {
2780 path: Path::new(path).into(),
2781 max_row: content.lines().count() as u32,
2782 excerpts: vec![RelatedExcerpt {
2783 row_range: 0..content.lines().count() as u32,
2784 text: content.into(),
2785 order: 0,
2786 }],
2787 in_open_source_repo: false,
2788 }
2789 }
2790
2791 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
2792 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
2793 }
2794
2795 #[test]
2796 fn test_no_truncation_when_within_budget() {
2797 let input = make_input(
2798 "prefix\neditable\nsuffix",
2799 7..15,
2800 10,
2801 vec![make_event("a.rs", "-old\n+new\n")],
2802 vec![make_related_file("related.rs", "fn helper() {}\n")],
2803 );
2804
2805 assert_eq!(
2806 format_with_budget(&input, 10000),
2807 indoc! {r#"
2808 <|file_sep|>related.rs
2809 fn helper() {}
2810 <|file_sep|>edit history
2811 --- a/a.rs
2812 +++ b/a.rs
2813 -old
2814 +new
2815 <|file_sep|>test.rs
2816 <|fim_prefix|>
2817 prefix
2818 <|fim_middle|>current
2819 edi<|user_cursor|>table
2820 <|fim_suffix|>
2821
2822 suffix
2823 <|fim_middle|>updated
2824 "#}
2825 );
2826 }
2827
2828 #[test]
2829 fn test_truncation_drops_edit_history_when_budget_tight() {
2830 let input = make_input(
2831 "code",
2832 0..4,
2833 2,
2834 vec![make_event("a.rs", "-x\n+y\n")],
2835 vec![
2836 make_related_file("r1.rs", "a\n"),
2837 make_related_file("r2.rs", "b\n"),
2838 ],
2839 );
2840
2841 assert_eq!(
2842 format_with_budget(&input, 10000),
2843 indoc! {r#"
2844 <|file_sep|>r1.rs
2845 a
2846 <|file_sep|>r2.rs
2847 b
2848 <|file_sep|>edit history
2849 --- a/a.rs
2850 +++ b/a.rs
2851 -x
2852 +y
2853 <|file_sep|>test.rs
2854 <|fim_prefix|>
2855 <|fim_middle|>current
2856 co<|user_cursor|>de
2857 <|fim_suffix|>
2858 <|fim_middle|>updated
2859 "#}
2860 );
2861
2862 assert_eq!(
2863 format_with_budget(&input, 50),
2864 indoc! {r#"
2865 <|file_sep|>r1.rs
2866 a
2867 <|file_sep|>r2.rs
2868 b
2869 <|file_sep|>test.rs
2870 <|fim_prefix|>
2871 <|fim_middle|>current
2872 co<|user_cursor|>de
2873 <|fim_suffix|>
2874 <|fim_middle|>updated
2875 "#}
2876 );
2877 }
2878
2879 #[test]
2880 fn test_truncation_includes_partial_excerpts() {
2881 let input = make_input(
2882 "x",
2883 0..1,
2884 0,
2885 vec![],
2886 vec![RelatedFile {
2887 path: Path::new("big.rs").into(),
2888 max_row: 30,
2889 in_open_source_repo: false,
2890 excerpts: vec![
2891 RelatedExcerpt {
2892 row_range: 0..10,
2893 text: "first excerpt\n".into(),
2894 order: 0,
2895 },
2896 RelatedExcerpt {
2897 row_range: 10..20,
2898 text: "second excerpt\n".into(),
2899 order: 0,
2900 },
2901 RelatedExcerpt {
2902 row_range: 20..30,
2903 text: "third excerpt\n".into(),
2904 order: 0,
2905 },
2906 ],
2907 }],
2908 );
2909
2910 assert_eq!(
2911 format_with_budget(&input, 10000),
2912 indoc! {r#"
2913 <|file_sep|>big.rs
2914 first excerpt
2915 ...
2916 second excerpt
2917 ...
2918 third excerpt
2919 <|file_sep|>test.rs
2920 <|fim_prefix|>
2921 <|fim_middle|>current
2922 <|user_cursor|>x
2923 <|fim_suffix|>
2924 <|fim_middle|>updated
2925 "#}
2926 );
2927
2928 assert_eq!(
2929 format_with_budget(&input, 50),
2930 indoc! {r#"
2931 <|file_sep|>big.rs
2932 first excerpt
2933 ...
2934 <|file_sep|>test.rs
2935 <|fim_prefix|>
2936 <|fim_middle|>current
2937 <|user_cursor|>x
2938 <|fim_suffix|>
2939 <|fim_middle|>updated
2940 "#}
2941 );
2942 }
2943
2944 #[test]
2945 fn test_truncation_prioritizes_lower_order_excerpts() {
2946 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
2947 // With tight budget, only the lower-order excerpt from file_b should be included.
2948 let input = make_input(
2949 "x",
2950 0..1,
2951 0,
2952 vec![],
2953 vec![
2954 RelatedFile {
2955 path: Path::new("file_a.rs").into(),
2956 max_row: 10,
2957 in_open_source_repo: false,
2958 excerpts: vec![RelatedExcerpt {
2959 row_range: 0..10,
2960 text: "low priority content\n".into(),
2961 order: 5,
2962 }],
2963 },
2964 RelatedFile {
2965 path: Path::new("file_b.rs").into(),
2966 max_row: 10,
2967 in_open_source_repo: false,
2968 excerpts: vec![RelatedExcerpt {
2969 row_range: 0..10,
2970 text: "high priority content\n".into(),
2971 order: 1,
2972 }],
2973 },
2974 ],
2975 );
2976
2977 // With large budget, both files included; rendered in stable lexicographic order.
2978 assert_eq!(
2979 format_with_budget(&input, 10000),
2980 indoc! {r#"
2981 <|file_sep|>file_a.rs
2982 low priority content
2983 <|file_sep|>file_b.rs
2984 high priority content
2985 <|file_sep|>test.rs
2986 <|fim_prefix|>
2987 <|fim_middle|>current
2988 <|user_cursor|>x
2989 <|fim_suffix|>
2990 <|fim_middle|>updated
2991 "#}
2992 );
2993
2994 // With tight budget, only file_b (lower order) fits.
2995 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
2996 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
2997 // file_a would need another 14 tokens, which doesn't fit.
2998 assert_eq!(
2999 format_with_budget(&input, 52),
3000 indoc! {r#"
3001 <|file_sep|>file_b.rs
3002 high priority content
3003 <|file_sep|>test.rs
3004 <|fim_prefix|>
3005 <|fim_middle|>current
3006 <|user_cursor|>x
3007 <|fim_suffix|>
3008 <|fim_middle|>updated
3009 "#}
3010 );
3011 }
3012
3013 #[test]
3014 fn test_truncation_drops_high_order_excerpts_within_file() {
3015 // A single file has excerpts at order 1 and order 3. With a tight budget,
3016 // only the order-1 excerpts are included while the order-3 excerpt is
3017 // dropped — even though they belong to the same file. This also preserves
3018 // the parent invariant: parent outline items have order ≤ their best
3019 // child, so they're always included when any child is.
3020 let input = make_input(
3021 "x",
3022 0..1,
3023 0,
3024 vec![],
3025 vec![RelatedFile {
3026 path: Path::new("mod.rs").into(),
3027 max_row: 30,
3028 in_open_source_repo: false,
3029 excerpts: vec![
3030 RelatedExcerpt {
3031 row_range: 0..5,
3032 text: "mod header\n".into(),
3033 order: 1,
3034 },
3035 RelatedExcerpt {
3036 row_range: 5..15,
3037 text: "important fn\n".into(),
3038 order: 1,
3039 },
3040 RelatedExcerpt {
3041 row_range: 15..30,
3042 text: "less important fn\n".into(),
3043 order: 3,
3044 },
3045 ],
3046 }],
3047 );
3048
3049 // With large budget, all three excerpts included.
3050 assert_eq!(
3051 format_with_budget(&input, 10000),
3052 indoc! {r#"
3053 <|file_sep|>mod.rs
3054 mod header
3055 ...
3056 important fn
3057 ...
3058 less important fn
3059 <|file_sep|>test.rs
3060 <|fim_prefix|>
3061 <|fim_middle|>current
3062 <|user_cursor|>x
3063 <|fim_suffix|>
3064 <|fim_middle|>updated
3065 "#}
3066 );
3067
3068 // With tight budget, only order<=1 excerpts included (header + important fn).
3069 assert_eq!(
3070 format_with_budget(&input, 55),
3071 indoc! {r#"
3072 <|file_sep|>mod.rs
3073 mod header
3074 ...
3075 important fn
3076 ...
3077 <|file_sep|>test.rs
3078 <|fim_prefix|>
3079 <|fim_middle|>current
3080 <|user_cursor|>x
3081 <|fim_suffix|>
3082 <|fim_middle|>updated
3083 "#}
3084 );
3085 }
3086
3087 #[test]
3088 fn test_truncation_drops_older_events_first() {
3089 let input = make_input(
3090 "x",
3091 0..1,
3092 0,
3093 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
3094 vec![],
3095 );
3096
3097 assert_eq!(
3098 format_with_budget(&input, 10000),
3099 indoc! {r#"
3100 <|file_sep|>edit history
3101 --- a/old.rs
3102 +++ b/old.rs
3103 -1
3104 --- a/new.rs
3105 +++ b/new.rs
3106 -2
3107 <|file_sep|>test.rs
3108 <|fim_prefix|>
3109 <|fim_middle|>current
3110 <|user_cursor|>x
3111 <|fim_suffix|>
3112 <|fim_middle|>updated
3113 "#}
3114 );
3115
3116 assert_eq!(
3117 format_with_budget(&input, 55),
3118 indoc! {r#"
3119 <|file_sep|>edit history
3120 --- a/new.rs
3121 +++ b/new.rs
3122 -2
3123 <|file_sep|>test.rs
3124 <|fim_prefix|>
3125 <|fim_middle|>current
3126 <|user_cursor|>x
3127 <|fim_suffix|>
3128 <|fim_middle|>updated
3129 "#}
3130 );
3131 }
3132
3133 #[test]
3134 fn test_cursor_excerpt_always_included_with_minimal_budget() {
3135 let input = make_input(
3136 "fn main() {}",
3137 0..12,
3138 3,
3139 vec![make_event("a.rs", "-old\n+new\n")],
3140 vec![make_related_file("related.rs", "helper\n")],
3141 );
3142
3143 assert_eq!(
3144 format_with_budget(&input, 30),
3145 indoc! {r#"
3146 <|file_sep|>test.rs
3147 <|fim_prefix|>
3148 <|fim_middle|>current
3149 fn <|user_cursor|>main() {}
3150 <|fim_suffix|>
3151 <|fim_middle|>updated
3152 "#}
3153 );
3154 }
3155
3156 fn format_seed_coder(input: &ZetaPromptInput) -> String {
3157 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
3158 }
3159
3160 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3161 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
3162 }
3163
3164 #[test]
3165 fn test_seed_coder_basic_format() {
3166 let input = make_input(
3167 "prefix\neditable\nsuffix",
3168 7..15,
3169 10,
3170 vec![make_event("a.rs", "-old\n+new\n")],
3171 vec![make_related_file("related.rs", "fn helper() {}\n")],
3172 );
3173
3174 assert_eq!(
3175 format_seed_coder(&input),
3176 indoc! {r#"
3177 <[fim-suffix]>
3178 suffix
3179 <[fim-prefix]><filename>related.rs
3180 fn helper() {}
3181
3182 <filename>edit_history
3183 --- a/a.rs
3184 +++ b/a.rs
3185 -old
3186 +new
3187
3188 <filename>test.rs
3189 prefix
3190 <<<<<<< CURRENT
3191 edi<|user_cursor|>table
3192 =======
3193 <[fim-middle]>"#}
3194 );
3195 }
3196
3197 #[test]
3198 fn test_seed_coder_no_context() {
3199 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
3200
3201 assert_eq!(
3202 format_seed_coder(&input),
3203 indoc! {r#"
3204 <[fim-suffix]>
3205 after
3206 <[fim-prefix]><filename>test.rs
3207 before
3208 <<<<<<< CURRENT
3209 mid<|user_cursor|>dle
3210 =======
3211 <[fim-middle]>"#}
3212 );
3213 }
3214
3215 #[test]
3216 fn test_seed_coder_truncation_drops_context() {
3217 let input = make_input(
3218 "code",
3219 0..4,
3220 2,
3221 vec![make_event("a.rs", "-x\n+y\n")],
3222 vec![make_related_file("r1.rs", "content\n")],
3223 );
3224
3225 // With large budget, everything is included
3226 assert_eq!(
3227 format_seed_coder(&input),
3228 indoc! {r#"
3229 <[fim-suffix]>
3230 <[fim-prefix]><filename>r1.rs
3231 content
3232
3233 <filename>edit_history
3234 --- a/a.rs
3235 +++ b/a.rs
3236 -x
3237 +y
3238
3239 <filename>test.rs
3240 <<<<<<< CURRENT
3241 co<|user_cursor|>de
3242 =======
3243 <[fim-middle]>"#}
3244 );
3245
3246 // With tight budget, context is dropped but cursor section remains
3247 assert_eq!(
3248 format_seed_coder_with_budget(&input, 30),
3249 indoc! {r#"
3250 <[fim-suffix]>
3251 <[fim-prefix]><filename>test.rs
3252 <<<<<<< CURRENT
3253 co<|user_cursor|>de
3254 =======
3255 <[fim-middle]>"#}
3256 );
3257 }
3258
3259 #[test]
3260 fn test_seed_coder_truncation_prioritizes_lower_order() {
3261 let input = make_input(
3262 "code",
3263 0..4,
3264 2,
3265 vec![],
3266 vec![
3267 RelatedFile {
3268 path: Path::new("low_prio.rs").into(),
3269 max_row: 5,
3270 in_open_source_repo: false,
3271 excerpts: vec![RelatedExcerpt {
3272 row_range: 0..5,
3273 text: "low prio\n".into(),
3274 order: 10,
3275 }],
3276 },
3277 RelatedFile {
3278 path: Path::new("high_prio.rs").into(),
3279 max_row: 5,
3280 in_open_source_repo: false,
3281 excerpts: vec![RelatedExcerpt {
3282 row_range: 0..5,
3283 text: "high prio\n".into(),
3284 order: 1,
3285 }],
3286 },
3287 ],
3288 );
3289
3290 // With large budget, both included; rendered in stable lexicographic order.
3291 assert_eq!(
3292 format_seed_coder(&input),
3293 indoc! {r#"
3294 <[fim-suffix]>
3295 <[fim-prefix]><filename>low_prio.rs
3296 low prio
3297 <filename>high_prio.rs
3298 high prio
3299
3300 <filename>test.rs
3301 <<<<<<< CURRENT
3302 co<|user_cursor|>de
3303 =======
3304 <[fim-middle]>"#}
3305 );
3306
3307 // With tight budget, only high_prio included.
3308 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
3309 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
3310 assert_eq!(
3311 format_seed_coder_with_budget(&input, 44),
3312 indoc! {r#"
3313 <[fim-suffix]>
3314 <[fim-prefix]><filename>high_prio.rs
3315 high prio
3316
3317 <filename>test.rs
3318 <<<<<<< CURRENT
3319 co<|user_cursor|>de
3320 =======
3321 <[fim-middle]>"#}
3322 );
3323 }
3324
3325 #[test]
3326 fn test_seed_coder_clean_output() {
3327 let output_with_marker = "new code\n>>>>>>> UPDATED\n";
3328 let output_without_marker = "new code\n";
3329
3330 assert_eq!(
3331 clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
3332 "new code\n"
3333 );
3334 assert_eq!(
3335 clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
3336 "new code\n"
3337 );
3338 }
3339
3340 #[test]
3341 fn test_format_zeta1_from_input_basic() {
3342 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
3343 let input = ZetaPromptInput {
3344 cursor_path: Path::new("src/main.rs").into(),
3345 cursor_excerpt: excerpt.into(),
3346 cursor_offset_in_excerpt: 30,
3347 excerpt_start_row: Some(0),
3348 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
3349 related_files: vec![],
3350 excerpt_ranges: ExcerptRanges {
3351 editable_150: 15..41,
3352 editable_180: 15..41,
3353 editable_350: 15..41,
3354 editable_150_context_350: 0..excerpt.len(),
3355 editable_180_context_350: 0..excerpt.len(),
3356 editable_350_context_150: 0..excerpt.len(),
3357 ..Default::default()
3358 },
3359 experiment: None,
3360 in_open_source_repo: false,
3361 can_collect_data: false,
3362 };
3363
3364 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
3365
3366 assert_eq!(
3367 prompt,
3368 concat!(
3369 "### Instruction:\n",
3370 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3371 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3372 "into account the cursor location.\n",
3373 "\n",
3374 "### User Edits:\n",
3375 "\n",
3376 "User edited other.rs:\n",
3377 "```diff\n",
3378 "-old\n",
3379 "+new\n",
3380 "\n",
3381 "```\n",
3382 "\n",
3383 "### User Excerpt:\n",
3384 "\n",
3385 "```src/main.rs\n",
3386 "<|start_of_file|>\n",
3387 "fn before() {}\n",
3388 "<|editable_region_start|>\n",
3389 "fn foo() {\n",
3390 " <|user_cursor_is_here|>let x = 1;\n",
3391 "\n",
3392 "<|editable_region_end|>}\n",
3393 "fn after() {}\n",
3394 "\n",
3395 "```\n",
3396 "\n",
3397 "### Response:\n",
3398 ),
3399 );
3400 }
3401
3402 #[test]
3403 fn test_format_zeta1_from_input_no_start_of_file() {
3404 let excerpt = "fn foo() {\n let x = 1;\n}\n";
3405 let input = ZetaPromptInput {
3406 cursor_path: Path::new("src/main.rs").into(),
3407 cursor_excerpt: excerpt.into(),
3408 cursor_offset_in_excerpt: 15,
3409 excerpt_start_row: Some(10),
3410 events: vec![],
3411 related_files: vec![],
3412 excerpt_ranges: ExcerptRanges {
3413 editable_150: 0..28,
3414 editable_180: 0..28,
3415 editable_350: 0..28,
3416 editable_150_context_350: 0..28,
3417 editable_180_context_350: 0..28,
3418 editable_350_context_150: 0..28,
3419 ..Default::default()
3420 },
3421 experiment: None,
3422 in_open_source_repo: false,
3423 can_collect_data: false,
3424 };
3425
3426 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
3427
3428 assert_eq!(
3429 prompt,
3430 concat!(
3431 "### Instruction:\n",
3432 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3433 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3434 "into account the cursor location.\n",
3435 "\n",
3436 "### User Edits:\n",
3437 "\n",
3438 "\n",
3439 "\n",
3440 "### User Excerpt:\n",
3441 "\n",
3442 "```src/main.rs\n",
3443 "<|editable_region_start|>\n",
3444 "fn foo() {\n",
3445 " <|user_cursor_is_here|>let x = 1;\n",
3446 "}\n",
3447 "\n",
3448 "<|editable_region_end|>\n",
3449 "```\n",
3450 "\n",
3451 "### Response:\n",
3452 ),
3453 );
3454 }
3455
3456 #[test]
3457 fn test_format_zeta1_from_input_with_sub_ranges() {
3458 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
3459 let editable_range = 10..37;
3460 let context_range = 0..excerpt.len();
3461
3462 let input = ZetaPromptInput {
3463 cursor_path: Path::new("test.rs").into(),
3464 cursor_excerpt: excerpt.into(),
3465 cursor_offset_in_excerpt: 25,
3466 excerpt_start_row: Some(0),
3467 events: vec![],
3468 related_files: vec![],
3469 excerpt_ranges: ExcerptRanges {
3470 editable_150: editable_range.clone(),
3471 editable_180: editable_range.clone(),
3472 editable_350: editable_range.clone(),
3473 editable_150_context_350: context_range.clone(),
3474 editable_180_context_350: context_range.clone(),
3475 editable_350_context_150: context_range.clone(),
3476 ..Default::default()
3477 },
3478 experiment: None,
3479 in_open_source_repo: false,
3480 can_collect_data: false,
3481 };
3482
3483 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
3484
3485 assert_eq!(
3486 prompt,
3487 concat!(
3488 "### Instruction:\n",
3489 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3490 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3491 "into account the cursor location.\n",
3492 "\n",
3493 "### User Edits:\n",
3494 "\n",
3495 "\n",
3496 "\n",
3497 "### User Excerpt:\n",
3498 "\n",
3499 "```test.rs\n",
3500 "<|start_of_file|>\n",
3501 "// prefix\n",
3502 "<|editable_region_start|>\n",
3503 "fn foo() {\n",
3504 " <|user_cursor_is_here|>let x = 1;\n",
3505 "}\n",
3506 "<|editable_region_end|>\n",
3507 "// suffix\n",
3508 "\n",
3509 "```\n",
3510 "\n",
3511 "### Response:\n",
3512 ),
3513 );
3514 }
3515
3516 #[test]
3517 fn test_clean_zeta1_model_output_basic() {
3518 let output = indoc! {"
3519 <|editable_region_start|>
3520 fn main() {
3521 println!(\"hello\");
3522 }
3523 <|editable_region_end|>
3524 "};
3525
3526 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3527 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
3528 }
3529
3530 #[test]
3531 fn test_clean_zeta1_model_output_with_cursor() {
3532 let output = indoc! {"
3533 <|editable_region_start|>
3534 fn main() {
3535 <|user_cursor_is_here|>println!(\"hello\");
3536 }
3537 <|editable_region_end|>
3538 "};
3539
3540 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3541 assert_eq!(
3542 cleaned,
3543 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
3544 );
3545 }
3546
3547 #[test]
3548 fn test_clean_zeta1_model_output_no_markers() {
3549 let output = "fn main() {}\n";
3550 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3551 assert_eq!(cleaned, "fn main() {}\n");
3552 }
3553
3554 #[test]
3555 fn test_clean_zeta1_model_output_empty_region() {
3556 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
3557 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3558 assert_eq!(cleaned, "");
3559 }
3560}