1pub mod excerpt_ranges;
2pub mod multi_region;
3
4use anyhow::{Result, anyhow};
5use serde::{Deserialize, Serialize};
6use std::fmt::Write;
7use std::ops::Range;
8use std::path::Path;
9use std::sync::Arc;
10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
11
12pub use crate::excerpt_ranges::{
13 ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
14};
15
16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
17pub const MAX_PROMPT_TOKENS: usize = 4096;
18
19/// Use up to this amount of the editable region for prefill.
20/// Larger values may result in more robust generation, but
21/// this region becomes non-editable.
22pub const PREFILL_RATIO: f64 = 0.1; // 10%
23
24fn estimate_tokens(bytes: usize) -> usize {
25 bytes / 3
26}
27
28/// Leave some slack to avoid overflow.
29fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
30 (max_tokens as f64 * 0.9).floor() as usize
31}
32
33#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
34pub struct ZetaPromptInput {
35 pub cursor_path: Arc<Path>,
36 pub cursor_excerpt: Arc<str>,
37 pub cursor_offset_in_excerpt: usize,
38 #[serde(default, skip_serializing_if = "Option::is_none")]
39 pub excerpt_start_row: Option<u32>,
40 pub events: Vec<Arc<Event>>,
41 #[serde(default)]
42 pub related_files: Option<Vec<RelatedFile>>,
43 #[serde(default, skip_serializing_if = "Vec::is_empty")]
44 pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
45 /// These ranges let the server select model-appropriate subsets.
46 pub excerpt_ranges: ExcerptRanges,
47 /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
48 /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
49 /// When present, the server uses these to compute editable/context ranges
50 /// instead of `excerpt_ranges`.
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub syntax_ranges: Option<Vec<Range<usize>>>,
53 /// The name of the edit prediction model experiment to use.
54 #[serde(default, skip_serializing_if = "Option::is_none")]
55 pub experiment: Option<String>,
56 #[serde(default)]
57 pub in_open_source_repo: bool,
58 #[serde(default)]
59 pub can_collect_data: bool,
60 #[serde(default, skip_serializing_if = "Option::is_none")]
61 pub repo_url: Option<String>,
62}
63
64#[derive(
65 Default,
66 Clone,
67 Copy,
68 Debug,
69 PartialEq,
70 Eq,
71 Hash,
72 EnumIter,
73 IntoStaticStr,
74 Serialize,
75 Deserialize,
76)]
77#[allow(non_camel_case_types)]
78pub enum ZetaFormat {
79 V0112MiddleAtEnd,
80 V0113Ordered,
81 V0114180EditableRegion,
82 V0120GitMergeMarkers,
83 #[default]
84 V0131GitMergeMarkersPrefix,
85 V0211Prefill,
86 V0211SeedCoder,
87 v0226Hashline,
88 V0304VariableEdit,
89 V0304SeedNoEdits,
90 /// Multi-block marker spans with NO_EDITS sentinel.
91 V0306SeedMultiRegions,
92 /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
93 V0316SeedMultiRegions,
94 /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
95 V0317SeedMultiRegions,
96}
97
98impl std::fmt::Display for ZetaFormat {
99 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100 write!(f, "{}", <&'static str>::from(self))
101 }
102}
103
104impl ZetaFormat {
105 pub fn parse(format_name: &str) -> Result<Self> {
106 let mut results = ZetaFormat::iter().filter(|version| {
107 <&'static str>::from(version)
108 .to_lowercase()
109 .contains(&format_name.to_lowercase())
110 });
111 let Some(result) = results.next() else {
112 anyhow::bail!(
113 "`{format_name}` did not match any of:\n{}",
114 Self::options_as_string()
115 );
116 };
117 if results.next().is_some() {
118 anyhow::bail!(
119 "`{format_name}` matched more than one of:\n{}",
120 Self::options_as_string()
121 );
122 }
123 Ok(result)
124 }
125
126 pub fn options_as_string() -> String {
127 ZetaFormat::iter()
128 .map(|format| format!("- {}\n", <&'static str>::from(format)))
129 .collect::<Vec<_>>()
130 .concat()
131 }
132}
133
134#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
135#[serde(tag = "event")]
136pub enum Event {
137 BufferChange {
138 path: Arc<Path>,
139 old_path: Arc<Path>,
140 diff: String,
141 predicted: bool,
142 in_open_source_repo: bool,
143 },
144}
145
146impl Event {
147 pub fn in_open_source_repo(&self) -> bool {
148 match self {
149 Event::BufferChange {
150 in_open_source_repo,
151 ..
152 } => *in_open_source_repo,
153 }
154 }
155}
156
157pub fn write_event(prompt: &mut String, event: &Event) {
158 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
159 for component in path.components() {
160 prompt.push('/');
161 write!(prompt, "{}", component.as_os_str().display()).ok();
162 }
163 }
164 match event {
165 Event::BufferChange {
166 path,
167 old_path,
168 diff,
169 predicted,
170 in_open_source_repo: _,
171 } => {
172 if *predicted {
173 prompt.push_str("// User accepted prediction:\n");
174 }
175 prompt.push_str("--- a");
176 write_path_as_unix_str(prompt, old_path.as_ref());
177 prompt.push_str("\n+++ b");
178 write_path_as_unix_str(prompt, path.as_ref());
179 prompt.push('\n');
180 prompt.push_str(diff);
181 }
182 }
183}
184
185#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
186pub struct ActiveBufferDiagnostic {
187 pub severity: Option<i32>,
188 pub message: String,
189 pub snippet: String,
190 pub snippet_buffer_row_range: Range<u32>,
191 pub diagnostic_range_in_snippet: Range<usize>,
192}
193
194#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
195pub struct RelatedFile {
196 pub path: Arc<Path>,
197 pub max_row: u32,
198 pub excerpts: Vec<RelatedExcerpt>,
199 #[serde(default)]
200 pub in_open_source_repo: bool,
201}
202
203#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
204pub struct RelatedExcerpt {
205 pub row_range: Range<u32>,
206 pub text: Arc<str>,
207 #[serde(default)]
208 pub order: usize,
209}
210
211pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
212 special_tokens_for_format(format)
213 .iter()
214 .any(|token| input.cursor_excerpt.contains(token))
215}
216
217pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
218 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
219}
220
221pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
222 match format {
223 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
224 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
225 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
226 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
227 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
228 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
229 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
230 ZetaFormat::v0226Hashline => hashline::special_tokens(),
231 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
232 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
233 ZetaFormat::V0316SeedMultiRegions => {
234 static TOKENS: &[&str] = &[
235 seed_coder::FIM_SUFFIX,
236 seed_coder::FIM_PREFIX,
237 seed_coder::FIM_MIDDLE,
238 seed_coder::FILE_MARKER,
239 multi_region::V0316_END_MARKER,
240 CURSOR_MARKER,
241 multi_region::MARKER_TAG_PREFIX,
242 ];
243 TOKENS
244 }
245 ZetaFormat::V0317SeedMultiRegions => {
246 static TOKENS: &[&str] = &[
247 seed_coder::FIM_SUFFIX,
248 seed_coder::FIM_PREFIX,
249 seed_coder::FIM_MIDDLE,
250 seed_coder::FILE_MARKER,
251 multi_region::V0317_END_MARKER,
252 CURSOR_MARKER,
253 multi_region::RELATIVE_MARKER_TAG_PREFIX,
254 ];
255 TOKENS
256 }
257 ZetaFormat::V0306SeedMultiRegions => {
258 static TOKENS: &[&str] = &[
259 seed_coder::FIM_SUFFIX,
260 seed_coder::FIM_PREFIX,
261 seed_coder::FIM_MIDDLE,
262 seed_coder::FILE_MARKER,
263 seed_coder::START_MARKER,
264 seed_coder::SEPARATOR,
265 seed_coder::END_MARKER,
266 CURSOR_MARKER,
267 multi_region::MARKER_TAG_PREFIX,
268 ];
269 TOKENS
270 }
271 }
272}
273
274/// Returns the (editable_token_limit, context_token_limit) for a given format.
275pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
276 match format {
277 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
278 ZetaFormat::V0114180EditableRegion => (180, 350),
279 ZetaFormat::V0120GitMergeMarkers
280 | ZetaFormat::V0131GitMergeMarkersPrefix
281 | ZetaFormat::V0211Prefill
282 | ZetaFormat::V0211SeedCoder
283 | ZetaFormat::v0226Hashline
284 | ZetaFormat::V0306SeedMultiRegions
285 | ZetaFormat::V0316SeedMultiRegions
286 | ZetaFormat::V0317SeedMultiRegions
287 | ZetaFormat::V0304SeedNoEdits => (350, 150),
288 ZetaFormat::V0304VariableEdit => (1024, 0),
289 }
290}
291
292pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
293 match format {
294 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
295 ZetaFormat::V0112MiddleAtEnd
296 | ZetaFormat::V0113Ordered
297 | ZetaFormat::V0114180EditableRegion
298 | ZetaFormat::V0120GitMergeMarkers
299 | ZetaFormat::V0131GitMergeMarkersPrefix
300 | ZetaFormat::V0211Prefill
301 | ZetaFormat::V0211SeedCoder
302 | ZetaFormat::V0304VariableEdit
303 | ZetaFormat::V0306SeedMultiRegions
304 | ZetaFormat::V0304SeedNoEdits => &[],
305 ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
306 ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
307 }
308}
309
310pub fn excerpt_ranges_for_format(
311 format: ZetaFormat,
312 ranges: &ExcerptRanges,
313) -> (Range<usize>, Range<usize>) {
314 match format {
315 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
316 ranges.editable_150.clone(),
317 ranges.editable_150_context_350.clone(),
318 ),
319 ZetaFormat::V0114180EditableRegion => (
320 ranges.editable_180.clone(),
321 ranges.editable_180_context_350.clone(),
322 ),
323 ZetaFormat::V0120GitMergeMarkers
324 | ZetaFormat::V0131GitMergeMarkersPrefix
325 | ZetaFormat::V0211Prefill
326 | ZetaFormat::V0211SeedCoder
327 | ZetaFormat::v0226Hashline
328 | ZetaFormat::V0304SeedNoEdits
329 | ZetaFormat::V0306SeedMultiRegions
330 | ZetaFormat::V0316SeedMultiRegions
331 | ZetaFormat::V0317SeedMultiRegions => (
332 ranges.editable_350.clone(),
333 ranges.editable_350_context_150.clone(),
334 ),
335 ZetaFormat::V0304VariableEdit => {
336 let context = ranges
337 .editable_350_context_1024
338 .clone()
339 .or(ranges.editable_350_context_512.clone())
340 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
341 (context.clone(), context)
342 }
343 }
344}
345
346pub fn write_cursor_excerpt_section_for_format(
347 format: ZetaFormat,
348 prompt: &mut String,
349 path: &Path,
350 context: &str,
351 editable_range: &Range<usize>,
352 cursor_offset: usize,
353) {
354 match format {
355 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
356 prompt,
357 path,
358 context,
359 editable_range,
360 cursor_offset,
361 ),
362 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
363 v0113_ordered::write_cursor_excerpt_section(
364 prompt,
365 path,
366 context,
367 editable_range,
368 cursor_offset,
369 )
370 }
371 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
372 prompt,
373 path,
374 context,
375 editable_range,
376 cursor_offset,
377 ),
378 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
379 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
380 prompt,
381 path,
382 context,
383 editable_range,
384 cursor_offset,
385 )
386 }
387 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
388 seed_coder::write_cursor_excerpt_section(
389 prompt,
390 path,
391 context,
392 editable_range,
393 cursor_offset,
394 )
395 }
396 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
397 prompt,
398 path,
399 context,
400 editable_range,
401 cursor_offset,
402 ),
403 ZetaFormat::V0304VariableEdit => {
404 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
405 }
406 ZetaFormat::V0306SeedMultiRegions => {
407 prompt.push_str(&build_v0306_cursor_prefix(
408 path,
409 context,
410 editable_range,
411 cursor_offset,
412 ));
413 }
414 ZetaFormat::V0316SeedMultiRegions => {
415 prompt.push_str(&build_v0316_cursor_prefix(
416 path,
417 context,
418 editable_range,
419 cursor_offset,
420 ));
421 }
422 ZetaFormat::V0317SeedMultiRegions => {
423 prompt.push_str(&build_v0317_cursor_prefix(
424 path,
425 context,
426 editable_range,
427 cursor_offset,
428 ));
429 }
430 }
431}
432
433fn build_v0306_cursor_prefix(
434 path: &Path,
435 context: &str,
436 editable_range: &Range<usize>,
437 cursor_offset: usize,
438) -> String {
439 let mut section = String::new();
440 let path_str = path.to_string_lossy();
441 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
442
443 section.push_str(&context[..editable_range.start]);
444 section.push_str(seed_coder::START_MARKER);
445
446 let editable_text = &context[editable_range.clone()];
447 let cursor_in_editable = cursor_offset - editable_range.start;
448 multi_region::write_editable_with_markers(
449 &mut section,
450 editable_text,
451 cursor_in_editable,
452 CURSOR_MARKER,
453 );
454
455 if !section.ends_with('\n') {
456 section.push('\n');
457 }
458 section.push_str(seed_coder::SEPARATOR);
459 section
460}
461
462fn build_v0316_cursor_prefix(
463 path: &Path,
464 context: &str,
465 editable_range: &Range<usize>,
466 cursor_offset: usize,
467) -> String {
468 let mut section = String::new();
469 let path_str = path.to_string_lossy();
470 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
471
472 section.push_str(&context[..editable_range.start]);
473
474 let editable_text = &context[editable_range.clone()];
475 let cursor_in_editable = cursor_offset - editable_range.start;
476 multi_region::write_editable_with_markers_v0316(
477 &mut section,
478 editable_text,
479 cursor_in_editable,
480 CURSOR_MARKER,
481 );
482
483 if !section.ends_with('\n') {
484 section.push('\n');
485 }
486 section
487}
488
489fn build_v0317_cursor_prefix(
490 path: &Path,
491 context: &str,
492 editable_range: &Range<usize>,
493 cursor_offset: usize,
494) -> String {
495 let mut section = String::new();
496 let path_str = path.to_string_lossy();
497 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
498
499 section.push_str(&context[..editable_range.start]);
500
501 let editable_text = &context[editable_range.clone()];
502 let cursor_in_editable = cursor_offset - editable_range.start;
503 multi_region::write_editable_with_markers_v0317(
504 &mut section,
505 editable_text,
506 cursor_in_editable,
507 CURSOR_MARKER,
508 );
509
510 if !section.ends_with('\n') {
511 section.push('\n');
512 }
513 section
514}
515
516fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
517 let start_row = text[0..range.start].matches('\n').count() as u32;
518 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
519 if !text[..range.end].ends_with('\n') {
520 end_row += 1;
521 }
522 return start_row..end_row;
523}
524
525pub fn format_prompt_with_budget_for_format(
526 input: &ZetaPromptInput,
527 format: ZetaFormat,
528 max_tokens: usize,
529) -> Option<String> {
530 let (context, editable_range, context_range, cursor_offset) =
531 resolve_cursor_region(input, format);
532 let path = &*input.cursor_path;
533
534 let empty_files = Vec::new();
535 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
536 let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
537 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
538 let row_range = relative_row_range.start + cursor_excerpt_start_row
539 ..relative_row_range.end + cursor_excerpt_start_row;
540 &filter_redundant_excerpts(
541 input_related_files.to_vec(),
542 input.cursor_path.as_ref(),
543 row_range,
544 )
545 } else {
546 input_related_files
547 };
548
549 let prompt = match format {
550 ZetaFormat::V0211SeedCoder
551 | ZetaFormat::V0304SeedNoEdits
552 | ZetaFormat::V0306SeedMultiRegions
553 | ZetaFormat::V0316SeedMultiRegions
554 | ZetaFormat::V0317SeedMultiRegions => {
555 let mut cursor_section = String::new();
556 write_cursor_excerpt_section_for_format(
557 format,
558 &mut cursor_section,
559 path,
560 context,
561 &editable_range,
562 cursor_offset,
563 );
564
565 let budget_with_margin = apply_prompt_budget_margin(max_tokens);
566 seed_coder::assemble_fim_prompt(
567 context,
568 &editable_range,
569 &cursor_section,
570 &input.events,
571 related_files,
572 budget_with_margin,
573 )
574 }
575 _ => {
576 let mut cursor_section = String::new();
577 write_cursor_excerpt_section_for_format(
578 format,
579 &mut cursor_section,
580 path,
581 context,
582 &editable_range,
583 cursor_offset,
584 );
585
586 let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
587 let cursor_tokens = estimate_tokens(cursor_section.len());
588 remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
589
590 let edit_history_section = format_edit_history_within_budget(
591 &input.events,
592 "<|file_sep|>",
593 "edit history",
594 remaining_budget,
595 max_edit_event_count_for_format(&format),
596 );
597 let edit_history_tokens = estimate_tokens(edit_history_section.len());
598 remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
599
600 let related_files_section = format_related_files_within_budget(
601 &related_files,
602 "<|file_sep|>",
603 "",
604 remaining_budget,
605 );
606
607 let mut prompt = String::new();
608 prompt.push_str(&related_files_section);
609 prompt.push_str(&edit_history_section);
610 prompt.push_str(&cursor_section);
611 prompt
612 }
613 };
614 let prompt_tokens = estimate_tokens(prompt.len());
615 if prompt_tokens > max_tokens {
616 return None;
617 }
618 return Some(prompt);
619}
620
621pub fn filter_redundant_excerpts(
622 mut related_files: Vec<RelatedFile>,
623 cursor_path: &Path,
624 cursor_row_range: Range<u32>,
625) -> Vec<RelatedFile> {
626 for file in &mut related_files {
627 if file.path.as_ref() == cursor_path {
628 file.excerpts.retain(|excerpt| {
629 excerpt.row_range.start < cursor_row_range.start
630 || excerpt.row_range.end > cursor_row_range.end
631 });
632 }
633 }
634 related_files.retain(|file| !file.excerpts.is_empty());
635 related_files
636}
637
638pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
639 match format {
640 ZetaFormat::V0112MiddleAtEnd
641 | ZetaFormat::V0113Ordered
642 | ZetaFormat::V0114180EditableRegion
643 | ZetaFormat::V0120GitMergeMarkers
644 | ZetaFormat::V0131GitMergeMarkersPrefix
645 | ZetaFormat::V0211Prefill
646 | ZetaFormat::V0211SeedCoder
647 | ZetaFormat::v0226Hashline
648 | ZetaFormat::V0304SeedNoEdits
649 | ZetaFormat::V0304VariableEdit
650 | ZetaFormat::V0306SeedMultiRegions
651 | ZetaFormat::V0316SeedMultiRegions
652 | ZetaFormat::V0317SeedMultiRegions => 6,
653 }
654}
655
656pub fn get_prefill_for_format(
657 format: ZetaFormat,
658 context: &str,
659 editable_range: &Range<usize>,
660) -> String {
661 match format {
662 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
663 ZetaFormat::V0112MiddleAtEnd
664 | ZetaFormat::V0113Ordered
665 | ZetaFormat::V0114180EditableRegion
666 | ZetaFormat::V0120GitMergeMarkers
667 | ZetaFormat::V0131GitMergeMarkersPrefix
668 | ZetaFormat::V0211SeedCoder
669 | ZetaFormat::v0226Hashline
670 | ZetaFormat::V0304VariableEdit => String::new(),
671 ZetaFormat::V0304SeedNoEdits
672 | ZetaFormat::V0306SeedMultiRegions
673 | ZetaFormat::V0316SeedMultiRegions
674 | ZetaFormat::V0317SeedMultiRegions => String::new(),
675 }
676}
677
678pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
679 match format {
680 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
681 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
682 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
683 ZetaFormat::V0211SeedCoder
684 | ZetaFormat::V0304SeedNoEdits
685 | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
686 ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
687 ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
688 ZetaFormat::V0112MiddleAtEnd
689 | ZetaFormat::V0113Ordered
690 | ZetaFormat::V0114180EditableRegion
691 | ZetaFormat::v0226Hashline
692 | ZetaFormat::V0304VariableEdit => None,
693 }
694}
695
696pub fn encode_patch_as_output_for_format(
697 format: ZetaFormat,
698 old_editable_region: &str,
699 patch: &str,
700 cursor_offset: Option<usize>,
701) -> Result<Option<String>> {
702 match format {
703 ZetaFormat::v0226Hashline => {
704 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
705 }
706 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
707 old_editable_region,
708 patch,
709 cursor_offset,
710 )
711 .map(Some),
712 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
713 Ok(seed_coder::no_edits(patch))
714 }
715 ZetaFormat::V0316SeedMultiRegions => {
716 let empty_patch = patch.lines().count() <= 3;
717 if empty_patch {
718 let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
719 let marker_num =
720 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
721 let tag = multi_region::marker_tag(marker_num);
722 Ok(Some(format!(
723 "{tag}{tag}{}",
724 multi_region::V0316_END_MARKER
725 )))
726 } else {
727 Ok(None)
728 }
729 }
730 ZetaFormat::V0317SeedMultiRegions => {
731 let empty_patch = patch.lines().count() <= 3;
732 if empty_patch {
733 let tag = multi_region::marker_tag_relative(0);
734 Ok(Some(format!(
735 "{tag}{tag}{}",
736 multi_region::V0317_END_MARKER
737 )))
738 } else {
739 Ok(None)
740 }
741 }
742 _ => Ok(None),
743 }
744}
745
746pub struct ParsedOutput {
747 /// Text that should replace the editable region
748 pub new_editable_region: String,
749 /// The byte range within `cursor_excerpt` that this replacement applies to
750 pub range_in_excerpt: Range<usize>,
751}
752
753/// Parse model output for the given zeta format
754pub fn parse_zeta2_model_output(
755 output: &str,
756 format: ZetaFormat,
757 prompt_inputs: &ZetaPromptInput,
758) -> Result<ParsedOutput> {
759 let output = match output_end_marker_for_format(format) {
760 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
761 None => output,
762 };
763
764 let (context, editable_range_in_context, context_range, cursor_offset) =
765 resolve_cursor_region(prompt_inputs, format);
766 let context_start = context_range.start;
767 let old_editable_region = &context[editable_range_in_context.clone()];
768 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
769
770 let (range_in_context, output) = match format {
771 ZetaFormat::v0226Hashline => (
772 editable_range_in_context,
773 if hashline::output_has_edit_commands(output) {
774 hashline::apply_edit_commands(old_editable_region, output)
775 } else {
776 output.to_string()
777 },
778 ),
779 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
780 ZetaFormat::V0304SeedNoEdits => (
781 editable_range_in_context,
782 if output.starts_with(seed_coder::NO_EDITS) {
783 old_editable_region.to_string()
784 } else {
785 output.to_string()
786 },
787 ),
788 ZetaFormat::V0306SeedMultiRegions => (
789 editable_range_in_context,
790 if output.starts_with(seed_coder::NO_EDITS) {
791 old_editable_region.to_string()
792 } else {
793 multi_region::apply_marker_span(old_editable_region, output)?
794 },
795 ),
796 ZetaFormat::V0316SeedMultiRegions => (
797 editable_range_in_context,
798 multi_region::apply_marker_span_v0316(old_editable_region, output)?,
799 ),
800 ZetaFormat::V0317SeedMultiRegions => (
801 editable_range_in_context,
802 multi_region::apply_marker_span_v0317(
803 old_editable_region,
804 output,
805 Some(cursor_offset_in_editable),
806 )?,
807 ),
808 _ => (editable_range_in_context, output.to_string()),
809 };
810
811 let range_in_excerpt =
812 range_in_context.start + context_start..range_in_context.end + context_start;
813
814 Ok(ParsedOutput {
815 new_editable_region: output,
816 range_in_excerpt,
817 })
818}
819
820pub fn excerpt_range_for_format(
821 format: ZetaFormat,
822 ranges: &ExcerptRanges,
823) -> (Range<usize>, Range<usize>) {
824 excerpt_ranges_for_format(format, ranges)
825}
826
827pub fn resolve_cursor_region(
828 input: &ZetaPromptInput,
829 format: ZetaFormat,
830) -> (&str, Range<usize>, Range<usize>, usize) {
831 let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
832 let (editable_tokens, context_tokens) = token_limits_for_format(format);
833 compute_editable_and_context_ranges(
834 &input.cursor_excerpt,
835 input.cursor_offset_in_excerpt,
836 syntax_ranges,
837 editable_tokens,
838 context_tokens,
839 )
840 } else {
841 excerpt_range_for_format(format, &input.excerpt_ranges)
842 };
843 let context_start = context_range.start;
844 let context_text = &input.cursor_excerpt[context_range.clone()];
845 let adjusted_editable =
846 (editable_range.start - context_start)..(editable_range.end - context_start);
847 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
848
849 (
850 context_text,
851 adjusted_editable,
852 context_range,
853 adjusted_cursor,
854 )
855}
856
857pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
858 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
859 get_prefill_for_format(format, context, &editable_range)
860}
861
862fn format_edit_history_within_budget(
863 events: &[Arc<Event>],
864 file_marker: &str,
865 edit_history_name: &str,
866 max_tokens: usize,
867 max_edit_event_count: usize,
868) -> String {
869 let header = format!("{}{}\n", file_marker, edit_history_name);
870 let header_tokens = estimate_tokens(header.len());
871 if header_tokens >= max_tokens {
872 return String::new();
873 }
874
875 let mut event_strings: Vec<String> = Vec::new();
876 let mut total_tokens = header_tokens;
877
878 for event in events.iter().rev().take(max_edit_event_count) {
879 let mut event_str = String::new();
880 write_event(&mut event_str, event);
881 let event_tokens = estimate_tokens(event_str.len());
882
883 if total_tokens + event_tokens > max_tokens {
884 break;
885 }
886 total_tokens += event_tokens;
887 event_strings.push(event_str);
888 }
889
890 if event_strings.is_empty() {
891 return String::new();
892 }
893
894 let mut result = header;
895 for event_str in event_strings.iter().rev() {
896 result.push_str(event_str);
897 }
898 result
899}
900
901fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
902 let needs_newline = !excerpt.text.ends_with('\n');
903 let needs_ellipsis = excerpt.row_range.end < file_max_row;
904 let len = excerpt.text.len()
905 + if needs_newline { "\n".len() } else { 0 }
906 + if needs_ellipsis { "...\n".len() } else { 0 };
907 estimate_tokens(len)
908}
909
910pub fn format_related_files_within_budget(
911 related_files: &[RelatedFile],
912 file_prefix: &str,
913 file_suffix: &str,
914 max_tokens: usize,
915) -> String {
916 struct ExcerptCandidate {
917 file_ix: usize,
918 excerpt_ix: usize,
919 order: usize,
920 }
921
922 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
923 .iter()
924 .enumerate()
925 .flat_map(|(file_ix, file)| {
926 file.excerpts
927 .iter()
928 .enumerate()
929 .map(move |(excerpt_ix, e)| ExcerptCandidate {
930 file_ix,
931 excerpt_ix,
932 order: e.order,
933 })
934 })
935 .collect();
936
937 // Pre-compute file header strings and their token costs.
938 let file_headers: Vec<String> = related_files
939 .iter()
940 .map(|file| {
941 let path_str = file.path.to_string_lossy();
942 format!("{}{}\n", file_prefix, path_str)
943 })
944 .collect();
945
946 // Sort the excerpts by their order and determine how many fit within the budget.
947 let mut total_tokens = 0;
948 let mut included_excerpt_count = 0_usize;
949 let mut included_file_indices = vec![false; related_files.len()];
950 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
951 for candidate in &excerpt_candidates {
952 let file = &related_files[candidate.file_ix];
953 let excerpt = &file.excerpts[candidate.excerpt_ix];
954 let file_already_included = included_file_indices[candidate.file_ix];
955 let header_cost = if file_already_included {
956 0
957 } else {
958 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
959 };
960 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
961 if total_tokens + header_cost + excerpt_cost > max_tokens {
962 break;
963 }
964 total_tokens += header_cost + excerpt_cost;
965 if !file_already_included {
966 included_file_indices[candidate.file_ix] = true;
967 }
968 included_excerpt_count += 1;
969 }
970
971 excerpt_candidates.truncate(included_excerpt_count);
972 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
973
974 // Render all of the files that fit within the token budget, in the original order.
975 let mut result = String::new();
976 let mut last_file_ix = None;
977 for candidate in &excerpt_candidates {
978 if last_file_ix != Some(candidate.file_ix) {
979 if last_file_ix.is_some() {
980 result.push_str(file_suffix);
981 }
982 result.push_str(&file_headers[candidate.file_ix]);
983 last_file_ix = Some(candidate.file_ix);
984 }
985 let file = &related_files[candidate.file_ix];
986 let excerpt = &file.excerpts[candidate.excerpt_ix];
987 result.push_str(&excerpt.text);
988 if !result.ends_with('\n') {
989 result.push('\n');
990 }
991 if excerpt.row_range.end < file.max_row {
992 result.push_str("...\n");
993 }
994 }
995
996 result
997}
998
999pub fn write_related_files(
1000 prompt: &mut String,
1001 related_files: &[RelatedFile],
1002) -> Vec<Range<usize>> {
1003 let mut ranges = Vec::new();
1004 for file in related_files {
1005 let start = prompt.len();
1006 let path_str = file.path.to_string_lossy();
1007 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1008 for excerpt in &file.excerpts {
1009 prompt.push_str(&excerpt.text);
1010 if !prompt.ends_with('\n') {
1011 prompt.push('\n');
1012 }
1013 if excerpt.row_range.end < file.max_row {
1014 prompt.push_str("...\n");
1015 }
1016 }
1017 let end = prompt.len();
1018 ranges.push(start..end);
1019 }
1020 ranges
1021}
1022
1023mod v0112_middle_at_end {
1024 use super::*;
1025
1026 pub fn special_tokens() -> &'static [&'static str] {
1027 &[
1028 "<|fim_prefix|>",
1029 "<|fim_suffix|>",
1030 "<|fim_middle|>",
1031 "<|file_sep|>",
1032 CURSOR_MARKER,
1033 ]
1034 }
1035
1036 pub fn write_cursor_excerpt_section(
1037 prompt: &mut String,
1038 path: &Path,
1039 context: &str,
1040 editable_range: &Range<usize>,
1041 cursor_offset: usize,
1042 ) {
1043 let path_str = path.to_string_lossy();
1044 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1045
1046 prompt.push_str("<|fim_prefix|>\n");
1047 prompt.push_str(&context[..editable_range.start]);
1048
1049 prompt.push_str("<|fim_suffix|>\n");
1050 prompt.push_str(&context[editable_range.end..]);
1051 if !prompt.ends_with('\n') {
1052 prompt.push('\n');
1053 }
1054
1055 prompt.push_str("<|fim_middle|>current\n");
1056 prompt.push_str(&context[editable_range.start..cursor_offset]);
1057 prompt.push_str(CURSOR_MARKER);
1058 prompt.push_str(&context[cursor_offset..editable_range.end]);
1059 if !prompt.ends_with('\n') {
1060 prompt.push('\n');
1061 }
1062
1063 prompt.push_str("<|fim_middle|>updated\n");
1064 }
1065}
1066
1067mod v0113_ordered {
1068 use super::*;
1069
1070 pub fn special_tokens() -> &'static [&'static str] {
1071 &[
1072 "<|fim_prefix|>",
1073 "<|fim_suffix|>",
1074 "<|fim_middle|>",
1075 "<|file_sep|>",
1076 CURSOR_MARKER,
1077 ]
1078 }
1079
1080 pub fn write_cursor_excerpt_section(
1081 prompt: &mut String,
1082 path: &Path,
1083 context: &str,
1084 editable_range: &Range<usize>,
1085 cursor_offset: usize,
1086 ) {
1087 let path_str = path.to_string_lossy();
1088 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1089
1090 prompt.push_str("<|fim_prefix|>\n");
1091 prompt.push_str(&context[..editable_range.start]);
1092 if !prompt.ends_with('\n') {
1093 prompt.push('\n');
1094 }
1095
1096 prompt.push_str("<|fim_middle|>current\n");
1097 prompt.push_str(&context[editable_range.start..cursor_offset]);
1098 prompt.push_str(CURSOR_MARKER);
1099 prompt.push_str(&context[cursor_offset..editable_range.end]);
1100 if !prompt.ends_with('\n') {
1101 prompt.push('\n');
1102 }
1103
1104 prompt.push_str("<|fim_suffix|>\n");
1105 prompt.push_str(&context[editable_range.end..]);
1106 if !prompt.ends_with('\n') {
1107 prompt.push('\n');
1108 }
1109
1110 prompt.push_str("<|fim_middle|>updated\n");
1111 }
1112}
1113
1114mod v0114180_editable_region {
1115 use super::*;
1116
1117 pub fn special_tokens() -> &'static [&'static str] {
1118 v0113_ordered::special_tokens()
1119 }
1120}
1121
1122pub mod v0120_git_merge_markers {
1123 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1124 //!
1125 //! Example prompt:
1126 //!
1127 //! <|file_sep|>path/to/target_file.py
1128 //! <|fim_prefix|>
1129 //! code before editable region
1130 //! <|fim_suffix|>
1131 //! code after editable region
1132 //! <|fim_middle|>
1133 //! <<<<<<< CURRENT
1134 //! code that
1135 //! needs to<|user_cursor|>
1136 //! be rewritten
1137 //! =======
1138 //!
1139 //! Expected output (should be generated by the model):
1140 //!
1141 //! updated
1142 //! code with
1143 //! changes applied
1144 //! >>>>>>> UPDATED
1145
1146 use super::*;
1147
1148 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1149 pub const SEPARATOR: &str = "=======\n";
1150 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1151
1152 pub fn special_tokens() -> &'static [&'static str] {
1153 &[
1154 "<|fim_prefix|>",
1155 "<|fim_suffix|>",
1156 "<|fim_middle|>",
1157 "<|file_sep|>",
1158 START_MARKER,
1159 SEPARATOR,
1160 END_MARKER,
1161 CURSOR_MARKER,
1162 ]
1163 }
1164
1165 pub fn write_cursor_excerpt_section(
1166 prompt: &mut String,
1167 path: &Path,
1168 context: &str,
1169 editable_range: &Range<usize>,
1170 cursor_offset: usize,
1171 ) {
1172 let path_str = path.to_string_lossy();
1173 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1174
1175 prompt.push_str("<|fim_prefix|>");
1176 prompt.push_str(&context[..editable_range.start]);
1177
1178 prompt.push_str("<|fim_suffix|>");
1179 prompt.push_str(&context[editable_range.end..]);
1180 if !prompt.ends_with('\n') {
1181 prompt.push('\n');
1182 }
1183
1184 prompt.push_str("<|fim_middle|>");
1185 prompt.push_str(START_MARKER);
1186 prompt.push_str(&context[editable_range.start..cursor_offset]);
1187 prompt.push_str(CURSOR_MARKER);
1188 prompt.push_str(&context[cursor_offset..editable_range.end]);
1189 if !prompt.ends_with('\n') {
1190 prompt.push('\n');
1191 }
1192 prompt.push_str(SEPARATOR);
1193 }
1194}
1195
1196pub mod v0131_git_merge_markers_prefix {
1197 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1198 //!
1199 //! Example prompt:
1200 //!
1201 //! <|file_sep|>path/to/target_file.py
1202 //! <|fim_prefix|>
1203 //! code before editable region
1204 //! <<<<<<< CURRENT
1205 //! code that
1206 //! needs to<|user_cursor|>
1207 //! be rewritten
1208 //! =======
1209 //! <|fim_suffix|>
1210 //! code after editable region
1211 //! <|fim_middle|>
1212 //!
1213 //! Expected output (should be generated by the model):
1214 //!
1215 //! updated
1216 //! code with
1217 //! changes applied
1218 //! >>>>>>> UPDATED
1219
1220 use super::*;
1221
1222 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1223 pub const SEPARATOR: &str = "=======\n";
1224 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1225
1226 pub fn special_tokens() -> &'static [&'static str] {
1227 &[
1228 "<|fim_prefix|>",
1229 "<|fim_suffix|>",
1230 "<|fim_middle|>",
1231 "<|file_sep|>",
1232 START_MARKER,
1233 SEPARATOR,
1234 END_MARKER,
1235 CURSOR_MARKER,
1236 ]
1237 }
1238
1239 pub fn write_cursor_excerpt_section(
1240 prompt: &mut String,
1241 path: &Path,
1242 context: &str,
1243 editable_range: &Range<usize>,
1244 cursor_offset: usize,
1245 ) {
1246 let path_str = path.to_string_lossy();
1247 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1248
1249 prompt.push_str("<|fim_prefix|>");
1250 prompt.push_str(&context[..editable_range.start]);
1251 prompt.push_str(START_MARKER);
1252 prompt.push_str(&context[editable_range.start..cursor_offset]);
1253 prompt.push_str(CURSOR_MARKER);
1254 prompt.push_str(&context[cursor_offset..editable_range.end]);
1255 if !prompt.ends_with('\n') {
1256 prompt.push('\n');
1257 }
1258 prompt.push_str(SEPARATOR);
1259
1260 prompt.push_str("<|fim_suffix|>");
1261 prompt.push_str(&context[editable_range.end..]);
1262 if !prompt.ends_with('\n') {
1263 prompt.push('\n');
1264 }
1265
1266 prompt.push_str("<|fim_middle|>");
1267 }
1268}
1269
1270pub mod v0211_prefill {
1271 use super::*;
1272
1273 pub fn special_tokens() -> &'static [&'static str] {
1274 v0131_git_merge_markers_prefix::special_tokens()
1275 }
1276
1277 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1278 let editable_region = &context[editable_range.start..editable_range.end];
1279
1280 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1281 let prefill_len = editable_region.floor_char_boundary(prefill_len);
1282
1283 // Find a token boundary to avoid splitting tokens in the prefill.
1284 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1285 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1286 // the \n and consume any consecutive \n characters after it.
1287 let prefill = &editable_region[..prefill_len];
1288 match prefill.rfind('\n') {
1289 Some(pos) => {
1290 let mut end = pos + 1;
1291 while end < editable_region.len()
1292 && editable_region.as_bytes().get(end) == Some(&b'\n')
1293 {
1294 end += 1;
1295 }
1296 editable_region[..end].to_string()
1297 }
1298 // No newline found. Fall back to splitting before the last space
1299 // (word-level boundary)
1300 None => match prefill.rfind(' ') {
1301 Some(pos) => prefill[..pos].to_string(),
1302 None => prefill.to_string(),
1303 },
1304 }
1305 }
1306}
1307
1308pub mod hashline {
1309
1310 use std::fmt::Display;
1311
1312 pub const END_MARKER: &str = "<|fim_middle|>updated";
1313 pub const START_MARKER: &str = "<|fim_middle|>current";
1314
1315 use super::*;
1316
1317 const SET_COMMAND_MARKER: &str = "<|set|>";
1318 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1319 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1320
1321 pub fn special_tokens() -> &'static [&'static str] {
1322 return &[
1323 SET_COMMAND_MARKER,
1324 "<|set_range|>",
1325 INSERT_COMMAND_MARKER,
1326 NO_EDITS_COMMAND_MARKER,
1327 CURSOR_MARKER,
1328 "<|file_sep|>",
1329 "<|fim_prefix|>",
1330 "<|fim_suffix|>",
1331 "<|fim_middle|>",
1332 ];
1333 }
1334
1335 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1336 #[derive(Debug, Clone, PartialEq, Eq)]
1337 struct LineRef {
1338 index: usize,
1339 hash: u8,
1340 }
1341
1342 impl Display for LineRef {
1343 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1344 write!(f, "{}:{:02x}", self.index, self.hash)
1345 }
1346 }
1347
1348 pub fn hash_line(line: &[u8]) -> u8 {
1349 let mut h: u8 = 0;
1350 for &byte in line {
1351 h = h.wrapping_add(byte);
1352 }
1353 return h;
1354 }
1355
1356 /// Write the hashline-encoded editable region into `out`. Each line of
1357 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1358 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1359 /// to the start of `editable_text`).
1360 pub fn write_hashline_editable_region(
1361 out: &mut String,
1362 editable_text: &str,
1363 cursor_offset_in_editable: usize,
1364 ) {
1365 let mut offset = 0;
1366 for (i, line) in editable_text.lines().enumerate() {
1367 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1368 && cursor_offset_in_editable < offset + line.len()
1369 {
1370 (
1371 &line[..cursor_offset_in_editable - offset],
1372 CURSOR_MARKER,
1373 &line[cursor_offset_in_editable - offset..],
1374 )
1375 } else {
1376 (line, "", "")
1377 };
1378 write!(
1379 out,
1380 "\n{}|{head}{cursor}{tail}",
1381 LineRef {
1382 index: i,
1383 hash: hash_line(line.as_bytes())
1384 }
1385 )
1386 .unwrap();
1387 offset += line.len() + 1;
1388 }
1389 }
1390
1391 pub fn write_cursor_excerpt_section(
1392 prompt: &mut String,
1393 path: &Path,
1394 context: &str,
1395 editable_range: &Range<usize>,
1396 cursor_offset: usize,
1397 ) {
1398 let path_str = path.to_string_lossy();
1399 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1400
1401 prompt.push_str("<|fim_prefix|>\n");
1402 prompt.push_str(&context[..editable_range.start]);
1403 prompt.push_str(START_MARKER);
1404
1405 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1406 let editable_region = &context[editable_range.clone()];
1407 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1408
1409 if !prompt.ends_with('\n') {
1410 prompt.push('\n');
1411 }
1412
1413 prompt.push_str("<|fim_suffix|>\n");
1414 prompt.push_str(&context[editable_range.end..]);
1415 if !prompt.ends_with('\n') {
1416 prompt.push('\n');
1417 }
1418
1419 prompt.push_str(END_MARKER);
1420 prompt.push('\n');
1421 }
1422
1423 /// A single edit command parsed from the model output.
1424 #[derive(Debug)]
1425 enum EditCommand<'a> {
1426 /// Replace a range of lines (inclusive on both ends). Single-line set is
1427 /// represented by `start == end`.
1428 Set {
1429 start: LineRef,
1430 end: LineRef,
1431 content: &'a str,
1432 },
1433 /// Insert new lines after the given line, or before the first line if
1434 /// `after` is `None`.
1435 Insert {
1436 after: Option<LineRef>,
1437 content: &'a str,
1438 },
1439 }
1440
1441 /// Parse a line reference like `3:c3` into a `LineRef`.
1442 fn parse_line_ref(s: &str) -> Option<LineRef> {
1443 let (idx_str, hash_str) = s.split_once(':')?;
1444 let index = idx_str.parse::<usize>().ok()?;
1445 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1446 Some(LineRef { index, hash })
1447 }
1448
1449 /// Parse the model output into a list of `EditCommand`s.
1450 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1451 let mut commands = Vec::new();
1452 let mut offset = 0usize;
1453
1454 while offset < model_output.len() {
1455 let next_nl = model_output[offset..]
1456 .find('\n')
1457 .map(|i| offset + i)
1458 .unwrap_or(model_output.len());
1459 let line = &model_output[offset..next_nl];
1460 let line_end = if next_nl < model_output.len() {
1461 next_nl + 1
1462 } else {
1463 next_nl
1464 };
1465
1466 let trimmed = line.trim();
1467 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1468 (true, spec)
1469 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1470 (false, spec)
1471 } else {
1472 offset = line_end;
1473 continue;
1474 };
1475
1476 let mut content_end = line_end;
1477 let mut scan = line_end;
1478
1479 while scan < model_output.len() {
1480 let body_nl = model_output[scan..]
1481 .find('\n')
1482 .map(|i| scan + i)
1483 .unwrap_or(model_output.len());
1484 let body_line = &model_output[scan..body_nl];
1485 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1486 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1487 {
1488 break;
1489 }
1490 scan = if body_nl < model_output.len() {
1491 body_nl + 1
1492 } else {
1493 body_nl
1494 };
1495 content_end = scan;
1496 }
1497
1498 let content = &model_output[line_end..content_end];
1499
1500 if is_set {
1501 if let Some((start_str, end_str)) = specifier.split_once('-') {
1502 if let (Some(start), Some(end)) =
1503 (parse_line_ref(start_str), parse_line_ref(end_str))
1504 {
1505 commands.push(EditCommand::Set {
1506 start,
1507 end,
1508 content,
1509 });
1510 }
1511 } else if let Some(target) = parse_line_ref(specifier) {
1512 commands.push(EditCommand::Set {
1513 start: target.clone(),
1514 end: target,
1515 content,
1516 });
1517 }
1518 } else {
1519 let after = parse_line_ref(specifier);
1520 commands.push(EditCommand::Insert { after, content });
1521 }
1522
1523 offset = scan;
1524 }
1525
1526 commands
1527 }
1528
1529 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1530 /// (as opposed to being a plain full-replacement output).
1531 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1532 /// editable region, returning the plain text content.
1533 pub fn strip_hashline_prefixes(region: &str) -> String {
1534 let mut decoded: String = region
1535 .lines()
1536 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1537 .collect::<Vec<_>>()
1538 .join("\n");
1539 if region.ends_with('\n') {
1540 decoded.push('\n');
1541 }
1542 decoded
1543 }
1544
1545 pub fn output_has_edit_commands(model_output: &str) -> bool {
1546 model_output.contains(SET_COMMAND_MARKER)
1547 || model_output.contains(INSERT_COMMAND_MARKER)
1548 || model_output.contains(NO_EDITS_COMMAND_MARKER)
1549 }
1550
1551 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1552 /// original editable region text.
1553 ///
1554 /// `editable_region` is the original text of the editable region (without hash
1555 /// prefixes). `model_output` is the raw model response containing edit commands.
1556 ///
1557 /// Returns the full replacement text for the editable region.
1558 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1559 if model_output
1560 .trim_start()
1561 .starts_with(NO_EDITS_COMMAND_MARKER)
1562 {
1563 return editable_region.to_string();
1564 }
1565
1566 let original_lines: Vec<&str> = editable_region.lines().collect();
1567 let old_hashes: Vec<u8> = original_lines
1568 .iter()
1569 .map(|line| hash_line(line.as_bytes()))
1570 .collect();
1571
1572 let commands = parse_edit_commands(model_output);
1573
1574 // For set operations: indexed by start line → Some((end line index, content))
1575 // For insert operations: indexed by line index → vec of content to insert after
1576 // Insert-before-first is tracked separately.
1577 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1578 let mut insert_before_first: Vec<&str> = Vec::new();
1579 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1580
1581 for command in &commands {
1582 match command {
1583 EditCommand::Set {
1584 start,
1585 end,
1586 content,
1587 } => {
1588 if start.index < old_hashes.len()
1589 && end.index < old_hashes.len()
1590 && start.index <= end.index
1591 && old_hashes[start.index] == start.hash
1592 && old_hashes[end.index] == end.hash
1593 {
1594 set_ops[start.index] = Some((end.index, *content));
1595 }
1596 }
1597 EditCommand::Insert { after, content } => match after {
1598 None => insert_before_first.push(*content),
1599 Some(line_ref) => {
1600 if line_ref.index < old_hashes.len()
1601 && old_hashes[line_ref.index] == line_ref.hash
1602 {
1603 insert_after[line_ref.index].push(*content);
1604 }
1605 }
1606 },
1607 }
1608 }
1609
1610 let mut result = String::new();
1611
1612 // Emit any insertions before the first line
1613 for content in &insert_before_first {
1614 result.push_str(content);
1615 if !content.ends_with('\n') {
1616 result.push('\n');
1617 }
1618 }
1619
1620 let mut i = 0;
1621 while i < original_lines.len() {
1622 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1623 // Replace lines i..=end_index with the replacement content
1624 result.push_str(replacement);
1625 if !replacement.is_empty() && !replacement.ends_with('\n') {
1626 result.push('\n');
1627 }
1628 // Emit any insertions after the end of this set range
1629 if *end_index < insert_after.len() {
1630 for content in &insert_after[*end_index] {
1631 result.push_str(content);
1632 if !content.ends_with('\n') {
1633 result.push('\n');
1634 }
1635 }
1636 }
1637 i = end_index + 1;
1638 } else {
1639 // Keep the original line
1640 result.push_str(original_lines[i]);
1641 result.push('\n');
1642 // Emit any insertions after this line
1643 for content in &insert_after[i] {
1644 result.push_str(content);
1645 if !content.ends_with('\n') {
1646 result.push('\n');
1647 }
1648 }
1649 i += 1;
1650 }
1651 }
1652
1653 // Preserve trailing newline behavior: if the original ended with a
1654 // newline the result already has one; if it didn't, trim the extra one
1655 // we added.
1656 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1657 result.pop();
1658 }
1659
1660 result
1661 }
1662
1663 /// Convert a unified diff patch into hashline edit commands.
1664 ///
1665 /// Parses the unified diff `patch` directly to determine which lines of
1666 /// `old_text` are deleted/replaced and what new lines are added, then emits
1667 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1668 /// `{index}:{hash}` identifiers.
1669 ///
1670 /// `cursor_offset` is an optional byte offset into the first hunk's new
1671 /// text (context + additions) where the cursor marker should be placed.
1672 pub fn patch_to_edit_commands(
1673 old_text: &str,
1674 patch: &str,
1675 cursor_offset: Option<usize>,
1676 ) -> Result<String> {
1677 let old_lines: Vec<&str> = old_text.lines().collect();
1678 let old_hashes: Vec<u8> = old_lines
1679 .iter()
1680 .map(|line| hash_line(line.as_bytes()))
1681 .collect();
1682
1683 let mut result = String::new();
1684 let mut first_hunk = true;
1685
1686 struct Hunk<'a> {
1687 line_range: Range<usize>,
1688 new_text_lines: Vec<&'a str>,
1689 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1690 }
1691
1692 // Parse the patch line by line. We only care about hunk headers,
1693 // context, deletions, and additions.
1694 let mut old_line_index: usize = 0;
1695 let mut current_hunk: Option<Hunk> = None;
1696 // Byte offset tracking within the hunk's new text for cursor placement.
1697 let mut new_text_byte_offset: usize = 0;
1698 // The line index of the last old line seen before/in the current hunk
1699 // (used for insert-after reference).
1700 let mut last_old_line_before_hunk: Option<usize> = None;
1701
1702 fn flush_hunk(
1703 hunk: Hunk,
1704 last_old_line: Option<usize>,
1705 result: &mut String,
1706 old_hashes: &[u8],
1707 ) {
1708 if hunk.line_range.is_empty() {
1709 // Pure insertion — reference the old line to insert after when in bounds.
1710 if let Some(after) = last_old_line
1711 && let Some(&hash) = old_hashes.get(after)
1712 {
1713 write!(
1714 result,
1715 "{INSERT_COMMAND_MARKER}{}\n",
1716 LineRef { index: after, hash }
1717 )
1718 .unwrap();
1719 } else {
1720 result.push_str(INSERT_COMMAND_MARKER);
1721 result.push('\n');
1722 }
1723 } else {
1724 let start = hunk.line_range.start;
1725 let end_exclusive = hunk.line_range.end;
1726 let deleted_line_count = end_exclusive.saturating_sub(start);
1727
1728 if deleted_line_count == 1 {
1729 if let Some(&hash) = old_hashes.get(start) {
1730 write!(
1731 result,
1732 "{SET_COMMAND_MARKER}{}\n",
1733 LineRef { index: start, hash }
1734 )
1735 .unwrap();
1736 } else {
1737 result.push_str(SET_COMMAND_MARKER);
1738 result.push('\n');
1739 }
1740 } else {
1741 let end_inclusive = end_exclusive - 1;
1742 match (
1743 old_hashes.get(start).copied(),
1744 old_hashes.get(end_inclusive).copied(),
1745 ) {
1746 (Some(start_hash), Some(end_hash)) => {
1747 write!(
1748 result,
1749 "{SET_COMMAND_MARKER}{}-{}\n",
1750 LineRef {
1751 index: start,
1752 hash: start_hash
1753 },
1754 LineRef {
1755 index: end_inclusive,
1756 hash: end_hash
1757 }
1758 )
1759 .unwrap();
1760 }
1761 _ => {
1762 result.push_str(SET_COMMAND_MARKER);
1763 result.push('\n');
1764 }
1765 }
1766 }
1767 }
1768 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1769 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1770 && line_offset == cursor_line_offset
1771 {
1772 result.push_str(&line[..char_offset]);
1773 result.push_str(CURSOR_MARKER);
1774 result.push_str(&line[char_offset..]);
1775 continue;
1776 }
1777
1778 result.push_str(line);
1779 }
1780 }
1781
1782 for raw_line in patch.split_inclusive('\n') {
1783 if raw_line.starts_with("@@") {
1784 // Flush any pending change hunk from a previous patch hunk.
1785 if let Some(hunk) = current_hunk.take() {
1786 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1787 }
1788
1789 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1790 // We intentionally do not trust old_start as a direct local index into `old_text`,
1791 // because some patches are produced against a larger file region and carry
1792 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1793 if first_hunk {
1794 new_text_byte_offset = 0;
1795 first_hunk = false;
1796 }
1797 continue;
1798 }
1799
1800 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1801 continue;
1802 }
1803 if raw_line.starts_with("\\ No newline") {
1804 continue;
1805 }
1806
1807 if raw_line.starts_with('-') {
1808 // Extend or start a change hunk with this deleted old line.
1809 match &mut current_hunk {
1810 Some(Hunk {
1811 line_range: range, ..
1812 }) => range.end = old_line_index + 1,
1813 None => {
1814 current_hunk = Some(Hunk {
1815 line_range: old_line_index..old_line_index + 1,
1816 new_text_lines: Vec::new(),
1817 cursor_line_offset_in_new_text: None,
1818 });
1819 }
1820 }
1821 old_line_index += 1;
1822 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1823 // Place cursor marker if cursor_offset falls within this line.
1824 let mut cursor_line_offset = None;
1825 if let Some(cursor_off) = cursor_offset
1826 && (first_hunk
1827 || cursor_off >= new_text_byte_offset
1828 && cursor_off <= new_text_byte_offset + added_content.len())
1829 {
1830 let line_offset = added_content.floor_char_boundary(
1831 cursor_off
1832 .saturating_sub(new_text_byte_offset)
1833 .min(added_content.len()),
1834 );
1835 cursor_line_offset = Some(line_offset);
1836 }
1837
1838 new_text_byte_offset += added_content.len();
1839
1840 let hunk = current_hunk.get_or_insert(Hunk {
1841 line_range: old_line_index..old_line_index,
1842 new_text_lines: vec![],
1843 cursor_line_offset_in_new_text: None,
1844 });
1845 hunk.new_text_lines.push(added_content);
1846 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1847 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1848 } else {
1849 // Context line (starts with ' ' or is empty).
1850 if let Some(hunk) = current_hunk.take() {
1851 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1852 }
1853 last_old_line_before_hunk = Some(old_line_index);
1854 old_line_index += 1;
1855 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1856 new_text_byte_offset += content.len();
1857 }
1858 }
1859
1860 // Flush final group.
1861 if let Some(hunk) = current_hunk.take() {
1862 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1863 }
1864
1865 // Trim a single trailing newline.
1866 if result.ends_with('\n') {
1867 result.pop();
1868 }
1869
1870 if result.is_empty() {
1871 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1872 }
1873
1874 Ok(result)
1875 }
1876
1877 #[cfg(test)]
1878 mod tests {
1879 use super::*;
1880 use indoc::indoc;
1881
1882 #[test]
1883 fn test_format_cursor_region() {
1884 struct Case {
1885 name: &'static str,
1886 context: &'static str,
1887 editable_range: Range<usize>,
1888 cursor_offset: usize,
1889 expected: &'static str,
1890 }
1891
1892 let cases = [
1893 Case {
1894 name: "basic_cursor_placement",
1895 context: "hello world\n",
1896 editable_range: 0..12,
1897 cursor_offset: 5,
1898 expected: indoc! {"
1899 <|file_sep|>test.rs
1900 <|fim_prefix|>
1901 <|fim_middle|>current
1902 0:5c|hello<|user_cursor|> world
1903 <|fim_suffix|>
1904 <|fim_middle|>updated
1905 "},
1906 },
1907 Case {
1908 name: "multiline_cursor_on_second_line",
1909 context: "aaa\nbbb\nccc\n",
1910 editable_range: 0..12,
1911 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1912 expected: indoc! {"
1913 <|file_sep|>test.rs
1914 <|fim_prefix|>
1915 <|fim_middle|>current
1916 0:23|aaa
1917 1:26|b<|user_cursor|>bb
1918 2:29|ccc
1919 <|fim_suffix|>
1920 <|fim_middle|>updated
1921 "},
1922 },
1923 Case {
1924 name: "no_trailing_newline_in_context",
1925 context: "line1\nline2",
1926 editable_range: 0..11,
1927 cursor_offset: 3,
1928 expected: indoc! {"
1929 <|file_sep|>test.rs
1930 <|fim_prefix|>
1931 <|fim_middle|>current
1932 0:d9|lin<|user_cursor|>e1
1933 1:da|line2
1934 <|fim_suffix|>
1935 <|fim_middle|>updated
1936 "},
1937 },
1938 Case {
1939 name: "leading_newline_in_editable_region",
1940 context: "\nabc\n",
1941 editable_range: 0..5,
1942 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1943 expected: indoc! {"
1944 <|file_sep|>test.rs
1945 <|fim_prefix|>
1946 <|fim_middle|>current
1947 0:00|
1948 1:26|a<|user_cursor|>bc
1949 <|fim_suffix|>
1950 <|fim_middle|>updated
1951 "},
1952 },
1953 Case {
1954 name: "with_suffix",
1955 context: "abc\ndef",
1956 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1957 cursor_offset: 2,
1958 expected: indoc! {"
1959 <|file_sep|>test.rs
1960 <|fim_prefix|>
1961 <|fim_middle|>current
1962 0:26|ab<|user_cursor|>c
1963 <|fim_suffix|>
1964 def
1965 <|fim_middle|>updated
1966 "},
1967 },
1968 Case {
1969 name: "unicode_two_byte_chars",
1970 context: "héllo\n",
1971 editable_range: 0..7,
1972 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1973 expected: indoc! {"
1974 <|file_sep|>test.rs
1975 <|fim_prefix|>
1976 <|fim_middle|>current
1977 0:1b|hé<|user_cursor|>llo
1978 <|fim_suffix|>
1979 <|fim_middle|>updated
1980 "},
1981 },
1982 Case {
1983 name: "unicode_three_byte_chars",
1984 context: "日本語\n",
1985 editable_range: 0..10,
1986 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1987 expected: indoc! {"
1988 <|file_sep|>test.rs
1989 <|fim_prefix|>
1990 <|fim_middle|>current
1991 0:80|日本<|user_cursor|>語
1992 <|fim_suffix|>
1993 <|fim_middle|>updated
1994 "},
1995 },
1996 Case {
1997 name: "unicode_four_byte_chars",
1998 context: "a🌍b\n",
1999 editable_range: 0..7,
2000 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2001 expected: indoc! {"
2002 <|file_sep|>test.rs
2003 <|fim_prefix|>
2004 <|fim_middle|>current
2005 0:6b|a🌍<|user_cursor|>b
2006 <|fim_suffix|>
2007 <|fim_middle|>updated
2008 "},
2009 },
2010 Case {
2011 name: "cursor_at_start_of_region_not_placed",
2012 context: "abc\n",
2013 editable_range: 0..4,
2014 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2015 expected: indoc! {"
2016 <|file_sep|>test.rs
2017 <|fim_prefix|>
2018 <|fim_middle|>current
2019 0:26|abc
2020 <|fim_suffix|>
2021 <|fim_middle|>updated
2022 "},
2023 },
2024 Case {
2025 name: "cursor_at_end_of_line_not_placed",
2026 context: "abc\ndef\n",
2027 editable_range: 0..8,
2028 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2029 expected: indoc! {"
2030 <|file_sep|>test.rs
2031 <|fim_prefix|>
2032 <|fim_middle|>current
2033 0:26|abc
2034 1:2f|def
2035 <|fim_suffix|>
2036 <|fim_middle|>updated
2037 "},
2038 },
2039 Case {
2040 name: "cursor_offset_relative_to_context_not_editable_region",
2041 // cursor_offset is relative to `context`, so when editable_range.start > 0,
2042 // write_cursor_excerpt_section must subtract it before comparing against
2043 // per-line offsets within the editable region.
2044 context: "pre\naaa\nbbb\nsuf\n",
2045 editable_range: 4..12, // editable region = "aaa\nbbb\n"
2046 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
2047 expected: indoc! {"
2048 <|file_sep|>test.rs
2049 <|fim_prefix|>
2050 pre
2051 <|fim_middle|>current
2052 0:23|aaa
2053 1:26|b<|user_cursor|>bb
2054 <|fim_suffix|>
2055 suf
2056 <|fim_middle|>updated
2057 "},
2058 },
2059 ];
2060
2061 for case in &cases {
2062 let mut prompt = String::new();
2063 hashline::write_cursor_excerpt_section(
2064 &mut prompt,
2065 Path::new("test.rs"),
2066 case.context,
2067 &case.editable_range,
2068 case.cursor_offset,
2069 );
2070 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2071 }
2072 }
2073
2074 #[test]
2075 fn test_apply_edit_commands() {
2076 struct Case {
2077 name: &'static str,
2078 original: &'static str,
2079 model_output: &'static str,
2080 expected: &'static str,
2081 }
2082
2083 let cases = vec![
2084 Case {
2085 name: "set_single_line",
2086 original: indoc! {"
2087 let mut total = 0;
2088 for product in products {
2089 total += ;
2090 }
2091 total
2092 "},
2093 model_output: indoc! {"
2094 <|set|>2:87
2095 total += product.price;
2096 "},
2097 expected: indoc! {"
2098 let mut total = 0;
2099 for product in products {
2100 total += product.price;
2101 }
2102 total
2103 "},
2104 },
2105 Case {
2106 name: "set_range",
2107 original: indoc! {"
2108 fn foo() {
2109 let x = 1;
2110 let y = 2;
2111 let z = 3;
2112 }
2113 "},
2114 model_output: indoc! {"
2115 <|set|>1:46-3:4a
2116 let sum = 6;
2117 "},
2118 expected: indoc! {"
2119 fn foo() {
2120 let sum = 6;
2121 }
2122 "},
2123 },
2124 Case {
2125 name: "insert_after_line",
2126 original: indoc! {"
2127 fn main() {
2128 let x = 1;
2129 }
2130 "},
2131 model_output: indoc! {"
2132 <|insert|>1:46
2133 let y = 2;
2134 "},
2135 expected: indoc! {"
2136 fn main() {
2137 let x = 1;
2138 let y = 2;
2139 }
2140 "},
2141 },
2142 Case {
2143 name: "insert_before_first",
2144 original: indoc! {"
2145 let x = 1;
2146 let y = 2;
2147 "},
2148 model_output: indoc! {"
2149 <|insert|>
2150 use std::io;
2151 "},
2152 expected: indoc! {"
2153 use std::io;
2154 let x = 1;
2155 let y = 2;
2156 "},
2157 },
2158 Case {
2159 name: "set_with_cursor_marker",
2160 original: indoc! {"
2161 fn main() {
2162 println!();
2163 }
2164 "},
2165 model_output: indoc! {"
2166 <|set|>1:34
2167 eprintln!(\"<|user_cursor|>\");
2168 "},
2169 expected: indoc! {"
2170 fn main() {
2171 eprintln!(\"<|user_cursor|>\");
2172 }
2173 "},
2174 },
2175 Case {
2176 name: "multiple_set_commands",
2177 original: indoc! {"
2178 aaa
2179 bbb
2180 ccc
2181 ddd
2182 "},
2183 model_output: indoc! {"
2184 <|set|>0:23
2185 AAA
2186 <|set|>2:29
2187 CCC
2188 "},
2189 expected: indoc! {"
2190 AAA
2191 bbb
2192 CCC
2193 ddd
2194 "},
2195 },
2196 Case {
2197 name: "set_range_multiline_replacement",
2198 original: indoc! {"
2199 fn handle_submit() {
2200 }
2201
2202 fn handle_keystroke() {
2203 "},
2204 model_output: indoc! {"
2205 <|set|>0:3f-1:7d
2206 fn handle_submit(modal_state: &mut ModalState) {
2207 <|user_cursor|>
2208 }
2209 "},
2210 expected: indoc! {"
2211 fn handle_submit(modal_state: &mut ModalState) {
2212 <|user_cursor|>
2213 }
2214
2215 fn handle_keystroke() {
2216 "},
2217 },
2218 Case {
2219 name: "no_edit_commands_returns_original",
2220 original: indoc! {"
2221 hello
2222 world
2223 "},
2224 model_output: "some random text with no commands",
2225 expected: indoc! {"
2226 hello
2227 world
2228 "},
2229 },
2230 Case {
2231 name: "no_edits_command_returns_original",
2232 original: indoc! {"
2233 hello
2234 world
2235 "},
2236 model_output: "<|no_edits|>",
2237 expected: indoc! {"
2238 hello
2239 world
2240 "},
2241 },
2242 Case {
2243 name: "wrong_hash_set_ignored",
2244 original: indoc! {"
2245 aaa
2246 bbb
2247 "},
2248 model_output: indoc! {"
2249 <|set|>0:ff
2250 ZZZ
2251 "},
2252 expected: indoc! {"
2253 aaa
2254 bbb
2255 "},
2256 },
2257 Case {
2258 name: "insert_and_set_combined",
2259 original: indoc! {"
2260 alpha
2261 beta
2262 gamma
2263 "},
2264 model_output: indoc! {"
2265 <|set|>0:06
2266 ALPHA
2267 <|insert|>1:9c
2268 beta_extra
2269 "},
2270 expected: indoc! {"
2271 ALPHA
2272 beta
2273 beta_extra
2274 gamma
2275 "},
2276 },
2277 Case {
2278 name: "no_trailing_newline_preserved",
2279 original: "hello\nworld",
2280 model_output: indoc! {"
2281 <|set|>0:14
2282 HELLO
2283 "},
2284 expected: "HELLO\nworld",
2285 },
2286 Case {
2287 name: "set_range_hash_mismatch_in_end_bound",
2288 original: indoc! {"
2289 one
2290 two
2291 three
2292 "},
2293 model_output: indoc! {"
2294 <|set|>0:42-2:ff
2295 ONE_TWO_THREE
2296 "},
2297 expected: indoc! {"
2298 one
2299 two
2300 three
2301 "},
2302 },
2303 Case {
2304 name: "set_range_start_greater_than_end_ignored",
2305 original: indoc! {"
2306 a
2307 b
2308 c
2309 "},
2310 model_output: indoc! {"
2311 <|set|>2:63-1:62
2312 X
2313 "},
2314 expected: indoc! {"
2315 a
2316 b
2317 c
2318 "},
2319 },
2320 Case {
2321 name: "insert_out_of_bounds_ignored",
2322 original: indoc! {"
2323 x
2324 y
2325 "},
2326 model_output: indoc! {"
2327 <|insert|>99:aa
2328 z
2329 "},
2330 expected: indoc! {"
2331 x
2332 y
2333 "},
2334 },
2335 Case {
2336 name: "set_out_of_bounds_ignored",
2337 original: indoc! {"
2338 x
2339 y
2340 "},
2341 model_output: indoc! {"
2342 <|set|>99:aa
2343 z
2344 "},
2345 expected: indoc! {"
2346 x
2347 y
2348 "},
2349 },
2350 Case {
2351 name: "malformed_set_command_ignored",
2352 original: indoc! {"
2353 alpha
2354 beta
2355 "},
2356 model_output: indoc! {"
2357 <|set|>not-a-line-ref
2358 UPDATED
2359 "},
2360 expected: indoc! {"
2361 alpha
2362 beta
2363 "},
2364 },
2365 Case {
2366 name: "malformed_insert_hash_treated_as_before_first",
2367 original: indoc! {"
2368 alpha
2369 beta
2370 "},
2371 model_output: indoc! {"
2372 <|insert|>1:nothex
2373 preamble
2374 "},
2375 expected: indoc! {"
2376 preamble
2377 alpha
2378 beta
2379 "},
2380 },
2381 Case {
2382 name: "set_then_insert_same_target_orders_insert_after_replacement",
2383 original: indoc! {"
2384 cat
2385 dog
2386 "},
2387 model_output: indoc! {"
2388 <|set|>0:38
2389 CAT
2390 <|insert|>0:38
2391 TAIL
2392 "},
2393 expected: indoc! {"
2394 CAT
2395 TAIL
2396 dog
2397 "},
2398 },
2399 Case {
2400 name: "overlapping_set_ranges_last_wins",
2401 original: indoc! {"
2402 a
2403 b
2404 c
2405 d
2406 "},
2407 model_output: indoc! {"
2408 <|set|>0:61-2:63
2409 FIRST
2410 <|set|>1:62-3:64
2411 SECOND
2412 "},
2413 expected: indoc! {"
2414 FIRST
2415 d
2416 "},
2417 },
2418 Case {
2419 name: "insert_before_first_and_after_line",
2420 original: indoc! {"
2421 a
2422 b
2423 "},
2424 model_output: indoc! {"
2425 <|insert|>
2426 HEAD
2427 <|insert|>0:61
2428 MID
2429 "},
2430 expected: indoc! {"
2431 HEAD
2432 a
2433 MID
2434 b
2435 "},
2436 },
2437 ];
2438
2439 for case in &cases {
2440 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2441 assert_eq!(result, case.expected, "failed case: {}", case.name);
2442 }
2443 }
2444
2445 #[test]
2446 fn test_output_has_edit_commands() {
2447 assert!(hashline::output_has_edit_commands(&format!(
2448 "{}0:ab\nnew",
2449 SET_COMMAND_MARKER
2450 )));
2451 assert!(hashline::output_has_edit_commands(&format!(
2452 "{}0:ab\nnew",
2453 INSERT_COMMAND_MARKER
2454 )));
2455 assert!(hashline::output_has_edit_commands(&format!(
2456 "some text\n{}1:cd\nstuff",
2457 SET_COMMAND_MARKER
2458 )));
2459 assert!(!hashline::output_has_edit_commands("just plain text"));
2460 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2461 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2462 }
2463
2464 // ---- hashline::patch_to_edit_commands round-trip tests ----
2465
2466 #[test]
2467 fn test_patch_to_edit_commands() {
2468 struct Case {
2469 name: &'static str,
2470 old: &'static str,
2471 patch: &'static str,
2472 expected_new: &'static str,
2473 }
2474
2475 let cases = [
2476 Case {
2477 name: "single_line_replacement",
2478 old: indoc! {"
2479 let mut total = 0;
2480 for product in products {
2481 total += ;
2482 }
2483 total
2484 "},
2485 patch: indoc! {"
2486 @@ -1,5 +1,5 @@
2487 let mut total = 0;
2488 for product in products {
2489 - total += ;
2490 + total += product.price;
2491 }
2492 total
2493 "},
2494 expected_new: indoc! {"
2495 let mut total = 0;
2496 for product in products {
2497 total += product.price;
2498 }
2499 total
2500 "},
2501 },
2502 Case {
2503 name: "multiline_replacement",
2504 old: indoc! {"
2505 fn foo() {
2506 let x = 1;
2507 let y = 2;
2508 let z = 3;
2509 }
2510 "},
2511 patch: indoc! {"
2512 @@ -1,5 +1,3 @@
2513 fn foo() {
2514 - let x = 1;
2515 - let y = 2;
2516 - let z = 3;
2517 + let sum = 1 + 2 + 3;
2518 }
2519 "},
2520 expected_new: indoc! {"
2521 fn foo() {
2522 let sum = 1 + 2 + 3;
2523 }
2524 "},
2525 },
2526 Case {
2527 name: "insertion",
2528 old: indoc! {"
2529 fn main() {
2530 let x = 1;
2531 }
2532 "},
2533 patch: indoc! {"
2534 @@ -1,3 +1,4 @@
2535 fn main() {
2536 let x = 1;
2537 + let y = 2;
2538 }
2539 "},
2540 expected_new: indoc! {"
2541 fn main() {
2542 let x = 1;
2543 let y = 2;
2544 }
2545 "},
2546 },
2547 Case {
2548 name: "insertion_before_first",
2549 old: indoc! {"
2550 let x = 1;
2551 let y = 2;
2552 "},
2553 patch: indoc! {"
2554 @@ -1,2 +1,3 @@
2555 +use std::io;
2556 let x = 1;
2557 let y = 2;
2558 "},
2559 expected_new: indoc! {"
2560 use std::io;
2561 let x = 1;
2562 let y = 2;
2563 "},
2564 },
2565 Case {
2566 name: "deletion",
2567 old: indoc! {"
2568 aaa
2569 bbb
2570 ccc
2571 ddd
2572 "},
2573 patch: indoc! {"
2574 @@ -1,4 +1,2 @@
2575 aaa
2576 -bbb
2577 -ccc
2578 ddd
2579 "},
2580 expected_new: indoc! {"
2581 aaa
2582 ddd
2583 "},
2584 },
2585 Case {
2586 name: "multiple_changes",
2587 old: indoc! {"
2588 alpha
2589 beta
2590 gamma
2591 delta
2592 epsilon
2593 "},
2594 patch: indoc! {"
2595 @@ -1,5 +1,5 @@
2596 -alpha
2597 +ALPHA
2598 beta
2599 gamma
2600 -delta
2601 +DELTA
2602 epsilon
2603 "},
2604 expected_new: indoc! {"
2605 ALPHA
2606 beta
2607 gamma
2608 DELTA
2609 epsilon
2610 "},
2611 },
2612 Case {
2613 name: "replace_with_insertion",
2614 old: indoc! {r#"
2615 fn handle() {
2616 modal_state.close();
2617 modal_state.dismiss();
2618 "#},
2619 patch: indoc! {r#"
2620 @@ -1,3 +1,4 @@
2621 fn handle() {
2622 modal_state.close();
2623 + eprintln!("");
2624 modal_state.dismiss();
2625 "#},
2626 expected_new: indoc! {r#"
2627 fn handle() {
2628 modal_state.close();
2629 eprintln!("");
2630 modal_state.dismiss();
2631 "#},
2632 },
2633 Case {
2634 name: "complete_replacement",
2635 old: indoc! {"
2636 aaa
2637 bbb
2638 ccc
2639 "},
2640 patch: indoc! {"
2641 @@ -1,3 +1,3 @@
2642 -aaa
2643 -bbb
2644 -ccc
2645 +xxx
2646 +yyy
2647 +zzz
2648 "},
2649 expected_new: indoc! {"
2650 xxx
2651 yyy
2652 zzz
2653 "},
2654 },
2655 Case {
2656 name: "add_function_body",
2657 old: indoc! {"
2658 fn foo() {
2659 modal_state.dismiss();
2660 }
2661
2662 fn
2663
2664 fn handle_keystroke() {
2665 "},
2666 patch: indoc! {"
2667 @@ -1,6 +1,8 @@
2668 fn foo() {
2669 modal_state.dismiss();
2670 }
2671
2672 -fn
2673 +fn handle_submit() {
2674 + todo()
2675 +}
2676
2677 fn handle_keystroke() {
2678 "},
2679 expected_new: indoc! {"
2680 fn foo() {
2681 modal_state.dismiss();
2682 }
2683
2684 fn handle_submit() {
2685 todo()
2686 }
2687
2688 fn handle_keystroke() {
2689 "},
2690 },
2691 Case {
2692 name: "with_cursor_offset",
2693 old: indoc! {r#"
2694 fn main() {
2695 println!();
2696 }
2697 "#},
2698 patch: indoc! {r#"
2699 @@ -1,3 +1,3 @@
2700 fn main() {
2701 - println!();
2702 + eprintln!("");
2703 }
2704 "#},
2705 expected_new: indoc! {r#"
2706 fn main() {
2707 eprintln!("<|user_cursor|>");
2708 }
2709 "#},
2710 },
2711 Case {
2712 name: "non_local_hunk_header_pure_insertion_repro",
2713 old: indoc! {"
2714 aaa
2715 bbb
2716 "},
2717 patch: indoc! {"
2718 @@ -20,2 +20,3 @@
2719 aaa
2720 +xxx
2721 bbb
2722 "},
2723 expected_new: indoc! {"
2724 aaa
2725 xxx
2726 bbb
2727 "},
2728 },
2729 Case {
2730 name: "empty_patch_produces_no_edits_marker",
2731 old: indoc! {"
2732 aaa
2733 bbb
2734 "},
2735 patch: "@@ -20,2 +20,3 @@\n",
2736 expected_new: indoc! {"
2737 aaa
2738 bbb
2739 "},
2740 },
2741 ];
2742
2743 for case in &cases {
2744 // The cursor_offset for patch_to_edit_commands is relative to
2745 // the first hunk's new text (context + additions). We compute
2746 // it by finding where the marker sits in the expected output
2747 // (which mirrors the new text of the hunk).
2748 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2749
2750 let commands =
2751 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2752 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2753
2754 assert!(
2755 hashline::output_has_edit_commands(&commands),
2756 "case {}: expected edit commands, got: {commands:?}",
2757 case.name,
2758 );
2759
2760 let applied = hashline::apply_edit_commands(case.old, &commands);
2761 assert_eq!(applied, case.expected_new, "case {}", case.name);
2762 }
2763 }
2764 }
2765}
2766
2767pub mod seed_coder {
2768 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2769 //!
2770 //! Seed-Coder uses different FIM tokens and order than Qwen:
2771 //! - SPM order: suffix comes FIRST, then prefix, then middle
2772 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2773 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2774 //!
2775 //! All context (related files, edit history) goes in the PREFIX section.
2776 //! The suffix contains only code after the editable region.
2777 //!
2778 //! Example prompt:
2779 //!
2780 //! <[fim-suffix]>
2781 //! code after editable region
2782 //! <[fim-prefix]><filename>related/file.py
2783 //! related file content
2784 //!
2785 //! <filename>edit_history
2786 //! --- a/some_file.py
2787 //! +++ b/some_file.py
2788 //! -old
2789 //! +new
2790 //!
2791 //! <filename>path/to/target_file.py
2792 //! code before editable region
2793 //! <<<<<<< CURRENT
2794 //! code that
2795 //! needs to<|user_cursor|>
2796 //! be rewritten
2797 //! =======
2798 //! <[fim-middle]>
2799 //!
2800 //! Expected output (model generates):
2801 //!
2802 //! updated
2803 //! code with
2804 //! changes applied
2805 //! >>>>>>> UPDATED
2806
2807 use super::*;
2808
2809 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2810 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2811 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2812 pub const FILE_MARKER: &str = "<filename>";
2813
2814 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2815 pub const SEPARATOR: &str = "=======\n";
2816 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2817
2818 pub const NO_EDITS: &str = "NO_EDITS\n";
2819
2820 pub fn special_tokens() -> &'static [&'static str] {
2821 &[
2822 FIM_SUFFIX,
2823 FIM_PREFIX,
2824 FIM_MIDDLE,
2825 FILE_MARKER,
2826 START_MARKER,
2827 SEPARATOR,
2828 END_MARKER,
2829 CURSOR_MARKER,
2830 ]
2831 }
2832
2833 pub fn write_cursor_excerpt_section(
2834 prompt: &mut String,
2835 path: &Path,
2836 context: &str,
2837 editable_range: &Range<usize>,
2838 cursor_offset: usize,
2839 ) {
2840 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2841 prompt.push_str(§ion);
2842 }
2843
2844 pub fn format_prompt_with_budget(
2845 path: &Path,
2846 context: &str,
2847 editable_range: &Range<usize>,
2848 cursor_offset: usize,
2849 events: &[Arc<Event>],
2850 related_files: &[RelatedFile],
2851 max_tokens: usize,
2852 ) -> String {
2853 let cursor_prefix_section =
2854 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2855 assemble_fim_prompt(
2856 context,
2857 editable_range,
2858 &cursor_prefix_section,
2859 events,
2860 related_files,
2861 max_tokens,
2862 )
2863 }
2864
2865 pub fn assemble_fim_prompt(
2866 context: &str,
2867 editable_range: &Range<usize>,
2868 cursor_prefix_section: &str,
2869 events: &[Arc<Event>],
2870 related_files: &[RelatedFile],
2871 max_tokens: usize,
2872 ) -> String {
2873 let suffix_section = build_suffix_section(context, editable_range);
2874
2875 let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
2876 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
2877 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2878
2879 let edit_history_section = super::format_edit_history_within_budget(
2880 events,
2881 FILE_MARKER,
2882 "edit_history",
2883 budget_after_cursor,
2884 max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
2885 );
2886 let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
2887 let budget_after_edit_history =
2888 budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
2889
2890 let related_files_section = super::format_related_files_within_budget(
2891 related_files,
2892 FILE_MARKER,
2893 "",
2894 budget_after_edit_history,
2895 );
2896
2897 let mut prompt = String::new();
2898 prompt.push_str(&suffix_section);
2899 prompt.push_str(FIM_PREFIX);
2900 prompt.push_str(&related_files_section);
2901 if !related_files_section.is_empty() {
2902 prompt.push('\n');
2903 }
2904 prompt.push_str(&edit_history_section);
2905 if !edit_history_section.is_empty() {
2906 prompt.push('\n');
2907 }
2908 prompt.push_str(cursor_prefix_section);
2909 prompt.push_str(FIM_MIDDLE);
2910
2911 prompt
2912 }
2913
2914 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2915 let mut section = String::new();
2916 section.push_str(FIM_SUFFIX);
2917 section.push_str(&context[editable_range.end..]);
2918 if !section.ends_with('\n') {
2919 section.push('\n');
2920 }
2921 section
2922 }
2923
2924 fn build_cursor_prefix_section(
2925 path: &Path,
2926 context: &str,
2927 editable_range: &Range<usize>,
2928 cursor_offset: usize,
2929 ) -> String {
2930 let mut section = String::new();
2931 let path_str = path.to_string_lossy();
2932 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2933
2934 section.push_str(&context[..editable_range.start]);
2935 section.push_str(START_MARKER);
2936 section.push_str(&context[editable_range.start..cursor_offset]);
2937 section.push_str(CURSOR_MARKER);
2938 section.push_str(&context[cursor_offset..editable_range.end]);
2939 if !section.ends_with('\n') {
2940 section.push('\n');
2941 }
2942 section.push_str(SEPARATOR);
2943 section
2944 }
2945
2946 /// Format patch as containing no changes if it's empty; otherwise return None.
2947 pub(crate) fn no_edits(patch: &str) -> Option<String> {
2948 // Count lines in the patch
2949 let empty_patch = patch.lines().count() <= 3;
2950 if empty_patch {
2951 Some(format!("{NO_EDITS}{END_MARKER}"))
2952 } else {
2953 None
2954 }
2955 }
2956}
2957
2958pub mod v0304_variable_edit {
2959 //! A prompt format with no fixed editable region. The entire context is shown
2960 //! to the model, and it chooses which text to replace by outputting surrounding
2961 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2962 //! text.
2963 //!
2964 //! Example prompt:
2965 //!
2966 //! <|file_sep|>path/to/file.py
2967 //! zero
2968 //! one
2969 //! two
2970 //! three<|user_cursor|>
2971 //! four
2972 //! five
2973 //! <|fim_prefix|>
2974 //
2975 //! Expected output (model generates):
2976 //!
2977 //! two
2978 //! <|fim_middle|>
2979 //! THREE
2980 //! <|fim_suffix|>
2981 //! four
2982 //!
2983 //! The output means: find "two\n...\nfour" in the context, and replace
2984 //! everything between "two\n" and "four" with "THREE\n".
2985
2986 use super::*;
2987
2988 pub fn special_tokens() -> &'static [&'static str] {
2989 &[
2990 "<|fim_prefix|>",
2991 "<|fim_suffix|>",
2992 "<|fim_middle|>",
2993 "<|file_sep|>",
2994 CURSOR_MARKER,
2995 ]
2996 }
2997
2998 pub fn write_cursor_excerpt_section(
2999 prompt: &mut String,
3000 path: &Path,
3001 context: &str,
3002 cursor_offset: usize,
3003 ) {
3004 let path_str = path.to_string_lossy();
3005 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3006
3007 prompt.push_str(&context[..cursor_offset]);
3008 prompt.push_str(CURSOR_MARKER);
3009 prompt.push_str(&context[cursor_offset..]);
3010 if !prompt.ends_with('\n') {
3011 prompt.push('\n');
3012 }
3013 prompt.push_str("<|fim_prefix|>\n")
3014 }
3015
3016 /// Apply a variable-edit model output to the original context text.
3017 ///
3018 /// The model output has the form:
3019 ///
3020 /// - prefix context lines
3021 /// - `<|fim_middle|>`
3022 /// - new text
3023 /// - `<|fim_suffix|>`
3024 /// - suffix context lines
3025 ///
3026 /// We locate the prefix/suffix context lines in the original text and replace
3027 /// everything between them with the new text.
3028 pub fn apply_variable_edit(
3029 context: &str,
3030 model_output: &str,
3031 ) -> Result<(Range<usize>, String)> {
3032 let (prefix_context, rest) = model_output
3033 .split_once("<|fim_middle|>\n")
3034 .or_else(|| model_output.split_once("<|fim_middle|>"))
3035 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3036
3037 let (new_text, suffix_context) = rest
3038 .split_once("<|fim_suffix|>\n")
3039 .or_else(|| rest.split_once("<|fim_suffix|>"))
3040 .unwrap_or((rest, ""));
3041
3042 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3043 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3044 } else {
3045 suffix_context
3046 };
3047
3048 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3049 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3050 + prefix_context.len();
3051 let suffix_offset = if suffix_context.is_empty() {
3052 context.len()
3053 } else {
3054 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3055 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3056 + prefix_offset
3057 };
3058
3059 let edit_range = prefix_offset..suffix_offset;
3060 return Ok((edit_range, new_text.to_string()));
3061 }
3062
3063 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3064 if needle.is_empty() {
3065 return Some(0);
3066 }
3067
3068 haystack.match_indices(needle).find_map(|(offset, _)| {
3069 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3070 matched_line_start.then_some(offset)
3071 })
3072 }
3073
3074 /// Convert a unified diff patch into the variable-edit output format.
3075 ///
3076 /// Parses `patch` as a unified diff against `old_text` and produces model
3077 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3078 /// delimiters. The diff is resolved by content matching rather than line
3079 /// numbers.
3080 pub fn patch_to_variable_edit_output(
3081 old_text: &str,
3082 patch: &str,
3083 cursor_offset: Option<usize>,
3084 ) -> Result<String> {
3085 // Parse the unified diff into hunks. Each hunk has an `old_context`
3086 // string (context + deleted lines interleaved in order) and a list of
3087 // edits expressed as byte ranges within that context plus replacement
3088 // text.
3089 let hunks = parse_hunks(patch);
3090 if hunks.is_empty() {
3091 return Ok(String::new());
3092 }
3093
3094 // Apply each hunk by finding its old_context in the text and
3095 // performing the edits. We search forward from where the previous
3096 // hunk ended so that hunks are applied in order.
3097 let mut new_text = old_text.to_string();
3098 let mut search_from: usize = 0;
3099 let mut first_hunk_pos: Option<usize> = None;
3100
3101 for hunk in &hunks {
3102 let context_pos = new_text[search_from..]
3103 .find(&hunk.old_context)
3104 .map(|pos| pos + search_from)
3105 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3106
3107 if first_hunk_pos.is_none() {
3108 first_hunk_pos = Some(context_pos);
3109 }
3110
3111 // Apply edits in reverse order so byte offsets remain valid.
3112 for edit in hunk.edits.iter().rev() {
3113 let abs_start = context_pos + edit.range.start;
3114 let abs_end = context_pos + edit.range.end;
3115 new_text.replace_range(abs_start..abs_end, &edit.text);
3116 }
3117
3118 // Advance past this hunk's region in the (now modified) text.
3119 let new_region_len: usize =
3120 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3121 len + edit.text.len() - (edit.range.end - edit.range.start)
3122 });
3123 search_from = context_pos + new_region_len;
3124 }
3125
3126 // Now we have old_text and new_text. Find the changed line range by
3127 // comparing them.
3128 let old_lines: Vec<&str> = old_text.lines().collect();
3129 let new_lines: Vec<&str> = new_text.lines().collect();
3130
3131 // Find first differing line.
3132 let first_changed_row = old_lines
3133 .iter()
3134 .zip(new_lines.iter())
3135 .position(|(a, b)| a != b)
3136 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3137
3138 // Find last differing line (from the end).
3139 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3140 let common_suffix = old_lines
3141 .iter()
3142 .rev()
3143 .zip(new_lines.iter().rev())
3144 .take(max_suffix)
3145 .take_while(|(a, b)| a == b)
3146 .count();
3147
3148 let old_end = old_lines.len() - common_suffix;
3149 let new_end = new_lines.len() - common_suffix;
3150
3151 if first_changed_row == old_end && first_changed_row == new_end {
3152 return Ok(String::new());
3153 }
3154
3155 // Build the replacement text from new_lines[first_diff..new_end].
3156 let mut merged_new_text = String::new();
3157 for line in &new_lines[first_changed_row..new_end] {
3158 merged_new_text.push_str(line);
3159 merged_new_text.push('\n');
3160 }
3161
3162 // cursor_offset is relative to the first hunk's new content in
3163 // new_text. Translate it to an offset within merged_new_text, which
3164 // only contains lines first_diff..new_end of new_text.
3165 if let Some(hunk_offset) = cursor_offset {
3166 let hunk_start = first_hunk_pos.unwrap_or(0);
3167 let absolute_pos = hunk_start + hunk_offset;
3168
3169 // Byte offset where first_diff starts in new_text.
3170 let merged_start: usize = new_lines[..first_changed_row]
3171 .iter()
3172 .map(|line| line.len() + 1)
3173 .sum();
3174
3175 if absolute_pos >= merged_start {
3176 let relative_offset = absolute_pos - merged_start;
3177 if relative_offset <= merged_new_text.len() {
3178 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3179 }
3180 }
3181 }
3182
3183 // Build output with 2 lines of context above and below.
3184 let context_lines_count = 2;
3185 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3186 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3187
3188 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3189 let pattern = &lines[line_range];
3190 let pattern_len = pattern.len();
3191
3192 let mut count = 0;
3193 for offset in 0..=lines.len() - pattern_len {
3194 if &lines[offset..offset + pattern_len] == pattern {
3195 count += 1;
3196 }
3197 }
3198 count
3199 }
3200
3201 // Expand prefix and suffix until they are unique
3202 while prefix_start > 0 {
3203 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3204 prefix_start -= 1;
3205 } else {
3206 break;
3207 }
3208 }
3209 while suffix_end < old_lines.len() {
3210 if count_matches(old_end..suffix_end, &old_lines) > 1 {
3211 suffix_end += 1;
3212 } else {
3213 break;
3214 }
3215 }
3216
3217 let mut output = String::new();
3218 for line in &old_lines[prefix_start..first_changed_row] {
3219 output.push_str(line);
3220 output.push('\n');
3221 }
3222 output.push_str("<|fim_middle|>\n");
3223 output.push_str(&merged_new_text);
3224 output.push_str("<|fim_suffix|>\n");
3225 for line in &old_lines[old_end..suffix_end] {
3226 output.push_str(line);
3227 output.push('\n');
3228 }
3229
3230 Ok(output)
3231 }
3232
3233 struct ParsedHunk {
3234 old_context: String,
3235 edits: Vec<ParsedEdit>,
3236 }
3237
3238 struct ParsedEdit {
3239 range: Range<usize>,
3240 text: String,
3241 }
3242
3243 /// Parse a unified diff into content-based hunks. Each hunk contains an
3244 /// `old_context` string (context lines + deleted lines, which together
3245 /// form the text that should be found in the original) and a list of edits
3246 /// expressed as byte ranges within that context.
3247 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3248 let mut hunks = Vec::new();
3249 let mut current: Option<ParsedHunk> = None;
3250
3251 for line in patch.lines() {
3252 if line.starts_with("@@") {
3253 if let Some(hunk) = current.take() {
3254 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3255 hunks.push(hunk);
3256 }
3257 }
3258 current = Some(ParsedHunk {
3259 old_context: String::new(),
3260 edits: Vec::new(),
3261 });
3262 } else if line.starts_with("---") || line.starts_with("+++") {
3263 continue;
3264 } else if let Some(hunk) = &mut current {
3265 if let Some(added) = line.strip_prefix('+') {
3266 let pos = hunk.old_context.len();
3267 if let Some(last_edit) = hunk.edits.last_mut() {
3268 if last_edit.range.end == pos {
3269 writeln!(&mut last_edit.text, "{added}").ok();
3270 continue;
3271 }
3272 }
3273 hunk.edits.push(ParsedEdit {
3274 range: pos..pos,
3275 text: format!("{added}\n"),
3276 });
3277 } else if let Some(removed) = line.strip_prefix('-') {
3278 let start = hunk.old_context.len();
3279 writeln!(&mut hunk.old_context, "{removed}").ok();
3280 let end = hunk.old_context.len();
3281 if let Some(last_edit) = hunk.edits.last_mut() {
3282 if last_edit.range.end == start {
3283 last_edit.range.end = end;
3284 continue;
3285 }
3286 }
3287 hunk.edits.push(ParsedEdit {
3288 range: start..end,
3289 text: String::new(),
3290 });
3291 } else {
3292 let ctx = line.strip_prefix(' ').unwrap_or(line);
3293 writeln!(&mut hunk.old_context, "{ctx}").ok();
3294 }
3295 }
3296 }
3297
3298 if let Some(hunk) = current {
3299 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3300 hunks.push(hunk);
3301 }
3302 }
3303
3304 hunks
3305 }
3306
3307 #[cfg(test)]
3308 mod tests {
3309 use super::*;
3310 use indoc::indoc;
3311
3312 #[test]
3313 fn test_apply_variable_edit() {
3314 struct Case {
3315 name: &'static str,
3316 original: &'static str,
3317 model_output: &'static str,
3318 expected: &'static str,
3319 }
3320
3321 let cases = [
3322 Case {
3323 name: "simple_single_line_replacement",
3324 original: indoc! {"
3325 zero
3326 one
3327 two
3328 three
3329 four
3330 five
3331 "},
3332 model_output: indoc! {"
3333 two
3334 <|fim_middle|>
3335 THREE
3336 <|fim_suffix|>
3337 four
3338 "},
3339 expected: indoc! {"
3340 zero
3341 one
3342 two
3343 THREE
3344 four
3345 five
3346 "},
3347 },
3348 Case {
3349 name: "multi_line_replacement",
3350 original: indoc! {"
3351 a
3352 b
3353 c
3354 d
3355 e
3356 "},
3357 model_output: indoc! {"
3358 a
3359 <|fim_middle|>
3360 B
3361 C
3362 D
3363 <|fim_suffix|>
3364 e
3365 "},
3366 expected: indoc! {"
3367 a
3368 B
3369 C
3370 D
3371 e
3372 "},
3373 },
3374 Case {
3375 name: "insertion_between_existing_lines",
3376 original: indoc! {"
3377 a
3378 b
3379 c
3380 "},
3381 model_output: indoc! {"
3382 a
3383 <|fim_middle|>
3384 X
3385 <|fim_suffix|>
3386 b
3387 "},
3388 expected: indoc! {"
3389 a
3390 X
3391 b
3392 c
3393 "},
3394 },
3395 Case {
3396 name: "deletion",
3397 original: indoc! {"
3398 a
3399 b
3400 c
3401 d
3402 "},
3403 model_output: indoc! {"
3404 a
3405 <|fim_middle|>
3406 <|fim_suffix|>
3407 c
3408 "},
3409 expected: indoc! {"
3410 a
3411 c
3412 d
3413 "},
3414 },
3415 Case {
3416 name: "replacement_at_start_no_prefix_context",
3417 original: indoc! {"
3418 a
3419 b
3420 c
3421 "},
3422 model_output: indoc! {"
3423 <|fim_middle|>
3424 X
3425 <|fim_suffix|>
3426 b
3427 "},
3428 expected: indoc! {"
3429 X
3430 b
3431 c
3432 "},
3433 },
3434 Case {
3435 name: "replacement_at_end_no_suffix_context",
3436 original: indoc! {"
3437 a
3438 b
3439 c
3440 "},
3441 model_output: indoc! {"
3442 b
3443 <|fim_middle|>
3444 Z
3445 <|fim_suffix|>
3446 "},
3447 expected: indoc! {"
3448 a
3449 b
3450 Z
3451 "},
3452 },
3453 Case {
3454 name: "context_with_trailing_newline_is_preserved",
3455 original: indoc! {"
3456 a
3457 b
3458 c
3459 "},
3460 model_output: indoc! {"
3461 a
3462 <|fim_middle|>
3463 B
3464 <|fim_suffix|>
3465 c
3466 "},
3467 expected: indoc! {"
3468 a
3469 B
3470 c
3471 "},
3472 },
3473 Case {
3474 name: "cursor_marker_passes_through_untouched",
3475 original: indoc! {"
3476 a
3477 b
3478 c
3479 "},
3480 model_output: indoc! {"
3481 a
3482 <|fim_middle|>
3483 B<|user_cursor|>B
3484 <|fim_suffix|>
3485 c
3486 "},
3487 expected: indoc! {"
3488 a
3489 B<|user_cursor|>B
3490 c
3491 "},
3492 },
3493 Case {
3494 name: "multiple_prefix_context_lines",
3495 original: indoc! {"
3496 a
3497 b
3498 c
3499 d
3500 e
3501 "},
3502 model_output: indoc! {"
3503 b
3504 c
3505 <|fim_middle|>
3506 D
3507 <|fim_suffix|>
3508 e
3509 "},
3510 expected: indoc! {"
3511 a
3512 b
3513 c
3514 D
3515 e
3516 "},
3517 },
3518 ];
3519
3520 for case in cases {
3521 let (edit_range, replacement) =
3522 apply_variable_edit(case.original, case.model_output).unwrap();
3523 let mut edited = case.original.to_string();
3524 edited.replace_range(edit_range, &replacement);
3525 assert_eq!(edited, case.expected, "{}", case.name);
3526 }
3527 }
3528
3529 #[test]
3530 fn test_patch_to_variable_edit() {
3531 struct Case {
3532 name: &'static str,
3533 old: &'static str,
3534 patch: &'static str,
3535 cursor_offset: Option<usize>,
3536 expected_variable_edit: &'static str,
3537 expected_after_apply: &'static str,
3538 }
3539
3540 let cases = [
3541 Case {
3542 name: "simple_replacement",
3543 old: indoc! {"
3544 zero
3545 one
3546 two
3547 three
3548 four
3549 five
3550 "},
3551 patch: indoc! {"
3552 @@ -3,3 +3,3 @@
3553 two
3554 -three
3555 +THREE
3556 four
3557 "},
3558 cursor_offset: None,
3559 expected_variable_edit: indoc! {"
3560 one
3561 two
3562 <|fim_middle|>
3563 THREE
3564 <|fim_suffix|>
3565 four
3566 five
3567 "},
3568 expected_after_apply: indoc! {"
3569 zero
3570 one
3571 two
3572 THREE
3573 four
3574 five
3575 "},
3576 },
3577 Case {
3578 name: "insertion",
3579 old: indoc! {"
3580 a
3581 b
3582 c
3583 d
3584 e
3585 "},
3586 patch: indoc! {"
3587 @@ -2,0 +3,1 @@
3588 b
3589 +X
3590 c
3591 "},
3592 cursor_offset: None,
3593 expected_variable_edit: indoc! {"
3594 a
3595 b
3596 <|fim_middle|>
3597 X
3598 <|fim_suffix|>
3599 c
3600 d
3601 "},
3602 expected_after_apply: indoc! {"
3603 a
3604 b
3605 X
3606 c
3607 d
3608 e
3609 "},
3610 },
3611 Case {
3612 name: "deletion",
3613 old: indoc! {"
3614 a
3615 b
3616 c
3617 d
3618 e
3619 "},
3620 patch: indoc! {"
3621 @@ -2,3 +2,2 @@
3622 b
3623 -c
3624 d
3625 "},
3626 cursor_offset: None,
3627 expected_variable_edit: indoc! {"
3628 a
3629 b
3630 <|fim_middle|>
3631 <|fim_suffix|>
3632 d
3633 e
3634 "},
3635 expected_after_apply: indoc! {"
3636 a
3637 b
3638 d
3639 e
3640 "},
3641 },
3642 Case {
3643 name: "edit_near_start",
3644 old: indoc! {"
3645 first
3646 second
3647 third
3648 fourth
3649 "},
3650 patch: indoc! {"
3651 @@ -1,1 +1,1 @@
3652 -first
3653 +FIRST
3654 "},
3655 cursor_offset: None,
3656 expected_variable_edit: indoc! {"
3657 <|fim_middle|>
3658 FIRST
3659 <|fim_suffix|>
3660 second
3661 third
3662 "},
3663 expected_after_apply: indoc! {"
3664 FIRST
3665 second
3666 third
3667 fourth
3668 "},
3669 },
3670 Case {
3671 name: "edit_near_end",
3672 old: indoc! {"
3673 first
3674 second
3675 third
3676 fourth
3677 "},
3678 patch: indoc! {"
3679 @@ -4,1 +4,1 @@
3680 -fourth
3681 +FOURTH
3682 "},
3683 cursor_offset: None,
3684 expected_variable_edit: indoc! {"
3685 second
3686 third
3687 <|fim_middle|>
3688 FOURTH
3689 <|fim_suffix|>
3690 "},
3691 expected_after_apply: indoc! {"
3692 first
3693 second
3694 third
3695 FOURTH
3696 "},
3697 },
3698 Case {
3699 name: "cursor_at_start_of_replacement",
3700 old: indoc! {"
3701 zero
3702 one
3703 two
3704 three
3705 four
3706 five
3707 "},
3708 patch: indoc! {"
3709 @@ -3,3 +3,3 @@
3710 two
3711 -three
3712 +THREE
3713 four
3714 "},
3715 cursor_offset: Some(4),
3716 expected_variable_edit: indoc! {"
3717 one
3718 two
3719 <|fim_middle|>
3720 <|user_cursor|>THREE
3721 <|fim_suffix|>
3722 four
3723 five
3724 "},
3725 expected_after_apply: indoc! {"
3726 zero
3727 one
3728 two
3729 <|user_cursor|>THREE
3730 four
3731 five
3732 "},
3733 },
3734 Case {
3735 name: "cursor_in_middle_of_replacement",
3736 old: indoc! {"
3737 zero
3738 one
3739 two
3740 three
3741 four
3742 five
3743 "},
3744 patch: indoc! {"
3745 @@ -3,3 +3,3 @@
3746 two
3747 -three
3748 +THREE
3749 four
3750 "},
3751 cursor_offset: Some(6),
3752 expected_variable_edit: indoc! {"
3753 one
3754 two
3755 <|fim_middle|>
3756 TH<|user_cursor|>REE
3757 <|fim_suffix|>
3758 four
3759 five
3760 "},
3761 expected_after_apply: indoc! {"
3762 zero
3763 one
3764 two
3765 TH<|user_cursor|>REE
3766 four
3767 five
3768 "},
3769 },
3770 Case {
3771 name: "expands_context_when_two_lines_not_unique_before_and_after",
3772 old: indoc! {"
3773 one
3774 a
3775 b
3776 c
3777 d
3778 two
3779 a
3780 b
3781 c
3782 d
3783 three
3784 a
3785 b
3786 c
3787 d
3788 four
3789 "},
3790 patch: indoc! {"
3791 @@ -4,5 +4,5 @@
3792 two
3793 a
3794 b
3795 -c
3796 +C
3797 d
3798 three
3799 "},
3800 cursor_offset: None,
3801 expected_variable_edit: indoc! {"
3802 two
3803 a
3804 b
3805 <|fim_middle|>
3806 C
3807 <|fim_suffix|>
3808 d
3809 three
3810 "},
3811 expected_after_apply: indoc! {"
3812 one
3813 a
3814 b
3815 c
3816 d
3817 two
3818 a
3819 b
3820 C
3821 d
3822 three
3823 a
3824 b
3825 c
3826 d
3827 four
3828 "},
3829 },
3830 Case {
3831 name: "expands_context_when_two_lines_not_unique_before_and_after",
3832 old: indoc! {"
3833 {
3834 {
3835 one();
3836 }
3837 }
3838 {
3839 {
3840 two();
3841 }
3842 }
3843 {
3844 {
3845 three();
3846 }
3847 }
3848 {
3849 {
3850 four();
3851 }
3852 }
3853 "},
3854 patch: indoc! {"
3855 @@ -4,5 +4,5 @@
3856 {
3857 - two();
3858 + TWO();
3859 }
3860 "},
3861 cursor_offset: None,
3862 expected_variable_edit: indoc! {"
3863 one();
3864 }
3865 }
3866 {
3867 {
3868 <|fim_middle|>
3869 TWO();
3870 <|fim_suffix|>
3871 }
3872 }
3873 {
3874 {
3875 three();
3876 "},
3877 expected_after_apply: indoc! {"
3878 {
3879 {
3880 one();
3881 }
3882 }
3883 {
3884 {
3885 TWO();
3886 }
3887 }
3888 {
3889 {
3890 three();
3891 }
3892 }
3893 {
3894 {
3895 four();
3896 }
3897 }
3898 "},
3899 },
3900 ];
3901
3902 for case in cases {
3903 let output =
3904 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3905 .unwrap_or_else(|error| {
3906 panic!("failed converting patch for {}: {error}", case.name)
3907 });
3908 assert_eq!(
3909 output, case.expected_variable_edit,
3910 "patch->variable_edit mismatch for {}",
3911 case.name
3912 );
3913
3914 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3915 .unwrap_or_else(|error| {
3916 panic!("failed applying variable_edit for {}: {error}", case.name)
3917 });
3918 let mut edited_by_variable_edit = case.old.to_string();
3919 edited_by_variable_edit.replace_range(edit_range, &replacement);
3920 assert_eq!(
3921 edited_by_variable_edit, case.expected_after_apply,
3922 "variable_edit apply mismatch for {}",
3923 case.name
3924 );
3925
3926 let (expected_edit_range, expected_replacement) =
3927 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3928 |error| {
3929 panic!(
3930 "failed applying expected variable_edit for {}: {error}",
3931 case.name
3932 )
3933 },
3934 );
3935 let mut edited_by_expected_variable_edit = case.old.to_string();
3936 edited_by_expected_variable_edit
3937 .replace_range(expected_edit_range, &expected_replacement);
3938 assert_eq!(
3939 edited_by_expected_variable_edit, case.expected_after_apply,
3940 "expected variable_edit apply mismatch for {}",
3941 case.name
3942 );
3943 }
3944 }
3945
3946 #[test]
3947 fn test_write_cursor_excerpt_section() {
3948 let path = Path::new("test.rs");
3949 let context = "fn main() {\n hello();\n}\n";
3950 let cursor_offset = 17;
3951 let mut prompt = String::new();
3952 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3953 assert_eq!(
3954 prompt,
3955 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3956 );
3957 }
3958 }
3959}
3960
3961/// The zeta1 prompt format
3962pub mod zeta1 {
3963 use super::*;
3964 use std::fmt::Write;
3965
3966 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3967 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3968 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3969 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3970
3971 const INSTRUCTION_HEADER: &str = concat!(
3972 "### Instruction:\n",
3973 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3974 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3975 "into account the cursor location.\n\n",
3976 "### User Edits:\n\n"
3977 );
3978 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3979 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3980
3981 /// Formats a complete zeta1 prompt from the input events and excerpt.
3982 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3983 let mut prompt = String::with_capacity(
3984 INSTRUCTION_HEADER.len()
3985 + input_events.len()
3986 + EXCERPT_HEADER.len()
3987 + input_excerpt.len()
3988 + RESPONSE_HEADER.len(),
3989 );
3990 prompt.push_str(INSTRUCTION_HEADER);
3991 prompt.push_str(input_events);
3992 prompt.push_str(EXCERPT_HEADER);
3993 prompt.push_str(input_excerpt);
3994 prompt.push_str(RESPONSE_HEADER);
3995 prompt
3996 }
3997
3998 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3999 /// editable and context byte-offset ranges within `cursor_excerpt`.
4000 pub fn format_zeta1_from_input(
4001 input: &ZetaPromptInput,
4002 editable_range: Range<usize>,
4003 context_range: Range<usize>,
4004 ) -> String {
4005 let events = format_zeta1_events(&input.events);
4006 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4007 format_zeta1_prompt(&events, &excerpt)
4008 }
4009
4010 /// Formats events in zeta1 style (oldest first).
4011 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4012 let mut result = String::new();
4013 for event in
4014 events
4015 .iter()
4016 .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4017 &ZetaFormat::V0114180EditableRegion,
4018 )))
4019 {
4020 let event_string = format_zeta1_event(event);
4021 if event_string.is_empty() {
4022 continue;
4023 }
4024 if !result.is_empty() {
4025 result.push_str("\n\n");
4026 }
4027 result.push_str(&event_string);
4028 }
4029 result
4030 }
4031
4032 fn format_zeta1_event(event: &Event) -> String {
4033 match event {
4034 Event::BufferChange {
4035 path,
4036 old_path,
4037 diff,
4038 ..
4039 } => {
4040 let mut prompt = String::new();
4041 if old_path != path {
4042 writeln!(
4043 prompt,
4044 "User renamed {} to {}\n",
4045 old_path.display(),
4046 path.display()
4047 )
4048 .ok();
4049 }
4050 if !diff.is_empty() {
4051 write!(
4052 prompt,
4053 "User edited {}:\n```diff\n{}\n```",
4054 path.display(),
4055 diff
4056 )
4057 .ok();
4058 }
4059 prompt
4060 }
4061 }
4062 }
4063
4064 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4065 /// within `cursor_excerpt`.
4066 fn format_zeta1_excerpt(
4067 input: &ZetaPromptInput,
4068 editable_range: Range<usize>,
4069 context_range: Range<usize>,
4070 ) -> String {
4071 let path_str = input.cursor_path.to_string_lossy();
4072 let excerpt = &*input.cursor_excerpt;
4073 let cursor_offset = input.cursor_offset_in_excerpt;
4074
4075 let mut prompt = String::new();
4076 writeln!(&mut prompt, "```{path_str}").ok();
4077
4078 let starts_at_file_beginning =
4079 input.excerpt_start_row == Some(0) && context_range.start == 0;
4080 if starts_at_file_beginning {
4081 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4082 }
4083
4084 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4085
4086 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4087 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4088 prompt.push_str(CURSOR_MARKER);
4089 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4090 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4091
4092 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4093 write!(prompt, "\n```").ok();
4094
4095 prompt
4096 }
4097
4098 /// Cleans zeta1 model output by extracting content between editable region
4099 /// markers and converting the zeta1 cursor marker to the universal one.
4100 /// Returns `None` if the output doesn't contain the expected markers.
4101 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4102 let content = output.replace(CURSOR_MARKER, "");
4103
4104 let content_start = content
4105 .find(EDITABLE_REGION_START_MARKER)
4106 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4107 .map(|pos| {
4108 if content.as_bytes().get(pos) == Some(&b'\n') {
4109 pos + 1
4110 } else {
4111 pos
4112 }
4113 })
4114 .unwrap_or(0);
4115
4116 let content_end = content
4117 .find(EDITABLE_REGION_END_MARKER)
4118 .map(|pos| {
4119 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4120 pos - 1
4121 } else {
4122 pos
4123 }
4124 })
4125 .unwrap_or(content.len());
4126
4127 if content_start > content_end {
4128 return Some(String::new());
4129 }
4130
4131 let extracted = &content[content_start..content_end];
4132
4133 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4134 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4135 let text_before_cursor = text_before_cursor
4136 .find(EDITABLE_REGION_START_MARKER)
4137 .map(|pos| {
4138 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4139 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4140 after_marker + 1
4141 } else {
4142 after_marker
4143 }
4144 })
4145 .unwrap_or(0);
4146 let offset_in_extracted = zeta1_cursor_pos
4147 .saturating_sub(text_before_cursor)
4148 .min(extracted.len());
4149 offset_in_extracted
4150 });
4151
4152 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4153 if let Some(offset) = cursor_offset {
4154 result.push_str(&extracted[..offset]);
4155 result.push_str(super::CURSOR_MARKER);
4156 result.push_str(&extracted[offset..]);
4157 } else {
4158 result.push_str(extracted);
4159 }
4160
4161 Some(result)
4162 }
4163}
4164
4165#[cfg(test)]
4166mod tests {
4167 use super::*;
4168 use indoc::indoc;
4169
4170 fn make_input(
4171 cursor_excerpt: &str,
4172 editable_range: Range<usize>,
4173 cursor_offset: usize,
4174 events: Vec<Event>,
4175 related_files: Vec<RelatedFile>,
4176 ) -> ZetaPromptInput {
4177 let context_range = 0..cursor_excerpt.len();
4178 ZetaPromptInput {
4179 cursor_path: Path::new("test.rs").into(),
4180 cursor_excerpt: cursor_excerpt.into(),
4181 cursor_offset_in_excerpt: cursor_offset,
4182 excerpt_start_row: None,
4183 events: events.into_iter().map(Arc::new).collect(),
4184 related_files: Some(related_files),
4185 active_buffer_diagnostics: vec![],
4186 excerpt_ranges: ExcerptRanges {
4187 editable_150: editable_range.clone(),
4188 editable_180: editable_range.clone(),
4189 editable_350: editable_range,
4190 editable_150_context_350: context_range.clone(),
4191 editable_180_context_350: context_range.clone(),
4192 editable_350_context_150: context_range,
4193 ..Default::default()
4194 },
4195 syntax_ranges: None,
4196 experiment: None,
4197 in_open_source_repo: false,
4198 can_collect_data: false,
4199 repo_url: None,
4200 }
4201 }
4202
4203 fn make_input_with_context_range(
4204 excerpt: &str,
4205 editable_range: Range<usize>,
4206 context_range: Range<usize>,
4207 cursor_offset: usize,
4208 ) -> ZetaPromptInput {
4209 ZetaPromptInput {
4210 cursor_path: Path::new("test.rs").into(),
4211 cursor_excerpt: excerpt.into(),
4212 cursor_offset_in_excerpt: cursor_offset,
4213 excerpt_start_row: None,
4214 events: vec![],
4215 related_files: Some(vec![]),
4216 active_buffer_diagnostics: vec![],
4217 excerpt_ranges: ExcerptRanges {
4218 editable_150: editable_range.clone(),
4219 editable_180: editable_range.clone(),
4220 editable_350: editable_range,
4221 editable_150_context_350: context_range.clone(),
4222 editable_180_context_350: context_range.clone(),
4223 editable_350_context_150: context_range,
4224 ..Default::default()
4225 },
4226 syntax_ranges: None,
4227 experiment: None,
4228 in_open_source_repo: false,
4229 can_collect_data: false,
4230 repo_url: None,
4231 }
4232 }
4233
4234 fn make_event(path: &str, diff: &str) -> Event {
4235 Event::BufferChange {
4236 path: Path::new(path).into(),
4237 old_path: Path::new(path).into(),
4238 diff: diff.to_string(),
4239 predicted: false,
4240 in_open_source_repo: false,
4241 }
4242 }
4243
4244 fn make_related_file(path: &str, content: &str) -> RelatedFile {
4245 RelatedFile {
4246 path: Path::new(path).into(),
4247 max_row: content.lines().count() as u32,
4248 excerpts: vec![RelatedExcerpt {
4249 row_range: 0..content.lines().count() as u32,
4250 text: content.into(),
4251 order: 0,
4252 }],
4253 in_open_source_repo: false,
4254 }
4255 }
4256
4257 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4258 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4259 }
4260
4261 fn budget_with_margin(requested_tokens: usize) -> usize {
4262 ((requested_tokens as f64) / 0.9).ceil() as usize
4263 }
4264
4265 #[test]
4266 fn test_no_truncation_when_within_budget() {
4267 let input = make_input(
4268 "prefix\neditable\nsuffix",
4269 7..15,
4270 10,
4271 vec![make_event("a.rs", "-old\n+new\n")],
4272 vec![make_related_file("related.rs", "fn helper() {}\n")],
4273 );
4274
4275 assert_eq!(
4276 format_with_budget(&input, 10000).unwrap(),
4277 indoc! {r#"
4278 <|file_sep|>related.rs
4279 fn helper() {}
4280 <|file_sep|>edit history
4281 --- a/a.rs
4282 +++ b/a.rs
4283 -old
4284 +new
4285 <|file_sep|>test.rs
4286 <|fim_prefix|>
4287 prefix
4288 <|fim_middle|>current
4289 edi<|user_cursor|>table
4290 <|fim_suffix|>
4291
4292 suffix
4293 <|fim_middle|>updated
4294 "#}
4295 .to_string()
4296 );
4297 }
4298
4299 #[test]
4300 fn test_truncation_drops_edit_history_when_budget_tight() {
4301 let input = make_input(
4302 "code",
4303 0..4,
4304 2,
4305 vec![make_event("a.rs", "-x\n+y\n")],
4306 vec![
4307 make_related_file("r1.rs", "aaaaaaa\n"),
4308 make_related_file("r2.rs", "bbbbbbb\n"),
4309 ],
4310 );
4311
4312 assert_eq!(
4313 format_with_budget(&input, 10000).unwrap(),
4314 indoc! {r#"
4315 <|file_sep|>r1.rs
4316 aaaaaaa
4317 <|file_sep|>r2.rs
4318 bbbbbbb
4319 <|file_sep|>edit history
4320 --- a/a.rs
4321 +++ b/a.rs
4322 -x
4323 +y
4324 <|file_sep|>test.rs
4325 <|fim_prefix|>
4326 <|fim_middle|>current
4327 co<|user_cursor|>de
4328 <|fim_suffix|>
4329 <|fim_middle|>updated
4330 "#}
4331 .to_string()
4332 );
4333
4334 assert_eq!(
4335 format_with_budget(&input, budget_with_margin(55)),
4336 Some(
4337 indoc! {r#"
4338 <|file_sep|>edit history
4339 --- a/a.rs
4340 +++ b/a.rs
4341 -x
4342 +y
4343 <|file_sep|>test.rs
4344 <|fim_prefix|>
4345 <|fim_middle|>current
4346 co<|user_cursor|>de
4347 <|fim_suffix|>
4348 <|fim_middle|>updated
4349 "#}
4350 .to_string()
4351 )
4352 );
4353 }
4354
4355 #[test]
4356 fn test_truncation_includes_partial_excerpts() {
4357 let input = make_input(
4358 "x",
4359 0..1,
4360 0,
4361 vec![],
4362 vec![RelatedFile {
4363 path: Path::new("big.rs").into(),
4364 max_row: 30,
4365 in_open_source_repo: false,
4366 excerpts: vec![
4367 RelatedExcerpt {
4368 row_range: 0..10,
4369 text: "first excerpt\n".into(),
4370 order: 0,
4371 },
4372 RelatedExcerpt {
4373 row_range: 10..20,
4374 text: "second excerpt\n".into(),
4375 order: 0,
4376 },
4377 RelatedExcerpt {
4378 row_range: 20..30,
4379 text: "third excerpt\n".into(),
4380 order: 0,
4381 },
4382 ],
4383 }],
4384 );
4385
4386 assert_eq!(
4387 format_with_budget(&input, 10000).unwrap(),
4388 indoc! {r#"
4389 <|file_sep|>big.rs
4390 first excerpt
4391 ...
4392 second excerpt
4393 ...
4394 third excerpt
4395 <|file_sep|>test.rs
4396 <|fim_prefix|>
4397 <|fim_middle|>current
4398 <|user_cursor|>x
4399 <|fim_suffix|>
4400 <|fim_middle|>updated
4401 "#}
4402 .to_string()
4403 );
4404
4405 assert_eq!(
4406 format_with_budget(&input, budget_with_margin(50)).unwrap(),
4407 indoc! {r#"
4408 <|file_sep|>big.rs
4409 first excerpt
4410 ...
4411 <|file_sep|>test.rs
4412 <|fim_prefix|>
4413 <|fim_middle|>current
4414 <|user_cursor|>x
4415 <|fim_suffix|>
4416 <|fim_middle|>updated
4417 "#}
4418 .to_string()
4419 );
4420 }
4421
4422 #[test]
4423 fn test_truncation_prioritizes_lower_order_excerpts() {
4424 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4425 // With tight budget, only the lower-order excerpt from file_b should be included.
4426 let input = make_input(
4427 "x",
4428 0..1,
4429 0,
4430 vec![],
4431 vec![
4432 RelatedFile {
4433 path: Path::new("file_a.rs").into(),
4434 max_row: 10,
4435 in_open_source_repo: false,
4436 excerpts: vec![RelatedExcerpt {
4437 row_range: 0..10,
4438 text: "low priority content\n".into(),
4439 order: 5,
4440 }],
4441 },
4442 RelatedFile {
4443 path: Path::new("file_b.rs").into(),
4444 max_row: 10,
4445 in_open_source_repo: false,
4446 excerpts: vec![RelatedExcerpt {
4447 row_range: 0..10,
4448 text: "high priority content\n".into(),
4449 order: 1,
4450 }],
4451 },
4452 ],
4453 );
4454
4455 // With large budget, both files included; rendered in stable lexicographic order.
4456 assert_eq!(
4457 format_with_budget(&input, 10000).unwrap(),
4458 indoc! {r#"
4459 <|file_sep|>file_a.rs
4460 low priority content
4461 <|file_sep|>file_b.rs
4462 high priority content
4463 <|file_sep|>test.rs
4464 <|fim_prefix|>
4465 <|fim_middle|>current
4466 <|user_cursor|>x
4467 <|fim_suffix|>
4468 <|fim_middle|>updated
4469 "#}
4470 .to_string()
4471 );
4472
4473 // With tight budget, only file_b (lower order) fits.
4474 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4475 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4476 // file_a would need another 14 tokens, which doesn't fit.
4477 assert_eq!(
4478 format_with_budget(&input, budget_with_margin(52)).unwrap(),
4479 indoc! {r#"
4480 <|file_sep|>file_b.rs
4481 high priority content
4482 <|file_sep|>test.rs
4483 <|fim_prefix|>
4484 <|fim_middle|>current
4485 <|user_cursor|>x
4486 <|fim_suffix|>
4487 <|fim_middle|>updated
4488 "#}
4489 .to_string()
4490 );
4491 }
4492
4493 #[test]
4494 fn test_truncation_drops_high_order_excerpts_within_file() {
4495 // A single file has excerpts at order 1 and order 3. With a tight budget,
4496 // only the order-1 excerpts are included while the order-3 excerpt is
4497 // dropped — even though they belong to the same file. This also preserves
4498 // the parent invariant: parent outline items have order ≤ their best
4499 // child, so they're always included when any child is.
4500 let input = make_input(
4501 "x",
4502 0..1,
4503 0,
4504 vec![],
4505 vec![RelatedFile {
4506 path: Path::new("mod.rs").into(),
4507 max_row: 30,
4508 in_open_source_repo: false,
4509 excerpts: vec![
4510 RelatedExcerpt {
4511 row_range: 0..5,
4512 text: "mod header\n".into(),
4513 order: 1,
4514 },
4515 RelatedExcerpt {
4516 row_range: 5..15,
4517 text: "important fn\n".into(),
4518 order: 1,
4519 },
4520 RelatedExcerpt {
4521 row_range: 15..30,
4522 text: "less important fn\n".into(),
4523 order: 3,
4524 },
4525 ],
4526 }],
4527 );
4528
4529 // With large budget, all three excerpts included.
4530 assert_eq!(
4531 format_with_budget(&input, 10000).unwrap(),
4532 indoc! {r#"
4533 <|file_sep|>mod.rs
4534 mod header
4535 ...
4536 important fn
4537 ...
4538 less important fn
4539 <|file_sep|>test.rs
4540 <|fim_prefix|>
4541 <|fim_middle|>current
4542 <|user_cursor|>x
4543 <|fim_suffix|>
4544 <|fim_middle|>updated
4545 "#}
4546 .to_string()
4547 );
4548
4549 // With tight budget, only order<=1 excerpts included (header + important fn).
4550 assert_eq!(
4551 format_with_budget(&input, budget_with_margin(55)).unwrap(),
4552 indoc! {r#"
4553 <|file_sep|>mod.rs
4554 mod header
4555 ...
4556 important fn
4557 ...
4558 <|file_sep|>test.rs
4559 <|fim_prefix|>
4560 <|fim_middle|>current
4561 <|user_cursor|>x
4562 <|fim_suffix|>
4563 <|fim_middle|>updated
4564 "#}
4565 .to_string()
4566 );
4567 }
4568
4569 #[test]
4570 fn test_truncation_drops_older_events_first() {
4571 let input = make_input(
4572 "x",
4573 0..1,
4574 0,
4575 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4576 vec![],
4577 );
4578
4579 assert_eq!(
4580 format_with_budget(&input, 10000).unwrap(),
4581 indoc! {r#"
4582 <|file_sep|>edit history
4583 --- a/old.rs
4584 +++ b/old.rs
4585 -1
4586 --- a/new.rs
4587 +++ b/new.rs
4588 -2
4589 <|file_sep|>test.rs
4590 <|fim_prefix|>
4591 <|fim_middle|>current
4592 <|user_cursor|>x
4593 <|fim_suffix|>
4594 <|fim_middle|>updated
4595 "#}
4596 .to_string()
4597 );
4598
4599 assert_eq!(
4600 format_with_budget(&input, 60).unwrap(),
4601 indoc! {r#"
4602 <|file_sep|>edit history
4603 --- a/new.rs
4604 +++ b/new.rs
4605 -2
4606 <|file_sep|>test.rs
4607 <|fim_prefix|>
4608 <|fim_middle|>current
4609 <|user_cursor|>x
4610 <|fim_suffix|>
4611 <|fim_middle|>updated
4612 "#}
4613 .to_string()
4614 );
4615 }
4616
4617 #[test]
4618 fn test_cursor_excerpt_always_included_with_minimal_budget() {
4619 let input = make_input(
4620 "fn main() {}",
4621 0..12,
4622 3,
4623 vec![make_event("a.rs", "-old\n+new\n")],
4624 vec![make_related_file("related.rs", "helper\n")],
4625 );
4626
4627 assert!(format_with_budget(&input, 30).is_none())
4628 }
4629
4630 #[track_caller]
4631 fn format_seed_coder(input: &ZetaPromptInput) -> String {
4632 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4633 .expect("seed coder prompt formatting should succeed")
4634 }
4635
4636 #[track_caller]
4637 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4638 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4639 .expect("seed coder prompt formatting should succeed")
4640 }
4641
4642 #[test]
4643 fn test_seed_coder_basic_format() {
4644 let input = make_input(
4645 "prefix\neditable\nsuffix",
4646 7..15,
4647 10,
4648 vec![make_event("a.rs", "-old\n+new\n")],
4649 vec![make_related_file("related.rs", "fn helper() {}\n")],
4650 );
4651
4652 assert_eq!(
4653 format_seed_coder(&input),
4654 indoc! {r#"
4655 <[fim-suffix]>
4656 suffix
4657 <[fim-prefix]><filename>related.rs
4658 fn helper() {}
4659
4660 <filename>edit_history
4661 --- a/a.rs
4662 +++ b/a.rs
4663 -old
4664 +new
4665
4666 <filename>test.rs
4667 prefix
4668 <<<<<<< CURRENT
4669 edi<|user_cursor|>table
4670 =======
4671 <[fim-middle]>"#}
4672 );
4673 }
4674
4675 #[test]
4676 fn test_v0317_formats_prompt_with_many_related_files() {
4677 let related_files = (0..900)
4678 .map(|index| {
4679 make_related_file(
4680 &format!("related_{index}.rs"),
4681 "fn helper() {\n let value = 1;\n}\n",
4682 )
4683 })
4684 .collect();
4685
4686 let input = make_input(
4687 "code",
4688 0..4,
4689 2,
4690 vec![make_event("a.rs", "-x\n+y\n")],
4691 related_files,
4692 );
4693
4694 let prompt =
4695 format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
4696
4697 assert!(prompt.is_some());
4698 let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
4699 assert!(prompt.contains("test.rs"));
4700 assert!(prompt.contains(CURSOR_MARKER));
4701 }
4702
4703 #[test]
4704 fn test_seed_coder_no_context() {
4705 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4706
4707 assert_eq!(
4708 format_seed_coder(&input),
4709 indoc! {r#"
4710 <[fim-suffix]>
4711 after
4712 <[fim-prefix]><filename>test.rs
4713 before
4714 <<<<<<< CURRENT
4715 mid<|user_cursor|>dle
4716 =======
4717 <[fim-middle]>"#}
4718 );
4719 }
4720
4721 #[test]
4722 fn test_seed_coder_truncation_drops_context() {
4723 let input = make_input(
4724 "code",
4725 0..4,
4726 2,
4727 vec![make_event("a.rs", "-x\n+y\n")],
4728 vec![make_related_file("r1.rs", "content\n")],
4729 );
4730
4731 // With large budget, everything is included
4732 assert_eq!(
4733 format_seed_coder(&input),
4734 indoc! {r#"
4735 <[fim-suffix]>
4736 <[fim-prefix]><filename>r1.rs
4737 content
4738
4739 <filename>edit_history
4740 --- a/a.rs
4741 +++ b/a.rs
4742 -x
4743 +y
4744
4745 <filename>test.rs
4746 <<<<<<< CURRENT
4747 co<|user_cursor|>de
4748 =======
4749 <[fim-middle]>"#}
4750 );
4751
4752 assert_eq!(
4753 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
4754 None
4755 );
4756
4757 assert_eq!(
4758 format_seed_coder_with_budget(&input, 40),
4759 indoc! {r#"
4760 <[fim-suffix]>
4761 <[fim-prefix]><filename>test.rs
4762 <<<<<<< CURRENT
4763 co<|user_cursor|>de
4764 =======
4765 <[fim-middle]>"#
4766 }
4767 )
4768 }
4769
4770 #[test]
4771 fn test_seed_coder_truncation_prioritizes_lower_order() {
4772 let input = make_input(
4773 "code",
4774 0..4,
4775 2,
4776 vec![],
4777 vec![
4778 RelatedFile {
4779 path: Path::new("low_prio.rs").into(),
4780 max_row: 5,
4781 in_open_source_repo: false,
4782 excerpts: vec![RelatedExcerpt {
4783 row_range: 0..5,
4784 text: "low prio\n".into(),
4785 order: 10,
4786 }],
4787 },
4788 RelatedFile {
4789 path: Path::new("high_prio.rs").into(),
4790 max_row: 5,
4791 in_open_source_repo: false,
4792 excerpts: vec![RelatedExcerpt {
4793 row_range: 0..5,
4794 text: "high prio\n".into(),
4795 order: 1,
4796 }],
4797 },
4798 ],
4799 );
4800
4801 // With large budget, both included; rendered in stable lexicographic order.
4802 assert_eq!(
4803 format_seed_coder(&input),
4804 indoc! {r#"
4805 <[fim-suffix]>
4806 <[fim-prefix]><filename>low_prio.rs
4807 low prio
4808 <filename>high_prio.rs
4809 high prio
4810
4811 <filename>test.rs
4812 <<<<<<< CURRENT
4813 co<|user_cursor|>de
4814 =======
4815 <[fim-middle]>"#}
4816 );
4817
4818 // With tight budget under the generic heuristic, context is dropped but the
4819 // minimal cursor section still fits.
4820 assert_eq!(
4821 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
4822 Some(
4823 indoc! {r#"
4824 <[fim-suffix]>
4825 <[fim-prefix]><filename>test.rs
4826 <<<<<<< CURRENT
4827 co<|user_cursor|>de
4828 =======
4829 <[fim-middle]>"#}
4830 .to_string()
4831 )
4832 );
4833 }
4834
4835 #[test]
4836 fn test_format_zeta1_from_input_basic() {
4837 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
4838 let input = ZetaPromptInput {
4839 cursor_path: Path::new("src/main.rs").into(),
4840 cursor_excerpt: excerpt.into(),
4841 cursor_offset_in_excerpt: 30,
4842 excerpt_start_row: Some(0),
4843 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4844 related_files: Some(vec![]),
4845 active_buffer_diagnostics: vec![],
4846 excerpt_ranges: ExcerptRanges {
4847 editable_150: 15..41,
4848 editable_180: 15..41,
4849 editable_350: 15..41,
4850 editable_150_context_350: 0..excerpt.len(),
4851 editable_180_context_350: 0..excerpt.len(),
4852 editable_350_context_150: 0..excerpt.len(),
4853 ..Default::default()
4854 },
4855 syntax_ranges: None,
4856 experiment: None,
4857 in_open_source_repo: false,
4858 can_collect_data: false,
4859 repo_url: None,
4860 };
4861
4862 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4863
4864 assert_eq!(
4865 prompt,
4866 concat!(
4867 "### Instruction:\n",
4868 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4869 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4870 "into account the cursor location.\n",
4871 "\n",
4872 "### User Edits:\n",
4873 "\n",
4874 "User edited other.rs:\n",
4875 "```diff\n",
4876 "-old\n",
4877 "+new\n",
4878 "\n",
4879 "```\n",
4880 "\n",
4881 "### User Excerpt:\n",
4882 "\n",
4883 "```src/main.rs\n",
4884 "<|start_of_file|>\n",
4885 "fn before() {}\n",
4886 "<|editable_region_start|>\n",
4887 "fn foo() {\n",
4888 " <|user_cursor_is_here|>let x = 1;\n",
4889 "\n",
4890 "<|editable_region_end|>}\n",
4891 "fn after() {}\n",
4892 "\n",
4893 "```\n",
4894 "\n",
4895 "### Response:\n",
4896 ),
4897 );
4898 }
4899
4900 #[test]
4901 fn test_format_zeta1_from_input_no_start_of_file() {
4902 let excerpt = "fn foo() {\n let x = 1;\n}\n";
4903 let input = ZetaPromptInput {
4904 cursor_path: Path::new("src/main.rs").into(),
4905 cursor_excerpt: excerpt.into(),
4906 cursor_offset_in_excerpt: 15,
4907 excerpt_start_row: Some(10),
4908 events: vec![],
4909 related_files: Some(vec![]),
4910 active_buffer_diagnostics: vec![],
4911 excerpt_ranges: ExcerptRanges {
4912 editable_150: 0..28,
4913 editable_180: 0..28,
4914 editable_350: 0..28,
4915 editable_150_context_350: 0..28,
4916 editable_180_context_350: 0..28,
4917 editable_350_context_150: 0..28,
4918 ..Default::default()
4919 },
4920 syntax_ranges: None,
4921 experiment: None,
4922 in_open_source_repo: false,
4923 can_collect_data: false,
4924 repo_url: None,
4925 };
4926
4927 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4928
4929 assert_eq!(
4930 prompt,
4931 concat!(
4932 "### Instruction:\n",
4933 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4934 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4935 "into account the cursor location.\n",
4936 "\n",
4937 "### User Edits:\n",
4938 "\n",
4939 "\n",
4940 "\n",
4941 "### User Excerpt:\n",
4942 "\n",
4943 "```src/main.rs\n",
4944 "<|editable_region_start|>\n",
4945 "fn foo() {\n",
4946 " <|user_cursor_is_here|>let x = 1;\n",
4947 "}\n",
4948 "\n",
4949 "<|editable_region_end|>\n",
4950 "```\n",
4951 "\n",
4952 "### Response:\n",
4953 ),
4954 );
4955 }
4956
4957 #[test]
4958 fn test_format_zeta1_from_input_with_sub_ranges() {
4959 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
4960 let editable_range = 10..37;
4961 let context_range = 0..excerpt.len();
4962
4963 let input = ZetaPromptInput {
4964 cursor_path: Path::new("test.rs").into(),
4965 cursor_excerpt: excerpt.into(),
4966 cursor_offset_in_excerpt: 25,
4967 excerpt_start_row: Some(0),
4968 events: vec![],
4969 related_files: Some(vec![]),
4970 active_buffer_diagnostics: vec![],
4971 excerpt_ranges: ExcerptRanges {
4972 editable_150: editable_range.clone(),
4973 editable_180: editable_range.clone(),
4974 editable_350: editable_range.clone(),
4975 editable_150_context_350: context_range.clone(),
4976 editable_180_context_350: context_range.clone(),
4977 editable_350_context_150: context_range.clone(),
4978 ..Default::default()
4979 },
4980 syntax_ranges: None,
4981 experiment: None,
4982 in_open_source_repo: false,
4983 can_collect_data: false,
4984 repo_url: None,
4985 };
4986
4987 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4988
4989 assert_eq!(
4990 prompt,
4991 concat!(
4992 "### Instruction:\n",
4993 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4994 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4995 "into account the cursor location.\n",
4996 "\n",
4997 "### User Edits:\n",
4998 "\n",
4999 "\n",
5000 "\n",
5001 "### User Excerpt:\n",
5002 "\n",
5003 "```test.rs\n",
5004 "<|start_of_file|>\n",
5005 "// prefix\n",
5006 "<|editable_region_start|>\n",
5007 "fn foo() {\n",
5008 " <|user_cursor_is_here|>let x = 1;\n",
5009 "}\n",
5010 "<|editable_region_end|>\n",
5011 "// suffix\n",
5012 "\n",
5013 "```\n",
5014 "\n",
5015 "### Response:\n",
5016 ),
5017 );
5018 }
5019
5020 #[test]
5021 fn test_max_event_count() {
5022 fn make_numbered_event(index: usize) -> Event {
5023 return make_event(
5024 &format!("event-{index}.rs"),
5025 &format!("-old-{index}\n+new-{index}\n"),
5026 );
5027 }
5028 let input = make_input(
5029 "x",
5030 0..1,
5031 0,
5032 (0..3).map(make_numbered_event).collect(),
5033 vec![],
5034 );
5035
5036 let edit_history_section = format_edit_history_within_budget(
5037 &input.events,
5038 "<|file_sep|>",
5039 "edit history",
5040 usize::MAX,
5041 5,
5042 );
5043
5044 assert_eq!(
5045 &edit_history_section,
5046 indoc!(
5047 "
5048 <|file_sep|>edit history
5049 --- a/event-0.rs
5050 +++ b/event-0.rs
5051 -old-0
5052 +new-0
5053 --- a/event-1.rs
5054 +++ b/event-1.rs
5055 -old-1
5056 +new-1
5057 --- a/event-2.rs
5058 +++ b/event-2.rs
5059 -old-2
5060 +new-2
5061 "
5062 )
5063 );
5064
5065 let edit_history_section = format_edit_history_within_budget(
5066 &input.events,
5067 "<|file_sep|>",
5068 "edit history",
5069 usize::MAX,
5070 2,
5071 );
5072
5073 assert_eq!(
5074 &edit_history_section,
5075 indoc!(
5076 "
5077 <|file_sep|>edit history
5078 --- a/event-1.rs
5079 +++ b/event-1.rs
5080 -old-1
5081 +new-1
5082 --- a/event-2.rs
5083 +++ b/event-2.rs
5084 -old-2
5085 +new-2
5086 "
5087 )
5088 );
5089
5090 let edit_history_section = format_edit_history_within_budget(
5091 &input.events,
5092 "<|file_sep|>",
5093 "edit history",
5094 usize::MAX,
5095 0,
5096 );
5097
5098 assert_eq!(&edit_history_section, "");
5099 }
5100
5101 #[test]
5102 fn test_clean_zeta1_model_output_basic() {
5103 let output = indoc! {"
5104 <|editable_region_start|>
5105 fn main() {
5106 println!(\"hello\");
5107 }
5108 <|editable_region_end|>
5109 "};
5110
5111 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5112 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
5113 }
5114
5115 #[test]
5116 fn test_clean_zeta1_model_output_with_cursor() {
5117 let output = indoc! {"
5118 <|editable_region_start|>
5119 fn main() {
5120 <|user_cursor_is_here|>println!(\"hello\");
5121 }
5122 <|editable_region_end|>
5123 "};
5124
5125 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5126 assert_eq!(
5127 cleaned,
5128 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
5129 );
5130 }
5131
5132 #[test]
5133 fn test_clean_zeta1_model_output_no_markers() {
5134 let output = "fn main() {}\n";
5135 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5136 assert_eq!(cleaned, "fn main() {}\n");
5137 }
5138
5139 #[test]
5140 fn test_clean_zeta1_model_output_empty_region() {
5141 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5142 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5143 assert_eq!(cleaned, "");
5144 }
5145
5146 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5147 let mut result = excerpt.to_string();
5148 result.replace_range(
5149 parsed_output.range_in_excerpt.clone(),
5150 &parsed_output.new_editable_region,
5151 );
5152 result
5153 }
5154
5155 #[test]
5156 fn test_parse_zeta2_model_output() {
5157 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5158 let context_start = excerpt.find("ctx start").unwrap();
5159 let context_end = excerpt.find("after ctx").unwrap();
5160 let editable_start = excerpt.find("editable old").unwrap();
5161 let editable_end = editable_start + "editable old\n".len();
5162 let input = make_input_with_context_range(
5163 excerpt,
5164 editable_start..editable_end,
5165 context_start..context_end,
5166 editable_start,
5167 );
5168
5169 let output = parse_zeta2_model_output(
5170 "editable new\n>>>>>>> UPDATED\n",
5171 ZetaFormat::V0131GitMergeMarkersPrefix,
5172 &input,
5173 )
5174 .unwrap();
5175
5176 assert_eq!(
5177 apply_edit(excerpt, &output),
5178 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5179 );
5180 }
5181
5182 #[test]
5183 fn test_parse_zeta2_model_output_identity() {
5184 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5185 let editable_start = excerpt.find("bbb").unwrap();
5186 let editable_end = excerpt.find("ddd").unwrap();
5187 let input = make_input_with_context_range(
5188 excerpt,
5189 editable_start..editable_end,
5190 0..excerpt.len(),
5191 editable_start,
5192 );
5193
5194 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5195 let output =
5196 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5197
5198 assert_eq!(apply_edit(excerpt, &output), excerpt);
5199 }
5200
5201 #[test]
5202 fn test_parse_zeta2_model_output_strips_end_marker() {
5203 let excerpt = "hello\nworld\n";
5204 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5205
5206 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5207 let output1 =
5208 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5209 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5210
5211 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5212 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5213 }
5214}