1pub mod excerpt_ranges;
2pub mod multi_region;
3
4use anyhow::{Result, anyhow};
5use serde::{Deserialize, Serialize};
6use std::fmt::Write;
7use std::ops::Range;
8use std::path::Path;
9use std::sync::Arc;
10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
11
12pub use crate::excerpt_ranges::{
13 ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
14};
15
16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
17pub const MAX_PROMPT_TOKENS: usize = 4096;
18
19/// Use up to this amount of the editable region for prefill.
20/// Larger values may result in more robust generation, but
21/// this region becomes non-editable.
22pub const PREFILL_RATIO: f64 = 0.1; // 10%
23
24fn estimate_tokens(bytes: usize) -> usize {
25 bytes / 3
26}
27
28#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
29pub struct ZetaPromptInput {
30 pub cursor_path: Arc<Path>,
31 pub cursor_excerpt: Arc<str>,
32 pub cursor_offset_in_excerpt: usize,
33 #[serde(default, skip_serializing_if = "Option::is_none")]
34 pub excerpt_start_row: Option<u32>,
35 pub events: Vec<Arc<Event>>,
36 #[serde(default)]
37 pub related_files: Option<Vec<RelatedFile>>,
38 #[serde(default, skip_serializing_if = "Vec::is_empty")]
39 pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
40 /// These ranges let the server select model-appropriate subsets.
41 pub excerpt_ranges: ExcerptRanges,
42 /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
43 /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
44 /// When present, the server uses these to compute editable/context ranges
45 /// instead of `excerpt_ranges`.
46 #[serde(default, skip_serializing_if = "Option::is_none")]
47 pub syntax_ranges: Option<Vec<Range<usize>>>,
48 /// The name of the edit prediction model experiment to use.
49 #[serde(default, skip_serializing_if = "Option::is_none")]
50 pub experiment: Option<String>,
51 #[serde(default)]
52 pub in_open_source_repo: bool,
53 #[serde(default)]
54 pub can_collect_data: bool,
55 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub repo_url: Option<String>,
57}
58
59#[derive(
60 Default,
61 Clone,
62 Copy,
63 Debug,
64 PartialEq,
65 Eq,
66 Hash,
67 EnumIter,
68 IntoStaticStr,
69 Serialize,
70 Deserialize,
71)]
72#[allow(non_camel_case_types)]
73pub enum ZetaFormat {
74 V0112MiddleAtEnd,
75 V0113Ordered,
76 V0114180EditableRegion,
77 V0120GitMergeMarkers,
78 #[default]
79 V0131GitMergeMarkersPrefix,
80 V0211Prefill,
81 V0211SeedCoder,
82 v0226Hashline,
83 V0304VariableEdit,
84 V0304SeedNoEdits,
85 V0306SeedMultiRegions,
86 V0316SeedMultiRegions,
87}
88
89impl std::fmt::Display for ZetaFormat {
90 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
91 write!(f, "{}", <&'static str>::from(self))
92 }
93}
94
95impl ZetaFormat {
96 pub fn parse(format_name: &str) -> Result<Self> {
97 let mut results = ZetaFormat::iter().filter(|version| {
98 <&'static str>::from(version)
99 .to_lowercase()
100 .contains(&format_name.to_lowercase())
101 });
102 let Some(result) = results.next() else {
103 anyhow::bail!(
104 "`{format_name}` did not match any of:\n{}",
105 Self::options_as_string()
106 );
107 };
108 if results.next().is_some() {
109 anyhow::bail!(
110 "`{format_name}` matched more than one of:\n{}",
111 Self::options_as_string()
112 );
113 }
114 Ok(result)
115 }
116
117 pub fn options_as_string() -> String {
118 ZetaFormat::iter()
119 .map(|format| format!("- {}\n", <&'static str>::from(format)))
120 .collect::<Vec<_>>()
121 .concat()
122 }
123}
124
125#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
126#[serde(tag = "event")]
127pub enum Event {
128 BufferChange {
129 path: Arc<Path>,
130 old_path: Arc<Path>,
131 diff: String,
132 predicted: bool,
133 in_open_source_repo: bool,
134 },
135}
136
137impl Event {
138 pub fn in_open_source_repo(&self) -> bool {
139 match self {
140 Event::BufferChange {
141 in_open_source_repo,
142 ..
143 } => *in_open_source_repo,
144 }
145 }
146}
147
148pub fn write_event(prompt: &mut String, event: &Event) {
149 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
150 for component in path.components() {
151 prompt.push('/');
152 write!(prompt, "{}", component.as_os_str().display()).ok();
153 }
154 }
155 match event {
156 Event::BufferChange {
157 path,
158 old_path,
159 diff,
160 predicted,
161 in_open_source_repo: _,
162 } => {
163 if *predicted {
164 prompt.push_str("// User accepted prediction:\n");
165 }
166 prompt.push_str("--- a");
167 write_path_as_unix_str(prompt, old_path.as_ref());
168 prompt.push_str("\n+++ b");
169 write_path_as_unix_str(prompt, path.as_ref());
170 prompt.push('\n');
171 prompt.push_str(diff);
172 }
173 }
174}
175
176#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
177pub struct ActiveBufferDiagnostic {
178 pub severity: Option<i32>,
179 pub message: String,
180 pub snippet: String,
181 pub snippet_buffer_row_range: Range<u32>,
182 pub diagnostic_range_in_snippet: Range<usize>,
183}
184
185#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
186pub struct RelatedFile {
187 pub path: Arc<Path>,
188 pub max_row: u32,
189 pub excerpts: Vec<RelatedExcerpt>,
190 #[serde(default)]
191 pub in_open_source_repo: bool,
192}
193
194#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
195pub struct RelatedExcerpt {
196 pub row_range: Range<u32>,
197 pub text: Arc<str>,
198 #[serde(default)]
199 pub order: usize,
200}
201
202pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
203 special_tokens_for_format(format)
204 .iter()
205 .any(|token| input.cursor_excerpt.contains(token))
206}
207
208pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
209 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
210}
211
212pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
213 match format {
214 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
215 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
216 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
217 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
218 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
219 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
220 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
221 ZetaFormat::v0226Hashline => hashline::special_tokens(),
222 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
223 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
224 ZetaFormat::V0306SeedMultiRegions => {
225 static TOKENS: &[&str] = &[
226 seed_coder::FIM_SUFFIX,
227 seed_coder::FIM_PREFIX,
228 seed_coder::FIM_MIDDLE,
229 seed_coder::FILE_MARKER,
230 seed_coder::START_MARKER,
231 seed_coder::SEPARATOR,
232 seed_coder::END_MARKER,
233 CURSOR_MARKER,
234 multi_region::MARKER_TAG_PREFIX,
235 ];
236 TOKENS
237 }
238 ZetaFormat::V0316SeedMultiRegions => {
239 static TOKENS: &[&str] = &[
240 seed_coder::FIM_SUFFIX,
241 seed_coder::FIM_PREFIX,
242 seed_coder::FIM_MIDDLE,
243 seed_coder::FILE_MARKER,
244 CURSOR_MARKER,
245 multi_region::MARKER_TAG_PREFIX,
246 ];
247 TOKENS
248 }
249 }
250}
251
252/// Returns the (editable_token_limit, context_token_limit) for a given format.
253pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
254 match format {
255 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
256 ZetaFormat::V0114180EditableRegion => (180, 350),
257 ZetaFormat::V0120GitMergeMarkers
258 | ZetaFormat::V0131GitMergeMarkersPrefix
259 | ZetaFormat::V0211Prefill
260 | ZetaFormat::V0211SeedCoder
261 | ZetaFormat::v0226Hashline
262 | ZetaFormat::V0306SeedMultiRegions
263 | ZetaFormat::V0316SeedMultiRegions
264 | ZetaFormat::V0304SeedNoEdits => (350, 150),
265 ZetaFormat::V0304VariableEdit => (1024, 0),
266 }
267}
268
269pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
270 match format {
271 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
272 ZetaFormat::V0112MiddleAtEnd
273 | ZetaFormat::V0113Ordered
274 | ZetaFormat::V0114180EditableRegion
275 | ZetaFormat::V0120GitMergeMarkers
276 | ZetaFormat::V0131GitMergeMarkersPrefix
277 | ZetaFormat::V0211Prefill
278 | ZetaFormat::V0211SeedCoder
279 | ZetaFormat::V0304VariableEdit
280 | ZetaFormat::V0306SeedMultiRegions
281 | ZetaFormat::V0316SeedMultiRegions
282 | ZetaFormat::V0304SeedNoEdits => &[],
283 }
284}
285
286pub fn excerpt_ranges_for_format(
287 format: ZetaFormat,
288 ranges: &ExcerptRanges,
289) -> (Range<usize>, Range<usize>) {
290 match format {
291 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
292 ranges.editable_150.clone(),
293 ranges.editable_150_context_350.clone(),
294 ),
295 ZetaFormat::V0114180EditableRegion => (
296 ranges.editable_180.clone(),
297 ranges.editable_180_context_350.clone(),
298 ),
299 ZetaFormat::V0120GitMergeMarkers
300 | ZetaFormat::V0131GitMergeMarkersPrefix
301 | ZetaFormat::V0211Prefill
302 | ZetaFormat::V0211SeedCoder
303 | ZetaFormat::v0226Hashline
304 | ZetaFormat::V0304SeedNoEdits
305 | ZetaFormat::V0306SeedMultiRegions
306 | ZetaFormat::V0316SeedMultiRegions => (
307 ranges.editable_350.clone(),
308 ranges.editable_350_context_150.clone(),
309 ),
310 ZetaFormat::V0304VariableEdit => {
311 let context = ranges
312 .editable_350_context_1024
313 .clone()
314 .or(ranges.editable_350_context_512.clone())
315 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
316 (context.clone(), context)
317 }
318 }
319}
320
321pub fn write_cursor_excerpt_section_for_format(
322 format: ZetaFormat,
323 prompt: &mut String,
324 path: &Path,
325 context: &str,
326 editable_range: &Range<usize>,
327 cursor_offset: usize,
328) {
329 match format {
330 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
331 prompt,
332 path,
333 context,
334 editable_range,
335 cursor_offset,
336 ),
337 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
338 v0113_ordered::write_cursor_excerpt_section(
339 prompt,
340 path,
341 context,
342 editable_range,
343 cursor_offset,
344 )
345 }
346 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
347 prompt,
348 path,
349 context,
350 editable_range,
351 cursor_offset,
352 ),
353 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
354 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
355 prompt,
356 path,
357 context,
358 editable_range,
359 cursor_offset,
360 )
361 }
362 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
363 seed_coder::write_cursor_excerpt_section(
364 prompt,
365 path,
366 context,
367 editable_range,
368 cursor_offset,
369 )
370 }
371 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
372 prompt,
373 path,
374 context,
375 editable_range,
376 cursor_offset,
377 ),
378 ZetaFormat::V0304VariableEdit => {
379 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
380 }
381 ZetaFormat::V0306SeedMultiRegions => {
382 prompt.push_str(&build_v0306_cursor_prefix(
383 path,
384 context,
385 editable_range,
386 cursor_offset,
387 ));
388 }
389 ZetaFormat::V0316SeedMultiRegions => {
390 prompt.push_str(&build_v0316_cursor_prefix(
391 path,
392 context,
393 editable_range,
394 cursor_offset,
395 ));
396 }
397 }
398}
399
400fn build_v0306_cursor_prefix(
401 path: &Path,
402 context: &str,
403 editable_range: &Range<usize>,
404 cursor_offset: usize,
405) -> String {
406 let mut section = String::new();
407 let path_str = path.to_string_lossy();
408 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
409
410 section.push_str(&context[..editable_range.start]);
411 section.push_str(seed_coder::START_MARKER);
412
413 let editable_text = &context[editable_range.clone()];
414 let cursor_in_editable = cursor_offset - editable_range.start;
415 multi_region::write_editable_with_markers(
416 &mut section,
417 editable_text,
418 cursor_in_editable,
419 CURSOR_MARKER,
420 );
421
422 if !section.ends_with('\n') {
423 section.push('\n');
424 }
425 section.push_str(seed_coder::SEPARATOR);
426 section
427}
428
429fn build_v0316_cursor_prefix(
430 path: &Path,
431 context: &str,
432 editable_range: &Range<usize>,
433 cursor_offset: usize,
434) -> String {
435 let mut section = String::new();
436 let path_str = path.to_string_lossy();
437 write!(
438 section,
439 "{}{}
440",
441 seed_coder::FILE_MARKER,
442 path_str
443 )
444 .ok();
445
446 section.push_str(&context[..editable_range.start]);
447
448 let editable_text = &context[editable_range.clone()];
449 let cursor_in_editable = cursor_offset - editable_range.start;
450 multi_region::write_editable_with_markers(
451 &mut section,
452 editable_text,
453 cursor_in_editable,
454 CURSOR_MARKER,
455 );
456
457 if !section.ends_with('\n') {
458 section.push('\n');
459 }
460 section
461}
462
463fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
464 let start_row = text[0..range.start].matches('\n').count() as u32;
465 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
466 if !text[..range.end].ends_with('\n') {
467 end_row += 1;
468 }
469 return start_row..end_row;
470}
471
472pub fn format_prompt_with_budget_for_format(
473 input: &ZetaPromptInput,
474 format: ZetaFormat,
475 max_tokens: usize,
476) -> Option<String> {
477 let (context, editable_range, context_range, cursor_offset) =
478 resolve_cursor_region(input, format);
479 let path = &*input.cursor_path;
480
481 let empty_files = Vec::new();
482 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
483 let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
484 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
485 let row_range = relative_row_range.start + cursor_excerpt_start_row
486 ..relative_row_range.end + cursor_excerpt_start_row;
487 &filter_redundant_excerpts(
488 input_related_files.to_vec(),
489 input.cursor_path.as_ref(),
490 row_range,
491 )
492 } else {
493 input_related_files
494 };
495
496 let prompt = match format {
497 ZetaFormat::V0211SeedCoder
498 | ZetaFormat::V0304SeedNoEdits
499 | ZetaFormat::V0306SeedMultiRegions
500 | ZetaFormat::V0316SeedMultiRegions => {
501 let mut cursor_section = String::new();
502 write_cursor_excerpt_section_for_format(
503 format,
504 &mut cursor_section,
505 path,
506 context,
507 &editable_range,
508 cursor_offset,
509 );
510
511 seed_coder::assemble_fim_prompt(
512 context,
513 &editable_range,
514 &cursor_section,
515 &input.events,
516 related_files,
517 max_tokens,
518 )
519 }
520 _ => {
521 let mut cursor_section = String::new();
522 write_cursor_excerpt_section_for_format(
523 format,
524 &mut cursor_section,
525 path,
526 context,
527 &editable_range,
528 cursor_offset,
529 );
530
531 let max_bytes = max_tokens * 3;
532 let content_budget_tokens =
533 estimate_tokens(max_bytes.saturating_sub(cursor_section.len()));
534
535 let edit_history_section = format_edit_history_within_budget(
536 &input.events,
537 "<|file_sep|>",
538 "edit history",
539 content_budget_tokens,
540 max_edit_event_count_for_format(&format),
541 );
542 let remaining_budget_tokens = estimate_tokens(
543 max_bytes
544 .saturating_sub(cursor_section.len())
545 .saturating_sub(edit_history_section.len()),
546 );
547
548 let related_files_section = format_related_files_within_budget(
549 &related_files,
550 "<|file_sep|>",
551 "",
552 remaining_budget_tokens,
553 );
554
555 let mut prompt = String::new();
556 prompt.push_str(&related_files_section);
557 prompt.push_str(&edit_history_section);
558 prompt.push_str(&cursor_section);
559 prompt
560 }
561 };
562 let prompt_tokens = estimate_tokens(prompt.len());
563 if prompt_tokens > max_tokens {
564 return None;
565 }
566 return Some(prompt);
567}
568
569pub fn filter_redundant_excerpts(
570 mut related_files: Vec<RelatedFile>,
571 cursor_path: &Path,
572 cursor_row_range: Range<u32>,
573) -> Vec<RelatedFile> {
574 for file in &mut related_files {
575 if file.path.as_ref() == cursor_path {
576 file.excerpts.retain(|excerpt| {
577 excerpt.row_range.start < cursor_row_range.start
578 || excerpt.row_range.end > cursor_row_range.end
579 });
580 }
581 }
582 related_files.retain(|file| !file.excerpts.is_empty());
583 related_files
584}
585
586pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
587 match format {
588 ZetaFormat::V0112MiddleAtEnd
589 | ZetaFormat::V0113Ordered
590 | ZetaFormat::V0114180EditableRegion
591 | ZetaFormat::V0120GitMergeMarkers
592 | ZetaFormat::V0131GitMergeMarkersPrefix
593 | ZetaFormat::V0211Prefill
594 | ZetaFormat::V0211SeedCoder
595 | ZetaFormat::v0226Hashline
596 | ZetaFormat::V0304SeedNoEdits
597 | ZetaFormat::V0304VariableEdit
598 | ZetaFormat::V0306SeedMultiRegions
599 | ZetaFormat::V0316SeedMultiRegions => 6,
600 }
601}
602
603pub fn get_prefill_for_format(
604 format: ZetaFormat,
605 context: &str,
606 editable_range: &Range<usize>,
607) -> String {
608 match format {
609 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
610 ZetaFormat::V0112MiddleAtEnd
611 | ZetaFormat::V0113Ordered
612 | ZetaFormat::V0114180EditableRegion
613 | ZetaFormat::V0120GitMergeMarkers
614 | ZetaFormat::V0131GitMergeMarkersPrefix
615 | ZetaFormat::V0211SeedCoder
616 | ZetaFormat::v0226Hashline
617 | ZetaFormat::V0304VariableEdit => String::new(),
618 ZetaFormat::V0304SeedNoEdits
619 | ZetaFormat::V0306SeedMultiRegions
620 | ZetaFormat::V0316SeedMultiRegions => String::new(),
621 }
622}
623
624pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
625 match format {
626 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
627 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
628 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
629 ZetaFormat::V0211SeedCoder
630 | ZetaFormat::V0304SeedNoEdits
631 | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
632 ZetaFormat::V0112MiddleAtEnd
633 | ZetaFormat::V0113Ordered
634 | ZetaFormat::V0114180EditableRegion
635 | ZetaFormat::v0226Hashline
636 | ZetaFormat::V0304VariableEdit
637 | ZetaFormat::V0316SeedMultiRegions => None,
638 }
639}
640
641pub fn encode_patch_as_output_for_format(
642 format: ZetaFormat,
643 old_editable_region: &str,
644 patch: &str,
645 cursor_offset: Option<usize>,
646) -> Result<Option<String>> {
647 match format {
648 ZetaFormat::v0226Hashline => {
649 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
650 }
651 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
652 old_editable_region,
653 patch,
654 cursor_offset,
655 )
656 .map(Some),
657 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
658 Ok(seed_coder::no_edits(patch))
659 }
660 // V0316 teacher prompt encoding is not yet implemented.
661 ZetaFormat::V0316SeedMultiRegions => Ok(None),
662 _ => Ok(None),
663 }
664}
665
666pub struct ParsedOutput {
667 /// Text that should replace the editable region
668 pub new_editable_region: String,
669 /// The byte range within `cursor_excerpt` that this replacement applies to
670 pub range_in_excerpt: Range<usize>,
671}
672
673/// Parse model output for the given zeta format
674pub fn parse_zeta2_model_output(
675 output: &str,
676 format: ZetaFormat,
677 prompt_inputs: &ZetaPromptInput,
678) -> Result<ParsedOutput> {
679 let output = match output_end_marker_for_format(format) {
680 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
681 None => output,
682 };
683
684 let (context, editable_range_in_context, context_range, _) =
685 resolve_cursor_region(prompt_inputs, format);
686 let context_start = context_range.start;
687 let old_editable_region = &context[editable_range_in_context.clone()];
688
689 let (range_in_context, output) = match format {
690 ZetaFormat::v0226Hashline => (
691 editable_range_in_context,
692 if hashline::output_has_edit_commands(output) {
693 hashline::apply_edit_commands(old_editable_region, output)
694 } else {
695 output.to_string()
696 },
697 ),
698 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
699 ZetaFormat::V0304SeedNoEdits => (
700 editable_range_in_context,
701 if output.starts_with(seed_coder::NO_EDITS) {
702 old_editable_region.to_string()
703 } else {
704 output.to_string()
705 },
706 ),
707 ZetaFormat::V0306SeedMultiRegions => (
708 editable_range_in_context,
709 if output.starts_with(seed_coder::NO_EDITS) {
710 old_editable_region.to_string()
711 } else {
712 multi_region::apply_marker_span(old_editable_region, output)?
713 },
714 ),
715 ZetaFormat::V0316SeedMultiRegions => (
716 editable_range_in_context,
717 if multi_region::is_repeated_final_marker(output) {
718 old_editable_region.to_string()
719 } else {
720 multi_region::apply_marker_span(old_editable_region, output)?
721 },
722 ),
723 _ => (editable_range_in_context, output.to_string()),
724 };
725
726 let range_in_excerpt =
727 range_in_context.start + context_start..range_in_context.end + context_start;
728
729 Ok(ParsedOutput {
730 new_editable_region: output,
731 range_in_excerpt,
732 })
733}
734
735pub fn excerpt_range_for_format(
736 format: ZetaFormat,
737 ranges: &ExcerptRanges,
738) -> (Range<usize>, Range<usize>) {
739 excerpt_ranges_for_format(format, ranges)
740}
741
742pub fn resolve_cursor_region(
743 input: &ZetaPromptInput,
744 format: ZetaFormat,
745) -> (&str, Range<usize>, Range<usize>, usize) {
746 let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
747 let (editable_tokens, context_tokens) = token_limits_for_format(format);
748 compute_editable_and_context_ranges(
749 &input.cursor_excerpt,
750 input.cursor_offset_in_excerpt,
751 syntax_ranges,
752 editable_tokens,
753 context_tokens,
754 )
755 } else {
756 excerpt_range_for_format(format, &input.excerpt_ranges)
757 };
758 let context_start = context_range.start;
759 let context_text = &input.cursor_excerpt[context_range.clone()];
760 let adjusted_editable =
761 (editable_range.start - context_start)..(editable_range.end - context_start);
762 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
763
764 (
765 context_text,
766 adjusted_editable,
767 context_range,
768 adjusted_cursor,
769 )
770}
771
772pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
773 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
774 get_prefill_for_format(format, context, &editable_range)
775}
776
777fn format_edit_history_within_budget(
778 events: &[Arc<Event>],
779 file_marker: &str,
780 edit_history_name: &str,
781 max_tokens: usize,
782 max_edit_event_count: usize,
783) -> String {
784 let max_bytes = max_tokens.saturating_mul(3);
785 let header = format!("{}{}\n", file_marker, edit_history_name);
786 if header.len() >= max_bytes {
787 return String::new();
788 }
789
790 let mut event_strings: Vec<String> = Vec::new();
791 let mut total_bytes = header.len();
792
793 for event in events.iter().rev().take(max_edit_event_count) {
794 let mut event_str = String::new();
795 write_event(&mut event_str, event);
796
797 if total_bytes + event_str.len() > max_bytes {
798 break;
799 }
800 total_bytes += event_str.len();
801 event_strings.push(event_str);
802 }
803
804 if event_strings.is_empty() {
805 return String::new();
806 }
807
808 let mut result = header;
809 for event_str in event_strings.iter().rev() {
810 result.push_str(event_str);
811 }
812 result
813}
814
815fn excerpt_rendered_bytes(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
816 excerpt.text.len()
817 + if !excerpt.text.ends_with('\n') {
818 "\n".len()
819 } else {
820 0
821 }
822 + if excerpt.row_range.end < file_max_row {
823 "...\n".len()
824 } else {
825 0
826 }
827}
828
829pub fn format_related_files_within_budget(
830 related_files: &[RelatedFile],
831 file_prefix: &str,
832 file_suffix: &str,
833 max_tokens: usize,
834) -> String {
835 struct ExcerptCandidate {
836 file_ix: usize,
837 excerpt_ix: usize,
838 order: usize,
839 }
840
841 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
842 .iter()
843 .enumerate()
844 .flat_map(|(file_ix, file)| {
845 file.excerpts
846 .iter()
847 .enumerate()
848 .map(move |(excerpt_ix, e)| ExcerptCandidate {
849 file_ix,
850 excerpt_ix,
851 order: e.order,
852 })
853 })
854 .collect();
855
856 // Pre-compute file header strings and their token costs.
857 let file_headers: Vec<String> = related_files
858 .iter()
859 .map(|file| {
860 let path_str = file.path.to_string_lossy();
861 format!("{}{}\n", file_prefix, path_str)
862 })
863 .collect();
864
865 // Sort the excerpts by their order and determine how many fit within the budget.
866 let max_bytes = max_tokens.saturating_mul(3);
867 let mut total_bytes = 0;
868 let mut included_excerpt_count = 0_usize;
869 let mut included_file_indices = vec![false; related_files.len()];
870 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
871 for candidate in &excerpt_candidates {
872 let file = &related_files[candidate.file_ix];
873 let excerpt = &file.excerpts[candidate.excerpt_ix];
874 let file_already_included = included_file_indices[candidate.file_ix];
875 let header_cost = if file_already_included {
876 0
877 } else {
878 file_headers[candidate.file_ix].len() + file_suffix.len()
879 };
880 let excerpt_cost = excerpt_rendered_bytes(excerpt, file.max_row);
881 if total_bytes + header_cost + excerpt_cost > max_bytes {
882 break;
883 }
884 total_bytes += header_cost + excerpt_cost;
885 if !file_already_included {
886 included_file_indices[candidate.file_ix] = true;
887 }
888 included_excerpt_count += 1;
889 }
890
891 excerpt_candidates.truncate(included_excerpt_count);
892 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
893
894 // Render all of the files that fit within the token budget, in the original order.
895 let mut result = String::new();
896 let mut last_file_ix = None;
897 for candidate in &excerpt_candidates {
898 if last_file_ix != Some(candidate.file_ix) {
899 if last_file_ix.is_some() {
900 result.push_str(file_suffix);
901 }
902 result.push_str(&file_headers[candidate.file_ix]);
903 last_file_ix = Some(candidate.file_ix);
904 }
905 let file = &related_files[candidate.file_ix];
906 let excerpt = &file.excerpts[candidate.excerpt_ix];
907 result.push_str(&excerpt.text);
908 if !result.ends_with('\n') {
909 result.push('\n');
910 }
911 if excerpt.row_range.end < file.max_row {
912 result.push_str("...\n");
913 }
914 }
915
916 result
917}
918
919pub fn write_related_files(
920 prompt: &mut String,
921 related_files: &[RelatedFile],
922) -> Vec<Range<usize>> {
923 let mut ranges = Vec::new();
924 for file in related_files {
925 let start = prompt.len();
926 let path_str = file.path.to_string_lossy();
927 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
928 for excerpt in &file.excerpts {
929 prompt.push_str(&excerpt.text);
930 if !prompt.ends_with('\n') {
931 prompt.push('\n');
932 }
933 if excerpt.row_range.end < file.max_row {
934 prompt.push_str("...\n");
935 }
936 }
937 let end = prompt.len();
938 ranges.push(start..end);
939 }
940 ranges
941}
942
943mod v0112_middle_at_end {
944 use super::*;
945
946 pub fn special_tokens() -> &'static [&'static str] {
947 &[
948 "<|fim_prefix|>",
949 "<|fim_suffix|>",
950 "<|fim_middle|>",
951 "<|file_sep|>",
952 CURSOR_MARKER,
953 ]
954 }
955
956 pub fn write_cursor_excerpt_section(
957 prompt: &mut String,
958 path: &Path,
959 context: &str,
960 editable_range: &Range<usize>,
961 cursor_offset: usize,
962 ) {
963 let path_str = path.to_string_lossy();
964 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
965
966 prompt.push_str("<|fim_prefix|>\n");
967 prompt.push_str(&context[..editable_range.start]);
968
969 prompt.push_str("<|fim_suffix|>\n");
970 prompt.push_str(&context[editable_range.end..]);
971 if !prompt.ends_with('\n') {
972 prompt.push('\n');
973 }
974
975 prompt.push_str("<|fim_middle|>current\n");
976 prompt.push_str(&context[editable_range.start..cursor_offset]);
977 prompt.push_str(CURSOR_MARKER);
978 prompt.push_str(&context[cursor_offset..editable_range.end]);
979 if !prompt.ends_with('\n') {
980 prompt.push('\n');
981 }
982
983 prompt.push_str("<|fim_middle|>updated\n");
984 }
985}
986
987mod v0113_ordered {
988 use super::*;
989
990 pub fn special_tokens() -> &'static [&'static str] {
991 &[
992 "<|fim_prefix|>",
993 "<|fim_suffix|>",
994 "<|fim_middle|>",
995 "<|file_sep|>",
996 CURSOR_MARKER,
997 ]
998 }
999
1000 pub fn write_cursor_excerpt_section(
1001 prompt: &mut String,
1002 path: &Path,
1003 context: &str,
1004 editable_range: &Range<usize>,
1005 cursor_offset: usize,
1006 ) {
1007 let path_str = path.to_string_lossy();
1008 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1009
1010 prompt.push_str("<|fim_prefix|>\n");
1011 prompt.push_str(&context[..editable_range.start]);
1012 if !prompt.ends_with('\n') {
1013 prompt.push('\n');
1014 }
1015
1016 prompt.push_str("<|fim_middle|>current\n");
1017 prompt.push_str(&context[editable_range.start..cursor_offset]);
1018 prompt.push_str(CURSOR_MARKER);
1019 prompt.push_str(&context[cursor_offset..editable_range.end]);
1020 if !prompt.ends_with('\n') {
1021 prompt.push('\n');
1022 }
1023
1024 prompt.push_str("<|fim_suffix|>\n");
1025 prompt.push_str(&context[editable_range.end..]);
1026 if !prompt.ends_with('\n') {
1027 prompt.push('\n');
1028 }
1029
1030 prompt.push_str("<|fim_middle|>updated\n");
1031 }
1032}
1033
1034mod v0114180_editable_region {
1035 use super::*;
1036
1037 pub fn special_tokens() -> &'static [&'static str] {
1038 v0113_ordered::special_tokens()
1039 }
1040}
1041
1042pub mod v0120_git_merge_markers {
1043 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1044 //!
1045 //! Example prompt:
1046 //!
1047 //! <|file_sep|>path/to/target_file.py
1048 //! <|fim_prefix|>
1049 //! code before editable region
1050 //! <|fim_suffix|>
1051 //! code after editable region
1052 //! <|fim_middle|>
1053 //! <<<<<<< CURRENT
1054 //! code that
1055 //! needs to<|user_cursor|>
1056 //! be rewritten
1057 //! =======
1058 //!
1059 //! Expected output (should be generated by the model):
1060 //!
1061 //! updated
1062 //! code with
1063 //! changes applied
1064 //! >>>>>>> UPDATED
1065
1066 use super::*;
1067
1068 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1069 pub const SEPARATOR: &str = "=======\n";
1070 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1071
1072 pub fn special_tokens() -> &'static [&'static str] {
1073 &[
1074 "<|fim_prefix|>",
1075 "<|fim_suffix|>",
1076 "<|fim_middle|>",
1077 "<|file_sep|>",
1078 START_MARKER,
1079 SEPARATOR,
1080 END_MARKER,
1081 CURSOR_MARKER,
1082 ]
1083 }
1084
1085 pub fn write_cursor_excerpt_section(
1086 prompt: &mut String,
1087 path: &Path,
1088 context: &str,
1089 editable_range: &Range<usize>,
1090 cursor_offset: usize,
1091 ) {
1092 let path_str = path.to_string_lossy();
1093 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1094
1095 prompt.push_str("<|fim_prefix|>");
1096 prompt.push_str(&context[..editable_range.start]);
1097
1098 prompt.push_str("<|fim_suffix|>");
1099 prompt.push_str(&context[editable_range.end..]);
1100 if !prompt.ends_with('\n') {
1101 prompt.push('\n');
1102 }
1103
1104 prompt.push_str("<|fim_middle|>");
1105 prompt.push_str(START_MARKER);
1106 prompt.push_str(&context[editable_range.start..cursor_offset]);
1107 prompt.push_str(CURSOR_MARKER);
1108 prompt.push_str(&context[cursor_offset..editable_range.end]);
1109 if !prompt.ends_with('\n') {
1110 prompt.push('\n');
1111 }
1112 prompt.push_str(SEPARATOR);
1113 }
1114}
1115
1116pub mod v0131_git_merge_markers_prefix {
1117 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1118 //!
1119 //! Example prompt:
1120 //!
1121 //! <|file_sep|>path/to/target_file.py
1122 //! <|fim_prefix|>
1123 //! code before editable region
1124 //! <<<<<<< CURRENT
1125 //! code that
1126 //! needs to<|user_cursor|>
1127 //! be rewritten
1128 //! =======
1129 //! <|fim_suffix|>
1130 //! code after editable region
1131 //! <|fim_middle|>
1132 //!
1133 //! Expected output (should be generated by the model):
1134 //!
1135 //! updated
1136 //! code with
1137 //! changes applied
1138 //! >>>>>>> UPDATED
1139
1140 use super::*;
1141
1142 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1143 pub const SEPARATOR: &str = "=======\n";
1144 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1145
1146 pub fn special_tokens() -> &'static [&'static str] {
1147 &[
1148 "<|fim_prefix|>",
1149 "<|fim_suffix|>",
1150 "<|fim_middle|>",
1151 "<|file_sep|>",
1152 START_MARKER,
1153 SEPARATOR,
1154 END_MARKER,
1155 CURSOR_MARKER,
1156 ]
1157 }
1158
1159 pub fn write_cursor_excerpt_section(
1160 prompt: &mut String,
1161 path: &Path,
1162 context: &str,
1163 editable_range: &Range<usize>,
1164 cursor_offset: usize,
1165 ) {
1166 let path_str = path.to_string_lossy();
1167 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1168
1169 prompt.push_str("<|fim_prefix|>");
1170 prompt.push_str(&context[..editable_range.start]);
1171 prompt.push_str(START_MARKER);
1172 prompt.push_str(&context[editable_range.start..cursor_offset]);
1173 prompt.push_str(CURSOR_MARKER);
1174 prompt.push_str(&context[cursor_offset..editable_range.end]);
1175 if !prompt.ends_with('\n') {
1176 prompt.push('\n');
1177 }
1178 prompt.push_str(SEPARATOR);
1179
1180 prompt.push_str("<|fim_suffix|>");
1181 prompt.push_str(&context[editable_range.end..]);
1182 if !prompt.ends_with('\n') {
1183 prompt.push('\n');
1184 }
1185
1186 prompt.push_str("<|fim_middle|>");
1187 }
1188}
1189
1190pub mod v0211_prefill {
1191 use super::*;
1192
1193 pub fn special_tokens() -> &'static [&'static str] {
1194 v0131_git_merge_markers_prefix::special_tokens()
1195 }
1196
1197 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1198 let editable_region = &context[editable_range.start..editable_range.end];
1199
1200 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1201 let prefill_len = editable_region.floor_char_boundary(prefill_len);
1202
1203 // Find a token boundary to avoid splitting tokens in the prefill.
1204 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1205 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1206 // the \n and consume any consecutive \n characters after it.
1207 let prefill = &editable_region[..prefill_len];
1208 match prefill.rfind('\n') {
1209 Some(pos) => {
1210 let mut end = pos + 1;
1211 while end < editable_region.len()
1212 && editable_region.as_bytes().get(end) == Some(&b'\n')
1213 {
1214 end += 1;
1215 }
1216 editable_region[..end].to_string()
1217 }
1218 // No newline found. Fall back to splitting before the last space
1219 // (word-level boundary)
1220 None => match prefill.rfind(' ') {
1221 Some(pos) => prefill[..pos].to_string(),
1222 None => prefill.to_string(),
1223 },
1224 }
1225 }
1226}
1227
1228pub mod hashline {
1229
1230 use std::fmt::Display;
1231
1232 pub const END_MARKER: &str = "<|fim_middle|>updated";
1233 pub const START_MARKER: &str = "<|fim_middle|>current";
1234
1235 use super::*;
1236
1237 const SET_COMMAND_MARKER: &str = "<|set|>";
1238 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1239 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1240
1241 pub fn special_tokens() -> &'static [&'static str] {
1242 return &[
1243 SET_COMMAND_MARKER,
1244 "<|set_range|>",
1245 INSERT_COMMAND_MARKER,
1246 NO_EDITS_COMMAND_MARKER,
1247 CURSOR_MARKER,
1248 "<|file_sep|>",
1249 "<|fim_prefix|>",
1250 "<|fim_suffix|>",
1251 "<|fim_middle|>",
1252 ];
1253 }
1254
1255 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1256 #[derive(Debug, Clone, PartialEq, Eq)]
1257 struct LineRef {
1258 index: usize,
1259 hash: u8,
1260 }
1261
1262 impl Display for LineRef {
1263 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1264 write!(f, "{}:{:02x}", self.index, self.hash)
1265 }
1266 }
1267
1268 pub fn hash_line(line: &[u8]) -> u8 {
1269 let mut h: u8 = 0;
1270 for &byte in line {
1271 h = h.wrapping_add(byte);
1272 }
1273 return h;
1274 }
1275
1276 /// Write the hashline-encoded editable region into `out`. Each line of
1277 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1278 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1279 /// to the start of `editable_text`).
1280 pub fn write_hashline_editable_region(
1281 out: &mut String,
1282 editable_text: &str,
1283 cursor_offset_in_editable: usize,
1284 ) {
1285 let mut offset = 0;
1286 for (i, line) in editable_text.lines().enumerate() {
1287 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1288 && cursor_offset_in_editable < offset + line.len()
1289 {
1290 (
1291 &line[..cursor_offset_in_editable - offset],
1292 CURSOR_MARKER,
1293 &line[cursor_offset_in_editable - offset..],
1294 )
1295 } else {
1296 (line, "", "")
1297 };
1298 write!(
1299 out,
1300 "\n{}|{head}{cursor}{tail}",
1301 LineRef {
1302 index: i,
1303 hash: hash_line(line.as_bytes())
1304 }
1305 )
1306 .unwrap();
1307 offset += line.len() + 1;
1308 }
1309 }
1310
1311 pub fn write_cursor_excerpt_section(
1312 prompt: &mut String,
1313 path: &Path,
1314 context: &str,
1315 editable_range: &Range<usize>,
1316 cursor_offset: usize,
1317 ) {
1318 let path_str = path.to_string_lossy();
1319 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1320
1321 prompt.push_str("<|fim_prefix|>\n");
1322 prompt.push_str(&context[..editable_range.start]);
1323 prompt.push_str(START_MARKER);
1324
1325 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1326 let editable_region = &context[editable_range.clone()];
1327 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1328
1329 if !prompt.ends_with('\n') {
1330 prompt.push('\n');
1331 }
1332
1333 prompt.push_str("<|fim_suffix|>\n");
1334 prompt.push_str(&context[editable_range.end..]);
1335 if !prompt.ends_with('\n') {
1336 prompt.push('\n');
1337 }
1338
1339 prompt.push_str(END_MARKER);
1340 prompt.push('\n');
1341 }
1342
1343 /// A single edit command parsed from the model output.
1344 #[derive(Debug)]
1345 enum EditCommand<'a> {
1346 /// Replace a range of lines (inclusive on both ends). Single-line set is
1347 /// represented by `start == end`.
1348 Set {
1349 start: LineRef,
1350 end: LineRef,
1351 content: &'a str,
1352 },
1353 /// Insert new lines after the given line, or before the first line if
1354 /// `after` is `None`.
1355 Insert {
1356 after: Option<LineRef>,
1357 content: &'a str,
1358 },
1359 }
1360
1361 /// Parse a line reference like `3:c3` into a `LineRef`.
1362 fn parse_line_ref(s: &str) -> Option<LineRef> {
1363 let (idx_str, hash_str) = s.split_once(':')?;
1364 let index = idx_str.parse::<usize>().ok()?;
1365 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1366 Some(LineRef { index, hash })
1367 }
1368
1369 /// Parse the model output into a list of `EditCommand`s.
1370 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1371 let mut commands = Vec::new();
1372 let mut offset = 0usize;
1373
1374 while offset < model_output.len() {
1375 let next_nl = model_output[offset..]
1376 .find('\n')
1377 .map(|i| offset + i)
1378 .unwrap_or(model_output.len());
1379 let line = &model_output[offset..next_nl];
1380 let line_end = if next_nl < model_output.len() {
1381 next_nl + 1
1382 } else {
1383 next_nl
1384 };
1385
1386 let trimmed = line.trim();
1387 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1388 (true, spec)
1389 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1390 (false, spec)
1391 } else {
1392 offset = line_end;
1393 continue;
1394 };
1395
1396 let mut content_end = line_end;
1397 let mut scan = line_end;
1398
1399 while scan < model_output.len() {
1400 let body_nl = model_output[scan..]
1401 .find('\n')
1402 .map(|i| scan + i)
1403 .unwrap_or(model_output.len());
1404 let body_line = &model_output[scan..body_nl];
1405 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1406 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1407 {
1408 break;
1409 }
1410 scan = if body_nl < model_output.len() {
1411 body_nl + 1
1412 } else {
1413 body_nl
1414 };
1415 content_end = scan;
1416 }
1417
1418 let content = &model_output[line_end..content_end];
1419
1420 if is_set {
1421 if let Some((start_str, end_str)) = specifier.split_once('-') {
1422 if let (Some(start), Some(end)) =
1423 (parse_line_ref(start_str), parse_line_ref(end_str))
1424 {
1425 commands.push(EditCommand::Set {
1426 start,
1427 end,
1428 content,
1429 });
1430 }
1431 } else if let Some(target) = parse_line_ref(specifier) {
1432 commands.push(EditCommand::Set {
1433 start: target.clone(),
1434 end: target,
1435 content,
1436 });
1437 }
1438 } else {
1439 let after = parse_line_ref(specifier);
1440 commands.push(EditCommand::Insert { after, content });
1441 }
1442
1443 offset = scan;
1444 }
1445
1446 commands
1447 }
1448
1449 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1450 /// (as opposed to being a plain full-replacement output).
1451 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1452 /// editable region, returning the plain text content.
1453 pub fn strip_hashline_prefixes(region: &str) -> String {
1454 let mut decoded: String = region
1455 .lines()
1456 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1457 .collect::<Vec<_>>()
1458 .join("\n");
1459 if region.ends_with('\n') {
1460 decoded.push('\n');
1461 }
1462 decoded
1463 }
1464
1465 pub fn output_has_edit_commands(model_output: &str) -> bool {
1466 model_output.contains(SET_COMMAND_MARKER)
1467 || model_output.contains(INSERT_COMMAND_MARKER)
1468 || model_output.contains(NO_EDITS_COMMAND_MARKER)
1469 }
1470
1471 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1472 /// original editable region text.
1473 ///
1474 /// `editable_region` is the original text of the editable region (without hash
1475 /// prefixes). `model_output` is the raw model response containing edit commands.
1476 ///
1477 /// Returns the full replacement text for the editable region.
1478 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1479 if model_output
1480 .trim_start()
1481 .starts_with(NO_EDITS_COMMAND_MARKER)
1482 {
1483 return editable_region.to_string();
1484 }
1485
1486 let original_lines: Vec<&str> = editable_region.lines().collect();
1487 let old_hashes: Vec<u8> = original_lines
1488 .iter()
1489 .map(|line| hash_line(line.as_bytes()))
1490 .collect();
1491
1492 let commands = parse_edit_commands(model_output);
1493
1494 // For set operations: indexed by start line → Some((end line index, content))
1495 // For insert operations: indexed by line index → vec of content to insert after
1496 // Insert-before-first is tracked separately.
1497 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1498 let mut insert_before_first: Vec<&str> = Vec::new();
1499 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1500
1501 for command in &commands {
1502 match command {
1503 EditCommand::Set {
1504 start,
1505 end,
1506 content,
1507 } => {
1508 if start.index < old_hashes.len()
1509 && end.index < old_hashes.len()
1510 && start.index <= end.index
1511 && old_hashes[start.index] == start.hash
1512 && old_hashes[end.index] == end.hash
1513 {
1514 set_ops[start.index] = Some((end.index, *content));
1515 }
1516 }
1517 EditCommand::Insert { after, content } => match after {
1518 None => insert_before_first.push(*content),
1519 Some(line_ref) => {
1520 if line_ref.index < old_hashes.len()
1521 && old_hashes[line_ref.index] == line_ref.hash
1522 {
1523 insert_after[line_ref.index].push(*content);
1524 }
1525 }
1526 },
1527 }
1528 }
1529
1530 let mut result = String::new();
1531
1532 // Emit any insertions before the first line
1533 for content in &insert_before_first {
1534 result.push_str(content);
1535 if !content.ends_with('\n') {
1536 result.push('\n');
1537 }
1538 }
1539
1540 let mut i = 0;
1541 while i < original_lines.len() {
1542 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1543 // Replace lines i..=end_index with the replacement content
1544 result.push_str(replacement);
1545 if !replacement.is_empty() && !replacement.ends_with('\n') {
1546 result.push('\n');
1547 }
1548 // Emit any insertions after the end of this set range
1549 if *end_index < insert_after.len() {
1550 for content in &insert_after[*end_index] {
1551 result.push_str(content);
1552 if !content.ends_with('\n') {
1553 result.push('\n');
1554 }
1555 }
1556 }
1557 i = end_index + 1;
1558 } else {
1559 // Keep the original line
1560 result.push_str(original_lines[i]);
1561 result.push('\n');
1562 // Emit any insertions after this line
1563 for content in &insert_after[i] {
1564 result.push_str(content);
1565 if !content.ends_with('\n') {
1566 result.push('\n');
1567 }
1568 }
1569 i += 1;
1570 }
1571 }
1572
1573 // Preserve trailing newline behavior: if the original ended with a
1574 // newline the result already has one; if it didn't, trim the extra one
1575 // we added.
1576 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1577 result.pop();
1578 }
1579
1580 result
1581 }
1582
1583 /// Convert a unified diff patch into hashline edit commands.
1584 ///
1585 /// Parses the unified diff `patch` directly to determine which lines of
1586 /// `old_text` are deleted/replaced and what new lines are added, then emits
1587 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1588 /// `{index}:{hash}` identifiers.
1589 ///
1590 /// `cursor_offset` is an optional byte offset into the first hunk's new
1591 /// text (context + additions) where the cursor marker should be placed.
1592 pub fn patch_to_edit_commands(
1593 old_text: &str,
1594 patch: &str,
1595 cursor_offset: Option<usize>,
1596 ) -> Result<String> {
1597 let old_lines: Vec<&str> = old_text.lines().collect();
1598 let old_hashes: Vec<u8> = old_lines
1599 .iter()
1600 .map(|line| hash_line(line.as_bytes()))
1601 .collect();
1602
1603 let mut result = String::new();
1604 let mut first_hunk = true;
1605
1606 struct Hunk<'a> {
1607 line_range: Range<usize>,
1608 new_text_lines: Vec<&'a str>,
1609 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1610 }
1611
1612 // Parse the patch line by line. We only care about hunk headers,
1613 // context, deletions, and additions.
1614 let mut old_line_index: usize = 0;
1615 let mut current_hunk: Option<Hunk> = None;
1616 // Byte offset tracking within the hunk's new text for cursor placement.
1617 let mut new_text_byte_offset: usize = 0;
1618 // The line index of the last old line seen before/in the current hunk
1619 // (used for insert-after reference).
1620 let mut last_old_line_before_hunk: Option<usize> = None;
1621
1622 fn flush_hunk(
1623 hunk: Hunk,
1624 last_old_line: Option<usize>,
1625 result: &mut String,
1626 old_hashes: &[u8],
1627 ) {
1628 if hunk.line_range.is_empty() {
1629 // Pure insertion — reference the old line to insert after when in bounds.
1630 if let Some(after) = last_old_line
1631 && let Some(&hash) = old_hashes.get(after)
1632 {
1633 write!(
1634 result,
1635 "{INSERT_COMMAND_MARKER}{}\n",
1636 LineRef { index: after, hash }
1637 )
1638 .unwrap();
1639 } else {
1640 result.push_str(INSERT_COMMAND_MARKER);
1641 result.push('\n');
1642 }
1643 } else {
1644 let start = hunk.line_range.start;
1645 let end_exclusive = hunk.line_range.end;
1646 let deleted_line_count = end_exclusive.saturating_sub(start);
1647
1648 if deleted_line_count == 1 {
1649 if let Some(&hash) = old_hashes.get(start) {
1650 write!(
1651 result,
1652 "{SET_COMMAND_MARKER}{}\n",
1653 LineRef { index: start, hash }
1654 )
1655 .unwrap();
1656 } else {
1657 result.push_str(SET_COMMAND_MARKER);
1658 result.push('\n');
1659 }
1660 } else {
1661 let end_inclusive = end_exclusive - 1;
1662 match (
1663 old_hashes.get(start).copied(),
1664 old_hashes.get(end_inclusive).copied(),
1665 ) {
1666 (Some(start_hash), Some(end_hash)) => {
1667 write!(
1668 result,
1669 "{SET_COMMAND_MARKER}{}-{}\n",
1670 LineRef {
1671 index: start,
1672 hash: start_hash
1673 },
1674 LineRef {
1675 index: end_inclusive,
1676 hash: end_hash
1677 }
1678 )
1679 .unwrap();
1680 }
1681 _ => {
1682 result.push_str(SET_COMMAND_MARKER);
1683 result.push('\n');
1684 }
1685 }
1686 }
1687 }
1688 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1689 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1690 && line_offset == cursor_line_offset
1691 {
1692 result.push_str(&line[..char_offset]);
1693 result.push_str(CURSOR_MARKER);
1694 result.push_str(&line[char_offset..]);
1695 continue;
1696 }
1697
1698 result.push_str(line);
1699 }
1700 }
1701
1702 for raw_line in patch.split_inclusive('\n') {
1703 if raw_line.starts_with("@@") {
1704 // Flush any pending change hunk from a previous patch hunk.
1705 if let Some(hunk) = current_hunk.take() {
1706 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1707 }
1708
1709 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1710 // We intentionally do not trust old_start as a direct local index into `old_text`,
1711 // because some patches are produced against a larger file region and carry
1712 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1713 if first_hunk {
1714 new_text_byte_offset = 0;
1715 first_hunk = false;
1716 }
1717 continue;
1718 }
1719
1720 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1721 continue;
1722 }
1723 if raw_line.starts_with("\\ No newline") {
1724 continue;
1725 }
1726
1727 if raw_line.starts_with('-') {
1728 // Extend or start a change hunk with this deleted old line.
1729 match &mut current_hunk {
1730 Some(Hunk {
1731 line_range: range, ..
1732 }) => range.end = old_line_index + 1,
1733 None => {
1734 current_hunk = Some(Hunk {
1735 line_range: old_line_index..old_line_index + 1,
1736 new_text_lines: Vec::new(),
1737 cursor_line_offset_in_new_text: None,
1738 });
1739 }
1740 }
1741 old_line_index += 1;
1742 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1743 // Place cursor marker if cursor_offset falls within this line.
1744 let mut cursor_line_offset = None;
1745 if let Some(cursor_off) = cursor_offset
1746 && (first_hunk
1747 || cursor_off >= new_text_byte_offset
1748 && cursor_off <= new_text_byte_offset + added_content.len())
1749 {
1750 let line_offset = added_content.floor_char_boundary(
1751 cursor_off
1752 .saturating_sub(new_text_byte_offset)
1753 .min(added_content.len()),
1754 );
1755 cursor_line_offset = Some(line_offset);
1756 }
1757
1758 new_text_byte_offset += added_content.len();
1759
1760 let hunk = current_hunk.get_or_insert(Hunk {
1761 line_range: old_line_index..old_line_index,
1762 new_text_lines: vec![],
1763 cursor_line_offset_in_new_text: None,
1764 });
1765 hunk.new_text_lines.push(added_content);
1766 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1767 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1768 } else {
1769 // Context line (starts with ' ' or is empty).
1770 if let Some(hunk) = current_hunk.take() {
1771 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1772 }
1773 last_old_line_before_hunk = Some(old_line_index);
1774 old_line_index += 1;
1775 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1776 new_text_byte_offset += content.len();
1777 }
1778 }
1779
1780 // Flush final group.
1781 if let Some(hunk) = current_hunk.take() {
1782 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1783 }
1784
1785 // Trim a single trailing newline.
1786 if result.ends_with('\n') {
1787 result.pop();
1788 }
1789
1790 if result.is_empty() {
1791 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1792 }
1793
1794 Ok(result)
1795 }
1796
1797 #[cfg(test)]
1798 mod tests {
1799 use super::*;
1800 use indoc::indoc;
1801
1802 #[test]
1803 fn test_format_cursor_region() {
1804 struct Case {
1805 name: &'static str,
1806 context: &'static str,
1807 editable_range: Range<usize>,
1808 cursor_offset: usize,
1809 expected: &'static str,
1810 }
1811
1812 let cases = [
1813 Case {
1814 name: "basic_cursor_placement",
1815 context: "hello world\n",
1816 editable_range: 0..12,
1817 cursor_offset: 5,
1818 expected: indoc! {"
1819 <|file_sep|>test.rs
1820 <|fim_prefix|>
1821 <|fim_middle|>current
1822 0:5c|hello<|user_cursor|> world
1823 <|fim_suffix|>
1824 <|fim_middle|>updated
1825 "},
1826 },
1827 Case {
1828 name: "multiline_cursor_on_second_line",
1829 context: "aaa\nbbb\nccc\n",
1830 editable_range: 0..12,
1831 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1832 expected: indoc! {"
1833 <|file_sep|>test.rs
1834 <|fim_prefix|>
1835 <|fim_middle|>current
1836 0:23|aaa
1837 1:26|b<|user_cursor|>bb
1838 2:29|ccc
1839 <|fim_suffix|>
1840 <|fim_middle|>updated
1841 "},
1842 },
1843 Case {
1844 name: "no_trailing_newline_in_context",
1845 context: "line1\nline2",
1846 editable_range: 0..11,
1847 cursor_offset: 3,
1848 expected: indoc! {"
1849 <|file_sep|>test.rs
1850 <|fim_prefix|>
1851 <|fim_middle|>current
1852 0:d9|lin<|user_cursor|>e1
1853 1:da|line2
1854 <|fim_suffix|>
1855 <|fim_middle|>updated
1856 "},
1857 },
1858 Case {
1859 name: "leading_newline_in_editable_region",
1860 context: "\nabc\n",
1861 editable_range: 0..5,
1862 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1863 expected: indoc! {"
1864 <|file_sep|>test.rs
1865 <|fim_prefix|>
1866 <|fim_middle|>current
1867 0:00|
1868 1:26|a<|user_cursor|>bc
1869 <|fim_suffix|>
1870 <|fim_middle|>updated
1871 "},
1872 },
1873 Case {
1874 name: "with_suffix",
1875 context: "abc\ndef",
1876 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1877 cursor_offset: 2,
1878 expected: indoc! {"
1879 <|file_sep|>test.rs
1880 <|fim_prefix|>
1881 <|fim_middle|>current
1882 0:26|ab<|user_cursor|>c
1883 <|fim_suffix|>
1884 def
1885 <|fim_middle|>updated
1886 "},
1887 },
1888 Case {
1889 name: "unicode_two_byte_chars",
1890 context: "héllo\n",
1891 editable_range: 0..7,
1892 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1893 expected: indoc! {"
1894 <|file_sep|>test.rs
1895 <|fim_prefix|>
1896 <|fim_middle|>current
1897 0:1b|hé<|user_cursor|>llo
1898 <|fim_suffix|>
1899 <|fim_middle|>updated
1900 "},
1901 },
1902 Case {
1903 name: "unicode_three_byte_chars",
1904 context: "日本語\n",
1905 editable_range: 0..10,
1906 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1907 expected: indoc! {"
1908 <|file_sep|>test.rs
1909 <|fim_prefix|>
1910 <|fim_middle|>current
1911 0:80|日本<|user_cursor|>語
1912 <|fim_suffix|>
1913 <|fim_middle|>updated
1914 "},
1915 },
1916 Case {
1917 name: "unicode_four_byte_chars",
1918 context: "a🌍b\n",
1919 editable_range: 0..7,
1920 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1921 expected: indoc! {"
1922 <|file_sep|>test.rs
1923 <|fim_prefix|>
1924 <|fim_middle|>current
1925 0:6b|a🌍<|user_cursor|>b
1926 <|fim_suffix|>
1927 <|fim_middle|>updated
1928 "},
1929 },
1930 Case {
1931 name: "cursor_at_start_of_region_not_placed",
1932 context: "abc\n",
1933 editable_range: 0..4,
1934 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1935 expected: indoc! {"
1936 <|file_sep|>test.rs
1937 <|fim_prefix|>
1938 <|fim_middle|>current
1939 0:26|abc
1940 <|fim_suffix|>
1941 <|fim_middle|>updated
1942 "},
1943 },
1944 Case {
1945 name: "cursor_at_end_of_line_not_placed",
1946 context: "abc\ndef\n",
1947 editable_range: 0..8,
1948 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1949 expected: indoc! {"
1950 <|file_sep|>test.rs
1951 <|fim_prefix|>
1952 <|fim_middle|>current
1953 0:26|abc
1954 1:2f|def
1955 <|fim_suffix|>
1956 <|fim_middle|>updated
1957 "},
1958 },
1959 Case {
1960 name: "cursor_offset_relative_to_context_not_editable_region",
1961 // cursor_offset is relative to `context`, so when editable_range.start > 0,
1962 // write_cursor_excerpt_section must subtract it before comparing against
1963 // per-line offsets within the editable region.
1964 context: "pre\naaa\nbbb\nsuf\n",
1965 editable_range: 4..12, // editable region = "aaa\nbbb\n"
1966 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
1967 expected: indoc! {"
1968 <|file_sep|>test.rs
1969 <|fim_prefix|>
1970 pre
1971 <|fim_middle|>current
1972 0:23|aaa
1973 1:26|b<|user_cursor|>bb
1974 <|fim_suffix|>
1975 suf
1976 <|fim_middle|>updated
1977 "},
1978 },
1979 ];
1980
1981 for case in &cases {
1982 let mut prompt = String::new();
1983 hashline::write_cursor_excerpt_section(
1984 &mut prompt,
1985 Path::new("test.rs"),
1986 case.context,
1987 &case.editable_range,
1988 case.cursor_offset,
1989 );
1990 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1991 }
1992 }
1993
1994 #[test]
1995 fn test_apply_edit_commands() {
1996 struct Case {
1997 name: &'static str,
1998 original: &'static str,
1999 model_output: &'static str,
2000 expected: &'static str,
2001 }
2002
2003 let cases = vec![
2004 Case {
2005 name: "set_single_line",
2006 original: indoc! {"
2007 let mut total = 0;
2008 for product in products {
2009 total += ;
2010 }
2011 total
2012 "},
2013 model_output: indoc! {"
2014 <|set|>2:87
2015 total += product.price;
2016 "},
2017 expected: indoc! {"
2018 let mut total = 0;
2019 for product in products {
2020 total += product.price;
2021 }
2022 total
2023 "},
2024 },
2025 Case {
2026 name: "set_range",
2027 original: indoc! {"
2028 fn foo() {
2029 let x = 1;
2030 let y = 2;
2031 let z = 3;
2032 }
2033 "},
2034 model_output: indoc! {"
2035 <|set|>1:46-3:4a
2036 let sum = 6;
2037 "},
2038 expected: indoc! {"
2039 fn foo() {
2040 let sum = 6;
2041 }
2042 "},
2043 },
2044 Case {
2045 name: "insert_after_line",
2046 original: indoc! {"
2047 fn main() {
2048 let x = 1;
2049 }
2050 "},
2051 model_output: indoc! {"
2052 <|insert|>1:46
2053 let y = 2;
2054 "},
2055 expected: indoc! {"
2056 fn main() {
2057 let x = 1;
2058 let y = 2;
2059 }
2060 "},
2061 },
2062 Case {
2063 name: "insert_before_first",
2064 original: indoc! {"
2065 let x = 1;
2066 let y = 2;
2067 "},
2068 model_output: indoc! {"
2069 <|insert|>
2070 use std::io;
2071 "},
2072 expected: indoc! {"
2073 use std::io;
2074 let x = 1;
2075 let y = 2;
2076 "},
2077 },
2078 Case {
2079 name: "set_with_cursor_marker",
2080 original: indoc! {"
2081 fn main() {
2082 println!();
2083 }
2084 "},
2085 model_output: indoc! {"
2086 <|set|>1:34
2087 eprintln!(\"<|user_cursor|>\");
2088 "},
2089 expected: indoc! {"
2090 fn main() {
2091 eprintln!(\"<|user_cursor|>\");
2092 }
2093 "},
2094 },
2095 Case {
2096 name: "multiple_set_commands",
2097 original: indoc! {"
2098 aaa
2099 bbb
2100 ccc
2101 ddd
2102 "},
2103 model_output: indoc! {"
2104 <|set|>0:23
2105 AAA
2106 <|set|>2:29
2107 CCC
2108 "},
2109 expected: indoc! {"
2110 AAA
2111 bbb
2112 CCC
2113 ddd
2114 "},
2115 },
2116 Case {
2117 name: "set_range_multiline_replacement",
2118 original: indoc! {"
2119 fn handle_submit() {
2120 }
2121
2122 fn handle_keystroke() {
2123 "},
2124 model_output: indoc! {"
2125 <|set|>0:3f-1:7d
2126 fn handle_submit(modal_state: &mut ModalState) {
2127 <|user_cursor|>
2128 }
2129 "},
2130 expected: indoc! {"
2131 fn handle_submit(modal_state: &mut ModalState) {
2132 <|user_cursor|>
2133 }
2134
2135 fn handle_keystroke() {
2136 "},
2137 },
2138 Case {
2139 name: "no_edit_commands_returns_original",
2140 original: indoc! {"
2141 hello
2142 world
2143 "},
2144 model_output: "some random text with no commands",
2145 expected: indoc! {"
2146 hello
2147 world
2148 "},
2149 },
2150 Case {
2151 name: "no_edits_command_returns_original",
2152 original: indoc! {"
2153 hello
2154 world
2155 "},
2156 model_output: "<|no_edits|>",
2157 expected: indoc! {"
2158 hello
2159 world
2160 "},
2161 },
2162 Case {
2163 name: "wrong_hash_set_ignored",
2164 original: indoc! {"
2165 aaa
2166 bbb
2167 "},
2168 model_output: indoc! {"
2169 <|set|>0:ff
2170 ZZZ
2171 "},
2172 expected: indoc! {"
2173 aaa
2174 bbb
2175 "},
2176 },
2177 Case {
2178 name: "insert_and_set_combined",
2179 original: indoc! {"
2180 alpha
2181 beta
2182 gamma
2183 "},
2184 model_output: indoc! {"
2185 <|set|>0:06
2186 ALPHA
2187 <|insert|>1:9c
2188 beta_extra
2189 "},
2190 expected: indoc! {"
2191 ALPHA
2192 beta
2193 beta_extra
2194 gamma
2195 "},
2196 },
2197 Case {
2198 name: "no_trailing_newline_preserved",
2199 original: "hello\nworld",
2200 model_output: indoc! {"
2201 <|set|>0:14
2202 HELLO
2203 "},
2204 expected: "HELLO\nworld",
2205 },
2206 Case {
2207 name: "set_range_hash_mismatch_in_end_bound",
2208 original: indoc! {"
2209 one
2210 two
2211 three
2212 "},
2213 model_output: indoc! {"
2214 <|set|>0:42-2:ff
2215 ONE_TWO_THREE
2216 "},
2217 expected: indoc! {"
2218 one
2219 two
2220 three
2221 "},
2222 },
2223 Case {
2224 name: "set_range_start_greater_than_end_ignored",
2225 original: indoc! {"
2226 a
2227 b
2228 c
2229 "},
2230 model_output: indoc! {"
2231 <|set|>2:63-1:62
2232 X
2233 "},
2234 expected: indoc! {"
2235 a
2236 b
2237 c
2238 "},
2239 },
2240 Case {
2241 name: "insert_out_of_bounds_ignored",
2242 original: indoc! {"
2243 x
2244 y
2245 "},
2246 model_output: indoc! {"
2247 <|insert|>99:aa
2248 z
2249 "},
2250 expected: indoc! {"
2251 x
2252 y
2253 "},
2254 },
2255 Case {
2256 name: "set_out_of_bounds_ignored",
2257 original: indoc! {"
2258 x
2259 y
2260 "},
2261 model_output: indoc! {"
2262 <|set|>99:aa
2263 z
2264 "},
2265 expected: indoc! {"
2266 x
2267 y
2268 "},
2269 },
2270 Case {
2271 name: "malformed_set_command_ignored",
2272 original: indoc! {"
2273 alpha
2274 beta
2275 "},
2276 model_output: indoc! {"
2277 <|set|>not-a-line-ref
2278 UPDATED
2279 "},
2280 expected: indoc! {"
2281 alpha
2282 beta
2283 "},
2284 },
2285 Case {
2286 name: "malformed_insert_hash_treated_as_before_first",
2287 original: indoc! {"
2288 alpha
2289 beta
2290 "},
2291 model_output: indoc! {"
2292 <|insert|>1:nothex
2293 preamble
2294 "},
2295 expected: indoc! {"
2296 preamble
2297 alpha
2298 beta
2299 "},
2300 },
2301 Case {
2302 name: "set_then_insert_same_target_orders_insert_after_replacement",
2303 original: indoc! {"
2304 cat
2305 dog
2306 "},
2307 model_output: indoc! {"
2308 <|set|>0:38
2309 CAT
2310 <|insert|>0:38
2311 TAIL
2312 "},
2313 expected: indoc! {"
2314 CAT
2315 TAIL
2316 dog
2317 "},
2318 },
2319 Case {
2320 name: "overlapping_set_ranges_last_wins",
2321 original: indoc! {"
2322 a
2323 b
2324 c
2325 d
2326 "},
2327 model_output: indoc! {"
2328 <|set|>0:61-2:63
2329 FIRST
2330 <|set|>1:62-3:64
2331 SECOND
2332 "},
2333 expected: indoc! {"
2334 FIRST
2335 d
2336 "},
2337 },
2338 Case {
2339 name: "insert_before_first_and_after_line",
2340 original: indoc! {"
2341 a
2342 b
2343 "},
2344 model_output: indoc! {"
2345 <|insert|>
2346 HEAD
2347 <|insert|>0:61
2348 MID
2349 "},
2350 expected: indoc! {"
2351 HEAD
2352 a
2353 MID
2354 b
2355 "},
2356 },
2357 ];
2358
2359 for case in &cases {
2360 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2361 assert_eq!(result, case.expected, "failed case: {}", case.name);
2362 }
2363 }
2364
2365 #[test]
2366 fn test_output_has_edit_commands() {
2367 assert!(hashline::output_has_edit_commands(&format!(
2368 "{}0:ab\nnew",
2369 SET_COMMAND_MARKER
2370 )));
2371 assert!(hashline::output_has_edit_commands(&format!(
2372 "{}0:ab\nnew",
2373 INSERT_COMMAND_MARKER
2374 )));
2375 assert!(hashline::output_has_edit_commands(&format!(
2376 "some text\n{}1:cd\nstuff",
2377 SET_COMMAND_MARKER
2378 )));
2379 assert!(!hashline::output_has_edit_commands("just plain text"));
2380 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2381 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2382 }
2383
2384 // ---- hashline::patch_to_edit_commands round-trip tests ----
2385
2386 #[test]
2387 fn test_patch_to_edit_commands() {
2388 struct Case {
2389 name: &'static str,
2390 old: &'static str,
2391 patch: &'static str,
2392 expected_new: &'static str,
2393 }
2394
2395 let cases = [
2396 Case {
2397 name: "single_line_replacement",
2398 old: indoc! {"
2399 let mut total = 0;
2400 for product in products {
2401 total += ;
2402 }
2403 total
2404 "},
2405 patch: indoc! {"
2406 @@ -1,5 +1,5 @@
2407 let mut total = 0;
2408 for product in products {
2409 - total += ;
2410 + total += product.price;
2411 }
2412 total
2413 "},
2414 expected_new: indoc! {"
2415 let mut total = 0;
2416 for product in products {
2417 total += product.price;
2418 }
2419 total
2420 "},
2421 },
2422 Case {
2423 name: "multiline_replacement",
2424 old: indoc! {"
2425 fn foo() {
2426 let x = 1;
2427 let y = 2;
2428 let z = 3;
2429 }
2430 "},
2431 patch: indoc! {"
2432 @@ -1,5 +1,3 @@
2433 fn foo() {
2434 - let x = 1;
2435 - let y = 2;
2436 - let z = 3;
2437 + let sum = 1 + 2 + 3;
2438 }
2439 "},
2440 expected_new: indoc! {"
2441 fn foo() {
2442 let sum = 1 + 2 + 3;
2443 }
2444 "},
2445 },
2446 Case {
2447 name: "insertion",
2448 old: indoc! {"
2449 fn main() {
2450 let x = 1;
2451 }
2452 "},
2453 patch: indoc! {"
2454 @@ -1,3 +1,4 @@
2455 fn main() {
2456 let x = 1;
2457 + let y = 2;
2458 }
2459 "},
2460 expected_new: indoc! {"
2461 fn main() {
2462 let x = 1;
2463 let y = 2;
2464 }
2465 "},
2466 },
2467 Case {
2468 name: "insertion_before_first",
2469 old: indoc! {"
2470 let x = 1;
2471 let y = 2;
2472 "},
2473 patch: indoc! {"
2474 @@ -1,2 +1,3 @@
2475 +use std::io;
2476 let x = 1;
2477 let y = 2;
2478 "},
2479 expected_new: indoc! {"
2480 use std::io;
2481 let x = 1;
2482 let y = 2;
2483 "},
2484 },
2485 Case {
2486 name: "deletion",
2487 old: indoc! {"
2488 aaa
2489 bbb
2490 ccc
2491 ddd
2492 "},
2493 patch: indoc! {"
2494 @@ -1,4 +1,2 @@
2495 aaa
2496 -bbb
2497 -ccc
2498 ddd
2499 "},
2500 expected_new: indoc! {"
2501 aaa
2502 ddd
2503 "},
2504 },
2505 Case {
2506 name: "multiple_changes",
2507 old: indoc! {"
2508 alpha
2509 beta
2510 gamma
2511 delta
2512 epsilon
2513 "},
2514 patch: indoc! {"
2515 @@ -1,5 +1,5 @@
2516 -alpha
2517 +ALPHA
2518 beta
2519 gamma
2520 -delta
2521 +DELTA
2522 epsilon
2523 "},
2524 expected_new: indoc! {"
2525 ALPHA
2526 beta
2527 gamma
2528 DELTA
2529 epsilon
2530 "},
2531 },
2532 Case {
2533 name: "replace_with_insertion",
2534 old: indoc! {r#"
2535 fn handle() {
2536 modal_state.close();
2537 modal_state.dismiss();
2538 "#},
2539 patch: indoc! {r#"
2540 @@ -1,3 +1,4 @@
2541 fn handle() {
2542 modal_state.close();
2543 + eprintln!("");
2544 modal_state.dismiss();
2545 "#},
2546 expected_new: indoc! {r#"
2547 fn handle() {
2548 modal_state.close();
2549 eprintln!("");
2550 modal_state.dismiss();
2551 "#},
2552 },
2553 Case {
2554 name: "complete_replacement",
2555 old: indoc! {"
2556 aaa
2557 bbb
2558 ccc
2559 "},
2560 patch: indoc! {"
2561 @@ -1,3 +1,3 @@
2562 -aaa
2563 -bbb
2564 -ccc
2565 +xxx
2566 +yyy
2567 +zzz
2568 "},
2569 expected_new: indoc! {"
2570 xxx
2571 yyy
2572 zzz
2573 "},
2574 },
2575 Case {
2576 name: "add_function_body",
2577 old: indoc! {"
2578 fn foo() {
2579 modal_state.dismiss();
2580 }
2581
2582 fn
2583
2584 fn handle_keystroke() {
2585 "},
2586 patch: indoc! {"
2587 @@ -1,6 +1,8 @@
2588 fn foo() {
2589 modal_state.dismiss();
2590 }
2591
2592 -fn
2593 +fn handle_submit() {
2594 + todo()
2595 +}
2596
2597 fn handle_keystroke() {
2598 "},
2599 expected_new: indoc! {"
2600 fn foo() {
2601 modal_state.dismiss();
2602 }
2603
2604 fn handle_submit() {
2605 todo()
2606 }
2607
2608 fn handle_keystroke() {
2609 "},
2610 },
2611 Case {
2612 name: "with_cursor_offset",
2613 old: indoc! {r#"
2614 fn main() {
2615 println!();
2616 }
2617 "#},
2618 patch: indoc! {r#"
2619 @@ -1,3 +1,3 @@
2620 fn main() {
2621 - println!();
2622 + eprintln!("");
2623 }
2624 "#},
2625 expected_new: indoc! {r#"
2626 fn main() {
2627 eprintln!("<|user_cursor|>");
2628 }
2629 "#},
2630 },
2631 Case {
2632 name: "non_local_hunk_header_pure_insertion_repro",
2633 old: indoc! {"
2634 aaa
2635 bbb
2636 "},
2637 patch: indoc! {"
2638 @@ -20,2 +20,3 @@
2639 aaa
2640 +xxx
2641 bbb
2642 "},
2643 expected_new: indoc! {"
2644 aaa
2645 xxx
2646 bbb
2647 "},
2648 },
2649 Case {
2650 name: "empty_patch_produces_no_edits_marker",
2651 old: indoc! {"
2652 aaa
2653 bbb
2654 "},
2655 patch: "@@ -20,2 +20,3 @@\n",
2656 expected_new: indoc! {"
2657 aaa
2658 bbb
2659 "},
2660 },
2661 ];
2662
2663 for case in &cases {
2664 // The cursor_offset for patch_to_edit_commands is relative to
2665 // the first hunk's new text (context + additions). We compute
2666 // it by finding where the marker sits in the expected output
2667 // (which mirrors the new text of the hunk).
2668 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2669
2670 let commands =
2671 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2672 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2673
2674 assert!(
2675 hashline::output_has_edit_commands(&commands),
2676 "case {}: expected edit commands, got: {commands:?}",
2677 case.name,
2678 );
2679
2680 let applied = hashline::apply_edit_commands(case.old, &commands);
2681 assert_eq!(applied, case.expected_new, "case {}", case.name);
2682 }
2683 }
2684 }
2685}
2686
2687pub mod seed_coder {
2688 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2689 //!
2690 //! Seed-Coder uses different FIM tokens and order than Qwen:
2691 //! - SPM order: suffix comes FIRST, then prefix, then middle
2692 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2693 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2694 //!
2695 //! All context (related files, edit history) goes in the PREFIX section.
2696 //! The suffix contains only code after the editable region.
2697 //!
2698 //! Example prompt:
2699 //!
2700 //! <[fim-suffix]>
2701 //! code after editable region
2702 //! <[fim-prefix]><filename>related/file.py
2703 //! related file content
2704 //!
2705 //! <filename>edit_history
2706 //! --- a/some_file.py
2707 //! +++ b/some_file.py
2708 //! -old
2709 //! +new
2710 //!
2711 //! <filename>path/to/target_file.py
2712 //! code before editable region
2713 //! <<<<<<< CURRENT
2714 //! code that
2715 //! needs to<|user_cursor|>
2716 //! be rewritten
2717 //! =======
2718 //! <[fim-middle]>
2719 //!
2720 //! Expected output (model generates):
2721 //!
2722 //! updated
2723 //! code with
2724 //! changes applied
2725 //! >>>>>>> UPDATED
2726
2727 use super::*;
2728
2729 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2730 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2731 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2732 pub const FILE_MARKER: &str = "<filename>";
2733
2734 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2735 pub const SEPARATOR: &str = "=======\n";
2736 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2737
2738 pub const NO_EDITS: &str = "NO_EDITS\n";
2739
2740 pub fn special_tokens() -> &'static [&'static str] {
2741 &[
2742 FIM_SUFFIX,
2743 FIM_PREFIX,
2744 FIM_MIDDLE,
2745 FILE_MARKER,
2746 START_MARKER,
2747 SEPARATOR,
2748 END_MARKER,
2749 CURSOR_MARKER,
2750 ]
2751 }
2752
2753 pub fn write_cursor_excerpt_section(
2754 prompt: &mut String,
2755 path: &Path,
2756 context: &str,
2757 editable_range: &Range<usize>,
2758 cursor_offset: usize,
2759 ) {
2760 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2761 prompt.push_str(§ion);
2762 }
2763
2764 pub fn format_prompt_with_budget(
2765 path: &Path,
2766 context: &str,
2767 editable_range: &Range<usize>,
2768 cursor_offset: usize,
2769 events: &[Arc<Event>],
2770 related_files: &[RelatedFile],
2771 max_tokens: usize,
2772 ) -> String {
2773 let cursor_prefix_section =
2774 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2775 assemble_fim_prompt(
2776 context,
2777 editable_range,
2778 &cursor_prefix_section,
2779 events,
2780 related_files,
2781 max_tokens,
2782 )
2783 }
2784
2785 pub fn assemble_fim_prompt(
2786 context: &str,
2787 editable_range: &Range<usize>,
2788 cursor_prefix_section: &str,
2789 events: &[Arc<Event>],
2790 related_files: &[RelatedFile],
2791 max_tokens: usize,
2792 ) -> String {
2793 let suffix_section = build_suffix_section(context, editable_range);
2794
2795 // Use byte-level budgeting to avoid accumulated rounding errors from
2796 // multiple estimate_tokens (floor division) calls across components.
2797 let max_bytes = max_tokens * 3;
2798 let fixed_bytes = suffix_section.len()
2799 + FIM_PREFIX.len()
2800 + cursor_prefix_section.len()
2801 + FIM_MIDDLE.len()
2802 + 2; // two potential newline separators
2803 let content_budget_tokens = estimate_tokens(max_bytes.saturating_sub(fixed_bytes));
2804
2805 let edit_history_section = super::format_edit_history_within_budget(
2806 events,
2807 FILE_MARKER,
2808 "edit_history",
2809 content_budget_tokens,
2810 max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
2811 );
2812 let remaining_budget_tokens = estimate_tokens(
2813 max_bytes
2814 .saturating_sub(fixed_bytes)
2815 .saturating_sub(edit_history_section.len()),
2816 );
2817
2818 let related_files_section = super::format_related_files_within_budget(
2819 related_files,
2820 FILE_MARKER,
2821 "",
2822 remaining_budget_tokens,
2823 );
2824
2825 let mut prompt = String::new();
2826 prompt.push_str(&suffix_section);
2827 prompt.push_str(FIM_PREFIX);
2828 prompt.push_str(&related_files_section);
2829 if !related_files_section.is_empty() {
2830 prompt.push('\n');
2831 }
2832 prompt.push_str(&edit_history_section);
2833 if !edit_history_section.is_empty() {
2834 prompt.push('\n');
2835 }
2836 prompt.push_str(cursor_prefix_section);
2837 prompt.push_str(FIM_MIDDLE);
2838
2839 prompt
2840 }
2841
2842 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2843 let mut section = String::new();
2844 section.push_str(FIM_SUFFIX);
2845 section.push_str(&context[editable_range.end..]);
2846 if !section.ends_with('\n') {
2847 section.push('\n');
2848 }
2849 section
2850 }
2851
2852 fn build_cursor_prefix_section(
2853 path: &Path,
2854 context: &str,
2855 editable_range: &Range<usize>,
2856 cursor_offset: usize,
2857 ) -> String {
2858 let mut section = String::new();
2859 let path_str = path.to_string_lossy();
2860 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2861
2862 section.push_str(&context[..editable_range.start]);
2863 section.push_str(START_MARKER);
2864 section.push_str(&context[editable_range.start..cursor_offset]);
2865 section.push_str(CURSOR_MARKER);
2866 section.push_str(&context[cursor_offset..editable_range.end]);
2867 if !section.ends_with('\n') {
2868 section.push('\n');
2869 }
2870 section.push_str(SEPARATOR);
2871 section
2872 }
2873
2874 /// Format patch as containing no changes if it's empty; otherwise return None.
2875 pub(crate) fn no_edits(patch: &str) -> Option<String> {
2876 // Count lines in the patch
2877 let empty_patch = patch.lines().count() <= 3;
2878 if empty_patch {
2879 Some(format!("{NO_EDITS}{END_MARKER}"))
2880 } else {
2881 None
2882 }
2883 }
2884}
2885
2886pub mod v0304_variable_edit {
2887 //! A prompt format with no fixed editable region. The entire context is shown
2888 //! to the model, and it chooses which text to replace by outputting surrounding
2889 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2890 //! text.
2891 //!
2892 //! Example prompt:
2893 //!
2894 //! <|file_sep|>path/to/file.py
2895 //! zero
2896 //! one
2897 //! two
2898 //! three<|user_cursor|>
2899 //! four
2900 //! five
2901 //! <|fim_prefix|>
2902 //
2903 //! Expected output (model generates):
2904 //!
2905 //! two
2906 //! <|fim_middle|>
2907 //! THREE
2908 //! <|fim_suffix|>
2909 //! four
2910 //!
2911 //! The output means: find "two\n...\nfour" in the context, and replace
2912 //! everything between "two\n" and "four" with "THREE\n".
2913
2914 use super::*;
2915
2916 pub fn special_tokens() -> &'static [&'static str] {
2917 &[
2918 "<|fim_prefix|>",
2919 "<|fim_suffix|>",
2920 "<|fim_middle|>",
2921 "<|file_sep|>",
2922 CURSOR_MARKER,
2923 ]
2924 }
2925
2926 pub fn write_cursor_excerpt_section(
2927 prompt: &mut String,
2928 path: &Path,
2929 context: &str,
2930 cursor_offset: usize,
2931 ) {
2932 let path_str = path.to_string_lossy();
2933 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2934
2935 prompt.push_str(&context[..cursor_offset]);
2936 prompt.push_str(CURSOR_MARKER);
2937 prompt.push_str(&context[cursor_offset..]);
2938 if !prompt.ends_with('\n') {
2939 prompt.push('\n');
2940 }
2941 prompt.push_str("<|fim_prefix|>\n")
2942 }
2943
2944 /// Apply a variable-edit model output to the original context text.
2945 ///
2946 /// The model output has the form:
2947 ///
2948 /// - prefix context lines
2949 /// - `<|fim_middle|>`
2950 /// - new text
2951 /// - `<|fim_suffix|>`
2952 /// - suffix context lines
2953 ///
2954 /// We locate the prefix/suffix context lines in the original text and replace
2955 /// everything between them with the new text.
2956 pub fn apply_variable_edit(
2957 context: &str,
2958 model_output: &str,
2959 ) -> Result<(Range<usize>, String)> {
2960 let (prefix_context, rest) = model_output
2961 .split_once("<|fim_middle|>\n")
2962 .or_else(|| model_output.split_once("<|fim_middle|>"))
2963 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2964
2965 let (new_text, suffix_context) = rest
2966 .split_once("<|fim_suffix|>\n")
2967 .or_else(|| rest.split_once("<|fim_suffix|>"))
2968 .unwrap_or((rest, ""));
2969
2970 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2971 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2972 } else {
2973 suffix_context
2974 };
2975
2976 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2977 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2978 + prefix_context.len();
2979 let suffix_offset = if suffix_context.is_empty() {
2980 context.len()
2981 } else {
2982 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2983 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2984 + prefix_offset
2985 };
2986
2987 let edit_range = prefix_offset..suffix_offset;
2988 return Ok((edit_range, new_text.to_string()));
2989 }
2990
2991 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2992 if needle.is_empty() {
2993 return Some(0);
2994 }
2995
2996 haystack.match_indices(needle).find_map(|(offset, _)| {
2997 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2998 matched_line_start.then_some(offset)
2999 })
3000 }
3001
3002 /// Convert a unified diff patch into the variable-edit output format.
3003 ///
3004 /// Parses `patch` as a unified diff against `old_text` and produces model
3005 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3006 /// delimiters. The diff is resolved by content matching rather than line
3007 /// numbers.
3008 pub fn patch_to_variable_edit_output(
3009 old_text: &str,
3010 patch: &str,
3011 cursor_offset: Option<usize>,
3012 ) -> Result<String> {
3013 // Parse the unified diff into hunks. Each hunk has an `old_context`
3014 // string (context + deleted lines interleaved in order) and a list of
3015 // edits expressed as byte ranges within that context plus replacement
3016 // text.
3017 let hunks = parse_hunks(patch);
3018 if hunks.is_empty() {
3019 return Ok(String::new());
3020 }
3021
3022 // Apply each hunk by finding its old_context in the text and
3023 // performing the edits. We search forward from where the previous
3024 // hunk ended so that hunks are applied in order.
3025 let mut new_text = old_text.to_string();
3026 let mut search_from: usize = 0;
3027 let mut first_hunk_pos: Option<usize> = None;
3028
3029 for hunk in &hunks {
3030 let context_pos = new_text[search_from..]
3031 .find(&hunk.old_context)
3032 .map(|pos| pos + search_from)
3033 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3034
3035 if first_hunk_pos.is_none() {
3036 first_hunk_pos = Some(context_pos);
3037 }
3038
3039 // Apply edits in reverse order so byte offsets remain valid.
3040 for edit in hunk.edits.iter().rev() {
3041 let abs_start = context_pos + edit.range.start;
3042 let abs_end = context_pos + edit.range.end;
3043 new_text.replace_range(abs_start..abs_end, &edit.text);
3044 }
3045
3046 // Advance past this hunk's region in the (now modified) text.
3047 let new_region_len: usize =
3048 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3049 len + edit.text.len() - (edit.range.end - edit.range.start)
3050 });
3051 search_from = context_pos + new_region_len;
3052 }
3053
3054 // Now we have old_text and new_text. Find the changed line range by
3055 // comparing them.
3056 let old_lines: Vec<&str> = old_text.lines().collect();
3057 let new_lines: Vec<&str> = new_text.lines().collect();
3058
3059 // Find first differing line.
3060 let first_changed_row = old_lines
3061 .iter()
3062 .zip(new_lines.iter())
3063 .position(|(a, b)| a != b)
3064 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3065
3066 // Find last differing line (from the end).
3067 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3068 let common_suffix = old_lines
3069 .iter()
3070 .rev()
3071 .zip(new_lines.iter().rev())
3072 .take(max_suffix)
3073 .take_while(|(a, b)| a == b)
3074 .count();
3075
3076 let old_end = old_lines.len() - common_suffix;
3077 let new_end = new_lines.len() - common_suffix;
3078
3079 if first_changed_row == old_end && first_changed_row == new_end {
3080 return Ok(String::new());
3081 }
3082
3083 // Build the replacement text from new_lines[first_diff..new_end].
3084 let mut merged_new_text = String::new();
3085 for line in &new_lines[first_changed_row..new_end] {
3086 merged_new_text.push_str(line);
3087 merged_new_text.push('\n');
3088 }
3089
3090 // cursor_offset is relative to the first hunk's new content in
3091 // new_text. Translate it to an offset within merged_new_text, which
3092 // only contains lines first_diff..new_end of new_text.
3093 if let Some(hunk_offset) = cursor_offset {
3094 let hunk_start = first_hunk_pos.unwrap_or(0);
3095 let absolute_pos = hunk_start + hunk_offset;
3096
3097 // Byte offset where first_diff starts in new_text.
3098 let merged_start: usize = new_lines[..first_changed_row]
3099 .iter()
3100 .map(|line| line.len() + 1)
3101 .sum();
3102
3103 if absolute_pos >= merged_start {
3104 let relative_offset = absolute_pos - merged_start;
3105 if relative_offset <= merged_new_text.len() {
3106 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3107 }
3108 }
3109 }
3110
3111 // Build output with 2 lines of context above and below.
3112 let context_lines_count = 2;
3113 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3114 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3115
3116 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3117 let pattern = &lines[line_range];
3118 let pattern_len = pattern.len();
3119
3120 let mut count = 0;
3121 for offset in 0..=lines.len() - pattern_len {
3122 if &lines[offset..offset + pattern_len] == pattern {
3123 count += 1;
3124 }
3125 }
3126 count
3127 }
3128
3129 // Expand prefix and suffix until they are unique
3130 while prefix_start > 0 {
3131 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3132 prefix_start -= 1;
3133 } else {
3134 break;
3135 }
3136 }
3137 while suffix_end < old_lines.len() {
3138 if count_matches(old_end..suffix_end, &old_lines) > 1 {
3139 suffix_end += 1;
3140 } else {
3141 break;
3142 }
3143 }
3144
3145 let mut output = String::new();
3146 for line in &old_lines[prefix_start..first_changed_row] {
3147 output.push_str(line);
3148 output.push('\n');
3149 }
3150 output.push_str("<|fim_middle|>\n");
3151 output.push_str(&merged_new_text);
3152 output.push_str("<|fim_suffix|>\n");
3153 for line in &old_lines[old_end..suffix_end] {
3154 output.push_str(line);
3155 output.push('\n');
3156 }
3157
3158 Ok(output)
3159 }
3160
3161 struct ParsedHunk {
3162 old_context: String,
3163 edits: Vec<ParsedEdit>,
3164 }
3165
3166 struct ParsedEdit {
3167 range: Range<usize>,
3168 text: String,
3169 }
3170
3171 /// Parse a unified diff into content-based hunks. Each hunk contains an
3172 /// `old_context` string (context lines + deleted lines, which together
3173 /// form the text that should be found in the original) and a list of edits
3174 /// expressed as byte ranges within that context.
3175 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3176 let mut hunks = Vec::new();
3177 let mut current: Option<ParsedHunk> = None;
3178
3179 for line in patch.lines() {
3180 if line.starts_with("@@") {
3181 if let Some(hunk) = current.take() {
3182 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3183 hunks.push(hunk);
3184 }
3185 }
3186 current = Some(ParsedHunk {
3187 old_context: String::new(),
3188 edits: Vec::new(),
3189 });
3190 } else if line.starts_with("---") || line.starts_with("+++") {
3191 continue;
3192 } else if let Some(hunk) = &mut current {
3193 if let Some(added) = line.strip_prefix('+') {
3194 let pos = hunk.old_context.len();
3195 if let Some(last_edit) = hunk.edits.last_mut() {
3196 if last_edit.range.end == pos {
3197 writeln!(&mut last_edit.text, "{added}").ok();
3198 continue;
3199 }
3200 }
3201 hunk.edits.push(ParsedEdit {
3202 range: pos..pos,
3203 text: format!("{added}\n"),
3204 });
3205 } else if let Some(removed) = line.strip_prefix('-') {
3206 let start = hunk.old_context.len();
3207 writeln!(&mut hunk.old_context, "{removed}").ok();
3208 let end = hunk.old_context.len();
3209 if let Some(last_edit) = hunk.edits.last_mut() {
3210 if last_edit.range.end == start {
3211 last_edit.range.end = end;
3212 continue;
3213 }
3214 }
3215 hunk.edits.push(ParsedEdit {
3216 range: start..end,
3217 text: String::new(),
3218 });
3219 } else {
3220 let ctx = line.strip_prefix(' ').unwrap_or(line);
3221 writeln!(&mut hunk.old_context, "{ctx}").ok();
3222 }
3223 }
3224 }
3225
3226 if let Some(hunk) = current {
3227 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3228 hunks.push(hunk);
3229 }
3230 }
3231
3232 hunks
3233 }
3234
3235 #[cfg(test)]
3236 mod tests {
3237 use super::*;
3238 use indoc::indoc;
3239
3240 #[test]
3241 fn test_apply_variable_edit() {
3242 struct Case {
3243 name: &'static str,
3244 original: &'static str,
3245 model_output: &'static str,
3246 expected: &'static str,
3247 }
3248
3249 let cases = [
3250 Case {
3251 name: "simple_single_line_replacement",
3252 original: indoc! {"
3253 zero
3254 one
3255 two
3256 three
3257 four
3258 five
3259 "},
3260 model_output: indoc! {"
3261 two
3262 <|fim_middle|>
3263 THREE
3264 <|fim_suffix|>
3265 four
3266 "},
3267 expected: indoc! {"
3268 zero
3269 one
3270 two
3271 THREE
3272 four
3273 five
3274 "},
3275 },
3276 Case {
3277 name: "multi_line_replacement",
3278 original: indoc! {"
3279 a
3280 b
3281 c
3282 d
3283 e
3284 "},
3285 model_output: indoc! {"
3286 a
3287 <|fim_middle|>
3288 B
3289 C
3290 D
3291 <|fim_suffix|>
3292 e
3293 "},
3294 expected: indoc! {"
3295 a
3296 B
3297 C
3298 D
3299 e
3300 "},
3301 },
3302 Case {
3303 name: "insertion_between_existing_lines",
3304 original: indoc! {"
3305 a
3306 b
3307 c
3308 "},
3309 model_output: indoc! {"
3310 a
3311 <|fim_middle|>
3312 X
3313 <|fim_suffix|>
3314 b
3315 "},
3316 expected: indoc! {"
3317 a
3318 X
3319 b
3320 c
3321 "},
3322 },
3323 Case {
3324 name: "deletion",
3325 original: indoc! {"
3326 a
3327 b
3328 c
3329 d
3330 "},
3331 model_output: indoc! {"
3332 a
3333 <|fim_middle|>
3334 <|fim_suffix|>
3335 c
3336 "},
3337 expected: indoc! {"
3338 a
3339 c
3340 d
3341 "},
3342 },
3343 Case {
3344 name: "replacement_at_start_no_prefix_context",
3345 original: indoc! {"
3346 a
3347 b
3348 c
3349 "},
3350 model_output: indoc! {"
3351 <|fim_middle|>
3352 X
3353 <|fim_suffix|>
3354 b
3355 "},
3356 expected: indoc! {"
3357 X
3358 b
3359 c
3360 "},
3361 },
3362 Case {
3363 name: "replacement_at_end_no_suffix_context",
3364 original: indoc! {"
3365 a
3366 b
3367 c
3368 "},
3369 model_output: indoc! {"
3370 b
3371 <|fim_middle|>
3372 Z
3373 <|fim_suffix|>
3374 "},
3375 expected: indoc! {"
3376 a
3377 b
3378 Z
3379 "},
3380 },
3381 Case {
3382 name: "context_with_trailing_newline_is_preserved",
3383 original: indoc! {"
3384 a
3385 b
3386 c
3387 "},
3388 model_output: indoc! {"
3389 a
3390 <|fim_middle|>
3391 B
3392 <|fim_suffix|>
3393 c
3394 "},
3395 expected: indoc! {"
3396 a
3397 B
3398 c
3399 "},
3400 },
3401 Case {
3402 name: "cursor_marker_passes_through_untouched",
3403 original: indoc! {"
3404 a
3405 b
3406 c
3407 "},
3408 model_output: indoc! {"
3409 a
3410 <|fim_middle|>
3411 B<|user_cursor|>B
3412 <|fim_suffix|>
3413 c
3414 "},
3415 expected: indoc! {"
3416 a
3417 B<|user_cursor|>B
3418 c
3419 "},
3420 },
3421 Case {
3422 name: "multiple_prefix_context_lines",
3423 original: indoc! {"
3424 a
3425 b
3426 c
3427 d
3428 e
3429 "},
3430 model_output: indoc! {"
3431 b
3432 c
3433 <|fim_middle|>
3434 D
3435 <|fim_suffix|>
3436 e
3437 "},
3438 expected: indoc! {"
3439 a
3440 b
3441 c
3442 D
3443 e
3444 "},
3445 },
3446 ];
3447
3448 for case in cases {
3449 let (edit_range, replacement) =
3450 apply_variable_edit(case.original, case.model_output).unwrap();
3451 let mut edited = case.original.to_string();
3452 edited.replace_range(edit_range, &replacement);
3453 assert_eq!(edited, case.expected, "{}", case.name);
3454 }
3455 }
3456
3457 #[test]
3458 fn test_patch_to_variable_edit() {
3459 struct Case {
3460 name: &'static str,
3461 old: &'static str,
3462 patch: &'static str,
3463 cursor_offset: Option<usize>,
3464 expected_variable_edit: &'static str,
3465 expected_after_apply: &'static str,
3466 }
3467
3468 let cases = [
3469 Case {
3470 name: "simple_replacement",
3471 old: indoc! {"
3472 zero
3473 one
3474 two
3475 three
3476 four
3477 five
3478 "},
3479 patch: indoc! {"
3480 @@ -3,3 +3,3 @@
3481 two
3482 -three
3483 +THREE
3484 four
3485 "},
3486 cursor_offset: None,
3487 expected_variable_edit: indoc! {"
3488 one
3489 two
3490 <|fim_middle|>
3491 THREE
3492 <|fim_suffix|>
3493 four
3494 five
3495 "},
3496 expected_after_apply: indoc! {"
3497 zero
3498 one
3499 two
3500 THREE
3501 four
3502 five
3503 "},
3504 },
3505 Case {
3506 name: "insertion",
3507 old: indoc! {"
3508 a
3509 b
3510 c
3511 d
3512 e
3513 "},
3514 patch: indoc! {"
3515 @@ -2,0 +3,1 @@
3516 b
3517 +X
3518 c
3519 "},
3520 cursor_offset: None,
3521 expected_variable_edit: indoc! {"
3522 a
3523 b
3524 <|fim_middle|>
3525 X
3526 <|fim_suffix|>
3527 c
3528 d
3529 "},
3530 expected_after_apply: indoc! {"
3531 a
3532 b
3533 X
3534 c
3535 d
3536 e
3537 "},
3538 },
3539 Case {
3540 name: "deletion",
3541 old: indoc! {"
3542 a
3543 b
3544 c
3545 d
3546 e
3547 "},
3548 patch: indoc! {"
3549 @@ -2,3 +2,2 @@
3550 b
3551 -c
3552 d
3553 "},
3554 cursor_offset: None,
3555 expected_variable_edit: indoc! {"
3556 a
3557 b
3558 <|fim_middle|>
3559 <|fim_suffix|>
3560 d
3561 e
3562 "},
3563 expected_after_apply: indoc! {"
3564 a
3565 b
3566 d
3567 e
3568 "},
3569 },
3570 Case {
3571 name: "edit_near_start",
3572 old: indoc! {"
3573 first
3574 second
3575 third
3576 fourth
3577 "},
3578 patch: indoc! {"
3579 @@ -1,1 +1,1 @@
3580 -first
3581 +FIRST
3582 "},
3583 cursor_offset: None,
3584 expected_variable_edit: indoc! {"
3585 <|fim_middle|>
3586 FIRST
3587 <|fim_suffix|>
3588 second
3589 third
3590 "},
3591 expected_after_apply: indoc! {"
3592 FIRST
3593 second
3594 third
3595 fourth
3596 "},
3597 },
3598 Case {
3599 name: "edit_near_end",
3600 old: indoc! {"
3601 first
3602 second
3603 third
3604 fourth
3605 "},
3606 patch: indoc! {"
3607 @@ -4,1 +4,1 @@
3608 -fourth
3609 +FOURTH
3610 "},
3611 cursor_offset: None,
3612 expected_variable_edit: indoc! {"
3613 second
3614 third
3615 <|fim_middle|>
3616 FOURTH
3617 <|fim_suffix|>
3618 "},
3619 expected_after_apply: indoc! {"
3620 first
3621 second
3622 third
3623 FOURTH
3624 "},
3625 },
3626 Case {
3627 name: "cursor_at_start_of_replacement",
3628 old: indoc! {"
3629 zero
3630 one
3631 two
3632 three
3633 four
3634 five
3635 "},
3636 patch: indoc! {"
3637 @@ -3,3 +3,3 @@
3638 two
3639 -three
3640 +THREE
3641 four
3642 "},
3643 cursor_offset: Some(4),
3644 expected_variable_edit: indoc! {"
3645 one
3646 two
3647 <|fim_middle|>
3648 <|user_cursor|>THREE
3649 <|fim_suffix|>
3650 four
3651 five
3652 "},
3653 expected_after_apply: indoc! {"
3654 zero
3655 one
3656 two
3657 <|user_cursor|>THREE
3658 four
3659 five
3660 "},
3661 },
3662 Case {
3663 name: "cursor_in_middle_of_replacement",
3664 old: indoc! {"
3665 zero
3666 one
3667 two
3668 three
3669 four
3670 five
3671 "},
3672 patch: indoc! {"
3673 @@ -3,3 +3,3 @@
3674 two
3675 -three
3676 +THREE
3677 four
3678 "},
3679 cursor_offset: Some(6),
3680 expected_variable_edit: indoc! {"
3681 one
3682 two
3683 <|fim_middle|>
3684 TH<|user_cursor|>REE
3685 <|fim_suffix|>
3686 four
3687 five
3688 "},
3689 expected_after_apply: indoc! {"
3690 zero
3691 one
3692 two
3693 TH<|user_cursor|>REE
3694 four
3695 five
3696 "},
3697 },
3698 Case {
3699 name: "expands_context_when_two_lines_not_unique_before_and_after",
3700 old: indoc! {"
3701 one
3702 a
3703 b
3704 c
3705 d
3706 two
3707 a
3708 b
3709 c
3710 d
3711 three
3712 a
3713 b
3714 c
3715 d
3716 four
3717 "},
3718 patch: indoc! {"
3719 @@ -4,5 +4,5 @@
3720 two
3721 a
3722 b
3723 -c
3724 +C
3725 d
3726 three
3727 "},
3728 cursor_offset: None,
3729 expected_variable_edit: indoc! {"
3730 two
3731 a
3732 b
3733 <|fim_middle|>
3734 C
3735 <|fim_suffix|>
3736 d
3737 three
3738 "},
3739 expected_after_apply: indoc! {"
3740 one
3741 a
3742 b
3743 c
3744 d
3745 two
3746 a
3747 b
3748 C
3749 d
3750 three
3751 a
3752 b
3753 c
3754 d
3755 four
3756 "},
3757 },
3758 Case {
3759 name: "expands_context_when_two_lines_not_unique_before_and_after",
3760 old: indoc! {"
3761 {
3762 {
3763 one();
3764 }
3765 }
3766 {
3767 {
3768 two();
3769 }
3770 }
3771 {
3772 {
3773 three();
3774 }
3775 }
3776 {
3777 {
3778 four();
3779 }
3780 }
3781 "},
3782 patch: indoc! {"
3783 @@ -4,5 +4,5 @@
3784 {
3785 - two();
3786 + TWO();
3787 }
3788 "},
3789 cursor_offset: None,
3790 expected_variable_edit: indoc! {"
3791 one();
3792 }
3793 }
3794 {
3795 {
3796 <|fim_middle|>
3797 TWO();
3798 <|fim_suffix|>
3799 }
3800 }
3801 {
3802 {
3803 three();
3804 "},
3805 expected_after_apply: indoc! {"
3806 {
3807 {
3808 one();
3809 }
3810 }
3811 {
3812 {
3813 TWO();
3814 }
3815 }
3816 {
3817 {
3818 three();
3819 }
3820 }
3821 {
3822 {
3823 four();
3824 }
3825 }
3826 "},
3827 },
3828 ];
3829
3830 for case in cases {
3831 let output =
3832 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3833 .unwrap_or_else(|error| {
3834 panic!("failed converting patch for {}: {error}", case.name)
3835 });
3836 assert_eq!(
3837 output, case.expected_variable_edit,
3838 "patch->variable_edit mismatch for {}",
3839 case.name
3840 );
3841
3842 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3843 .unwrap_or_else(|error| {
3844 panic!("failed applying variable_edit for {}: {error}", case.name)
3845 });
3846 let mut edited_by_variable_edit = case.old.to_string();
3847 edited_by_variable_edit.replace_range(edit_range, &replacement);
3848 assert_eq!(
3849 edited_by_variable_edit, case.expected_after_apply,
3850 "variable_edit apply mismatch for {}",
3851 case.name
3852 );
3853
3854 let (expected_edit_range, expected_replacement) =
3855 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3856 |error| {
3857 panic!(
3858 "failed applying expected variable_edit for {}: {error}",
3859 case.name
3860 )
3861 },
3862 );
3863 let mut edited_by_expected_variable_edit = case.old.to_string();
3864 edited_by_expected_variable_edit
3865 .replace_range(expected_edit_range, &expected_replacement);
3866 assert_eq!(
3867 edited_by_expected_variable_edit, case.expected_after_apply,
3868 "expected variable_edit apply mismatch for {}",
3869 case.name
3870 );
3871 }
3872 }
3873
3874 #[test]
3875 fn test_write_cursor_excerpt_section() {
3876 let path = Path::new("test.rs");
3877 let context = "fn main() {\n hello();\n}\n";
3878 let cursor_offset = 17;
3879 let mut prompt = String::new();
3880 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3881 assert_eq!(
3882 prompt,
3883 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3884 );
3885 }
3886 }
3887}
3888
3889/// The zeta1 prompt format
3890pub mod zeta1 {
3891 use super::*;
3892 use std::fmt::Write;
3893
3894 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3895 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3896 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3897 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3898
3899 const INSTRUCTION_HEADER: &str = concat!(
3900 "### Instruction:\n",
3901 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3902 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3903 "into account the cursor location.\n\n",
3904 "### User Edits:\n\n"
3905 );
3906 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3907 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3908
3909 /// Formats a complete zeta1 prompt from the input events and excerpt.
3910 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3911 let mut prompt = String::with_capacity(
3912 INSTRUCTION_HEADER.len()
3913 + input_events.len()
3914 + EXCERPT_HEADER.len()
3915 + input_excerpt.len()
3916 + RESPONSE_HEADER.len(),
3917 );
3918 prompt.push_str(INSTRUCTION_HEADER);
3919 prompt.push_str(input_events);
3920 prompt.push_str(EXCERPT_HEADER);
3921 prompt.push_str(input_excerpt);
3922 prompt.push_str(RESPONSE_HEADER);
3923 prompt
3924 }
3925
3926 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3927 /// editable and context byte-offset ranges within `cursor_excerpt`.
3928 pub fn format_zeta1_from_input(
3929 input: &ZetaPromptInput,
3930 editable_range: Range<usize>,
3931 context_range: Range<usize>,
3932 ) -> String {
3933 let events = format_zeta1_events(&input.events);
3934 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3935 format_zeta1_prompt(&events, &excerpt)
3936 }
3937
3938 /// Formats events in zeta1 style (oldest first).
3939 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3940 let mut result = String::new();
3941 for event in
3942 events
3943 .iter()
3944 .skip(events.len().saturating_sub(max_edit_event_count_for_format(
3945 &ZetaFormat::V0114180EditableRegion,
3946 )))
3947 {
3948 let event_string = format_zeta1_event(event);
3949 if event_string.is_empty() {
3950 continue;
3951 }
3952 if !result.is_empty() {
3953 result.push_str("\n\n");
3954 }
3955 result.push_str(&event_string);
3956 }
3957 result
3958 }
3959
3960 fn format_zeta1_event(event: &Event) -> String {
3961 match event {
3962 Event::BufferChange {
3963 path,
3964 old_path,
3965 diff,
3966 ..
3967 } => {
3968 let mut prompt = String::new();
3969 if old_path != path {
3970 writeln!(
3971 prompt,
3972 "User renamed {} to {}\n",
3973 old_path.display(),
3974 path.display()
3975 )
3976 .ok();
3977 }
3978 if !diff.is_empty() {
3979 write!(
3980 prompt,
3981 "User edited {}:\n```diff\n{}\n```",
3982 path.display(),
3983 diff
3984 )
3985 .ok();
3986 }
3987 prompt
3988 }
3989 }
3990 }
3991
3992 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3993 /// within `cursor_excerpt`.
3994 fn format_zeta1_excerpt(
3995 input: &ZetaPromptInput,
3996 editable_range: Range<usize>,
3997 context_range: Range<usize>,
3998 ) -> String {
3999 let path_str = input.cursor_path.to_string_lossy();
4000 let excerpt = &*input.cursor_excerpt;
4001 let cursor_offset = input.cursor_offset_in_excerpt;
4002
4003 let mut prompt = String::new();
4004 writeln!(&mut prompt, "```{path_str}").ok();
4005
4006 let starts_at_file_beginning =
4007 input.excerpt_start_row == Some(0) && context_range.start == 0;
4008 if starts_at_file_beginning {
4009 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4010 }
4011
4012 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4013
4014 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4015 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4016 prompt.push_str(CURSOR_MARKER);
4017 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4018 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4019
4020 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4021 write!(prompt, "\n```").ok();
4022
4023 prompt
4024 }
4025
4026 /// Cleans zeta1 model output by extracting content between editable region
4027 /// markers and converting the zeta1 cursor marker to the universal one.
4028 /// Returns `None` if the output doesn't contain the expected markers.
4029 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4030 let content = output.replace(CURSOR_MARKER, "");
4031
4032 let content_start = content
4033 .find(EDITABLE_REGION_START_MARKER)
4034 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4035 .map(|pos| {
4036 if content.as_bytes().get(pos) == Some(&b'\n') {
4037 pos + 1
4038 } else {
4039 pos
4040 }
4041 })
4042 .unwrap_or(0);
4043
4044 let content_end = content
4045 .find(EDITABLE_REGION_END_MARKER)
4046 .map(|pos| {
4047 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4048 pos - 1
4049 } else {
4050 pos
4051 }
4052 })
4053 .unwrap_or(content.len());
4054
4055 if content_start > content_end {
4056 return Some(String::new());
4057 }
4058
4059 let extracted = &content[content_start..content_end];
4060
4061 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4062 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4063 let text_before_cursor = text_before_cursor
4064 .find(EDITABLE_REGION_START_MARKER)
4065 .map(|pos| {
4066 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4067 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4068 after_marker + 1
4069 } else {
4070 after_marker
4071 }
4072 })
4073 .unwrap_or(0);
4074 let offset_in_extracted = zeta1_cursor_pos
4075 .saturating_sub(text_before_cursor)
4076 .min(extracted.len());
4077 offset_in_extracted
4078 });
4079
4080 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4081 if let Some(offset) = cursor_offset {
4082 result.push_str(&extracted[..offset]);
4083 result.push_str(super::CURSOR_MARKER);
4084 result.push_str(&extracted[offset..]);
4085 } else {
4086 result.push_str(extracted);
4087 }
4088
4089 Some(result)
4090 }
4091}
4092
4093#[cfg(test)]
4094mod tests {
4095 use super::*;
4096 use indoc::indoc;
4097
4098 fn make_input(
4099 cursor_excerpt: &str,
4100 editable_range: Range<usize>,
4101 cursor_offset: usize,
4102 events: Vec<Event>,
4103 related_files: Vec<RelatedFile>,
4104 ) -> ZetaPromptInput {
4105 let context_range = 0..cursor_excerpt.len();
4106 ZetaPromptInput {
4107 cursor_path: Path::new("test.rs").into(),
4108 cursor_excerpt: cursor_excerpt.into(),
4109 cursor_offset_in_excerpt: cursor_offset,
4110 excerpt_start_row: None,
4111 events: events.into_iter().map(Arc::new).collect(),
4112 related_files: Some(related_files),
4113 active_buffer_diagnostics: vec![],
4114 excerpt_ranges: ExcerptRanges {
4115 editable_150: editable_range.clone(),
4116 editable_180: editable_range.clone(),
4117 editable_350: editable_range,
4118 editable_150_context_350: context_range.clone(),
4119 editable_180_context_350: context_range.clone(),
4120 editable_350_context_150: context_range,
4121 ..Default::default()
4122 },
4123 syntax_ranges: None,
4124 experiment: None,
4125 in_open_source_repo: false,
4126 can_collect_data: false,
4127 repo_url: None,
4128 }
4129 }
4130
4131 fn make_input_with_context_range(
4132 excerpt: &str,
4133 editable_range: Range<usize>,
4134 context_range: Range<usize>,
4135 cursor_offset: usize,
4136 ) -> ZetaPromptInput {
4137 ZetaPromptInput {
4138 cursor_path: Path::new("test.rs").into(),
4139 cursor_excerpt: excerpt.into(),
4140 cursor_offset_in_excerpt: cursor_offset,
4141 excerpt_start_row: None,
4142 events: vec![],
4143 related_files: Some(vec![]),
4144 active_buffer_diagnostics: vec![],
4145 excerpt_ranges: ExcerptRanges {
4146 editable_150: editable_range.clone(),
4147 editable_180: editable_range.clone(),
4148 editable_350: editable_range,
4149 editable_150_context_350: context_range.clone(),
4150 editable_180_context_350: context_range.clone(),
4151 editable_350_context_150: context_range,
4152 ..Default::default()
4153 },
4154 syntax_ranges: None,
4155 experiment: None,
4156 in_open_source_repo: false,
4157 can_collect_data: false,
4158 repo_url: None,
4159 }
4160 }
4161
4162 fn make_event(path: &str, diff: &str) -> Event {
4163 Event::BufferChange {
4164 path: Path::new(path).into(),
4165 old_path: Path::new(path).into(),
4166 diff: diff.to_string(),
4167 predicted: false,
4168 in_open_source_repo: false,
4169 }
4170 }
4171
4172 fn make_related_file(path: &str, content: &str) -> RelatedFile {
4173 RelatedFile {
4174 path: Path::new(path).into(),
4175 max_row: content.lines().count() as u32,
4176 excerpts: vec![RelatedExcerpt {
4177 row_range: 0..content.lines().count() as u32,
4178 text: content.into(),
4179 order: 0,
4180 }],
4181 in_open_source_repo: false,
4182 }
4183 }
4184
4185 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4186 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4187 }
4188
4189 #[test]
4190 fn test_no_truncation_when_within_budget() {
4191 let input = make_input(
4192 "prefix\neditable\nsuffix",
4193 7..15,
4194 10,
4195 vec![make_event("a.rs", "-old\n+new\n")],
4196 vec![make_related_file("related.rs", "fn helper() {}\n")],
4197 );
4198
4199 assert_eq!(
4200 format_with_budget(&input, 10000).unwrap(),
4201 indoc! {r#"
4202 <|file_sep|>related.rs
4203 fn helper() {}
4204 <|file_sep|>edit history
4205 --- a/a.rs
4206 +++ b/a.rs
4207 -old
4208 +new
4209 <|file_sep|>test.rs
4210 <|fim_prefix|>
4211 prefix
4212 <|fim_middle|>current
4213 edi<|user_cursor|>table
4214 <|fim_suffix|>
4215
4216 suffix
4217 <|fim_middle|>updated
4218 "#}
4219 .to_string()
4220 );
4221 }
4222
4223 #[test]
4224 fn test_truncation_drops_edit_history_when_budget_tight() {
4225 let input = make_input(
4226 "code",
4227 0..4,
4228 2,
4229 vec![make_event("a.rs", "-x\n+y\n")],
4230 vec![
4231 make_related_file("r1.rs", "aaaaaaa\n"),
4232 make_related_file("r2.rs", "bbbbbbb\n"),
4233 ],
4234 );
4235
4236 assert_eq!(
4237 format_with_budget(&input, 10000).unwrap(),
4238 indoc! {r#"
4239 <|file_sep|>r1.rs
4240 aaaaaaa
4241 <|file_sep|>r2.rs
4242 bbbbbbb
4243 <|file_sep|>edit history
4244 --- a/a.rs
4245 +++ b/a.rs
4246 -x
4247 +y
4248 <|file_sep|>test.rs
4249 <|fim_prefix|>
4250 <|fim_middle|>current
4251 co<|user_cursor|>de
4252 <|fim_suffix|>
4253 <|fim_middle|>updated
4254 "#}
4255 .to_string()
4256 );
4257
4258 assert_eq!(
4259 format_with_budget(&input, 57),
4260 Some(
4261 indoc! {r#"
4262 <|file_sep|>edit history
4263 --- a/a.rs
4264 +++ b/a.rs
4265 -x
4266 +y
4267 <|file_sep|>test.rs
4268 <|fim_prefix|>
4269 <|fim_middle|>current
4270 co<|user_cursor|>de
4271 <|fim_suffix|>
4272 <|fim_middle|>updated
4273 "#}
4274 .to_string()
4275 )
4276 );
4277 }
4278
4279 #[test]
4280 fn test_truncation_includes_partial_excerpts() {
4281 let input = make_input(
4282 "x",
4283 0..1,
4284 0,
4285 vec![],
4286 vec![RelatedFile {
4287 path: Path::new("big.rs").into(),
4288 max_row: 30,
4289 in_open_source_repo: false,
4290 excerpts: vec![
4291 RelatedExcerpt {
4292 row_range: 0..10,
4293 text: "first excerpt\n".into(),
4294 order: 0,
4295 },
4296 RelatedExcerpt {
4297 row_range: 10..20,
4298 text: "second excerpt\n".into(),
4299 order: 0,
4300 },
4301 RelatedExcerpt {
4302 row_range: 20..30,
4303 text: "third excerpt\n".into(),
4304 order: 0,
4305 },
4306 ],
4307 }],
4308 );
4309
4310 assert_eq!(
4311 format_with_budget(&input, 10000).unwrap(),
4312 indoc! {r#"
4313 <|file_sep|>big.rs
4314 first excerpt
4315 ...
4316 second excerpt
4317 ...
4318 third excerpt
4319 <|file_sep|>test.rs
4320 <|fim_prefix|>
4321 <|fim_middle|>current
4322 <|user_cursor|>x
4323 <|fim_suffix|>
4324 <|fim_middle|>updated
4325 "#}
4326 .to_string()
4327 );
4328
4329 assert_eq!(
4330 format_with_budget(&input, 50).unwrap(),
4331 indoc! {r#"
4332 <|file_sep|>big.rs
4333 first excerpt
4334 ...
4335 <|file_sep|>test.rs
4336 <|fim_prefix|>
4337 <|fim_middle|>current
4338 <|user_cursor|>x
4339 <|fim_suffix|>
4340 <|fim_middle|>updated
4341 "#}
4342 .to_string()
4343 );
4344 }
4345
4346 #[test]
4347 fn test_truncation_prioritizes_lower_order_excerpts() {
4348 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4349 // With tight budget, only the lower-order excerpt from file_b should be included.
4350 let input = make_input(
4351 "x",
4352 0..1,
4353 0,
4354 vec![],
4355 vec![
4356 RelatedFile {
4357 path: Path::new("file_a.rs").into(),
4358 max_row: 10,
4359 in_open_source_repo: false,
4360 excerpts: vec![RelatedExcerpt {
4361 row_range: 0..10,
4362 text: "low priority content\n".into(),
4363 order: 5,
4364 }],
4365 },
4366 RelatedFile {
4367 path: Path::new("file_b.rs").into(),
4368 max_row: 10,
4369 in_open_source_repo: false,
4370 excerpts: vec![RelatedExcerpt {
4371 row_range: 0..10,
4372 text: "high priority content\n".into(),
4373 order: 1,
4374 }],
4375 },
4376 ],
4377 );
4378
4379 // With large budget, both files included; rendered in stable lexicographic order.
4380 assert_eq!(
4381 format_with_budget(&input, 10000).unwrap(),
4382 indoc! {r#"
4383 <|file_sep|>file_a.rs
4384 low priority content
4385 <|file_sep|>file_b.rs
4386 high priority content
4387 <|file_sep|>test.rs
4388 <|fim_prefix|>
4389 <|fim_middle|>current
4390 <|user_cursor|>x
4391 <|fim_suffix|>
4392 <|fim_middle|>updated
4393 "#}
4394 .to_string()
4395 );
4396
4397 // With tight budget, only file_b (lower order) fits.
4398 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4399 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4400 // file_a would need another 14 tokens, which doesn't fit.
4401 assert_eq!(
4402 format_with_budget(&input, 52).unwrap(),
4403 indoc! {r#"
4404 <|file_sep|>file_b.rs
4405 high priority content
4406 <|file_sep|>test.rs
4407 <|fim_prefix|>
4408 <|fim_middle|>current
4409 <|user_cursor|>x
4410 <|fim_suffix|>
4411 <|fim_middle|>updated
4412 "#}
4413 .to_string()
4414 );
4415 }
4416
4417 #[test]
4418 fn test_truncation_drops_high_order_excerpts_within_file() {
4419 // A single file has excerpts at order 1 and order 3. With a tight budget,
4420 // only the order-1 excerpts are included while the order-3 excerpt is
4421 // dropped — even though they belong to the same file. This also preserves
4422 // the parent invariant: parent outline items have order ≤ their best
4423 // child, so they're always included when any child is.
4424 let input = make_input(
4425 "x",
4426 0..1,
4427 0,
4428 vec![],
4429 vec![RelatedFile {
4430 path: Path::new("mod.rs").into(),
4431 max_row: 30,
4432 in_open_source_repo: false,
4433 excerpts: vec![
4434 RelatedExcerpt {
4435 row_range: 0..5,
4436 text: "mod header\n".into(),
4437 order: 1,
4438 },
4439 RelatedExcerpt {
4440 row_range: 5..15,
4441 text: "important fn\n".into(),
4442 order: 1,
4443 },
4444 RelatedExcerpt {
4445 row_range: 15..30,
4446 text: "less important fn\n".into(),
4447 order: 3,
4448 },
4449 ],
4450 }],
4451 );
4452
4453 // With large budget, all three excerpts included.
4454 assert_eq!(
4455 format_with_budget(&input, 10000).unwrap(),
4456 indoc! {r#"
4457 <|file_sep|>mod.rs
4458 mod header
4459 ...
4460 important fn
4461 ...
4462 less important fn
4463 <|file_sep|>test.rs
4464 <|fim_prefix|>
4465 <|fim_middle|>current
4466 <|user_cursor|>x
4467 <|fim_suffix|>
4468 <|fim_middle|>updated
4469 "#}
4470 .to_string()
4471 );
4472
4473 // With tight budget, only order<=1 excerpts included (header + important fn).
4474 assert_eq!(
4475 format_with_budget(&input, 55).unwrap(),
4476 indoc! {r#"
4477 <|file_sep|>mod.rs
4478 mod header
4479 ...
4480 important fn
4481 ...
4482 <|file_sep|>test.rs
4483 <|fim_prefix|>
4484 <|fim_middle|>current
4485 <|user_cursor|>x
4486 <|fim_suffix|>
4487 <|fim_middle|>updated
4488 "#}
4489 .to_string()
4490 );
4491 }
4492
4493 #[test]
4494 fn test_truncation_drops_older_events_first() {
4495 let input = make_input(
4496 "x",
4497 0..1,
4498 0,
4499 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4500 vec![],
4501 );
4502
4503 assert_eq!(
4504 format_with_budget(&input, 10000).unwrap(),
4505 indoc! {r#"
4506 <|file_sep|>edit history
4507 --- a/old.rs
4508 +++ b/old.rs
4509 -1
4510 --- a/new.rs
4511 +++ b/new.rs
4512 -2
4513 <|file_sep|>test.rs
4514 <|fim_prefix|>
4515 <|fim_middle|>current
4516 <|user_cursor|>x
4517 <|fim_suffix|>
4518 <|fim_middle|>updated
4519 "#}
4520 .to_string()
4521 );
4522
4523 assert_eq!(
4524 format_with_budget(&input, 60).unwrap(),
4525 indoc! {r#"
4526 <|file_sep|>edit history
4527 --- a/new.rs
4528 +++ b/new.rs
4529 -2
4530 <|file_sep|>test.rs
4531 <|fim_prefix|>
4532 <|fim_middle|>current
4533 <|user_cursor|>x
4534 <|fim_suffix|>
4535 <|fim_middle|>updated
4536 "#}
4537 .to_string()
4538 );
4539 }
4540
4541 #[test]
4542 fn test_cursor_excerpt_always_included_with_minimal_budget() {
4543 let input = make_input(
4544 "fn main() {}",
4545 0..12,
4546 3,
4547 vec![make_event("a.rs", "-old\n+new\n")],
4548 vec![make_related_file("related.rs", "helper\n")],
4549 );
4550
4551 assert!(format_with_budget(&input, 30).is_none())
4552 }
4553
4554 #[track_caller]
4555 fn format_seed_coder(input: &ZetaPromptInput) -> String {
4556 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4557 .expect("seed coder prompt formatting should succeed")
4558 }
4559
4560 #[track_caller]
4561 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4562 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4563 .expect("seed coder prompt formatting should succeed")
4564 }
4565
4566 #[test]
4567 fn test_seed_coder_basic_format() {
4568 let input = make_input(
4569 "prefix\neditable\nsuffix",
4570 7..15,
4571 10,
4572 vec![make_event("a.rs", "-old\n+new\n")],
4573 vec![make_related_file("related.rs", "fn helper() {}\n")],
4574 );
4575
4576 assert_eq!(
4577 format_seed_coder(&input),
4578 indoc! {r#"
4579 <[fim-suffix]>
4580 suffix
4581 <[fim-prefix]><filename>related.rs
4582 fn helper() {}
4583
4584 <filename>edit_history
4585 --- a/a.rs
4586 +++ b/a.rs
4587 -old
4588 +new
4589
4590 <filename>test.rs
4591 prefix
4592 <<<<<<< CURRENT
4593 edi<|user_cursor|>table
4594 =======
4595 <[fim-middle]>"#}
4596 );
4597 }
4598
4599 #[test]
4600 fn test_seed_coder_no_context() {
4601 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4602
4603 assert_eq!(
4604 format_seed_coder(&input),
4605 indoc! {r#"
4606 <[fim-suffix]>
4607 after
4608 <[fim-prefix]><filename>test.rs
4609 before
4610 <<<<<<< CURRENT
4611 mid<|user_cursor|>dle
4612 =======
4613 <[fim-middle]>"#}
4614 );
4615 }
4616
4617 #[test]
4618 fn test_seed_coder_truncation_drops_context() {
4619 let input = make_input(
4620 "code",
4621 0..4,
4622 2,
4623 vec![make_event("a.rs", "-x\n+y\n")],
4624 vec![make_related_file("r1.rs", "content\n")],
4625 );
4626
4627 // With large budget, everything is included
4628 assert_eq!(
4629 format_seed_coder(&input),
4630 indoc! {r#"
4631 <[fim-suffix]>
4632 <[fim-prefix]><filename>r1.rs
4633 content
4634
4635 <filename>edit_history
4636 --- a/a.rs
4637 +++ b/a.rs
4638 -x
4639 +y
4640
4641 <filename>test.rs
4642 <<<<<<< CURRENT
4643 co<|user_cursor|>de
4644 =======
4645 <[fim-middle]>"#}
4646 );
4647
4648 assert_eq!(
4649 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
4650 None
4651 );
4652
4653 assert_eq!(
4654 format_seed_coder_with_budget(&input, 40),
4655 indoc! {r#"
4656 <[fim-suffix]>
4657 <[fim-prefix]><filename>test.rs
4658 <<<<<<< CURRENT
4659 co<|user_cursor|>de
4660 =======
4661 <[fim-middle]>"#
4662 }
4663 )
4664 }
4665
4666 #[test]
4667 fn test_seed_coder_truncation_prioritizes_lower_order() {
4668 let input = make_input(
4669 "code",
4670 0..4,
4671 2,
4672 vec![],
4673 vec![
4674 RelatedFile {
4675 path: Path::new("low_prio.rs").into(),
4676 max_row: 5,
4677 in_open_source_repo: false,
4678 excerpts: vec![RelatedExcerpt {
4679 row_range: 0..5,
4680 text: "low prio\n".into(),
4681 order: 10,
4682 }],
4683 },
4684 RelatedFile {
4685 path: Path::new("high_prio.rs").into(),
4686 max_row: 5,
4687 in_open_source_repo: false,
4688 excerpts: vec![RelatedExcerpt {
4689 row_range: 0..5,
4690 text: "high prio\n".into(),
4691 order: 1,
4692 }],
4693 },
4694 ],
4695 );
4696
4697 // With large budget, both included; rendered in stable lexicographic order.
4698 assert_eq!(
4699 format_seed_coder(&input),
4700 indoc! {r#"
4701 <[fim-suffix]>
4702 <[fim-prefix]><filename>low_prio.rs
4703 low prio
4704 <filename>high_prio.rs
4705 high prio
4706
4707 <filename>test.rs
4708 <<<<<<< CURRENT
4709 co<|user_cursor|>de
4710 =======
4711 <[fim-middle]>"#}
4712 );
4713
4714 // With tight budget under the generic heuristic, context is dropped but the
4715 // minimal cursor section still fits.
4716 assert_eq!(
4717 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
4718 Some(
4719 indoc! {r#"
4720 <[fim-suffix]>
4721 <[fim-prefix]><filename>test.rs
4722 <<<<<<< CURRENT
4723 co<|user_cursor|>de
4724 =======
4725 <[fim-middle]>"#}
4726 .to_string()
4727 )
4728 );
4729 }
4730
4731 #[test]
4732 fn test_format_zeta1_from_input_basic() {
4733 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
4734 let input = ZetaPromptInput {
4735 cursor_path: Path::new("src/main.rs").into(),
4736 cursor_excerpt: excerpt.into(),
4737 cursor_offset_in_excerpt: 30,
4738 excerpt_start_row: Some(0),
4739 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4740 related_files: Some(vec![]),
4741 active_buffer_diagnostics: vec![],
4742 excerpt_ranges: ExcerptRanges {
4743 editable_150: 15..41,
4744 editable_180: 15..41,
4745 editable_350: 15..41,
4746 editable_150_context_350: 0..excerpt.len(),
4747 editable_180_context_350: 0..excerpt.len(),
4748 editable_350_context_150: 0..excerpt.len(),
4749 ..Default::default()
4750 },
4751 syntax_ranges: None,
4752 experiment: None,
4753 in_open_source_repo: false,
4754 can_collect_data: false,
4755 repo_url: None,
4756 };
4757
4758 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4759
4760 assert_eq!(
4761 prompt,
4762 concat!(
4763 "### Instruction:\n",
4764 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4765 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4766 "into account the cursor location.\n",
4767 "\n",
4768 "### User Edits:\n",
4769 "\n",
4770 "User edited other.rs:\n",
4771 "```diff\n",
4772 "-old\n",
4773 "+new\n",
4774 "\n",
4775 "```\n",
4776 "\n",
4777 "### User Excerpt:\n",
4778 "\n",
4779 "```src/main.rs\n",
4780 "<|start_of_file|>\n",
4781 "fn before() {}\n",
4782 "<|editable_region_start|>\n",
4783 "fn foo() {\n",
4784 " <|user_cursor_is_here|>let x = 1;\n",
4785 "\n",
4786 "<|editable_region_end|>}\n",
4787 "fn after() {}\n",
4788 "\n",
4789 "```\n",
4790 "\n",
4791 "### Response:\n",
4792 ),
4793 );
4794 }
4795
4796 #[test]
4797 fn test_format_zeta1_from_input_no_start_of_file() {
4798 let excerpt = "fn foo() {\n let x = 1;\n}\n";
4799 let input = ZetaPromptInput {
4800 cursor_path: Path::new("src/main.rs").into(),
4801 cursor_excerpt: excerpt.into(),
4802 cursor_offset_in_excerpt: 15,
4803 excerpt_start_row: Some(10),
4804 events: vec![],
4805 related_files: Some(vec![]),
4806 active_buffer_diagnostics: vec![],
4807 excerpt_ranges: ExcerptRanges {
4808 editable_150: 0..28,
4809 editable_180: 0..28,
4810 editable_350: 0..28,
4811 editable_150_context_350: 0..28,
4812 editable_180_context_350: 0..28,
4813 editable_350_context_150: 0..28,
4814 ..Default::default()
4815 },
4816 syntax_ranges: None,
4817 experiment: None,
4818 in_open_source_repo: false,
4819 can_collect_data: false,
4820 repo_url: None,
4821 };
4822
4823 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4824
4825 assert_eq!(
4826 prompt,
4827 concat!(
4828 "### Instruction:\n",
4829 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4830 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4831 "into account the cursor location.\n",
4832 "\n",
4833 "### User Edits:\n",
4834 "\n",
4835 "\n",
4836 "\n",
4837 "### User Excerpt:\n",
4838 "\n",
4839 "```src/main.rs\n",
4840 "<|editable_region_start|>\n",
4841 "fn foo() {\n",
4842 " <|user_cursor_is_here|>let x = 1;\n",
4843 "}\n",
4844 "\n",
4845 "<|editable_region_end|>\n",
4846 "```\n",
4847 "\n",
4848 "### Response:\n",
4849 ),
4850 );
4851 }
4852
4853 #[test]
4854 fn test_format_zeta1_from_input_with_sub_ranges() {
4855 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
4856 let editable_range = 10..37;
4857 let context_range = 0..excerpt.len();
4858
4859 let input = ZetaPromptInput {
4860 cursor_path: Path::new("test.rs").into(),
4861 cursor_excerpt: excerpt.into(),
4862 cursor_offset_in_excerpt: 25,
4863 excerpt_start_row: Some(0),
4864 events: vec![],
4865 related_files: Some(vec![]),
4866 active_buffer_diagnostics: vec![],
4867 excerpt_ranges: ExcerptRanges {
4868 editable_150: editable_range.clone(),
4869 editable_180: editable_range.clone(),
4870 editable_350: editable_range.clone(),
4871 editable_150_context_350: context_range.clone(),
4872 editable_180_context_350: context_range.clone(),
4873 editable_350_context_150: context_range.clone(),
4874 ..Default::default()
4875 },
4876 syntax_ranges: None,
4877 experiment: None,
4878 in_open_source_repo: false,
4879 can_collect_data: false,
4880 repo_url: None,
4881 };
4882
4883 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4884
4885 assert_eq!(
4886 prompt,
4887 concat!(
4888 "### Instruction:\n",
4889 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4890 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4891 "into account the cursor location.\n",
4892 "\n",
4893 "### User Edits:\n",
4894 "\n",
4895 "\n",
4896 "\n",
4897 "### User Excerpt:\n",
4898 "\n",
4899 "```test.rs\n",
4900 "<|start_of_file|>\n",
4901 "// prefix\n",
4902 "<|editable_region_start|>\n",
4903 "fn foo() {\n",
4904 " <|user_cursor_is_here|>let x = 1;\n",
4905 "}\n",
4906 "<|editable_region_end|>\n",
4907 "// suffix\n",
4908 "\n",
4909 "```\n",
4910 "\n",
4911 "### Response:\n",
4912 ),
4913 );
4914 }
4915
4916 #[test]
4917 fn test_max_event_count() {
4918 fn make_numbered_event(index: usize) -> Event {
4919 return make_event(
4920 &format!("event-{index}.rs"),
4921 &format!("-old-{index}\n+new-{index}\n"),
4922 );
4923 }
4924 let input = make_input(
4925 "x",
4926 0..1,
4927 0,
4928 (0..3).map(make_numbered_event).collect(),
4929 vec![],
4930 );
4931
4932 let edit_history_section = format_edit_history_within_budget(
4933 &input.events,
4934 "<|file_sep|>",
4935 "edit history",
4936 usize::MAX,
4937 5,
4938 );
4939
4940 assert_eq!(
4941 &edit_history_section,
4942 indoc!(
4943 "
4944 <|file_sep|>edit history
4945 --- a/event-0.rs
4946 +++ b/event-0.rs
4947 -old-0
4948 +new-0
4949 --- a/event-1.rs
4950 +++ b/event-1.rs
4951 -old-1
4952 +new-1
4953 --- a/event-2.rs
4954 +++ b/event-2.rs
4955 -old-2
4956 +new-2
4957 "
4958 )
4959 );
4960
4961 let edit_history_section = format_edit_history_within_budget(
4962 &input.events,
4963 "<|file_sep|>",
4964 "edit history",
4965 usize::MAX,
4966 2,
4967 );
4968
4969 assert_eq!(
4970 &edit_history_section,
4971 indoc!(
4972 "
4973 <|file_sep|>edit history
4974 --- a/event-1.rs
4975 +++ b/event-1.rs
4976 -old-1
4977 +new-1
4978 --- a/event-2.rs
4979 +++ b/event-2.rs
4980 -old-2
4981 +new-2
4982 "
4983 )
4984 );
4985
4986 let edit_history_section = format_edit_history_within_budget(
4987 &input.events,
4988 "<|file_sep|>",
4989 "edit history",
4990 usize::MAX,
4991 0,
4992 );
4993
4994 assert_eq!(&edit_history_section, "");
4995 }
4996
4997 #[test]
4998 fn test_clean_zeta1_model_output_basic() {
4999 let output = indoc! {"
5000 <|editable_region_start|>
5001 fn main() {
5002 println!(\"hello\");
5003 }
5004 <|editable_region_end|>
5005 "};
5006
5007 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5008 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
5009 }
5010
5011 #[test]
5012 fn test_clean_zeta1_model_output_with_cursor() {
5013 let output = indoc! {"
5014 <|editable_region_start|>
5015 fn main() {
5016 <|user_cursor_is_here|>println!(\"hello\");
5017 }
5018 <|editable_region_end|>
5019 "};
5020
5021 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5022 assert_eq!(
5023 cleaned,
5024 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
5025 );
5026 }
5027
5028 #[test]
5029 fn test_clean_zeta1_model_output_no_markers() {
5030 let output = "fn main() {}\n";
5031 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5032 assert_eq!(cleaned, "fn main() {}\n");
5033 }
5034
5035 #[test]
5036 fn test_clean_zeta1_model_output_empty_region() {
5037 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5038 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5039 assert_eq!(cleaned, "");
5040 }
5041
5042 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5043 let mut result = excerpt.to_string();
5044 result.replace_range(
5045 parsed_output.range_in_excerpt.clone(),
5046 &parsed_output.new_editable_region,
5047 );
5048 result
5049 }
5050
5051 #[test]
5052 fn test_parse_zeta2_model_output() {
5053 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5054 let context_start = excerpt.find("ctx start").unwrap();
5055 let context_end = excerpt.find("after ctx").unwrap();
5056 let editable_start = excerpt.find("editable old").unwrap();
5057 let editable_end = editable_start + "editable old\n".len();
5058 let input = make_input_with_context_range(
5059 excerpt,
5060 editable_start..editable_end,
5061 context_start..context_end,
5062 editable_start,
5063 );
5064
5065 let output = parse_zeta2_model_output(
5066 "editable new\n>>>>>>> UPDATED\n",
5067 ZetaFormat::V0131GitMergeMarkersPrefix,
5068 &input,
5069 )
5070 .unwrap();
5071
5072 assert_eq!(
5073 apply_edit(excerpt, &output),
5074 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5075 );
5076 }
5077
5078 #[test]
5079 fn test_parse_zeta2_model_output_identity() {
5080 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5081 let editable_start = excerpt.find("bbb").unwrap();
5082 let editable_end = excerpt.find("ddd").unwrap();
5083 let input = make_input_with_context_range(
5084 excerpt,
5085 editable_start..editable_end,
5086 0..excerpt.len(),
5087 editable_start,
5088 );
5089
5090 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5091 let output =
5092 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5093
5094 assert_eq!(apply_edit(excerpt, &output), excerpt);
5095 }
5096
5097 #[test]
5098 fn test_parse_zeta2_model_output_strips_end_marker() {
5099 let excerpt = "hello\nworld\n";
5100 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5101
5102 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5103 let output1 =
5104 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5105 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5106
5107 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5108 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5109 }
5110}