1pub mod excerpt_ranges;
2pub mod multi_region;
3pub mod udiff;
4
5use anyhow::{Result, anyhow};
6use serde::{Deserialize, Serialize};
7use std::fmt::Write;
8use std::ops::Range;
9use std::path::Path;
10use std::sync::Arc;
11use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
12
13pub use crate::excerpt_ranges::{
14 ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
15};
16
17pub const CURSOR_MARKER: &str = "<|user_cursor|>";
18pub const MAX_PROMPT_TOKENS: usize = 4096;
19
20/// Use up to this amount of the editable region for prefill.
21/// Larger values may result in more robust generation, but
22/// this region becomes non-editable.
23pub const PREFILL_RATIO: f64 = 0.1; // 10%
24
25fn estimate_tokens(bytes: usize) -> usize {
26 bytes / 3
27}
28
29/// Leave some slack to avoid overflow.
30fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
31 (max_tokens as f64 * 0.9).floor() as usize
32}
33
34#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
35pub struct ZetaPromptInput {
36 pub cursor_path: Arc<Path>,
37 pub cursor_excerpt: Arc<str>,
38 pub cursor_offset_in_excerpt: usize,
39 #[serde(default, skip_serializing_if = "Option::is_none")]
40 pub excerpt_start_row: Option<u32>,
41 pub events: Vec<Arc<Event>>,
42 #[serde(default)]
43 pub related_files: Option<Vec<RelatedFile>>,
44 #[serde(default, skip_serializing_if = "Vec::is_empty")]
45 pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
46 /// These ranges let the server select model-appropriate subsets.
47 pub excerpt_ranges: ExcerptRanges,
48 /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
49 /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
50 /// When present, the server uses these to compute editable/context ranges
51 /// instead of `excerpt_ranges`.
52 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub syntax_ranges: Option<Vec<Range<usize>>>,
54 #[serde(default)]
55 pub in_open_source_repo: bool,
56 #[serde(default)]
57 pub can_collect_data: bool,
58 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub repo_url: Option<String>,
60}
61
62#[derive(
63 Default,
64 Clone,
65 Copy,
66 Debug,
67 PartialEq,
68 Eq,
69 Hash,
70 EnumIter,
71 IntoStaticStr,
72 Serialize,
73 Deserialize,
74)]
75#[allow(non_camel_case_types)]
76pub enum ZetaFormat {
77 V0112MiddleAtEnd,
78 V0113Ordered,
79 V0114180EditableRegion,
80 V0120GitMergeMarkers,
81 #[default]
82 V0131GitMergeMarkersPrefix,
83 V0211Prefill,
84 V0211SeedCoder,
85 v0226Hashline,
86 V0304VariableEdit,
87 V0304SeedNoEdits,
88 /// Multi-block marker spans with NO_EDITS sentinel.
89 V0306SeedMultiRegions,
90 /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
91 V0316SeedMultiRegions,
92 /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
93 V0317SeedMultiRegions,
94 /// V0316 with larger block sizes.
95 V0318SeedMultiRegions,
96 /// V0318-style markers over the full available current file excerpt with no related files.
97 V0327SingleFile,
98}
99
100impl std::fmt::Display for ZetaFormat {
101 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102 write!(f, "{}", <&'static str>::from(self))
103 }
104}
105
106impl ZetaFormat {
107 pub fn parse(format_name: &str) -> Result<Self> {
108 let lower = format_name.to_lowercase();
109
110 // Exact case-insensitive match takes priority, bypassing ambiguity checks.
111 for variant in ZetaFormat::iter() {
112 if <&'static str>::from(&variant).to_lowercase() == lower {
113 return Ok(variant);
114 }
115 }
116
117 let mut results = ZetaFormat::iter().filter(|version| {
118 <&'static str>::from(version)
119 .to_lowercase()
120 .contains(&lower)
121 });
122 let Some(result) = results.next() else {
123 anyhow::bail!(
124 "`{format_name}` did not match any of:\n{}",
125 Self::options_as_string()
126 );
127 };
128 if results.next().is_some() {
129 anyhow::bail!(
130 "`{format_name}` matched more than one of:\n{}",
131 Self::options_as_string()
132 );
133 }
134 Ok(result)
135 }
136
137 pub fn options_as_string() -> String {
138 ZetaFormat::iter()
139 .map(|format| format!("- {}\n", <&'static str>::from(format)))
140 .collect::<Vec<_>>()
141 .concat()
142 }
143}
144
145#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
146#[serde(tag = "event")]
147pub enum Event {
148 BufferChange {
149 path: Arc<Path>,
150 old_path: Arc<Path>,
151 diff: String,
152 predicted: bool,
153 in_open_source_repo: bool,
154 },
155}
156
157impl Event {
158 pub fn in_open_source_repo(&self) -> bool {
159 match self {
160 Event::BufferChange {
161 in_open_source_repo,
162 ..
163 } => *in_open_source_repo,
164 }
165 }
166}
167
168pub fn write_event(prompt: &mut String, event: &Event) {
169 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
170 for component in path.components() {
171 prompt.push('/');
172 write!(prompt, "{}", component.as_os_str().display()).ok();
173 }
174 }
175 match event {
176 Event::BufferChange {
177 path,
178 old_path,
179 diff,
180 predicted,
181 in_open_source_repo: _,
182 } => {
183 if *predicted {
184 prompt.push_str("// User accepted prediction:\n");
185 }
186 prompt.push_str("--- a");
187 write_path_as_unix_str(prompt, old_path.as_ref());
188 prompt.push_str("\n+++ b");
189 write_path_as_unix_str(prompt, path.as_ref());
190 prompt.push('\n');
191 prompt.push_str(diff);
192 }
193 }
194}
195
196#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
197pub struct ActiveBufferDiagnostic {
198 pub severity: Option<i32>,
199 pub message: String,
200 pub snippet: String,
201 pub snippet_buffer_row_range: Range<u32>,
202 pub diagnostic_range_in_snippet: Range<usize>,
203}
204
205#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
206pub struct RelatedFile {
207 pub path: Arc<Path>,
208 pub max_row: u32,
209 pub excerpts: Vec<RelatedExcerpt>,
210 #[serde(default)]
211 pub in_open_source_repo: bool,
212}
213
214#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
215pub struct RelatedExcerpt {
216 pub row_range: Range<u32>,
217 pub text: Arc<str>,
218 #[serde(default)]
219 pub order: usize,
220}
221
222pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
223 special_tokens_for_format(format).iter().any(|token| {
224 if let Some(line_token) = token.strip_suffix('\n') {
225 input.cursor_excerpt.lines().any(|line| line == line_token)
226 } else {
227 input.cursor_excerpt.contains(token)
228 }
229 })
230}
231
232pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
233 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
234}
235
236pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
237 match format {
238 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
239 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
240 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
241 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
242 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
243 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
244 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
245 ZetaFormat::v0226Hashline => hashline::special_tokens(),
246 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
247 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
248 ZetaFormat::V0316SeedMultiRegions => {
249 static TOKENS: &[&str] = &[
250 seed_coder::FIM_SUFFIX,
251 seed_coder::FIM_PREFIX,
252 seed_coder::FIM_MIDDLE,
253 seed_coder::FILE_MARKER,
254 multi_region::V0316_END_MARKER,
255 CURSOR_MARKER,
256 multi_region::MARKER_TAG_PREFIX,
257 ];
258 TOKENS
259 }
260 ZetaFormat::V0318SeedMultiRegions => {
261 static TOKENS: &[&str] = &[
262 seed_coder::FIM_SUFFIX,
263 seed_coder::FIM_PREFIX,
264 seed_coder::FIM_MIDDLE,
265 seed_coder::FILE_MARKER,
266 multi_region::V0318_END_MARKER,
267 CURSOR_MARKER,
268 multi_region::MARKER_TAG_PREFIX,
269 ];
270 TOKENS
271 }
272 ZetaFormat::V0317SeedMultiRegions => {
273 static TOKENS: &[&str] = &[
274 seed_coder::FIM_SUFFIX,
275 seed_coder::FIM_PREFIX,
276 seed_coder::FIM_MIDDLE,
277 seed_coder::FILE_MARKER,
278 multi_region::V0317_END_MARKER,
279 CURSOR_MARKER,
280 multi_region::RELATIVE_MARKER_TAG_PREFIX,
281 ];
282 TOKENS
283 }
284 ZetaFormat::V0327SingleFile => {
285 static TOKENS: &[&str] = &[
286 seed_coder::FIM_SUFFIX,
287 seed_coder::FIM_PREFIX,
288 seed_coder::FIM_MIDDLE,
289 seed_coder::FILE_MARKER,
290 multi_region::V0327_END_MARKER,
291 CURSOR_MARKER,
292 multi_region::MARKER_TAG_PREFIX,
293 ];
294 TOKENS
295 }
296 ZetaFormat::V0306SeedMultiRegions => {
297 static TOKENS: &[&str] = &[
298 seed_coder::FIM_SUFFIX,
299 seed_coder::FIM_PREFIX,
300 seed_coder::FIM_MIDDLE,
301 seed_coder::FILE_MARKER,
302 seed_coder::START_MARKER,
303 seed_coder::SEPARATOR,
304 seed_coder::END_MARKER,
305 CURSOR_MARKER,
306 multi_region::MARKER_TAG_PREFIX,
307 ];
308 TOKENS
309 }
310 }
311}
312
313/// Returns the (editable_token_limit, context_token_limit) for a given format.
314pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
315 match format {
316 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
317 ZetaFormat::V0114180EditableRegion => (180, 350),
318 ZetaFormat::V0120GitMergeMarkers
319 | ZetaFormat::V0131GitMergeMarkersPrefix
320 | ZetaFormat::V0211Prefill
321 | ZetaFormat::V0211SeedCoder
322 | ZetaFormat::v0226Hashline
323 | ZetaFormat::V0306SeedMultiRegions
324 | ZetaFormat::V0316SeedMultiRegions
325 | ZetaFormat::V0318SeedMultiRegions
326 | ZetaFormat::V0317SeedMultiRegions
327 | ZetaFormat::V0327SingleFile
328 | ZetaFormat::V0304SeedNoEdits => (350, 150),
329
330 ZetaFormat::V0304VariableEdit => (1024, 0),
331 }
332}
333
334pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
335 match format {
336 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
337 ZetaFormat::V0112MiddleAtEnd
338 | ZetaFormat::V0113Ordered
339 | ZetaFormat::V0114180EditableRegion
340 | ZetaFormat::V0120GitMergeMarkers
341 | ZetaFormat::V0131GitMergeMarkersPrefix
342 | ZetaFormat::V0211Prefill
343 | ZetaFormat::V0211SeedCoder
344 | ZetaFormat::V0304VariableEdit
345 | ZetaFormat::V0306SeedMultiRegions
346 | ZetaFormat::V0304SeedNoEdits => &[],
347 ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
348 ZetaFormat::V0318SeedMultiRegions => &[multi_region::V0318_END_MARKER],
349 ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
350 ZetaFormat::V0327SingleFile => &[multi_region::V0327_END_MARKER],
351 }
352}
353
354/// Return (editable_range, context_range) for the prompt format
355pub fn excerpt_ranges_for_format(
356 format: ZetaFormat,
357 ranges: &ExcerptRanges,
358) -> (Range<usize>, Range<usize>) {
359 match format {
360 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
361 ranges.editable_150.clone(),
362 ranges.editable_150_context_350.clone(),
363 ),
364 ZetaFormat::V0114180EditableRegion => (
365 ranges.editable_180.clone(),
366 ranges.editable_180_context_350.clone(),
367 ),
368 ZetaFormat::V0120GitMergeMarkers
369 | ZetaFormat::V0131GitMergeMarkersPrefix
370 | ZetaFormat::V0211Prefill
371 | ZetaFormat::V0211SeedCoder
372 | ZetaFormat::v0226Hashline
373 | ZetaFormat::V0304SeedNoEdits
374 | ZetaFormat::V0306SeedMultiRegions
375 | ZetaFormat::V0316SeedMultiRegions
376 | ZetaFormat::V0318SeedMultiRegions
377 | ZetaFormat::V0317SeedMultiRegions => (
378 ranges.editable_350.clone(),
379 ranges.editable_350_context_150.clone(),
380 ),
381 ZetaFormat::V0327SingleFile => (
382 ranges.editable_350_context_150.clone(),
383 ranges.context_8192.clone().unwrap_or(
384 // shouldn't be used, only for compat with old data/clients
385 ranges.editable_350_context_150.clone(),
386 ),
387 ),
388
389 ZetaFormat::V0304VariableEdit => {
390 let context = ranges
391 .editable_350_context_1024
392 .clone()
393 .or(ranges.editable_350_context_512.clone())
394 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
395 (context.clone(), context)
396 }
397 }
398}
399
400pub fn write_cursor_excerpt_section_for_format(
401 format: ZetaFormat,
402 prompt: &mut String,
403 path: &Path,
404 context: &str,
405 editable_range: &Range<usize>,
406 cursor_offset: usize,
407) {
408 match format {
409 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
410 prompt,
411 path,
412 context,
413 editable_range,
414 cursor_offset,
415 ),
416 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
417 v0113_ordered::write_cursor_excerpt_section(
418 prompt,
419 path,
420 context,
421 editable_range,
422 cursor_offset,
423 )
424 }
425 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
426 prompt,
427 path,
428 context,
429 editable_range,
430 cursor_offset,
431 ),
432 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
433 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
434 prompt,
435 path,
436 context,
437 editable_range,
438 cursor_offset,
439 )
440 }
441 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
442 seed_coder::write_cursor_excerpt_section(
443 prompt,
444 path,
445 context,
446 editable_range,
447 cursor_offset,
448 )
449 }
450 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
451 prompt,
452 path,
453 context,
454 editable_range,
455 cursor_offset,
456 ),
457 ZetaFormat::V0304VariableEdit => {
458 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
459 }
460 ZetaFormat::V0306SeedMultiRegions => {
461 prompt.push_str(&build_v0306_cursor_prefix(
462 path,
463 context,
464 editable_range,
465 cursor_offset,
466 ));
467 }
468 ZetaFormat::V0316SeedMultiRegions => {
469 prompt.push_str(&build_v0316_cursor_prefix(
470 path,
471 context,
472 editable_range,
473 cursor_offset,
474 ));
475 }
476 ZetaFormat::V0318SeedMultiRegions => {
477 prompt.push_str(&build_v0318_cursor_prefix(
478 path,
479 context,
480 editable_range,
481 cursor_offset,
482 ));
483 }
484 ZetaFormat::V0317SeedMultiRegions => {
485 prompt.push_str(&build_v0317_cursor_prefix(
486 path,
487 context,
488 editable_range,
489 cursor_offset,
490 ));
491 }
492 ZetaFormat::V0327SingleFile => {
493 prompt.push_str(&build_v0318_cursor_prefix(
494 path,
495 context,
496 editable_range,
497 cursor_offset,
498 ));
499 }
500 }
501}
502
503fn build_v0306_cursor_prefix(
504 path: &Path,
505 context: &str,
506 editable_range: &Range<usize>,
507 cursor_offset: usize,
508) -> String {
509 let mut section = String::new();
510 let path_str = path.to_string_lossy();
511 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
512
513 section.push_str(&context[..editable_range.start]);
514 section.push_str(seed_coder::START_MARKER);
515
516 let editable_text = &context[editable_range.clone()];
517 let cursor_in_editable = cursor_offset - editable_range.start;
518 multi_region::write_editable_with_markers(
519 &mut section,
520 editable_text,
521 cursor_in_editable,
522 CURSOR_MARKER,
523 );
524
525 if !section.ends_with('\n') {
526 section.push('\n');
527 }
528 section.push_str(seed_coder::SEPARATOR);
529 section
530}
531
532fn build_v0316_cursor_prefix(
533 path: &Path,
534 context: &str,
535 editable_range: &Range<usize>,
536 cursor_offset: usize,
537) -> String {
538 let mut section = String::new();
539 let path_str = path.to_string_lossy();
540 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
541
542 section.push_str(&context[..editable_range.start]);
543
544 let editable_text = &context[editable_range.clone()];
545 let cursor_in_editable = cursor_offset - editable_range.start;
546 multi_region::write_editable_with_markers_v0316(
547 &mut section,
548 editable_text,
549 cursor_in_editable,
550 CURSOR_MARKER,
551 );
552
553 if !section.ends_with('\n') {
554 section.push('\n');
555 }
556 section
557}
558
559fn build_v0318_cursor_prefix(
560 path: &Path,
561 context: &str,
562 editable_range: &Range<usize>,
563 cursor_offset: usize,
564) -> String {
565 let mut section = String::new();
566 let path_str = path.to_string_lossy();
567 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
568
569 section.push_str(&context[..editable_range.start]);
570
571 let editable_text = &context[editable_range.clone()];
572 let cursor_in_editable = cursor_offset - editable_range.start;
573 multi_region::write_editable_with_markers_v0318(
574 &mut section,
575 editable_text,
576 cursor_in_editable,
577 CURSOR_MARKER,
578 );
579
580 if !section.ends_with('\n') {
581 section.push('\n');
582 }
583 section
584}
585
586fn build_v0317_cursor_prefix(
587 path: &Path,
588 context: &str,
589 editable_range: &Range<usize>,
590 cursor_offset: usize,
591) -> String {
592 let mut section = String::new();
593 let path_str = path.to_string_lossy();
594 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
595
596 section.push_str(&context[..editable_range.start]);
597
598 let editable_text = &context[editable_range.clone()];
599 let cursor_in_editable = cursor_offset - editable_range.start;
600 multi_region::write_editable_with_markers_v0317(
601 &mut section,
602 editable_text,
603 cursor_in_editable,
604 CURSOR_MARKER,
605 );
606
607 if !section.ends_with('\n') {
608 section.push('\n');
609 }
610 section
611}
612
613fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
614 let start_row = text[0..range.start].matches('\n').count() as u32;
615 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
616 if !text[..range.end].ends_with('\n') {
617 end_row += 1;
618 }
619 return start_row..end_row;
620}
621
622fn assemble_single_file_fim_prompt(
623 context: &str,
624 editable_range: &Range<usize>,
625 cursor_prefix_section: &str,
626 events: &[Arc<Event>],
627 max_tokens: usize,
628) -> String {
629 let suffix_section = seed_coder::build_suffix_section(context, editable_range);
630
631 let suffix_tokens = estimate_tokens(suffix_section.len() + seed_coder::FIM_PREFIX.len());
632 let cursor_prefix_tokens =
633 estimate_tokens(cursor_prefix_section.len() + seed_coder::FIM_MIDDLE.len());
634 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
635
636 let edit_history_section = format_edit_history_within_budget(
637 events,
638 seed_coder::FILE_MARKER,
639 "edit_history",
640 budget_after_cursor,
641 max_edit_event_count_for_format(&ZetaFormat::V0327SingleFile),
642 );
643
644 let mut prompt = String::new();
645 prompt.push_str(&suffix_section);
646 prompt.push_str(seed_coder::FIM_PREFIX);
647 prompt.push_str(&edit_history_section);
648 if !edit_history_section.is_empty() {
649 prompt.push('\n');
650 }
651 prompt.push_str(cursor_prefix_section);
652 prompt.push_str(seed_coder::FIM_MIDDLE);
653 prompt
654}
655
656pub fn format_prompt_with_budget_for_format(
657 input: &ZetaPromptInput,
658 format: ZetaFormat,
659 max_tokens: usize,
660) -> Option<String> {
661 let (context, editable_range, context_range, cursor_offset) =
662 resolve_cursor_region(input, format);
663 let path = &*input.cursor_path;
664
665 let empty_files = Vec::new();
666 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
667 let filtered_related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
668 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
669 let row_range = relative_row_range.start + cursor_excerpt_start_row
670 ..relative_row_range.end + cursor_excerpt_start_row;
671 filter_redundant_excerpts(
672 input_related_files.to_vec(),
673 input.cursor_path.as_ref(),
674 row_range,
675 )
676 } else {
677 input_related_files.to_vec()
678 };
679 let related_files = filtered_related_files.as_slice();
680
681 let prompt = match format {
682 ZetaFormat::V0211SeedCoder
683 | ZetaFormat::V0304SeedNoEdits
684 | ZetaFormat::V0306SeedMultiRegions
685 | ZetaFormat::V0316SeedMultiRegions
686 | ZetaFormat::V0318SeedMultiRegions
687 | ZetaFormat::V0317SeedMultiRegions => {
688 let mut cursor_section = String::new();
689 write_cursor_excerpt_section_for_format(
690 format,
691 &mut cursor_section,
692 path,
693 context,
694 &editable_range,
695 cursor_offset,
696 );
697
698 let budget_with_margin = apply_prompt_budget_margin(max_tokens);
699 seed_coder::assemble_fim_prompt(
700 context,
701 &editable_range,
702 &cursor_section,
703 &input.events,
704 related_files,
705 budget_with_margin,
706 )
707 }
708 ZetaFormat::V0327SingleFile => {
709 let mut cursor_section = String::new();
710 write_cursor_excerpt_section_for_format(
711 format,
712 &mut cursor_section,
713 path,
714 context,
715 &editable_range,
716 cursor_offset,
717 );
718
719 assemble_single_file_fim_prompt(
720 context,
721 &editable_range,
722 &cursor_section,
723 &input.events,
724 apply_prompt_budget_margin(max_tokens),
725 )
726 }
727 _ => {
728 let mut cursor_section = String::new();
729 write_cursor_excerpt_section_for_format(
730 format,
731 &mut cursor_section,
732 path,
733 context,
734 &editable_range,
735 cursor_offset,
736 );
737
738 let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
739 let cursor_tokens = estimate_tokens(cursor_section.len());
740 remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
741
742 let edit_history_section = format_edit_history_within_budget(
743 &input.events,
744 "<|file_sep|>",
745 "edit history",
746 remaining_budget,
747 max_edit_event_count_for_format(&format),
748 );
749 let edit_history_tokens = estimate_tokens(edit_history_section.len());
750 remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
751
752 let related_files_section = format_related_files_within_budget(
753 &related_files,
754 "<|file_sep|>",
755 "",
756 remaining_budget,
757 );
758
759 let mut prompt = String::new();
760 prompt.push_str(&related_files_section);
761 prompt.push_str(&edit_history_section);
762 prompt.push_str(&cursor_section);
763 prompt
764 }
765 };
766 let prompt_tokens = estimate_tokens(prompt.len());
767 if prompt_tokens > max_tokens {
768 return None;
769 }
770 return Some(prompt);
771}
772
773pub fn filter_redundant_excerpts(
774 mut related_files: Vec<RelatedFile>,
775 cursor_path: &Path,
776 cursor_row_range: Range<u32>,
777) -> Vec<RelatedFile> {
778 for file in &mut related_files {
779 if file.path.as_ref() == cursor_path {
780 file.excerpts.retain(|excerpt| {
781 excerpt.row_range.start < cursor_row_range.start
782 || excerpt.row_range.end > cursor_row_range.end
783 });
784 }
785 }
786 related_files.retain(|file| !file.excerpts.is_empty());
787 related_files
788}
789
790pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
791 match format {
792 ZetaFormat::V0112MiddleAtEnd
793 | ZetaFormat::V0113Ordered
794 | ZetaFormat::V0114180EditableRegion
795 | ZetaFormat::V0120GitMergeMarkers
796 | ZetaFormat::V0131GitMergeMarkersPrefix
797 | ZetaFormat::V0211Prefill
798 | ZetaFormat::V0211SeedCoder
799 | ZetaFormat::v0226Hashline
800 | ZetaFormat::V0304SeedNoEdits
801 | ZetaFormat::V0304VariableEdit
802 | ZetaFormat::V0306SeedMultiRegions
803 | ZetaFormat::V0316SeedMultiRegions
804 | ZetaFormat::V0318SeedMultiRegions
805 | ZetaFormat::V0317SeedMultiRegions
806 | ZetaFormat::V0327SingleFile => 6,
807 }
808}
809
810pub fn get_prefill_for_format(
811 format: ZetaFormat,
812 context: &str,
813 editable_range: &Range<usize>,
814) -> String {
815 match format {
816 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
817 ZetaFormat::V0112MiddleAtEnd
818 | ZetaFormat::V0113Ordered
819 | ZetaFormat::V0114180EditableRegion
820 | ZetaFormat::V0120GitMergeMarkers
821 | ZetaFormat::V0131GitMergeMarkersPrefix
822 | ZetaFormat::V0211SeedCoder
823 | ZetaFormat::v0226Hashline
824 | ZetaFormat::V0304VariableEdit => String::new(),
825 ZetaFormat::V0304SeedNoEdits
826 | ZetaFormat::V0306SeedMultiRegions
827 | ZetaFormat::V0316SeedMultiRegions
828 | ZetaFormat::V0318SeedMultiRegions
829 | ZetaFormat::V0317SeedMultiRegions
830 | ZetaFormat::V0327SingleFile => String::new(),
831 }
832}
833
834pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
835 match format {
836 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
837 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
838 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
839 ZetaFormat::V0211SeedCoder
840 | ZetaFormat::V0304SeedNoEdits
841 | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
842 ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
843 ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
844 ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
845 ZetaFormat::V0327SingleFile => Some(multi_region::V0327_END_MARKER),
846
847 ZetaFormat::V0112MiddleAtEnd
848 | ZetaFormat::V0113Ordered
849 | ZetaFormat::V0114180EditableRegion
850 | ZetaFormat::v0226Hashline
851 | ZetaFormat::V0304VariableEdit => None,
852 }
853}
854
855pub fn encode_patch_as_output_for_format(
856 format: ZetaFormat,
857 old_editable_region: &str,
858 patch: &str,
859 cursor_offset: Option<usize>,
860) -> Result<Option<String>> {
861 match format {
862 ZetaFormat::v0226Hashline => {
863 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
864 }
865 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
866 old_editable_region,
867 patch,
868 cursor_offset,
869 )
870 .map(Some),
871 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
872 Ok(seed_coder::no_edits(patch))
873 }
874 ZetaFormat::V0316SeedMultiRegions => {
875 let empty_patch = patch.lines().count() <= 3;
876 if empty_patch {
877 let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
878 let marker_num =
879 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
880 let tag = multi_region::marker_tag(marker_num);
881 Ok(Some(format!(
882 "{tag}{tag}{}",
883 multi_region::V0316_END_MARKER
884 )))
885 } else {
886 Ok(None)
887 }
888 }
889 ZetaFormat::V0318SeedMultiRegions => {
890 let empty_patch = patch.lines().count() <= 3;
891 if empty_patch {
892 let marker_offsets =
893 multi_region::compute_marker_offsets_v0318(old_editable_region);
894 let marker_num =
895 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
896 let tag = multi_region::marker_tag(marker_num);
897 Ok(Some(format!(
898 "{tag}{tag}{}",
899 multi_region::V0318_END_MARKER
900 )))
901 } else {
902 Ok(None)
903 }
904 }
905 ZetaFormat::V0317SeedMultiRegions => {
906 let empty_patch = patch.lines().count() <= 3;
907 if empty_patch {
908 let tag = multi_region::marker_tag_relative(0);
909 Ok(Some(format!(
910 "{tag}{tag}{}",
911 multi_region::V0317_END_MARKER
912 )))
913 } else {
914 Ok(None)
915 }
916 }
917 ZetaFormat::V0327SingleFile => {
918 let empty_patch = patch.lines().count() <= 3;
919 if empty_patch {
920 let marker_offsets =
921 multi_region::compute_marker_offsets_v0318(old_editable_region);
922 let marker_num =
923 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
924 let tag = multi_region::marker_tag(marker_num);
925 Ok(Some(format!(
926 "{tag}{tag}{}",
927 multi_region::V0327_END_MARKER
928 )))
929 } else {
930 Ok(None)
931 }
932 }
933 _ => Ok(None),
934 }
935}
936
937/// Given a `ZetaPromptInput`, a format, and a patch (with cursor already
938/// extracted), produce the expected model output string for training.
939pub fn format_expected_output(
940 input: &ZetaPromptInput,
941 format: ZetaFormat,
942 patch: &str,
943 cursor_offset: Option<usize>,
944) -> Result<String> {
945 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
946 let mut old_editable = context[editable_range].to_string();
947 if !old_editable.is_empty() && !old_editable.ends_with('\n') {
948 old_editable.push('\n');
949 }
950
951 // Formats with their own output encoding (hashline, variable-edit,
952 // multi-region empty patches) are handled here.
953 if let Some(output) =
954 encode_patch_as_output_for_format(format, &old_editable, patch, cursor_offset)?
955 {
956 return Ok(output);
957 }
958
959 let empty_patch = patch.lines().count() <= 3;
960
961 match format {
962 // Multi-region formats: non-empty patches need diff application
963 // then marker-span encoding.
964 ZetaFormat::V0316SeedMultiRegions => {
965 let (new_editable, first_hunk_offset) =
966 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
967 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
968 multi_region::encode_from_old_and_new_v0316(
969 &old_editable,
970 &new_editable,
971 cursor_in_new,
972 CURSOR_MARKER,
973 multi_region::V0316_END_MARKER,
974 )
975 }
976 ZetaFormat::V0318SeedMultiRegions => {
977 let (new_editable, first_hunk_offset) =
978 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
979 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
980 multi_region::encode_from_old_and_new_v0318(
981 &old_editable,
982 &new_editable,
983 cursor_in_new,
984 CURSOR_MARKER,
985 multi_region::V0318_END_MARKER,
986 )
987 }
988 ZetaFormat::V0317SeedMultiRegions => {
989 let (new_editable, first_hunk_offset) =
990 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
991 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
992 multi_region::encode_from_old_and_new_v0317(
993 &old_editable,
994 &new_editable,
995 cursor_in_new,
996 CURSOR_MARKER,
997 multi_region::V0317_END_MARKER,
998 )
999 }
1000 // V0131-style formats and fallback: produce new editable text with
1001 // cursor marker inserted, followed by the end marker.
1002 _ => {
1003 let (mut result, first_hunk_offset) = if empty_patch {
1004 (old_editable.clone(), None)
1005 } else {
1006 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?
1007 };
1008
1009 if let Some(cursor) = cursor_offset {
1010 let hunk_start = if !empty_patch {
1011 first_hunk_offset.unwrap_or(0)
1012 } else {
1013 0
1014 };
1015 let offset = (hunk_start + cursor).min(result.len());
1016 result.insert_str(offset, CURSOR_MARKER);
1017 }
1018
1019 if !result.is_empty() && !result.ends_with('\n') {
1020 result.push('\n');
1021 }
1022
1023 if let Some(end_marker) = output_end_marker_for_format(format) {
1024 result.push_str(end_marker);
1025 }
1026
1027 Ok(result)
1028 }
1029 }
1030}
1031
1032/// Compute the cursor position within the new text after diff application.
1033fn cursor_in_new_text(
1034 cursor_offset: Option<usize>,
1035 first_hunk_offset: Option<usize>,
1036 new_text: &str,
1037) -> Option<usize> {
1038 cursor_offset.map(|cursor| {
1039 let hunk_start = first_hunk_offset.unwrap_or(0);
1040 (hunk_start + cursor).min(new_text.len())
1041 })
1042}
1043
1044#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1045pub struct ParsedOutput {
1046 /// Text that should replace the editable region
1047 pub new_editable_region: String,
1048 /// The byte range within `cursor_excerpt` that this replacement applies to
1049 pub range_in_excerpt: Range<usize>,
1050 /// Byte offset of the cursor marker within `new_editable_region`, if present
1051 pub cursor_offset_in_new_editable_region: Option<usize>,
1052}
1053
1054#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1055pub struct CursorPosition {
1056 pub path: String,
1057 pub row: usize,
1058 pub column: usize,
1059 pub offset: usize,
1060 pub editable_region_offset: usize,
1061}
1062
1063pub fn parsed_output_from_editable_region(
1064 range_in_excerpt: Range<usize>,
1065 mut new_editable_region: String,
1066) -> ParsedOutput {
1067 let cursor_offset_in_new_editable_region = new_editable_region.find(CURSOR_MARKER);
1068 if let Some(offset) = cursor_offset_in_new_editable_region {
1069 new_editable_region.replace_range(offset..offset + CURSOR_MARKER.len(), "");
1070 }
1071
1072 ParsedOutput {
1073 new_editable_region,
1074 range_in_excerpt,
1075 cursor_offset_in_new_editable_region,
1076 }
1077}
1078
1079/// Parse model output for the given zeta format
1080pub fn parse_zeta2_model_output(
1081 output: &str,
1082 format: ZetaFormat,
1083 prompt_inputs: &ZetaPromptInput,
1084) -> Result<ParsedOutput> {
1085 let output = match output_end_marker_for_format(format) {
1086 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
1087 None => output,
1088 };
1089
1090 let (context, editable_range_in_context, context_range, cursor_offset) =
1091 resolve_cursor_region(prompt_inputs, format);
1092 let context_start = context_range.start;
1093 let old_editable_region = &context[editable_range_in_context.clone()];
1094 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
1095
1096 let (range_in_context, output) = match format {
1097 ZetaFormat::v0226Hashline => (
1098 editable_range_in_context,
1099 if hashline::output_has_edit_commands(output) {
1100 hashline::apply_edit_commands(old_editable_region, output)
1101 } else {
1102 output.to_string()
1103 },
1104 ),
1105 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
1106 ZetaFormat::V0304SeedNoEdits => (
1107 editable_range_in_context,
1108 if output.starts_with(seed_coder::NO_EDITS) {
1109 old_editable_region.to_string()
1110 } else {
1111 output.to_string()
1112 },
1113 ),
1114 ZetaFormat::V0306SeedMultiRegions => (
1115 editable_range_in_context,
1116 if output.starts_with(seed_coder::NO_EDITS) {
1117 old_editable_region.to_string()
1118 } else {
1119 multi_region::apply_marker_span(old_editable_region, output)?
1120 },
1121 ),
1122 ZetaFormat::V0316SeedMultiRegions => (
1123 editable_range_in_context,
1124 multi_region::apply_marker_span_v0316(old_editable_region, output)?,
1125 ),
1126 ZetaFormat::V0318SeedMultiRegions => (
1127 editable_range_in_context,
1128 multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1129 ),
1130 ZetaFormat::V0317SeedMultiRegions => (
1131 editable_range_in_context,
1132 multi_region::apply_marker_span_v0317(
1133 old_editable_region,
1134 output,
1135 Some(cursor_offset_in_editable),
1136 )?,
1137 ),
1138 ZetaFormat::V0327SingleFile => (
1139 editable_range_in_context,
1140 multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1141 ),
1142 _ => (editable_range_in_context, output.to_string()),
1143 };
1144
1145 let range_in_excerpt =
1146 range_in_context.start + context_start..range_in_context.end + context_start;
1147
1148 Ok(parsed_output_from_editable_region(range_in_excerpt, output))
1149}
1150
1151pub fn parse_zeta2_model_output_as_patch(
1152 output: &str,
1153 format: ZetaFormat,
1154 prompt_inputs: &ZetaPromptInput,
1155) -> Result<String> {
1156 let parsed = parse_zeta2_model_output(output, format, prompt_inputs)?;
1157 parsed_output_to_patch(prompt_inputs, parsed)
1158}
1159
1160pub fn cursor_position_from_parsed_output(
1161 prompt_inputs: &ZetaPromptInput,
1162 parsed: &ParsedOutput,
1163) -> Option<CursorPosition> {
1164 let cursor_offset = parsed.cursor_offset_in_new_editable_region?;
1165 let editable_region_offset = parsed.range_in_excerpt.start;
1166 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1167
1168 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
1169
1170 let new_editable_region = &parsed.new_editable_region;
1171 let prefix_end = cursor_offset.min(new_editable_region.len());
1172 let new_region_prefix = &new_editable_region[..prefix_end];
1173
1174 let row = editable_region_start_line + new_region_prefix.matches('\n').count();
1175
1176 let column = match new_region_prefix.rfind('\n') {
1177 Some(last_newline) => cursor_offset - last_newline - 1,
1178 None => {
1179 let content_prefix = &excerpt[..editable_region_offset];
1180 let content_column = match content_prefix.rfind('\n') {
1181 Some(last_newline) => editable_region_offset - last_newline - 1,
1182 None => editable_region_offset,
1183 };
1184 content_column + cursor_offset
1185 }
1186 };
1187
1188 Some(CursorPosition {
1189 path: prompt_inputs.cursor_path.to_string_lossy().into_owned(),
1190 row,
1191 column,
1192 offset: editable_region_offset + cursor_offset,
1193 editable_region_offset: cursor_offset,
1194 })
1195}
1196
1197pub fn parsed_output_to_patch(
1198 prompt_inputs: &ZetaPromptInput,
1199 parsed: ParsedOutput,
1200) -> Result<String> {
1201 let range_in_excerpt = parsed.range_in_excerpt;
1202 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1203 let old_text = excerpt[range_in_excerpt.clone()].to_string();
1204 let mut new_text = parsed.new_editable_region;
1205
1206 let mut old_text_normalized = old_text;
1207 if !new_text.is_empty() && !new_text.ends_with('\n') {
1208 new_text.push('\n');
1209 }
1210 if !old_text_normalized.is_empty() && !old_text_normalized.ends_with('\n') {
1211 old_text_normalized.push('\n');
1212 }
1213
1214 let editable_region_offset = range_in_excerpt.start;
1215 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count() as u32;
1216 let editable_region_lines = old_text_normalized.lines().count() as u32;
1217
1218 let diff = udiff::unified_diff_with_context(
1219 &old_text_normalized,
1220 &new_text,
1221 editable_region_start_line,
1222 editable_region_start_line,
1223 editable_region_lines,
1224 );
1225
1226 let path = prompt_inputs
1227 .cursor_path
1228 .to_string_lossy()
1229 .trim_start_matches('/')
1230 .to_string();
1231 let formatted_diff = format!("--- a/{path}\n+++ b/{path}\n{diff}");
1232
1233 Ok(udiff::encode_cursor_in_patch(
1234 &formatted_diff,
1235 parsed.cursor_offset_in_new_editable_region,
1236 ))
1237}
1238
1239pub fn excerpt_range_for_format(
1240 format: ZetaFormat,
1241 ranges: &ExcerptRanges,
1242) -> (Range<usize>, Range<usize>) {
1243 excerpt_ranges_for_format(format, ranges)
1244}
1245
1246pub fn resolve_cursor_region(
1247 input: &ZetaPromptInput,
1248 format: ZetaFormat,
1249) -> (&str, Range<usize>, Range<usize>, usize) {
1250 let (editable_range, context_range) = if format == ZetaFormat::V0327SingleFile {
1251 let (editable_tokens, _) = token_limits_for_format(format);
1252 let context_range = 0..input.cursor_excerpt.len();
1253 let editable_range = multi_region::compute_v0327_editable_range(
1254 &input.cursor_excerpt,
1255 input.cursor_offset_in_excerpt,
1256 editable_tokens,
1257 );
1258 (editable_range, context_range)
1259 } else if let Some(syntax_ranges) = &input.syntax_ranges {
1260 let (editable_tokens, context_tokens) = token_limits_for_format(format);
1261 compute_editable_and_context_ranges(
1262 &input.cursor_excerpt,
1263 input.cursor_offset_in_excerpt,
1264 syntax_ranges,
1265 editable_tokens,
1266 context_tokens,
1267 )
1268 } else {
1269 excerpt_range_for_format(format, &input.excerpt_ranges)
1270 };
1271
1272 let context_start = context_range.start;
1273 let context_text = &input.cursor_excerpt[context_range.clone()];
1274 let adjusted_editable =
1275 (editable_range.start - context_start)..(editable_range.end - context_start);
1276 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
1277
1278 (
1279 context_text,
1280 adjusted_editable,
1281 context_range,
1282 adjusted_cursor,
1283 )
1284}
1285
1286pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
1287 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1288 get_prefill_for_format(format, context, &editable_range)
1289}
1290
1291fn format_edit_history_within_budget(
1292 events: &[Arc<Event>],
1293 file_marker: &str,
1294 edit_history_name: &str,
1295 max_tokens: usize,
1296 max_edit_event_count: usize,
1297) -> String {
1298 let header = format!("{}{}\n", file_marker, edit_history_name);
1299 let header_tokens = estimate_tokens(header.len());
1300 if header_tokens >= max_tokens {
1301 return String::new();
1302 }
1303
1304 let mut event_strings: Vec<String> = Vec::new();
1305 let mut total_tokens = header_tokens;
1306
1307 for event in events.iter().rev().take(max_edit_event_count) {
1308 let mut event_str = String::new();
1309 write_event(&mut event_str, event);
1310 let event_tokens = estimate_tokens(event_str.len());
1311
1312 if total_tokens + event_tokens > max_tokens {
1313 break;
1314 }
1315 total_tokens += event_tokens;
1316 event_strings.push(event_str);
1317 }
1318
1319 if event_strings.is_empty() {
1320 return String::new();
1321 }
1322
1323 let mut result = header;
1324 for event_str in event_strings.iter().rev() {
1325 result.push_str(event_str);
1326 }
1327 result
1328}
1329
1330fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
1331 let needs_newline = !excerpt.text.ends_with('\n');
1332 let needs_ellipsis = excerpt.row_range.end < file_max_row;
1333 let len = excerpt.text.len()
1334 + if needs_newline { "\n".len() } else { 0 }
1335 + if needs_ellipsis { "...\n".len() } else { 0 };
1336 estimate_tokens(len)
1337}
1338
1339pub fn format_related_files_within_budget(
1340 related_files: &[RelatedFile],
1341 file_prefix: &str,
1342 file_suffix: &str,
1343 max_tokens: usize,
1344) -> String {
1345 struct ExcerptCandidate {
1346 file_ix: usize,
1347 excerpt_ix: usize,
1348 order: usize,
1349 }
1350
1351 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
1352 .iter()
1353 .enumerate()
1354 .flat_map(|(file_ix, file)| {
1355 file.excerpts
1356 .iter()
1357 .enumerate()
1358 .map(move |(excerpt_ix, e)| ExcerptCandidate {
1359 file_ix,
1360 excerpt_ix,
1361 order: e.order,
1362 })
1363 })
1364 .collect();
1365
1366 // Pre-compute file header strings and their token costs.
1367 let file_headers: Vec<String> = related_files
1368 .iter()
1369 .map(|file| {
1370 let path_str = file.path.to_string_lossy();
1371 format!("{}{}\n", file_prefix, path_str)
1372 })
1373 .collect();
1374
1375 // Sort the excerpts by their order and determine how many fit within the budget.
1376 let mut total_tokens = 0;
1377 let mut included_excerpt_count = 0_usize;
1378 let mut included_file_indices = vec![false; related_files.len()];
1379 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1380 for candidate in &excerpt_candidates {
1381 let file = &related_files[candidate.file_ix];
1382 let excerpt = &file.excerpts[candidate.excerpt_ix];
1383 let file_already_included = included_file_indices[candidate.file_ix];
1384 let header_cost = if file_already_included {
1385 0
1386 } else {
1387 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1388 };
1389 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1390 if total_tokens + header_cost + excerpt_cost > max_tokens {
1391 break;
1392 }
1393 total_tokens += header_cost + excerpt_cost;
1394 if !file_already_included {
1395 included_file_indices[candidate.file_ix] = true;
1396 }
1397 included_excerpt_count += 1;
1398 }
1399
1400 excerpt_candidates.truncate(included_excerpt_count);
1401 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1402
1403 // Render all of the files that fit within the token budget, in the original order.
1404 let mut result = String::new();
1405 let mut last_file_ix = None;
1406 for candidate in &excerpt_candidates {
1407 if last_file_ix != Some(candidate.file_ix) {
1408 if last_file_ix.is_some() {
1409 result.push_str(file_suffix);
1410 }
1411 result.push_str(&file_headers[candidate.file_ix]);
1412 last_file_ix = Some(candidate.file_ix);
1413 }
1414 let file = &related_files[candidate.file_ix];
1415 let excerpt = &file.excerpts[candidate.excerpt_ix];
1416 result.push_str(&excerpt.text);
1417 if !result.ends_with('\n') {
1418 result.push('\n');
1419 }
1420 if excerpt.row_range.end < file.max_row {
1421 result.push_str("...\n");
1422 }
1423 }
1424
1425 result
1426}
1427
1428pub fn write_related_files(
1429 prompt: &mut String,
1430 related_files: &[RelatedFile],
1431) -> Vec<Range<usize>> {
1432 let mut ranges = Vec::new();
1433 for file in related_files {
1434 let start = prompt.len();
1435 let path_str = file.path.to_string_lossy();
1436 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1437 for excerpt in &file.excerpts {
1438 prompt.push_str(&excerpt.text);
1439 if !prompt.ends_with('\n') {
1440 prompt.push('\n');
1441 }
1442 if excerpt.row_range.end < file.max_row {
1443 prompt.push_str("...\n");
1444 }
1445 }
1446 let end = prompt.len();
1447 ranges.push(start..end);
1448 }
1449 ranges
1450}
1451
1452mod v0112_middle_at_end {
1453 use super::*;
1454
1455 pub fn special_tokens() -> &'static [&'static str] {
1456 &[
1457 "<|fim_prefix|>",
1458 "<|fim_suffix|>",
1459 "<|fim_middle|>",
1460 "<|file_sep|>",
1461 CURSOR_MARKER,
1462 ]
1463 }
1464
1465 pub fn write_cursor_excerpt_section(
1466 prompt: &mut String,
1467 path: &Path,
1468 context: &str,
1469 editable_range: &Range<usize>,
1470 cursor_offset: usize,
1471 ) {
1472 let path_str = path.to_string_lossy();
1473 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1474
1475 prompt.push_str("<|fim_prefix|>\n");
1476 prompt.push_str(&context[..editable_range.start]);
1477
1478 prompt.push_str("<|fim_suffix|>\n");
1479 prompt.push_str(&context[editable_range.end..]);
1480 if !prompt.ends_with('\n') {
1481 prompt.push('\n');
1482 }
1483
1484 prompt.push_str("<|fim_middle|>current\n");
1485 prompt.push_str(&context[editable_range.start..cursor_offset]);
1486 prompt.push_str(CURSOR_MARKER);
1487 prompt.push_str(&context[cursor_offset..editable_range.end]);
1488 if !prompt.ends_with('\n') {
1489 prompt.push('\n');
1490 }
1491
1492 prompt.push_str("<|fim_middle|>updated\n");
1493 }
1494}
1495
1496mod v0113_ordered {
1497 use super::*;
1498
1499 pub fn special_tokens() -> &'static [&'static str] {
1500 &[
1501 "<|fim_prefix|>",
1502 "<|fim_suffix|>",
1503 "<|fim_middle|>",
1504 "<|file_sep|>",
1505 CURSOR_MARKER,
1506 ]
1507 }
1508
1509 pub fn write_cursor_excerpt_section(
1510 prompt: &mut String,
1511 path: &Path,
1512 context: &str,
1513 editable_range: &Range<usize>,
1514 cursor_offset: usize,
1515 ) {
1516 let path_str = path.to_string_lossy();
1517 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1518
1519 prompt.push_str("<|fim_prefix|>\n");
1520 prompt.push_str(&context[..editable_range.start]);
1521 if !prompt.ends_with('\n') {
1522 prompt.push('\n');
1523 }
1524
1525 prompt.push_str("<|fim_middle|>current\n");
1526 prompt.push_str(&context[editable_range.start..cursor_offset]);
1527 prompt.push_str(CURSOR_MARKER);
1528 prompt.push_str(&context[cursor_offset..editable_range.end]);
1529 if !prompt.ends_with('\n') {
1530 prompt.push('\n');
1531 }
1532
1533 prompt.push_str("<|fim_suffix|>\n");
1534 prompt.push_str(&context[editable_range.end..]);
1535 if !prompt.ends_with('\n') {
1536 prompt.push('\n');
1537 }
1538
1539 prompt.push_str("<|fim_middle|>updated\n");
1540 }
1541}
1542
1543mod v0114180_editable_region {
1544 use super::*;
1545
1546 pub fn special_tokens() -> &'static [&'static str] {
1547 v0113_ordered::special_tokens()
1548 }
1549}
1550
1551pub mod v0120_git_merge_markers {
1552 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1553 //!
1554 //! Example prompt:
1555 //!
1556 //! <|file_sep|>path/to/target_file.py
1557 //! <|fim_prefix|>
1558 //! code before editable region
1559 //! <|fim_suffix|>
1560 //! code after editable region
1561 //! <|fim_middle|>
1562 //! <<<<<<< CURRENT
1563 //! code that
1564 //! needs to<|user_cursor|>
1565 //! be rewritten
1566 //! =======
1567 //!
1568 //! Expected output (should be generated by the model):
1569 //!
1570 //! updated
1571 //! code with
1572 //! changes applied
1573 //! >>>>>>> UPDATED
1574
1575 use super::*;
1576
1577 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1578 pub const SEPARATOR: &str = "=======\n";
1579 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1580
1581 pub fn special_tokens() -> &'static [&'static str] {
1582 &[
1583 "<|fim_prefix|>",
1584 "<|fim_suffix|>",
1585 "<|fim_middle|>",
1586 "<|file_sep|>",
1587 START_MARKER,
1588 SEPARATOR,
1589 END_MARKER,
1590 CURSOR_MARKER,
1591 ]
1592 }
1593
1594 pub fn write_cursor_excerpt_section(
1595 prompt: &mut String,
1596 path: &Path,
1597 context: &str,
1598 editable_range: &Range<usize>,
1599 cursor_offset: usize,
1600 ) {
1601 let path_str = path.to_string_lossy();
1602 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1603
1604 prompt.push_str("<|fim_prefix|>");
1605 prompt.push_str(&context[..editable_range.start]);
1606
1607 prompt.push_str("<|fim_suffix|>");
1608 prompt.push_str(&context[editable_range.end..]);
1609 if !prompt.ends_with('\n') {
1610 prompt.push('\n');
1611 }
1612
1613 prompt.push_str("<|fim_middle|>");
1614 prompt.push_str(START_MARKER);
1615 prompt.push_str(&context[editable_range.start..cursor_offset]);
1616 prompt.push_str(CURSOR_MARKER);
1617 prompt.push_str(&context[cursor_offset..editable_range.end]);
1618 if !prompt.ends_with('\n') {
1619 prompt.push('\n');
1620 }
1621 prompt.push_str(SEPARATOR);
1622 }
1623}
1624
1625pub mod v0131_git_merge_markers_prefix {
1626 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1627 //!
1628 //! Example prompt:
1629 //!
1630 //! <|file_sep|>path/to/target_file.py
1631 //! <|fim_prefix|>
1632 //! code before editable region
1633 //! <<<<<<< CURRENT
1634 //! code that
1635 //! needs to<|user_cursor|>
1636 //! be rewritten
1637 //! =======
1638 //! <|fim_suffix|>
1639 //! code after editable region
1640 //! <|fim_middle|>
1641 //!
1642 //! Expected output (should be generated by the model):
1643 //!
1644 //! updated
1645 //! code with
1646 //! changes applied
1647 //! >>>>>>> UPDATED
1648
1649 use super::*;
1650
1651 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1652 pub const SEPARATOR: &str = "=======\n";
1653 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1654
1655 pub fn special_tokens() -> &'static [&'static str] {
1656 &[
1657 "<|fim_prefix|>",
1658 "<|fim_suffix|>",
1659 "<|fim_middle|>",
1660 "<|file_sep|>",
1661 START_MARKER,
1662 SEPARATOR,
1663 END_MARKER,
1664 CURSOR_MARKER,
1665 ]
1666 }
1667
1668 pub fn write_cursor_excerpt_section(
1669 prompt: &mut String,
1670 path: &Path,
1671 context: &str,
1672 editable_range: &Range<usize>,
1673 cursor_offset: usize,
1674 ) {
1675 let path_str = path.to_string_lossy();
1676 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1677
1678 prompt.push_str("<|fim_prefix|>");
1679 prompt.push_str(&context[..editable_range.start]);
1680 prompt.push_str(START_MARKER);
1681 prompt.push_str(&context[editable_range.start..cursor_offset]);
1682 prompt.push_str(CURSOR_MARKER);
1683 prompt.push_str(&context[cursor_offset..editable_range.end]);
1684 if !prompt.ends_with('\n') {
1685 prompt.push('\n');
1686 }
1687 prompt.push_str(SEPARATOR);
1688
1689 prompt.push_str("<|fim_suffix|>");
1690 prompt.push_str(&context[editable_range.end..]);
1691 if !prompt.ends_with('\n') {
1692 prompt.push('\n');
1693 }
1694
1695 prompt.push_str("<|fim_middle|>");
1696 }
1697}
1698
1699pub mod v0211_prefill {
1700 use super::*;
1701
1702 pub fn special_tokens() -> &'static [&'static str] {
1703 v0131_git_merge_markers_prefix::special_tokens()
1704 }
1705
1706 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1707 let editable_region = &context[editable_range.start..editable_range.end];
1708
1709 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1710 let prefill_len = editable_region.floor_char_boundary(prefill_len);
1711
1712 // Find a token boundary to avoid splitting tokens in the prefill.
1713 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1714 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1715 // the \n and consume any consecutive \n characters after it.
1716 let prefill = &editable_region[..prefill_len];
1717 match prefill.rfind('\n') {
1718 Some(pos) => {
1719 let mut end = pos + 1;
1720 while end < editable_region.len()
1721 && editable_region.as_bytes().get(end) == Some(&b'\n')
1722 {
1723 end += 1;
1724 }
1725 editable_region[..end].to_string()
1726 }
1727 // No newline found. Fall back to splitting before the last space
1728 // (word-level boundary)
1729 None => match prefill.rfind(' ') {
1730 Some(pos) => prefill[..pos].to_string(),
1731 None => prefill.to_string(),
1732 },
1733 }
1734 }
1735}
1736
1737pub mod hashline {
1738
1739 use std::fmt::Display;
1740
1741 pub const END_MARKER: &str = "<|fim_middle|>updated";
1742 pub const START_MARKER: &str = "<|fim_middle|>current";
1743
1744 use super::*;
1745
1746 const SET_COMMAND_MARKER: &str = "<|set|>";
1747 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1748 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1749
1750 pub fn special_tokens() -> &'static [&'static str] {
1751 return &[
1752 SET_COMMAND_MARKER,
1753 "<|set_range|>",
1754 INSERT_COMMAND_MARKER,
1755 NO_EDITS_COMMAND_MARKER,
1756 CURSOR_MARKER,
1757 "<|file_sep|>",
1758 "<|fim_prefix|>",
1759 "<|fim_suffix|>",
1760 "<|fim_middle|>",
1761 ];
1762 }
1763
1764 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1765 #[derive(Debug, Clone, PartialEq, Eq)]
1766 struct LineRef {
1767 index: usize,
1768 hash: u8,
1769 }
1770
1771 impl Display for LineRef {
1772 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1773 write!(f, "{}:{:02x}", self.index, self.hash)
1774 }
1775 }
1776
1777 pub fn hash_line(line: &[u8]) -> u8 {
1778 let mut h: u8 = 0;
1779 for &byte in line {
1780 h = h.wrapping_add(byte);
1781 }
1782 return h;
1783 }
1784
1785 /// Write the hashline-encoded editable region into `out`. Each line of
1786 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1787 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1788 /// to the start of `editable_text`).
1789 pub fn write_hashline_editable_region(
1790 out: &mut String,
1791 editable_text: &str,
1792 cursor_offset_in_editable: usize,
1793 ) {
1794 let mut offset = 0;
1795 for (i, line) in editable_text.lines().enumerate() {
1796 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1797 && cursor_offset_in_editable < offset + line.len()
1798 {
1799 (
1800 &line[..cursor_offset_in_editable - offset],
1801 CURSOR_MARKER,
1802 &line[cursor_offset_in_editable - offset..],
1803 )
1804 } else {
1805 (line, "", "")
1806 };
1807 write!(
1808 out,
1809 "\n{}|{head}{cursor}{tail}",
1810 LineRef {
1811 index: i,
1812 hash: hash_line(line.as_bytes())
1813 }
1814 )
1815 .unwrap();
1816 offset += line.len() + 1;
1817 }
1818 }
1819
1820 pub fn write_cursor_excerpt_section(
1821 prompt: &mut String,
1822 path: &Path,
1823 context: &str,
1824 editable_range: &Range<usize>,
1825 cursor_offset: usize,
1826 ) {
1827 let path_str = path.to_string_lossy();
1828 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1829
1830 prompt.push_str("<|fim_prefix|>\n");
1831 prompt.push_str(&context[..editable_range.start]);
1832 prompt.push_str(START_MARKER);
1833
1834 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1835 let editable_region = &context[editable_range.clone()];
1836 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1837
1838 if !prompt.ends_with('\n') {
1839 prompt.push('\n');
1840 }
1841
1842 prompt.push_str("<|fim_suffix|>\n");
1843 prompt.push_str(&context[editable_range.end..]);
1844 if !prompt.ends_with('\n') {
1845 prompt.push('\n');
1846 }
1847
1848 prompt.push_str(END_MARKER);
1849 prompt.push('\n');
1850 }
1851
1852 /// A single edit command parsed from the model output.
1853 #[derive(Debug)]
1854 enum EditCommand<'a> {
1855 /// Replace a range of lines (inclusive on both ends). Single-line set is
1856 /// represented by `start == end`.
1857 Set {
1858 start: LineRef,
1859 end: LineRef,
1860 content: &'a str,
1861 },
1862 /// Insert new lines after the given line, or before the first line if
1863 /// `after` is `None`.
1864 Insert {
1865 after: Option<LineRef>,
1866 content: &'a str,
1867 },
1868 }
1869
1870 /// Parse a line reference like `3:c3` into a `LineRef`.
1871 fn parse_line_ref(s: &str) -> Option<LineRef> {
1872 let (idx_str, hash_str) = s.split_once(':')?;
1873 let index = idx_str.parse::<usize>().ok()?;
1874 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1875 Some(LineRef { index, hash })
1876 }
1877
1878 /// Parse the model output into a list of `EditCommand`s.
1879 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1880 let mut commands = Vec::new();
1881 let mut offset = 0usize;
1882
1883 while offset < model_output.len() {
1884 let next_nl = model_output[offset..]
1885 .find('\n')
1886 .map(|i| offset + i)
1887 .unwrap_or(model_output.len());
1888 let line = &model_output[offset..next_nl];
1889 let line_end = if next_nl < model_output.len() {
1890 next_nl + 1
1891 } else {
1892 next_nl
1893 };
1894
1895 let trimmed = line.trim();
1896 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1897 (true, spec)
1898 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1899 (false, spec)
1900 } else {
1901 offset = line_end;
1902 continue;
1903 };
1904
1905 let mut content_end = line_end;
1906 let mut scan = line_end;
1907
1908 while scan < model_output.len() {
1909 let body_nl = model_output[scan..]
1910 .find('\n')
1911 .map(|i| scan + i)
1912 .unwrap_or(model_output.len());
1913 let body_line = &model_output[scan..body_nl];
1914 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1915 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1916 {
1917 break;
1918 }
1919 scan = if body_nl < model_output.len() {
1920 body_nl + 1
1921 } else {
1922 body_nl
1923 };
1924 content_end = scan;
1925 }
1926
1927 let content = &model_output[line_end..content_end];
1928
1929 if is_set {
1930 if let Some((start_str, end_str)) = specifier.split_once('-') {
1931 if let (Some(start), Some(end)) =
1932 (parse_line_ref(start_str), parse_line_ref(end_str))
1933 {
1934 commands.push(EditCommand::Set {
1935 start,
1936 end,
1937 content,
1938 });
1939 }
1940 } else if let Some(target) = parse_line_ref(specifier) {
1941 commands.push(EditCommand::Set {
1942 start: target.clone(),
1943 end: target,
1944 content,
1945 });
1946 }
1947 } else {
1948 let after = parse_line_ref(specifier);
1949 commands.push(EditCommand::Insert { after, content });
1950 }
1951
1952 offset = scan;
1953 }
1954
1955 commands
1956 }
1957
1958 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1959 /// (as opposed to being a plain full-replacement output).
1960 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1961 /// editable region, returning the plain text content.
1962 pub fn strip_hashline_prefixes(region: &str) -> String {
1963 let mut decoded: String = region
1964 .lines()
1965 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1966 .collect::<Vec<_>>()
1967 .join("\n");
1968 if region.ends_with('\n') {
1969 decoded.push('\n');
1970 }
1971 decoded
1972 }
1973
1974 pub fn output_has_edit_commands(model_output: &str) -> bool {
1975 model_output.contains(SET_COMMAND_MARKER)
1976 || model_output.contains(INSERT_COMMAND_MARKER)
1977 || model_output.contains(NO_EDITS_COMMAND_MARKER)
1978 }
1979
1980 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1981 /// original editable region text.
1982 ///
1983 /// `editable_region` is the original text of the editable region (without hash
1984 /// prefixes). `model_output` is the raw model response containing edit commands.
1985 ///
1986 /// Returns the full replacement text for the editable region.
1987 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1988 if model_output
1989 .trim_start()
1990 .starts_with(NO_EDITS_COMMAND_MARKER)
1991 {
1992 return editable_region.to_string();
1993 }
1994
1995 let original_lines: Vec<&str> = editable_region.lines().collect();
1996 let old_hashes: Vec<u8> = original_lines
1997 .iter()
1998 .map(|line| hash_line(line.as_bytes()))
1999 .collect();
2000
2001 let commands = parse_edit_commands(model_output);
2002
2003 // For set operations: indexed by start line → Some((end line index, content))
2004 // For insert operations: indexed by line index → vec of content to insert after
2005 // Insert-before-first is tracked separately.
2006 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
2007 let mut insert_before_first: Vec<&str> = Vec::new();
2008 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
2009
2010 for command in &commands {
2011 match command {
2012 EditCommand::Set {
2013 start,
2014 end,
2015 content,
2016 } => {
2017 if start.index < old_hashes.len()
2018 && end.index < old_hashes.len()
2019 && start.index <= end.index
2020 && old_hashes[start.index] == start.hash
2021 && old_hashes[end.index] == end.hash
2022 {
2023 set_ops[start.index] = Some((end.index, *content));
2024 }
2025 }
2026 EditCommand::Insert { after, content } => match after {
2027 None => insert_before_first.push(*content),
2028 Some(line_ref) => {
2029 if line_ref.index < old_hashes.len()
2030 && old_hashes[line_ref.index] == line_ref.hash
2031 {
2032 insert_after[line_ref.index].push(*content);
2033 }
2034 }
2035 },
2036 }
2037 }
2038
2039 let mut result = String::new();
2040
2041 // Emit any insertions before the first line
2042 for content in &insert_before_first {
2043 result.push_str(content);
2044 if !content.ends_with('\n') {
2045 result.push('\n');
2046 }
2047 }
2048
2049 let mut i = 0;
2050 while i < original_lines.len() {
2051 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
2052 // Replace lines i..=end_index with the replacement content
2053 result.push_str(replacement);
2054 if !replacement.is_empty() && !replacement.ends_with('\n') {
2055 result.push('\n');
2056 }
2057 // Emit any insertions after the end of this set range
2058 if *end_index < insert_after.len() {
2059 for content in &insert_after[*end_index] {
2060 result.push_str(content);
2061 if !content.ends_with('\n') {
2062 result.push('\n');
2063 }
2064 }
2065 }
2066 i = end_index + 1;
2067 } else {
2068 // Keep the original line
2069 result.push_str(original_lines[i]);
2070 result.push('\n');
2071 // Emit any insertions after this line
2072 for content in &insert_after[i] {
2073 result.push_str(content);
2074 if !content.ends_with('\n') {
2075 result.push('\n');
2076 }
2077 }
2078 i += 1;
2079 }
2080 }
2081
2082 // Preserve trailing newline behavior: if the original ended with a
2083 // newline the result already has one; if it didn't, trim the extra one
2084 // we added.
2085 if !editable_region.ends_with('\n') && result.ends_with('\n') {
2086 result.pop();
2087 }
2088
2089 result
2090 }
2091
2092 /// Convert a unified diff patch into hashline edit commands.
2093 ///
2094 /// Parses the unified diff `patch` directly to determine which lines of
2095 /// `old_text` are deleted/replaced and what new lines are added, then emits
2096 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
2097 /// `{index}:{hash}` identifiers.
2098 ///
2099 /// `cursor_offset` is an optional byte offset into the first hunk's new
2100 /// text (context + additions) where the cursor marker should be placed.
2101 pub fn patch_to_edit_commands(
2102 old_text: &str,
2103 patch: &str,
2104 cursor_offset: Option<usize>,
2105 ) -> Result<String> {
2106 let old_lines: Vec<&str> = old_text.lines().collect();
2107 let old_hashes: Vec<u8> = old_lines
2108 .iter()
2109 .map(|line| hash_line(line.as_bytes()))
2110 .collect();
2111
2112 let mut result = String::new();
2113 let mut first_hunk = true;
2114
2115 struct Hunk<'a> {
2116 line_range: Range<usize>,
2117 new_text_lines: Vec<&'a str>,
2118 cursor_line_offset_in_new_text: Option<(usize, usize)>,
2119 }
2120
2121 // Parse the patch line by line. We only care about hunk headers,
2122 // context, deletions, and additions.
2123 let mut old_line_index: usize = 0;
2124 let mut current_hunk: Option<Hunk> = None;
2125 // Byte offset tracking within the hunk's new text for cursor placement.
2126 let mut new_text_byte_offset: usize = 0;
2127 // The line index of the last old line seen before/in the current hunk
2128 // (used for insert-after reference).
2129 let mut last_old_line_before_hunk: Option<usize> = None;
2130
2131 fn flush_hunk(
2132 hunk: Hunk,
2133 last_old_line: Option<usize>,
2134 result: &mut String,
2135 old_hashes: &[u8],
2136 ) {
2137 if hunk.line_range.is_empty() {
2138 // Pure insertion — reference the old line to insert after when in bounds.
2139 if let Some(after) = last_old_line
2140 && let Some(&hash) = old_hashes.get(after)
2141 {
2142 write!(
2143 result,
2144 "{INSERT_COMMAND_MARKER}{}\n",
2145 LineRef { index: after, hash }
2146 )
2147 .unwrap();
2148 } else {
2149 result.push_str(INSERT_COMMAND_MARKER);
2150 result.push('\n');
2151 }
2152 } else {
2153 let start = hunk.line_range.start;
2154 let end_exclusive = hunk.line_range.end;
2155 let deleted_line_count = end_exclusive.saturating_sub(start);
2156
2157 if deleted_line_count == 1 {
2158 if let Some(&hash) = old_hashes.get(start) {
2159 write!(
2160 result,
2161 "{SET_COMMAND_MARKER}{}\n",
2162 LineRef { index: start, hash }
2163 )
2164 .unwrap();
2165 } else {
2166 result.push_str(SET_COMMAND_MARKER);
2167 result.push('\n');
2168 }
2169 } else {
2170 let end_inclusive = end_exclusive - 1;
2171 match (
2172 old_hashes.get(start).copied(),
2173 old_hashes.get(end_inclusive).copied(),
2174 ) {
2175 (Some(start_hash), Some(end_hash)) => {
2176 write!(
2177 result,
2178 "{SET_COMMAND_MARKER}{}-{}\n",
2179 LineRef {
2180 index: start,
2181 hash: start_hash
2182 },
2183 LineRef {
2184 index: end_inclusive,
2185 hash: end_hash
2186 }
2187 )
2188 .unwrap();
2189 }
2190 _ => {
2191 result.push_str(SET_COMMAND_MARKER);
2192 result.push('\n');
2193 }
2194 }
2195 }
2196 }
2197 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
2198 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
2199 && line_offset == cursor_line_offset
2200 {
2201 result.push_str(&line[..char_offset]);
2202 result.push_str(CURSOR_MARKER);
2203 result.push_str(&line[char_offset..]);
2204 continue;
2205 }
2206
2207 result.push_str(line);
2208 }
2209 }
2210
2211 for raw_line in patch.split_inclusive('\n') {
2212 if raw_line.starts_with("@@") {
2213 // Flush any pending change hunk from a previous patch hunk.
2214 if let Some(hunk) = current_hunk.take() {
2215 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2216 }
2217
2218 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
2219 // We intentionally do not trust old_start as a direct local index into `old_text`,
2220 // because some patches are produced against a larger file region and carry
2221 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
2222 if first_hunk {
2223 new_text_byte_offset = 0;
2224 first_hunk = false;
2225 }
2226 continue;
2227 }
2228
2229 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
2230 continue;
2231 }
2232 if raw_line.starts_with("\\ No newline") {
2233 continue;
2234 }
2235
2236 if raw_line.starts_with('-') {
2237 // Extend or start a change hunk with this deleted old line.
2238 match &mut current_hunk {
2239 Some(Hunk {
2240 line_range: range, ..
2241 }) => range.end = old_line_index + 1,
2242 None => {
2243 current_hunk = Some(Hunk {
2244 line_range: old_line_index..old_line_index + 1,
2245 new_text_lines: Vec::new(),
2246 cursor_line_offset_in_new_text: None,
2247 });
2248 }
2249 }
2250 old_line_index += 1;
2251 } else if let Some(added_content) = raw_line.strip_prefix('+') {
2252 // Place cursor marker if cursor_offset falls within this line.
2253 let mut cursor_line_offset = None;
2254 if let Some(cursor_off) = cursor_offset
2255 && (first_hunk
2256 || cursor_off >= new_text_byte_offset
2257 && cursor_off <= new_text_byte_offset + added_content.len())
2258 {
2259 let line_offset = added_content.floor_char_boundary(
2260 cursor_off
2261 .saturating_sub(new_text_byte_offset)
2262 .min(added_content.len()),
2263 );
2264 cursor_line_offset = Some(line_offset);
2265 }
2266
2267 new_text_byte_offset += added_content.len();
2268
2269 let hunk = current_hunk.get_or_insert(Hunk {
2270 line_range: old_line_index..old_line_index,
2271 new_text_lines: vec![],
2272 cursor_line_offset_in_new_text: None,
2273 });
2274 hunk.new_text_lines.push(added_content);
2275 hunk.cursor_line_offset_in_new_text = cursor_line_offset
2276 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
2277 } else {
2278 // Context line (starts with ' ' or is empty).
2279 if let Some(hunk) = current_hunk.take() {
2280 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2281 }
2282 last_old_line_before_hunk = Some(old_line_index);
2283 old_line_index += 1;
2284 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
2285 new_text_byte_offset += content.len();
2286 }
2287 }
2288
2289 // Flush final group.
2290 if let Some(hunk) = current_hunk.take() {
2291 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2292 }
2293
2294 // Trim a single trailing newline.
2295 if result.ends_with('\n') {
2296 result.pop();
2297 }
2298
2299 if result.is_empty() {
2300 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
2301 }
2302
2303 Ok(result)
2304 }
2305
2306 #[cfg(test)]
2307 mod tests {
2308 use super::*;
2309 use indoc::indoc;
2310
2311 #[test]
2312 fn test_format_cursor_region() {
2313 struct Case {
2314 name: &'static str,
2315 context: &'static str,
2316 editable_range: Range<usize>,
2317 cursor_offset: usize,
2318 expected: &'static str,
2319 }
2320
2321 let cases = [
2322 Case {
2323 name: "basic_cursor_placement",
2324 context: "hello world\n",
2325 editable_range: 0..12,
2326 cursor_offset: 5,
2327 expected: indoc! {"
2328 <|file_sep|>test.rs
2329 <|fim_prefix|>
2330 <|fim_middle|>current
2331 0:5c|hello<|user_cursor|> world
2332 <|fim_suffix|>
2333 <|fim_middle|>updated
2334 "},
2335 },
2336 Case {
2337 name: "multiline_cursor_on_second_line",
2338 context: "aaa\nbbb\nccc\n",
2339 editable_range: 0..12,
2340 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
2341 expected: indoc! {"
2342 <|file_sep|>test.rs
2343 <|fim_prefix|>
2344 <|fim_middle|>current
2345 0:23|aaa
2346 1:26|b<|user_cursor|>bb
2347 2:29|ccc
2348 <|fim_suffix|>
2349 <|fim_middle|>updated
2350 "},
2351 },
2352 Case {
2353 name: "no_trailing_newline_in_context",
2354 context: "line1\nline2",
2355 editable_range: 0..11,
2356 cursor_offset: 3,
2357 expected: indoc! {"
2358 <|file_sep|>test.rs
2359 <|fim_prefix|>
2360 <|fim_middle|>current
2361 0:d9|lin<|user_cursor|>e1
2362 1:da|line2
2363 <|fim_suffix|>
2364 <|fim_middle|>updated
2365 "},
2366 },
2367 Case {
2368 name: "leading_newline_in_editable_region",
2369 context: "\nabc\n",
2370 editable_range: 0..5,
2371 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2372 expected: indoc! {"
2373 <|file_sep|>test.rs
2374 <|fim_prefix|>
2375 <|fim_middle|>current
2376 0:00|
2377 1:26|a<|user_cursor|>bc
2378 <|fim_suffix|>
2379 <|fim_middle|>updated
2380 "},
2381 },
2382 Case {
2383 name: "with_suffix",
2384 context: "abc\ndef",
2385 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2386 cursor_offset: 2,
2387 expected: indoc! {"
2388 <|file_sep|>test.rs
2389 <|fim_prefix|>
2390 <|fim_middle|>current
2391 0:26|ab<|user_cursor|>c
2392 <|fim_suffix|>
2393 def
2394 <|fim_middle|>updated
2395 "},
2396 },
2397 Case {
2398 name: "unicode_two_byte_chars",
2399 context: "héllo\n",
2400 editable_range: 0..7,
2401 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2402 expected: indoc! {"
2403 <|file_sep|>test.rs
2404 <|fim_prefix|>
2405 <|fim_middle|>current
2406 0:1b|hé<|user_cursor|>llo
2407 <|fim_suffix|>
2408 <|fim_middle|>updated
2409 "},
2410 },
2411 Case {
2412 name: "unicode_three_byte_chars",
2413 context: "日本語\n",
2414 editable_range: 0..10,
2415 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2416 expected: indoc! {"
2417 <|file_sep|>test.rs
2418 <|fim_prefix|>
2419 <|fim_middle|>current
2420 0:80|日本<|user_cursor|>語
2421 <|fim_suffix|>
2422 <|fim_middle|>updated
2423 "},
2424 },
2425 Case {
2426 name: "unicode_four_byte_chars",
2427 context: "a🌍b\n",
2428 editable_range: 0..7,
2429 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2430 expected: indoc! {"
2431 <|file_sep|>test.rs
2432 <|fim_prefix|>
2433 <|fim_middle|>current
2434 0:6b|a🌍<|user_cursor|>b
2435 <|fim_suffix|>
2436 <|fim_middle|>updated
2437 "},
2438 },
2439 Case {
2440 name: "cursor_at_start_of_region_not_placed",
2441 context: "abc\n",
2442 editable_range: 0..4,
2443 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2444 expected: indoc! {"
2445 <|file_sep|>test.rs
2446 <|fim_prefix|>
2447 <|fim_middle|>current
2448 0:26|abc
2449 <|fim_suffix|>
2450 <|fim_middle|>updated
2451 "},
2452 },
2453 Case {
2454 name: "cursor_at_end_of_line_not_placed",
2455 context: "abc\ndef\n",
2456 editable_range: 0..8,
2457 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2458 expected: indoc! {"
2459 <|file_sep|>test.rs
2460 <|fim_prefix|>
2461 <|fim_middle|>current
2462 0:26|abc
2463 1:2f|def
2464 <|fim_suffix|>
2465 <|fim_middle|>updated
2466 "},
2467 },
2468 Case {
2469 name: "cursor_offset_relative_to_context_not_editable_region",
2470 // cursor_offset is relative to `context`, so when editable_range.start > 0,
2471 // write_cursor_excerpt_section must subtract it before comparing against
2472 // per-line offsets within the editable region.
2473 context: "pre\naaa\nbbb\nsuf\n",
2474 editable_range: 4..12, // editable region = "aaa\nbbb\n"
2475 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
2476 expected: indoc! {"
2477 <|file_sep|>test.rs
2478 <|fim_prefix|>
2479 pre
2480 <|fim_middle|>current
2481 0:23|aaa
2482 1:26|b<|user_cursor|>bb
2483 <|fim_suffix|>
2484 suf
2485 <|fim_middle|>updated
2486 "},
2487 },
2488 ];
2489
2490 for case in &cases {
2491 let mut prompt = String::new();
2492 hashline::write_cursor_excerpt_section(
2493 &mut prompt,
2494 Path::new("test.rs"),
2495 case.context,
2496 &case.editable_range,
2497 case.cursor_offset,
2498 );
2499 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2500 }
2501 }
2502
2503 #[test]
2504 fn test_apply_edit_commands() {
2505 struct Case {
2506 name: &'static str,
2507 original: &'static str,
2508 model_output: &'static str,
2509 expected: &'static str,
2510 }
2511
2512 let cases = vec![
2513 Case {
2514 name: "set_single_line",
2515 original: indoc! {"
2516 let mut total = 0;
2517 for product in products {
2518 total += ;
2519 }
2520 total
2521 "},
2522 model_output: indoc! {"
2523 <|set|>2:87
2524 total += product.price;
2525 "},
2526 expected: indoc! {"
2527 let mut total = 0;
2528 for product in products {
2529 total += product.price;
2530 }
2531 total
2532 "},
2533 },
2534 Case {
2535 name: "set_range",
2536 original: indoc! {"
2537 fn foo() {
2538 let x = 1;
2539 let y = 2;
2540 let z = 3;
2541 }
2542 "},
2543 model_output: indoc! {"
2544 <|set|>1:46-3:4a
2545 let sum = 6;
2546 "},
2547 expected: indoc! {"
2548 fn foo() {
2549 let sum = 6;
2550 }
2551 "},
2552 },
2553 Case {
2554 name: "insert_after_line",
2555 original: indoc! {"
2556 fn main() {
2557 let x = 1;
2558 }
2559 "},
2560 model_output: indoc! {"
2561 <|insert|>1:46
2562 let y = 2;
2563 "},
2564 expected: indoc! {"
2565 fn main() {
2566 let x = 1;
2567 let y = 2;
2568 }
2569 "},
2570 },
2571 Case {
2572 name: "insert_before_first",
2573 original: indoc! {"
2574 let x = 1;
2575 let y = 2;
2576 "},
2577 model_output: indoc! {"
2578 <|insert|>
2579 use std::io;
2580 "},
2581 expected: indoc! {"
2582 use std::io;
2583 let x = 1;
2584 let y = 2;
2585 "},
2586 },
2587 Case {
2588 name: "set_with_cursor_marker",
2589 original: indoc! {"
2590 fn main() {
2591 println!();
2592 }
2593 "},
2594 model_output: indoc! {"
2595 <|set|>1:34
2596 eprintln!(\"<|user_cursor|>\");
2597 "},
2598 expected: indoc! {"
2599 fn main() {
2600 eprintln!(\"<|user_cursor|>\");
2601 }
2602 "},
2603 },
2604 Case {
2605 name: "multiple_set_commands",
2606 original: indoc! {"
2607 aaa
2608 bbb
2609 ccc
2610 ddd
2611 "},
2612 model_output: indoc! {"
2613 <|set|>0:23
2614 AAA
2615 <|set|>2:29
2616 CCC
2617 "},
2618 expected: indoc! {"
2619 AAA
2620 bbb
2621 CCC
2622 ddd
2623 "},
2624 },
2625 Case {
2626 name: "set_range_multiline_replacement",
2627 original: indoc! {"
2628 fn handle_submit() {
2629 }
2630
2631 fn handle_keystroke() {
2632 "},
2633 model_output: indoc! {"
2634 <|set|>0:3f-1:7d
2635 fn handle_submit(modal_state: &mut ModalState) {
2636 <|user_cursor|>
2637 }
2638 "},
2639 expected: indoc! {"
2640 fn handle_submit(modal_state: &mut ModalState) {
2641 <|user_cursor|>
2642 }
2643
2644 fn handle_keystroke() {
2645 "},
2646 },
2647 Case {
2648 name: "no_edit_commands_returns_original",
2649 original: indoc! {"
2650 hello
2651 world
2652 "},
2653 model_output: "some random text with no commands",
2654 expected: indoc! {"
2655 hello
2656 world
2657 "},
2658 },
2659 Case {
2660 name: "no_edits_command_returns_original",
2661 original: indoc! {"
2662 hello
2663 world
2664 "},
2665 model_output: "<|no_edits|>",
2666 expected: indoc! {"
2667 hello
2668 world
2669 "},
2670 },
2671 Case {
2672 name: "wrong_hash_set_ignored",
2673 original: indoc! {"
2674 aaa
2675 bbb
2676 "},
2677 model_output: indoc! {"
2678 <|set|>0:ff
2679 ZZZ
2680 "},
2681 expected: indoc! {"
2682 aaa
2683 bbb
2684 "},
2685 },
2686 Case {
2687 name: "insert_and_set_combined",
2688 original: indoc! {"
2689 alpha
2690 beta
2691 gamma
2692 "},
2693 model_output: indoc! {"
2694 <|set|>0:06
2695 ALPHA
2696 <|insert|>1:9c
2697 beta_extra
2698 "},
2699 expected: indoc! {"
2700 ALPHA
2701 beta
2702 beta_extra
2703 gamma
2704 "},
2705 },
2706 Case {
2707 name: "no_trailing_newline_preserved",
2708 original: "hello\nworld",
2709 model_output: indoc! {"
2710 <|set|>0:14
2711 HELLO
2712 "},
2713 expected: "HELLO\nworld",
2714 },
2715 Case {
2716 name: "set_range_hash_mismatch_in_end_bound",
2717 original: indoc! {"
2718 one
2719 two
2720 three
2721 "},
2722 model_output: indoc! {"
2723 <|set|>0:42-2:ff
2724 ONE_TWO_THREE
2725 "},
2726 expected: indoc! {"
2727 one
2728 two
2729 three
2730 "},
2731 },
2732 Case {
2733 name: "set_range_start_greater_than_end_ignored",
2734 original: indoc! {"
2735 a
2736 b
2737 c
2738 "},
2739 model_output: indoc! {"
2740 <|set|>2:63-1:62
2741 X
2742 "},
2743 expected: indoc! {"
2744 a
2745 b
2746 c
2747 "},
2748 },
2749 Case {
2750 name: "insert_out_of_bounds_ignored",
2751 original: indoc! {"
2752 x
2753 y
2754 "},
2755 model_output: indoc! {"
2756 <|insert|>99:aa
2757 z
2758 "},
2759 expected: indoc! {"
2760 x
2761 y
2762 "},
2763 },
2764 Case {
2765 name: "set_out_of_bounds_ignored",
2766 original: indoc! {"
2767 x
2768 y
2769 "},
2770 model_output: indoc! {"
2771 <|set|>99:aa
2772 z
2773 "},
2774 expected: indoc! {"
2775 x
2776 y
2777 "},
2778 },
2779 Case {
2780 name: "malformed_set_command_ignored",
2781 original: indoc! {"
2782 alpha
2783 beta
2784 "},
2785 model_output: indoc! {"
2786 <|set|>not-a-line-ref
2787 UPDATED
2788 "},
2789 expected: indoc! {"
2790 alpha
2791 beta
2792 "},
2793 },
2794 Case {
2795 name: "malformed_insert_hash_treated_as_before_first",
2796 original: indoc! {"
2797 alpha
2798 beta
2799 "},
2800 model_output: indoc! {"
2801 <|insert|>1:nothex
2802 preamble
2803 "},
2804 expected: indoc! {"
2805 preamble
2806 alpha
2807 beta
2808 "},
2809 },
2810 Case {
2811 name: "set_then_insert_same_target_orders_insert_after_replacement",
2812 original: indoc! {"
2813 cat
2814 dog
2815 "},
2816 model_output: indoc! {"
2817 <|set|>0:38
2818 CAT
2819 <|insert|>0:38
2820 TAIL
2821 "},
2822 expected: indoc! {"
2823 CAT
2824 TAIL
2825 dog
2826 "},
2827 },
2828 Case {
2829 name: "overlapping_set_ranges_last_wins",
2830 original: indoc! {"
2831 a
2832 b
2833 c
2834 d
2835 "},
2836 model_output: indoc! {"
2837 <|set|>0:61-2:63
2838 FIRST
2839 <|set|>1:62-3:64
2840 SECOND
2841 "},
2842 expected: indoc! {"
2843 FIRST
2844 d
2845 "},
2846 },
2847 Case {
2848 name: "insert_before_first_and_after_line",
2849 original: indoc! {"
2850 a
2851 b
2852 "},
2853 model_output: indoc! {"
2854 <|insert|>
2855 HEAD
2856 <|insert|>0:61
2857 MID
2858 "},
2859 expected: indoc! {"
2860 HEAD
2861 a
2862 MID
2863 b
2864 "},
2865 },
2866 ];
2867
2868 for case in &cases {
2869 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2870 assert_eq!(result, case.expected, "failed case: {}", case.name);
2871 }
2872 }
2873
2874 #[test]
2875 fn test_output_has_edit_commands() {
2876 assert!(hashline::output_has_edit_commands(&format!(
2877 "{}0:ab\nnew",
2878 SET_COMMAND_MARKER
2879 )));
2880 assert!(hashline::output_has_edit_commands(&format!(
2881 "{}0:ab\nnew",
2882 INSERT_COMMAND_MARKER
2883 )));
2884 assert!(hashline::output_has_edit_commands(&format!(
2885 "some text\n{}1:cd\nstuff",
2886 SET_COMMAND_MARKER
2887 )));
2888 assert!(!hashline::output_has_edit_commands("just plain text"));
2889 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2890 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2891 }
2892
2893 // ---- hashline::patch_to_edit_commands round-trip tests ----
2894
2895 #[test]
2896 fn test_patch_to_edit_commands() {
2897 struct Case {
2898 name: &'static str,
2899 old: &'static str,
2900 patch: &'static str,
2901 expected_new: &'static str,
2902 }
2903
2904 let cases = [
2905 Case {
2906 name: "single_line_replacement",
2907 old: indoc! {"
2908 let mut total = 0;
2909 for product in products {
2910 total += ;
2911 }
2912 total
2913 "},
2914 patch: indoc! {"
2915 @@ -1,5 +1,5 @@
2916 let mut total = 0;
2917 for product in products {
2918 - total += ;
2919 + total += product.price;
2920 }
2921 total
2922 "},
2923 expected_new: indoc! {"
2924 let mut total = 0;
2925 for product in products {
2926 total += product.price;
2927 }
2928 total
2929 "},
2930 },
2931 Case {
2932 name: "multiline_replacement",
2933 old: indoc! {"
2934 fn foo() {
2935 let x = 1;
2936 let y = 2;
2937 let z = 3;
2938 }
2939 "},
2940 patch: indoc! {"
2941 @@ -1,5 +1,3 @@
2942 fn foo() {
2943 - let x = 1;
2944 - let y = 2;
2945 - let z = 3;
2946 + let sum = 1 + 2 + 3;
2947 }
2948 "},
2949 expected_new: indoc! {"
2950 fn foo() {
2951 let sum = 1 + 2 + 3;
2952 }
2953 "},
2954 },
2955 Case {
2956 name: "insertion",
2957 old: indoc! {"
2958 fn main() {
2959 let x = 1;
2960 }
2961 "},
2962 patch: indoc! {"
2963 @@ -1,3 +1,4 @@
2964 fn main() {
2965 let x = 1;
2966 + let y = 2;
2967 }
2968 "},
2969 expected_new: indoc! {"
2970 fn main() {
2971 let x = 1;
2972 let y = 2;
2973 }
2974 "},
2975 },
2976 Case {
2977 name: "insertion_before_first",
2978 old: indoc! {"
2979 let x = 1;
2980 let y = 2;
2981 "},
2982 patch: indoc! {"
2983 @@ -1,2 +1,3 @@
2984 +use std::io;
2985 let x = 1;
2986 let y = 2;
2987 "},
2988 expected_new: indoc! {"
2989 use std::io;
2990 let x = 1;
2991 let y = 2;
2992 "},
2993 },
2994 Case {
2995 name: "deletion",
2996 old: indoc! {"
2997 aaa
2998 bbb
2999 ccc
3000 ddd
3001 "},
3002 patch: indoc! {"
3003 @@ -1,4 +1,2 @@
3004 aaa
3005 -bbb
3006 -ccc
3007 ddd
3008 "},
3009 expected_new: indoc! {"
3010 aaa
3011 ddd
3012 "},
3013 },
3014 Case {
3015 name: "multiple_changes",
3016 old: indoc! {"
3017 alpha
3018 beta
3019 gamma
3020 delta
3021 epsilon
3022 "},
3023 patch: indoc! {"
3024 @@ -1,5 +1,5 @@
3025 -alpha
3026 +ALPHA
3027 beta
3028 gamma
3029 -delta
3030 +DELTA
3031 epsilon
3032 "},
3033 expected_new: indoc! {"
3034 ALPHA
3035 beta
3036 gamma
3037 DELTA
3038 epsilon
3039 "},
3040 },
3041 Case {
3042 name: "replace_with_insertion",
3043 old: indoc! {r#"
3044 fn handle() {
3045 modal_state.close();
3046 modal_state.dismiss();
3047 "#},
3048 patch: indoc! {r#"
3049 @@ -1,3 +1,4 @@
3050 fn handle() {
3051 modal_state.close();
3052 + eprintln!("");
3053 modal_state.dismiss();
3054 "#},
3055 expected_new: indoc! {r#"
3056 fn handle() {
3057 modal_state.close();
3058 eprintln!("");
3059 modal_state.dismiss();
3060 "#},
3061 },
3062 Case {
3063 name: "complete_replacement",
3064 old: indoc! {"
3065 aaa
3066 bbb
3067 ccc
3068 "},
3069 patch: indoc! {"
3070 @@ -1,3 +1,3 @@
3071 -aaa
3072 -bbb
3073 -ccc
3074 +xxx
3075 +yyy
3076 +zzz
3077 "},
3078 expected_new: indoc! {"
3079 xxx
3080 yyy
3081 zzz
3082 "},
3083 },
3084 Case {
3085 name: "add_function_body",
3086 old: indoc! {"
3087 fn foo() {
3088 modal_state.dismiss();
3089 }
3090
3091 fn
3092
3093 fn handle_keystroke() {
3094 "},
3095 patch: indoc! {"
3096 @@ -1,6 +1,8 @@
3097 fn foo() {
3098 modal_state.dismiss();
3099 }
3100
3101 -fn
3102 +fn handle_submit() {
3103 + todo()
3104 +}
3105
3106 fn handle_keystroke() {
3107 "},
3108 expected_new: indoc! {"
3109 fn foo() {
3110 modal_state.dismiss();
3111 }
3112
3113 fn handle_submit() {
3114 todo()
3115 }
3116
3117 fn handle_keystroke() {
3118 "},
3119 },
3120 Case {
3121 name: "with_cursor_offset",
3122 old: indoc! {r#"
3123 fn main() {
3124 println!();
3125 }
3126 "#},
3127 patch: indoc! {r#"
3128 @@ -1,3 +1,3 @@
3129 fn main() {
3130 - println!();
3131 + eprintln!("");
3132 }
3133 "#},
3134 expected_new: indoc! {r#"
3135 fn main() {
3136 eprintln!("<|user_cursor|>");
3137 }
3138 "#},
3139 },
3140 Case {
3141 name: "non_local_hunk_header_pure_insertion_repro",
3142 old: indoc! {"
3143 aaa
3144 bbb
3145 "},
3146 patch: indoc! {"
3147 @@ -20,2 +20,3 @@
3148 aaa
3149 +xxx
3150 bbb
3151 "},
3152 expected_new: indoc! {"
3153 aaa
3154 xxx
3155 bbb
3156 "},
3157 },
3158 Case {
3159 name: "empty_patch_produces_no_edits_marker",
3160 old: indoc! {"
3161 aaa
3162 bbb
3163 "},
3164 patch: "@@ -20,2 +20,3 @@\n",
3165 expected_new: indoc! {"
3166 aaa
3167 bbb
3168 "},
3169 },
3170 ];
3171
3172 for case in &cases {
3173 // The cursor_offset for patch_to_edit_commands is relative to
3174 // the first hunk's new text (context + additions). We compute
3175 // it by finding where the marker sits in the expected output
3176 // (which mirrors the new text of the hunk).
3177 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
3178
3179 let commands =
3180 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
3181 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
3182
3183 assert!(
3184 hashline::output_has_edit_commands(&commands),
3185 "case {}: expected edit commands, got: {commands:?}",
3186 case.name,
3187 );
3188
3189 let applied = hashline::apply_edit_commands(case.old, &commands);
3190 assert_eq!(applied, case.expected_new, "case {}", case.name);
3191 }
3192 }
3193 }
3194}
3195
3196pub mod seed_coder {
3197 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
3198 //!
3199 //! Seed-Coder uses different FIM tokens and order than Qwen:
3200 //! - SPM order: suffix comes FIRST, then prefix, then middle
3201 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
3202 //! - File markers: StarCoder-style `<filename>path` (single token + path)
3203 //!
3204 //! All context (related files, edit history) goes in the PREFIX section.
3205 //! The suffix contains only code after the editable region.
3206 //!
3207 //! Example prompt:
3208 //!
3209 //! <[fim-suffix]>
3210 //! code after editable region
3211 //! <[fim-prefix]><filename>related/file.py
3212 //! related file content
3213 //!
3214 //! <filename>edit_history
3215 //! --- a/some_file.py
3216 //! +++ b/some_file.py
3217 //! -old
3218 //! +new
3219 //!
3220 //! <filename>path/to/target_file.py
3221 //! code before editable region
3222 //! <<<<<<< CURRENT
3223 //! code that
3224 //! needs to<|user_cursor|>
3225 //! be rewritten
3226 //! =======
3227 //! <[fim-middle]>
3228 //!
3229 //! Expected output (model generates):
3230 //!
3231 //! updated
3232 //! code with
3233 //! changes applied
3234 //! >>>>>>> UPDATED
3235
3236 use super::*;
3237
3238 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
3239 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
3240 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
3241 pub const FILE_MARKER: &str = "<filename>";
3242
3243 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
3244 pub const SEPARATOR: &str = "=======\n";
3245 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
3246
3247 pub const NO_EDITS: &str = "NO_EDITS\n";
3248
3249 pub fn special_tokens() -> &'static [&'static str] {
3250 &[
3251 FIM_SUFFIX,
3252 FIM_PREFIX,
3253 FIM_MIDDLE,
3254 FILE_MARKER,
3255 START_MARKER,
3256 SEPARATOR,
3257 END_MARKER,
3258 CURSOR_MARKER,
3259 ]
3260 }
3261
3262 pub fn write_cursor_excerpt_section(
3263 prompt: &mut String,
3264 path: &Path,
3265 context: &str,
3266 editable_range: &Range<usize>,
3267 cursor_offset: usize,
3268 ) {
3269 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3270 prompt.push_str(§ion);
3271 }
3272
3273 pub fn format_prompt_with_budget(
3274 path: &Path,
3275 context: &str,
3276 editable_range: &Range<usize>,
3277 cursor_offset: usize,
3278 events: &[Arc<Event>],
3279 related_files: &[RelatedFile],
3280 max_tokens: usize,
3281 ) -> String {
3282 let cursor_prefix_section =
3283 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3284 assemble_fim_prompt(
3285 context,
3286 editable_range,
3287 &cursor_prefix_section,
3288 events,
3289 related_files,
3290 max_tokens,
3291 )
3292 }
3293
3294 pub fn assemble_fim_prompt(
3295 context: &str,
3296 editable_range: &Range<usize>,
3297 cursor_prefix_section: &str,
3298 events: &[Arc<Event>],
3299 related_files: &[RelatedFile],
3300 max_tokens: usize,
3301 ) -> String {
3302 let suffix_section = build_suffix_section(context, editable_range);
3303
3304 let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
3305 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
3306 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
3307
3308 let edit_history_section = super::format_edit_history_within_budget(
3309 events,
3310 FILE_MARKER,
3311 "edit_history",
3312 budget_after_cursor,
3313 max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
3314 );
3315 let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
3316 let budget_after_edit_history =
3317 budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
3318
3319 let related_files_section = super::format_related_files_within_budget(
3320 related_files,
3321 FILE_MARKER,
3322 "",
3323 budget_after_edit_history,
3324 );
3325
3326 let mut prompt = String::new();
3327 prompt.push_str(&suffix_section);
3328 prompt.push_str(FIM_PREFIX);
3329 prompt.push_str(&related_files_section);
3330 if !related_files_section.is_empty() {
3331 prompt.push('\n');
3332 }
3333 prompt.push_str(&edit_history_section);
3334 if !edit_history_section.is_empty() {
3335 prompt.push('\n');
3336 }
3337 prompt.push_str(cursor_prefix_section);
3338 prompt.push_str(FIM_MIDDLE);
3339
3340 prompt
3341 }
3342
3343 pub(crate) fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
3344 let mut section = String::new();
3345 section.push_str(FIM_SUFFIX);
3346 section.push_str(&context[editable_range.end..]);
3347 if !section.ends_with('\n') {
3348 section.push('\n');
3349 }
3350 section
3351 }
3352
3353 fn build_cursor_prefix_section(
3354 path: &Path,
3355 context: &str,
3356 editable_range: &Range<usize>,
3357 cursor_offset: usize,
3358 ) -> String {
3359 let mut section = String::new();
3360 let path_str = path.to_string_lossy();
3361 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3362
3363 section.push_str(&context[..editable_range.start]);
3364 section.push_str(START_MARKER);
3365 section.push_str(&context[editable_range.start..cursor_offset]);
3366 section.push_str(CURSOR_MARKER);
3367 section.push_str(&context[cursor_offset..editable_range.end]);
3368 if !section.ends_with('\n') {
3369 section.push('\n');
3370 }
3371 section.push_str(SEPARATOR);
3372 section
3373 }
3374
3375 /// Format patch as containing no changes if it's empty; otherwise return None.
3376 pub(crate) fn no_edits(patch: &str) -> Option<String> {
3377 // Count lines in the patch
3378 let empty_patch = patch.lines().count() <= 3;
3379 if empty_patch {
3380 Some(format!("{NO_EDITS}{END_MARKER}"))
3381 } else {
3382 None
3383 }
3384 }
3385}
3386
3387pub mod v0304_variable_edit {
3388 //! A prompt format with no fixed editable region. The entire context is shown
3389 //! to the model, and it chooses which text to replace by outputting surrounding
3390 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3391 //! text.
3392 //!
3393 //! Example prompt:
3394 //!
3395 //! <|file_sep|>path/to/file.py
3396 //! zero
3397 //! one
3398 //! two
3399 //! three<|user_cursor|>
3400 //! four
3401 //! five
3402 //! <|fim_prefix|>
3403 //
3404 //! Expected output (model generates):
3405 //!
3406 //! two
3407 //! <|fim_middle|>
3408 //! THREE
3409 //! <|fim_suffix|>
3410 //! four
3411 //!
3412 //! The output means: find "two\n...\nfour" in the context, and replace
3413 //! everything between "two\n" and "four" with "THREE\n".
3414
3415 use super::*;
3416
3417 pub fn special_tokens() -> &'static [&'static str] {
3418 &[
3419 "<|fim_prefix|>",
3420 "<|fim_suffix|>",
3421 "<|fim_middle|>",
3422 "<|file_sep|>",
3423 CURSOR_MARKER,
3424 ]
3425 }
3426
3427 pub fn write_cursor_excerpt_section(
3428 prompt: &mut String,
3429 path: &Path,
3430 context: &str,
3431 cursor_offset: usize,
3432 ) {
3433 let path_str = path.to_string_lossy();
3434 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3435
3436 prompt.push_str(&context[..cursor_offset]);
3437 prompt.push_str(CURSOR_MARKER);
3438 prompt.push_str(&context[cursor_offset..]);
3439 if !prompt.ends_with('\n') {
3440 prompt.push('\n');
3441 }
3442 prompt.push_str("<|fim_prefix|>\n")
3443 }
3444
3445 /// Apply a variable-edit model output to the original context text.
3446 ///
3447 /// The model output has the form:
3448 ///
3449 /// - prefix context lines
3450 /// - `<|fim_middle|>`
3451 /// - new text
3452 /// - `<|fim_suffix|>`
3453 /// - suffix context lines
3454 ///
3455 /// We locate the prefix/suffix context lines in the original text and replace
3456 /// everything between them with the new text.
3457 pub fn apply_variable_edit(
3458 context: &str,
3459 model_output: &str,
3460 ) -> Result<(Range<usize>, String)> {
3461 let (prefix_context, rest) = model_output
3462 .split_once("<|fim_middle|>\n")
3463 .or_else(|| model_output.split_once("<|fim_middle|>"))
3464 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3465
3466 let (new_text, suffix_context) = rest
3467 .split_once("<|fim_suffix|>\n")
3468 .or_else(|| rest.split_once("<|fim_suffix|>"))
3469 .unwrap_or((rest, ""));
3470
3471 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3472 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3473 } else {
3474 suffix_context
3475 };
3476
3477 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3478 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3479 + prefix_context.len();
3480 let suffix_offset = if suffix_context.is_empty() {
3481 context.len()
3482 } else {
3483 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3484 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3485 + prefix_offset
3486 };
3487
3488 let edit_range = prefix_offset..suffix_offset;
3489 return Ok((edit_range, new_text.to_string()));
3490 }
3491
3492 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3493 if needle.is_empty() {
3494 return Some(0);
3495 }
3496
3497 haystack.match_indices(needle).find_map(|(offset, _)| {
3498 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3499 matched_line_start.then_some(offset)
3500 })
3501 }
3502
3503 /// Convert a unified diff patch into the variable-edit output format.
3504 ///
3505 /// Parses `patch` as a unified diff against `old_text` and produces model
3506 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3507 /// delimiters. The diff is resolved by content matching rather than line
3508 /// numbers.
3509 pub fn patch_to_variable_edit_output(
3510 old_text: &str,
3511 patch: &str,
3512 cursor_offset: Option<usize>,
3513 ) -> Result<String> {
3514 // Parse the unified diff into hunks. Each hunk has an `old_context`
3515 // string (context + deleted lines interleaved in order) and a list of
3516 // edits expressed as byte ranges within that context plus replacement
3517 // text.
3518 let hunks = parse_hunks(patch);
3519 if hunks.is_empty() {
3520 return Ok(String::new());
3521 }
3522
3523 // Apply each hunk by finding its old_context in the text and
3524 // performing the edits. We search forward from where the previous
3525 // hunk ended so that hunks are applied in order.
3526 let mut new_text = old_text.to_string();
3527 let mut search_from: usize = 0;
3528 let mut first_hunk_pos: Option<usize> = None;
3529
3530 for hunk in &hunks {
3531 let context_pos = new_text[search_from..]
3532 .find(&hunk.old_context)
3533 .map(|pos| pos + search_from)
3534 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3535
3536 if first_hunk_pos.is_none() {
3537 first_hunk_pos = Some(context_pos);
3538 }
3539
3540 // Apply edits in reverse order so byte offsets remain valid.
3541 for edit in hunk.edits.iter().rev() {
3542 let abs_start = context_pos + edit.range.start;
3543 let abs_end = context_pos + edit.range.end;
3544 new_text.replace_range(abs_start..abs_end, &edit.text);
3545 }
3546
3547 // Advance past this hunk's region in the (now modified) text.
3548 let new_region_len: usize =
3549 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3550 len + edit.text.len() - (edit.range.end - edit.range.start)
3551 });
3552 search_from = context_pos + new_region_len;
3553 }
3554
3555 // Now we have old_text and new_text. Find the changed line range by
3556 // comparing them.
3557 let old_lines: Vec<&str> = old_text.lines().collect();
3558 let new_lines: Vec<&str> = new_text.lines().collect();
3559
3560 // Find first differing line.
3561 let first_changed_row = old_lines
3562 .iter()
3563 .zip(new_lines.iter())
3564 .position(|(a, b)| a != b)
3565 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3566
3567 // Find last differing line (from the end).
3568 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3569 let common_suffix = old_lines
3570 .iter()
3571 .rev()
3572 .zip(new_lines.iter().rev())
3573 .take(max_suffix)
3574 .take_while(|(a, b)| a == b)
3575 .count();
3576
3577 let old_end = old_lines.len() - common_suffix;
3578 let new_end = new_lines.len() - common_suffix;
3579
3580 if first_changed_row == old_end && first_changed_row == new_end {
3581 return Ok(String::new());
3582 }
3583
3584 // Build the replacement text from new_lines[first_diff..new_end].
3585 let mut merged_new_text = String::new();
3586 for line in &new_lines[first_changed_row..new_end] {
3587 merged_new_text.push_str(line);
3588 merged_new_text.push('\n');
3589 }
3590
3591 // cursor_offset is relative to the first hunk's new content in
3592 // new_text. Translate it to an offset within merged_new_text, which
3593 // only contains lines first_diff..new_end of new_text.
3594 if let Some(hunk_offset) = cursor_offset {
3595 let hunk_start = first_hunk_pos.unwrap_or(0);
3596 let absolute_pos = hunk_start + hunk_offset;
3597
3598 // Byte offset where first_diff starts in new_text.
3599 let merged_start: usize = new_lines[..first_changed_row]
3600 .iter()
3601 .map(|line| line.len() + 1)
3602 .sum();
3603
3604 if absolute_pos >= merged_start {
3605 let relative_offset = absolute_pos - merged_start;
3606 if relative_offset <= merged_new_text.len() {
3607 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3608 }
3609 }
3610 }
3611
3612 // Build output with 2 lines of context above and below.
3613 let context_lines_count = 2;
3614 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3615 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3616
3617 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3618 let pattern = &lines[line_range];
3619 let pattern_len = pattern.len();
3620
3621 let mut count = 0;
3622 for offset in 0..=lines.len() - pattern_len {
3623 if &lines[offset..offset + pattern_len] == pattern {
3624 count += 1;
3625 }
3626 }
3627 count
3628 }
3629
3630 // Expand prefix and suffix until they are unique
3631 while prefix_start > 0 {
3632 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3633 prefix_start -= 1;
3634 } else {
3635 break;
3636 }
3637 }
3638 while suffix_end < old_lines.len() {
3639 if count_matches(old_end..suffix_end, &old_lines) > 1 {
3640 suffix_end += 1;
3641 } else {
3642 break;
3643 }
3644 }
3645
3646 let mut output = String::new();
3647 for line in &old_lines[prefix_start..first_changed_row] {
3648 output.push_str(line);
3649 output.push('\n');
3650 }
3651 output.push_str("<|fim_middle|>\n");
3652 output.push_str(&merged_new_text);
3653 output.push_str("<|fim_suffix|>\n");
3654 for line in &old_lines[old_end..suffix_end] {
3655 output.push_str(line);
3656 output.push('\n');
3657 }
3658
3659 Ok(output)
3660 }
3661
3662 struct ParsedHunk {
3663 old_context: String,
3664 edits: Vec<ParsedEdit>,
3665 }
3666
3667 struct ParsedEdit {
3668 range: Range<usize>,
3669 text: String,
3670 }
3671
3672 /// Parse a unified diff into content-based hunks. Each hunk contains an
3673 /// `old_context` string (context lines + deleted lines, which together
3674 /// form the text that should be found in the original) and a list of edits
3675 /// expressed as byte ranges within that context.
3676 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3677 let mut hunks = Vec::new();
3678 let mut current: Option<ParsedHunk> = None;
3679
3680 for line in patch.lines() {
3681 if line.starts_with("@@") {
3682 if let Some(hunk) = current.take() {
3683 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3684 hunks.push(hunk);
3685 }
3686 }
3687 current = Some(ParsedHunk {
3688 old_context: String::new(),
3689 edits: Vec::new(),
3690 });
3691 } else if line.starts_with("---") || line.starts_with("+++") {
3692 continue;
3693 } else if let Some(hunk) = &mut current {
3694 if let Some(added) = line.strip_prefix('+') {
3695 let pos = hunk.old_context.len();
3696 if let Some(last_edit) = hunk.edits.last_mut() {
3697 if last_edit.range.end == pos {
3698 writeln!(&mut last_edit.text, "{added}").ok();
3699 continue;
3700 }
3701 }
3702 hunk.edits.push(ParsedEdit {
3703 range: pos..pos,
3704 text: format!("{added}\n"),
3705 });
3706 } else if let Some(removed) = line.strip_prefix('-') {
3707 let start = hunk.old_context.len();
3708 writeln!(&mut hunk.old_context, "{removed}").ok();
3709 let end = hunk.old_context.len();
3710 if let Some(last_edit) = hunk.edits.last_mut() {
3711 if last_edit.range.end == start {
3712 last_edit.range.end = end;
3713 continue;
3714 }
3715 }
3716 hunk.edits.push(ParsedEdit {
3717 range: start..end,
3718 text: String::new(),
3719 });
3720 } else {
3721 let ctx = line.strip_prefix(' ').unwrap_or(line);
3722 writeln!(&mut hunk.old_context, "{ctx}").ok();
3723 }
3724 }
3725 }
3726
3727 if let Some(hunk) = current {
3728 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3729 hunks.push(hunk);
3730 }
3731 }
3732
3733 hunks
3734 }
3735
3736 #[cfg(test)]
3737 mod tests {
3738 use super::*;
3739 use indoc::indoc;
3740
3741 #[test]
3742 fn test_apply_variable_edit() {
3743 struct Case {
3744 name: &'static str,
3745 original: &'static str,
3746 model_output: &'static str,
3747 expected: &'static str,
3748 }
3749
3750 let cases = [
3751 Case {
3752 name: "simple_single_line_replacement",
3753 original: indoc! {"
3754 zero
3755 one
3756 two
3757 three
3758 four
3759 five
3760 "},
3761 model_output: indoc! {"
3762 two
3763 <|fim_middle|>
3764 THREE
3765 <|fim_suffix|>
3766 four
3767 "},
3768 expected: indoc! {"
3769 zero
3770 one
3771 two
3772 THREE
3773 four
3774 five
3775 "},
3776 },
3777 Case {
3778 name: "multi_line_replacement",
3779 original: indoc! {"
3780 a
3781 b
3782 c
3783 d
3784 e
3785 "},
3786 model_output: indoc! {"
3787 a
3788 <|fim_middle|>
3789 B
3790 C
3791 D
3792 <|fim_suffix|>
3793 e
3794 "},
3795 expected: indoc! {"
3796 a
3797 B
3798 C
3799 D
3800 e
3801 "},
3802 },
3803 Case {
3804 name: "insertion_between_existing_lines",
3805 original: indoc! {"
3806 a
3807 b
3808 c
3809 "},
3810 model_output: indoc! {"
3811 a
3812 <|fim_middle|>
3813 X
3814 <|fim_suffix|>
3815 b
3816 "},
3817 expected: indoc! {"
3818 a
3819 X
3820 b
3821 c
3822 "},
3823 },
3824 Case {
3825 name: "deletion",
3826 original: indoc! {"
3827 a
3828 b
3829 c
3830 d
3831 "},
3832 model_output: indoc! {"
3833 a
3834 <|fim_middle|>
3835 <|fim_suffix|>
3836 c
3837 "},
3838 expected: indoc! {"
3839 a
3840 c
3841 d
3842 "},
3843 },
3844 Case {
3845 name: "replacement_at_start_no_prefix_context",
3846 original: indoc! {"
3847 a
3848 b
3849 c
3850 "},
3851 model_output: indoc! {"
3852 <|fim_middle|>
3853 X
3854 <|fim_suffix|>
3855 b
3856 "},
3857 expected: indoc! {"
3858 X
3859 b
3860 c
3861 "},
3862 },
3863 Case {
3864 name: "replacement_at_end_no_suffix_context",
3865 original: indoc! {"
3866 a
3867 b
3868 c
3869 "},
3870 model_output: indoc! {"
3871 b
3872 <|fim_middle|>
3873 Z
3874 <|fim_suffix|>
3875 "},
3876 expected: indoc! {"
3877 a
3878 b
3879 Z
3880 "},
3881 },
3882 Case {
3883 name: "context_with_trailing_newline_is_preserved",
3884 original: indoc! {"
3885 a
3886 b
3887 c
3888 "},
3889 model_output: indoc! {"
3890 a
3891 <|fim_middle|>
3892 B
3893 <|fim_suffix|>
3894 c
3895 "},
3896 expected: indoc! {"
3897 a
3898 B
3899 c
3900 "},
3901 },
3902 Case {
3903 name: "cursor_marker_passes_through_untouched",
3904 original: indoc! {"
3905 a
3906 b
3907 c
3908 "},
3909 model_output: indoc! {"
3910 a
3911 <|fim_middle|>
3912 B<|user_cursor|>B
3913 <|fim_suffix|>
3914 c
3915 "},
3916 expected: indoc! {"
3917 a
3918 B<|user_cursor|>B
3919 c
3920 "},
3921 },
3922 Case {
3923 name: "multiple_prefix_context_lines",
3924 original: indoc! {"
3925 a
3926 b
3927 c
3928 d
3929 e
3930 "},
3931 model_output: indoc! {"
3932 b
3933 c
3934 <|fim_middle|>
3935 D
3936 <|fim_suffix|>
3937 e
3938 "},
3939 expected: indoc! {"
3940 a
3941 b
3942 c
3943 D
3944 e
3945 "},
3946 },
3947 ];
3948
3949 for case in cases {
3950 let (edit_range, replacement) =
3951 apply_variable_edit(case.original, case.model_output).unwrap();
3952 let mut edited = case.original.to_string();
3953 edited.replace_range(edit_range, &replacement);
3954 assert_eq!(edited, case.expected, "{}", case.name);
3955 }
3956 }
3957
3958 #[test]
3959 fn test_patch_to_variable_edit() {
3960 struct Case {
3961 name: &'static str,
3962 old: &'static str,
3963 patch: &'static str,
3964 cursor_offset: Option<usize>,
3965 expected_variable_edit: &'static str,
3966 expected_after_apply: &'static str,
3967 }
3968
3969 let cases = [
3970 Case {
3971 name: "simple_replacement",
3972 old: indoc! {"
3973 zero
3974 one
3975 two
3976 three
3977 four
3978 five
3979 "},
3980 patch: indoc! {"
3981 @@ -3,3 +3,3 @@
3982 two
3983 -three
3984 +THREE
3985 four
3986 "},
3987 cursor_offset: None,
3988 expected_variable_edit: indoc! {"
3989 one
3990 two
3991 <|fim_middle|>
3992 THREE
3993 <|fim_suffix|>
3994 four
3995 five
3996 "},
3997 expected_after_apply: indoc! {"
3998 zero
3999 one
4000 two
4001 THREE
4002 four
4003 five
4004 "},
4005 },
4006 Case {
4007 name: "insertion",
4008 old: indoc! {"
4009 a
4010 b
4011 c
4012 d
4013 e
4014 "},
4015 patch: indoc! {"
4016 @@ -2,0 +3,1 @@
4017 b
4018 +X
4019 c
4020 "},
4021 cursor_offset: None,
4022 expected_variable_edit: indoc! {"
4023 a
4024 b
4025 <|fim_middle|>
4026 X
4027 <|fim_suffix|>
4028 c
4029 d
4030 "},
4031 expected_after_apply: indoc! {"
4032 a
4033 b
4034 X
4035 c
4036 d
4037 e
4038 "},
4039 },
4040 Case {
4041 name: "deletion",
4042 old: indoc! {"
4043 a
4044 b
4045 c
4046 d
4047 e
4048 "},
4049 patch: indoc! {"
4050 @@ -2,3 +2,2 @@
4051 b
4052 -c
4053 d
4054 "},
4055 cursor_offset: None,
4056 expected_variable_edit: indoc! {"
4057 a
4058 b
4059 <|fim_middle|>
4060 <|fim_suffix|>
4061 d
4062 e
4063 "},
4064 expected_after_apply: indoc! {"
4065 a
4066 b
4067 d
4068 e
4069 "},
4070 },
4071 Case {
4072 name: "edit_near_start",
4073 old: indoc! {"
4074 first
4075 second
4076 third
4077 fourth
4078 "},
4079 patch: indoc! {"
4080 @@ -1,1 +1,1 @@
4081 -first
4082 +FIRST
4083 "},
4084 cursor_offset: None,
4085 expected_variable_edit: indoc! {"
4086 <|fim_middle|>
4087 FIRST
4088 <|fim_suffix|>
4089 second
4090 third
4091 "},
4092 expected_after_apply: indoc! {"
4093 FIRST
4094 second
4095 third
4096 fourth
4097 "},
4098 },
4099 Case {
4100 name: "edit_near_end",
4101 old: indoc! {"
4102 first
4103 second
4104 third
4105 fourth
4106 "},
4107 patch: indoc! {"
4108 @@ -4,1 +4,1 @@
4109 -fourth
4110 +FOURTH
4111 "},
4112 cursor_offset: None,
4113 expected_variable_edit: indoc! {"
4114 second
4115 third
4116 <|fim_middle|>
4117 FOURTH
4118 <|fim_suffix|>
4119 "},
4120 expected_after_apply: indoc! {"
4121 first
4122 second
4123 third
4124 FOURTH
4125 "},
4126 },
4127 Case {
4128 name: "cursor_at_start_of_replacement",
4129 old: indoc! {"
4130 zero
4131 one
4132 two
4133 three
4134 four
4135 five
4136 "},
4137 patch: indoc! {"
4138 @@ -3,3 +3,3 @@
4139 two
4140 -three
4141 +THREE
4142 four
4143 "},
4144 cursor_offset: Some(4),
4145 expected_variable_edit: indoc! {"
4146 one
4147 two
4148 <|fim_middle|>
4149 <|user_cursor|>THREE
4150 <|fim_suffix|>
4151 four
4152 five
4153 "},
4154 expected_after_apply: indoc! {"
4155 zero
4156 one
4157 two
4158 <|user_cursor|>THREE
4159 four
4160 five
4161 "},
4162 },
4163 Case {
4164 name: "cursor_in_middle_of_replacement",
4165 old: indoc! {"
4166 zero
4167 one
4168 two
4169 three
4170 four
4171 five
4172 "},
4173 patch: indoc! {"
4174 @@ -3,3 +3,3 @@
4175 two
4176 -three
4177 +THREE
4178 four
4179 "},
4180 cursor_offset: Some(6),
4181 expected_variable_edit: indoc! {"
4182 one
4183 two
4184 <|fim_middle|>
4185 TH<|user_cursor|>REE
4186 <|fim_suffix|>
4187 four
4188 five
4189 "},
4190 expected_after_apply: indoc! {"
4191 zero
4192 one
4193 two
4194 TH<|user_cursor|>REE
4195 four
4196 five
4197 "},
4198 },
4199 Case {
4200 name: "expands_context_when_two_lines_not_unique_before_and_after",
4201 old: indoc! {"
4202 one
4203 a
4204 b
4205 c
4206 d
4207 two
4208 a
4209 b
4210 c
4211 d
4212 three
4213 a
4214 b
4215 c
4216 d
4217 four
4218 "},
4219 patch: indoc! {"
4220 @@ -4,5 +4,5 @@
4221 two
4222 a
4223 b
4224 -c
4225 +C
4226 d
4227 three
4228 "},
4229 cursor_offset: None,
4230 expected_variable_edit: indoc! {"
4231 two
4232 a
4233 b
4234 <|fim_middle|>
4235 C
4236 <|fim_suffix|>
4237 d
4238 three
4239 "},
4240 expected_after_apply: indoc! {"
4241 one
4242 a
4243 b
4244 c
4245 d
4246 two
4247 a
4248 b
4249 C
4250 d
4251 three
4252 a
4253 b
4254 c
4255 d
4256 four
4257 "},
4258 },
4259 Case {
4260 name: "expands_context_when_two_lines_not_unique_before_and_after",
4261 old: indoc! {"
4262 {
4263 {
4264 one();
4265 }
4266 }
4267 {
4268 {
4269 two();
4270 }
4271 }
4272 {
4273 {
4274 three();
4275 }
4276 }
4277 {
4278 {
4279 four();
4280 }
4281 }
4282 "},
4283 patch: indoc! {"
4284 @@ -4,5 +4,5 @@
4285 {
4286 - two();
4287 + TWO();
4288 }
4289 "},
4290 cursor_offset: None,
4291 expected_variable_edit: indoc! {"
4292 one();
4293 }
4294 }
4295 {
4296 {
4297 <|fim_middle|>
4298 TWO();
4299 <|fim_suffix|>
4300 }
4301 }
4302 {
4303 {
4304 three();
4305 "},
4306 expected_after_apply: indoc! {"
4307 {
4308 {
4309 one();
4310 }
4311 }
4312 {
4313 {
4314 TWO();
4315 }
4316 }
4317 {
4318 {
4319 three();
4320 }
4321 }
4322 {
4323 {
4324 four();
4325 }
4326 }
4327 "},
4328 },
4329 ];
4330
4331 for case in cases {
4332 let output =
4333 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
4334 .unwrap_or_else(|error| {
4335 panic!("failed converting patch for {}: {error}", case.name)
4336 });
4337 assert_eq!(
4338 output, case.expected_variable_edit,
4339 "patch->variable_edit mismatch for {}",
4340 case.name
4341 );
4342
4343 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
4344 .unwrap_or_else(|error| {
4345 panic!("failed applying variable_edit for {}: {error}", case.name)
4346 });
4347 let mut edited_by_variable_edit = case.old.to_string();
4348 edited_by_variable_edit.replace_range(edit_range, &replacement);
4349 assert_eq!(
4350 edited_by_variable_edit, case.expected_after_apply,
4351 "variable_edit apply mismatch for {}",
4352 case.name
4353 );
4354
4355 let (expected_edit_range, expected_replacement) =
4356 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4357 |error| {
4358 panic!(
4359 "failed applying expected variable_edit for {}: {error}",
4360 case.name
4361 )
4362 },
4363 );
4364 let mut edited_by_expected_variable_edit = case.old.to_string();
4365 edited_by_expected_variable_edit
4366 .replace_range(expected_edit_range, &expected_replacement);
4367 assert_eq!(
4368 edited_by_expected_variable_edit, case.expected_after_apply,
4369 "expected variable_edit apply mismatch for {}",
4370 case.name
4371 );
4372 }
4373 }
4374
4375 #[test]
4376 fn test_write_cursor_excerpt_section() {
4377 let path = Path::new("test.rs");
4378 let context = "fn main() {\n hello();\n}\n";
4379 let cursor_offset = 17;
4380 let mut prompt = String::new();
4381 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4382 assert_eq!(
4383 prompt,
4384 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4385 );
4386 }
4387 }
4388}
4389
4390/// The zeta1 prompt format
4391pub mod zeta1 {
4392 use super::*;
4393 use std::fmt::Write;
4394
4395 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4396 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4397 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4398 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4399
4400 const INSTRUCTION_HEADER: &str = concat!(
4401 "### Instruction:\n",
4402 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4403 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4404 "into account the cursor location.\n\n",
4405 "### User Edits:\n\n"
4406 );
4407 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4408 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4409
4410 /// Formats a complete zeta1 prompt from the input events and excerpt.
4411 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4412 let mut prompt = String::with_capacity(
4413 INSTRUCTION_HEADER.len()
4414 + input_events.len()
4415 + EXCERPT_HEADER.len()
4416 + input_excerpt.len()
4417 + RESPONSE_HEADER.len(),
4418 );
4419 prompt.push_str(INSTRUCTION_HEADER);
4420 prompt.push_str(input_events);
4421 prompt.push_str(EXCERPT_HEADER);
4422 prompt.push_str(input_excerpt);
4423 prompt.push_str(RESPONSE_HEADER);
4424 prompt
4425 }
4426
4427 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4428 /// editable and context byte-offset ranges within `cursor_excerpt`.
4429 pub fn format_zeta1_from_input(
4430 input: &ZetaPromptInput,
4431 editable_range: Range<usize>,
4432 context_range: Range<usize>,
4433 ) -> String {
4434 let events = format_zeta1_events(&input.events);
4435 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4436 format_zeta1_prompt(&events, &excerpt)
4437 }
4438
4439 /// Formats events in zeta1 style (oldest first).
4440 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4441 let mut result = String::new();
4442 for event in
4443 events
4444 .iter()
4445 .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4446 &ZetaFormat::V0114180EditableRegion,
4447 )))
4448 {
4449 let event_string = format_zeta1_event(event);
4450 if event_string.is_empty() {
4451 continue;
4452 }
4453 if !result.is_empty() {
4454 result.push_str("\n\n");
4455 }
4456 result.push_str(&event_string);
4457 }
4458 result
4459 }
4460
4461 fn format_zeta1_event(event: &Event) -> String {
4462 match event {
4463 Event::BufferChange {
4464 path,
4465 old_path,
4466 diff,
4467 ..
4468 } => {
4469 let mut prompt = String::new();
4470 if old_path != path {
4471 writeln!(
4472 prompt,
4473 "User renamed {} to {}\n",
4474 old_path.display(),
4475 path.display()
4476 )
4477 .ok();
4478 }
4479 if !diff.is_empty() {
4480 write!(
4481 prompt,
4482 "User edited {}:\n```diff\n{}\n```",
4483 path.display(),
4484 diff
4485 )
4486 .ok();
4487 }
4488 prompt
4489 }
4490 }
4491 }
4492
4493 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4494 /// within `cursor_excerpt`.
4495 fn format_zeta1_excerpt(
4496 input: &ZetaPromptInput,
4497 editable_range: Range<usize>,
4498 context_range: Range<usize>,
4499 ) -> String {
4500 let path_str = input.cursor_path.to_string_lossy();
4501 let excerpt = &*input.cursor_excerpt;
4502 let cursor_offset = input.cursor_offset_in_excerpt;
4503
4504 let mut prompt = String::new();
4505 writeln!(&mut prompt, "```{path_str}").ok();
4506
4507 let starts_at_file_beginning =
4508 input.excerpt_start_row == Some(0) && context_range.start == 0;
4509 if starts_at_file_beginning {
4510 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4511 }
4512
4513 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4514
4515 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4516 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4517 prompt.push_str(CURSOR_MARKER);
4518 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4519 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4520
4521 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4522 write!(prompt, "\n```").ok();
4523
4524 prompt
4525 }
4526
4527 /// Cleans zeta1 model output by extracting content between editable region
4528 /// markers and converting the zeta1 cursor marker to the universal one.
4529 /// Returns `None` if the output doesn't contain the expected markers.
4530 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4531 let content = output.replace(CURSOR_MARKER, "");
4532
4533 let content_start = content
4534 .find(EDITABLE_REGION_START_MARKER)
4535 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4536 .map(|pos| {
4537 if content.as_bytes().get(pos) == Some(&b'\n') {
4538 pos + 1
4539 } else {
4540 pos
4541 }
4542 })
4543 .unwrap_or(0);
4544
4545 let content_end = content
4546 .find(EDITABLE_REGION_END_MARKER)
4547 .map(|pos| {
4548 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4549 pos - 1
4550 } else {
4551 pos
4552 }
4553 })
4554 .unwrap_or(content.len());
4555
4556 if content_start > content_end {
4557 return Some(String::new());
4558 }
4559
4560 let extracted = &content[content_start..content_end];
4561
4562 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4563 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4564 let text_before_cursor = text_before_cursor
4565 .find(EDITABLE_REGION_START_MARKER)
4566 .map(|pos| {
4567 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4568 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4569 after_marker + 1
4570 } else {
4571 after_marker
4572 }
4573 })
4574 .unwrap_or(0);
4575 let offset_in_extracted = zeta1_cursor_pos
4576 .saturating_sub(text_before_cursor)
4577 .min(extracted.len());
4578 offset_in_extracted
4579 });
4580
4581 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4582 if let Some(offset) = cursor_offset {
4583 result.push_str(&extracted[..offset]);
4584 result.push_str(super::CURSOR_MARKER);
4585 result.push_str(&extracted[offset..]);
4586 } else {
4587 result.push_str(extracted);
4588 }
4589
4590 Some(result)
4591 }
4592}
4593
4594#[cfg(test)]
4595mod tests {
4596 use super::*;
4597 use indoc::indoc;
4598
4599 fn make_input(
4600 cursor_excerpt: &str,
4601 editable_range: Range<usize>,
4602 cursor_offset: usize,
4603 events: Vec<Event>,
4604 related_files: Vec<RelatedFile>,
4605 ) -> ZetaPromptInput {
4606 let context_range = 0..cursor_excerpt.len();
4607 ZetaPromptInput {
4608 cursor_path: Path::new("test.rs").into(),
4609 cursor_excerpt: cursor_excerpt.into(),
4610 cursor_offset_in_excerpt: cursor_offset,
4611 excerpt_start_row: None,
4612 events: events.into_iter().map(Arc::new).collect(),
4613 related_files: Some(related_files),
4614 active_buffer_diagnostics: vec![],
4615 excerpt_ranges: ExcerptRanges {
4616 editable_150: editable_range.clone(),
4617 editable_180: editable_range.clone(),
4618 editable_350: editable_range,
4619 editable_150_context_350: context_range.clone(),
4620 editable_180_context_350: context_range.clone(),
4621 editable_350_context_150: context_range,
4622 ..Default::default()
4623 },
4624 syntax_ranges: None,
4625 in_open_source_repo: false,
4626 can_collect_data: false,
4627 repo_url: None,
4628 }
4629 }
4630
4631 fn make_input_with_context_range(
4632 excerpt: &str,
4633 editable_range: Range<usize>,
4634 context_range: Range<usize>,
4635 cursor_offset: usize,
4636 ) -> ZetaPromptInput {
4637 ZetaPromptInput {
4638 cursor_path: Path::new("test.rs").into(),
4639 cursor_excerpt: excerpt.into(),
4640 cursor_offset_in_excerpt: cursor_offset,
4641 excerpt_start_row: None,
4642 events: vec![],
4643 related_files: Some(vec![]),
4644 active_buffer_diagnostics: vec![],
4645 excerpt_ranges: ExcerptRanges {
4646 editable_150: editable_range.clone(),
4647 editable_180: editable_range.clone(),
4648 editable_350: editable_range,
4649 editable_150_context_350: context_range.clone(),
4650 editable_180_context_350: context_range.clone(),
4651 editable_350_context_150: context_range,
4652 ..Default::default()
4653 },
4654 syntax_ranges: None,
4655 in_open_source_repo: false,
4656 can_collect_data: false,
4657 repo_url: None,
4658 }
4659 }
4660
4661 fn make_event(path: &str, diff: &str) -> Event {
4662 Event::BufferChange {
4663 path: Path::new(path).into(),
4664 old_path: Path::new(path).into(),
4665 diff: diff.to_string(),
4666 predicted: false,
4667 in_open_source_repo: false,
4668 }
4669 }
4670
4671 fn make_related_file(path: &str, content: &str) -> RelatedFile {
4672 RelatedFile {
4673 path: Path::new(path).into(),
4674 max_row: content.lines().count() as u32,
4675 excerpts: vec![RelatedExcerpt {
4676 row_range: 0..content.lines().count() as u32,
4677 text: content.into(),
4678 order: 0,
4679 }],
4680 in_open_source_repo: false,
4681 }
4682 }
4683
4684 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4685 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4686 }
4687
4688 fn budget_with_margin(requested_tokens: usize) -> usize {
4689 ((requested_tokens as f64) / 0.9).ceil() as usize
4690 }
4691
4692 #[test]
4693 fn test_no_truncation_when_within_budget() {
4694 let input = make_input(
4695 "prefix\neditable\nsuffix",
4696 7..15,
4697 10,
4698 vec![make_event("a.rs", "-old\n+new\n")],
4699 vec![make_related_file("related.rs", "fn helper() {}\n")],
4700 );
4701
4702 assert_eq!(
4703 format_with_budget(&input, 10000).unwrap(),
4704 indoc! {r#"
4705 <|file_sep|>related.rs
4706 fn helper() {}
4707 <|file_sep|>edit history
4708 --- a/a.rs
4709 +++ b/a.rs
4710 -old
4711 +new
4712 <|file_sep|>test.rs
4713 <|fim_prefix|>
4714 prefix
4715 <|fim_middle|>current
4716 edi<|user_cursor|>table
4717 <|fim_suffix|>
4718
4719 suffix
4720 <|fim_middle|>updated
4721 "#}
4722 .to_string()
4723 );
4724 }
4725
4726 #[test]
4727 fn test_truncation_drops_edit_history_when_budget_tight() {
4728 let input = make_input(
4729 "code",
4730 0..4,
4731 2,
4732 vec![make_event("a.rs", "-x\n+y\n")],
4733 vec![
4734 make_related_file("r1.rs", "aaaaaaa\n"),
4735 make_related_file("r2.rs", "bbbbbbb\n"),
4736 ],
4737 );
4738
4739 assert_eq!(
4740 format_with_budget(&input, 10000).unwrap(),
4741 indoc! {r#"
4742 <|file_sep|>r1.rs
4743 aaaaaaa
4744 <|file_sep|>r2.rs
4745 bbbbbbb
4746 <|file_sep|>edit history
4747 --- a/a.rs
4748 +++ b/a.rs
4749 -x
4750 +y
4751 <|file_sep|>test.rs
4752 <|fim_prefix|>
4753 <|fim_middle|>current
4754 co<|user_cursor|>de
4755 <|fim_suffix|>
4756 <|fim_middle|>updated
4757 "#}
4758 .to_string()
4759 );
4760
4761 assert_eq!(
4762 format_with_budget(&input, budget_with_margin(55)),
4763 Some(
4764 indoc! {r#"
4765 <|file_sep|>edit history
4766 --- a/a.rs
4767 +++ b/a.rs
4768 -x
4769 +y
4770 <|file_sep|>test.rs
4771 <|fim_prefix|>
4772 <|fim_middle|>current
4773 co<|user_cursor|>de
4774 <|fim_suffix|>
4775 <|fim_middle|>updated
4776 "#}
4777 .to_string()
4778 )
4779 );
4780 }
4781
4782 #[test]
4783 fn test_truncation_includes_partial_excerpts() {
4784 let input = make_input(
4785 "x",
4786 0..1,
4787 0,
4788 vec![],
4789 vec![RelatedFile {
4790 path: Path::new("big.rs").into(),
4791 max_row: 30,
4792 in_open_source_repo: false,
4793 excerpts: vec![
4794 RelatedExcerpt {
4795 row_range: 0..10,
4796 text: "first excerpt\n".into(),
4797 order: 0,
4798 },
4799 RelatedExcerpt {
4800 row_range: 10..20,
4801 text: "second excerpt\n".into(),
4802 order: 0,
4803 },
4804 RelatedExcerpt {
4805 row_range: 20..30,
4806 text: "third excerpt\n".into(),
4807 order: 0,
4808 },
4809 ],
4810 }],
4811 );
4812
4813 assert_eq!(
4814 format_with_budget(&input, 10000).unwrap(),
4815 indoc! {r#"
4816 <|file_sep|>big.rs
4817 first excerpt
4818 ...
4819 second excerpt
4820 ...
4821 third excerpt
4822 <|file_sep|>test.rs
4823 <|fim_prefix|>
4824 <|fim_middle|>current
4825 <|user_cursor|>x
4826 <|fim_suffix|>
4827 <|fim_middle|>updated
4828 "#}
4829 .to_string()
4830 );
4831
4832 assert_eq!(
4833 format_with_budget(&input, budget_with_margin(50)).unwrap(),
4834 indoc! {r#"
4835 <|file_sep|>big.rs
4836 first excerpt
4837 ...
4838 <|file_sep|>test.rs
4839 <|fim_prefix|>
4840 <|fim_middle|>current
4841 <|user_cursor|>x
4842 <|fim_suffix|>
4843 <|fim_middle|>updated
4844 "#}
4845 .to_string()
4846 );
4847 }
4848
4849 #[test]
4850 fn test_truncation_prioritizes_lower_order_excerpts() {
4851 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4852 // With tight budget, only the lower-order excerpt from file_b should be included.
4853 let input = make_input(
4854 "x",
4855 0..1,
4856 0,
4857 vec![],
4858 vec![
4859 RelatedFile {
4860 path: Path::new("file_a.rs").into(),
4861 max_row: 10,
4862 in_open_source_repo: false,
4863 excerpts: vec![RelatedExcerpt {
4864 row_range: 0..10,
4865 text: "low priority content\n".into(),
4866 order: 5,
4867 }],
4868 },
4869 RelatedFile {
4870 path: Path::new("file_b.rs").into(),
4871 max_row: 10,
4872 in_open_source_repo: false,
4873 excerpts: vec![RelatedExcerpt {
4874 row_range: 0..10,
4875 text: "high priority content\n".into(),
4876 order: 1,
4877 }],
4878 },
4879 ],
4880 );
4881
4882 // With large budget, both files included; rendered in stable lexicographic order.
4883 assert_eq!(
4884 format_with_budget(&input, 10000).unwrap(),
4885 indoc! {r#"
4886 <|file_sep|>file_a.rs
4887 low priority content
4888 <|file_sep|>file_b.rs
4889 high priority content
4890 <|file_sep|>test.rs
4891 <|fim_prefix|>
4892 <|fim_middle|>current
4893 <|user_cursor|>x
4894 <|fim_suffix|>
4895 <|fim_middle|>updated
4896 "#}
4897 .to_string()
4898 );
4899
4900 // With tight budget, only file_b (lower order) fits.
4901 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4902 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4903 // file_a would need another 14 tokens, which doesn't fit.
4904 assert_eq!(
4905 format_with_budget(&input, budget_with_margin(52)).unwrap(),
4906 indoc! {r#"
4907 <|file_sep|>file_b.rs
4908 high priority content
4909 <|file_sep|>test.rs
4910 <|fim_prefix|>
4911 <|fim_middle|>current
4912 <|user_cursor|>x
4913 <|fim_suffix|>
4914 <|fim_middle|>updated
4915 "#}
4916 .to_string()
4917 );
4918 }
4919
4920 #[test]
4921 fn test_truncation_drops_high_order_excerpts_within_file() {
4922 // A single file has excerpts at order 1 and order 3. With a tight budget,
4923 // only the order-1 excerpts are included while the order-3 excerpt is
4924 // dropped — even though they belong to the same file. This also preserves
4925 // the parent invariant: parent outline items have order ≤ their best
4926 // child, so they're always included when any child is.
4927 let input = make_input(
4928 "x",
4929 0..1,
4930 0,
4931 vec![],
4932 vec![RelatedFile {
4933 path: Path::new("mod.rs").into(),
4934 max_row: 30,
4935 in_open_source_repo: false,
4936 excerpts: vec![
4937 RelatedExcerpt {
4938 row_range: 0..5,
4939 text: "mod header\n".into(),
4940 order: 1,
4941 },
4942 RelatedExcerpt {
4943 row_range: 5..15,
4944 text: "important fn\n".into(),
4945 order: 1,
4946 },
4947 RelatedExcerpt {
4948 row_range: 15..30,
4949 text: "less important fn\n".into(),
4950 order: 3,
4951 },
4952 ],
4953 }],
4954 );
4955
4956 // With large budget, all three excerpts included.
4957 assert_eq!(
4958 format_with_budget(&input, 10000).unwrap(),
4959 indoc! {r#"
4960 <|file_sep|>mod.rs
4961 mod header
4962 ...
4963 important fn
4964 ...
4965 less important fn
4966 <|file_sep|>test.rs
4967 <|fim_prefix|>
4968 <|fim_middle|>current
4969 <|user_cursor|>x
4970 <|fim_suffix|>
4971 <|fim_middle|>updated
4972 "#}
4973 .to_string()
4974 );
4975
4976 // With tight budget, only order<=1 excerpts included (header + important fn).
4977 assert_eq!(
4978 format_with_budget(&input, budget_with_margin(55)).unwrap(),
4979 indoc! {r#"
4980 <|file_sep|>mod.rs
4981 mod header
4982 ...
4983 important fn
4984 ...
4985 <|file_sep|>test.rs
4986 <|fim_prefix|>
4987 <|fim_middle|>current
4988 <|user_cursor|>x
4989 <|fim_suffix|>
4990 <|fim_middle|>updated
4991 "#}
4992 .to_string()
4993 );
4994 }
4995
4996 #[test]
4997 fn test_truncation_drops_older_events_first() {
4998 let input = make_input(
4999 "x",
5000 0..1,
5001 0,
5002 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
5003 vec![],
5004 );
5005
5006 assert_eq!(
5007 format_with_budget(&input, 10000).unwrap(),
5008 indoc! {r#"
5009 <|file_sep|>edit history
5010 --- a/old.rs
5011 +++ b/old.rs
5012 -1
5013 --- a/new.rs
5014 +++ b/new.rs
5015 -2
5016 <|file_sep|>test.rs
5017 <|fim_prefix|>
5018 <|fim_middle|>current
5019 <|user_cursor|>x
5020 <|fim_suffix|>
5021 <|fim_middle|>updated
5022 "#}
5023 .to_string()
5024 );
5025
5026 assert_eq!(
5027 format_with_budget(&input, 60).unwrap(),
5028 indoc! {r#"
5029 <|file_sep|>edit history
5030 --- a/new.rs
5031 +++ b/new.rs
5032 -2
5033 <|file_sep|>test.rs
5034 <|fim_prefix|>
5035 <|fim_middle|>current
5036 <|user_cursor|>x
5037 <|fim_suffix|>
5038 <|fim_middle|>updated
5039 "#}
5040 .to_string()
5041 );
5042 }
5043
5044 #[test]
5045 fn test_cursor_excerpt_always_included_with_minimal_budget() {
5046 let input = make_input(
5047 "fn main() {}",
5048 0..12,
5049 3,
5050 vec![make_event("a.rs", "-old\n+new\n")],
5051 vec![make_related_file("related.rs", "helper\n")],
5052 );
5053
5054 assert!(format_with_budget(&input, 30).is_none())
5055 }
5056
5057 #[track_caller]
5058 fn format_seed_coder(input: &ZetaPromptInput) -> String {
5059 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
5060 .expect("seed coder prompt formatting should succeed")
5061 }
5062
5063 #[track_caller]
5064 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
5065 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
5066 .expect("seed coder prompt formatting should succeed")
5067 }
5068
5069 #[test]
5070 fn test_seed_coder_basic_format() {
5071 let input = make_input(
5072 "prefix\neditable\nsuffix",
5073 7..15,
5074 10,
5075 vec![make_event("a.rs", "-old\n+new\n")],
5076 vec![make_related_file("related.rs", "fn helper() {}\n")],
5077 );
5078
5079 assert_eq!(
5080 format_seed_coder(&input),
5081 indoc! {r#"
5082 <[fim-suffix]>
5083 suffix
5084 <[fim-prefix]><filename>related.rs
5085 fn helper() {}
5086
5087 <filename>edit_history
5088 --- a/a.rs
5089 +++ b/a.rs
5090 -old
5091 +new
5092
5093 <filename>test.rs
5094 prefix
5095 <<<<<<< CURRENT
5096 edi<|user_cursor|>table
5097 =======
5098 <[fim-middle]>"#}
5099 );
5100 }
5101
5102 #[test]
5103 fn test_v0317_formats_prompt_with_many_related_files() {
5104 let related_files = (0..900)
5105 .map(|index| {
5106 make_related_file(
5107 &format!("related_{index}.rs"),
5108 "fn helper() {\n let value = 1;\n}\n",
5109 )
5110 })
5111 .collect();
5112
5113 let input = make_input(
5114 "code",
5115 0..4,
5116 2,
5117 vec![make_event("a.rs", "-x\n+y\n")],
5118 related_files,
5119 );
5120
5121 let prompt =
5122 format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
5123
5124 assert!(prompt.is_some());
5125 let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
5126 assert!(prompt.contains("test.rs"));
5127 assert!(prompt.contains(CURSOR_MARKER));
5128 }
5129
5130 #[test]
5131 fn test_v0327_formats_single_file_prompt_without_related_files() {
5132 let excerpt = indoc! {"
5133 line01
5134 line02
5135 line03
5136 line04
5137 line05
5138 line06
5139 line07
5140 line08
5141 line09
5142 line10
5143 line11
5144 line12
5145 line13
5146 line14
5147 line15
5148 line16
5149 line17
5150 line18
5151 line19
5152 line20
5153 "};
5154 let cursor_offset = excerpt.find("line10").expect("cursor line exists");
5155 let input = make_input(
5156 excerpt,
5157 0..excerpt.len(),
5158 cursor_offset,
5159 vec![make_event("a.rs", "-x\n+y\n")],
5160 vec![make_related_file("related.rs", "fn helper() {}\n")],
5161 );
5162
5163 let prompt =
5164 format_prompt_with_budget_for_format(&input, ZetaFormat::V0327SingleFile, 4096)
5165 .expect("v0327 prompt should fit");
5166
5167 assert!(prompt.contains("line01"));
5168 assert!(prompt.contains("line20"));
5169 assert!(prompt.contains("<filename>edit_history"));
5170 assert!(prompt.contains("<filename>test.rs"));
5171 assert!(prompt.contains(CURSOR_MARKER));
5172 assert!(!prompt.contains("related.rs"));
5173 assert!(!prompt.contains("fn helper() {}"));
5174 }
5175
5176 #[test]
5177 fn test_v0327_resolve_cursor_region_uses_full_excerpt_context() {
5178 let excerpt = (0..80)
5179 .map(|index| format!("l{index:02}\n"))
5180 .collect::<String>();
5181 let cursor_offset = excerpt.find("l40").expect("cursor line exists");
5182 let input = make_input(&excerpt, 0..excerpt.len(), cursor_offset, vec![], vec![]);
5183
5184 let (context, editable_range, context_range, adjusted_cursor) =
5185 resolve_cursor_region(&input, ZetaFormat::V0327SingleFile);
5186
5187 assert_eq!(context, excerpt);
5188 assert_eq!(context_range, 0..excerpt.len());
5189 assert_eq!(adjusted_cursor, cursor_offset);
5190 assert!(editable_range.start < adjusted_cursor);
5191 assert!(editable_range.end > adjusted_cursor);
5192 assert!(editable_range.end < excerpt.len());
5193 }
5194
5195 #[test]
5196 fn test_seed_coder_no_context() {
5197 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
5198
5199 assert_eq!(
5200 format_seed_coder(&input),
5201 indoc! {r#"
5202 <[fim-suffix]>
5203 after
5204 <[fim-prefix]><filename>test.rs
5205 before
5206 <<<<<<< CURRENT
5207 mid<|user_cursor|>dle
5208 =======
5209 <[fim-middle]>"#}
5210 );
5211 }
5212
5213 #[test]
5214 fn test_seed_coder_truncation_drops_context() {
5215 let input = make_input(
5216 "code",
5217 0..4,
5218 2,
5219 vec![make_event("a.rs", "-x\n+y\n")],
5220 vec![make_related_file("r1.rs", "content\n")],
5221 );
5222
5223 // With large budget, everything is included
5224 assert_eq!(
5225 format_seed_coder(&input),
5226 indoc! {r#"
5227 <[fim-suffix]>
5228 <[fim-prefix]><filename>r1.rs
5229 content
5230
5231 <filename>edit_history
5232 --- a/a.rs
5233 +++ b/a.rs
5234 -x
5235 +y
5236
5237 <filename>test.rs
5238 <<<<<<< CURRENT
5239 co<|user_cursor|>de
5240 =======
5241 <[fim-middle]>"#}
5242 );
5243
5244 assert_eq!(
5245 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
5246 None
5247 );
5248
5249 assert_eq!(
5250 format_seed_coder_with_budget(&input, 40),
5251 indoc! {r#"
5252 <[fim-suffix]>
5253 <[fim-prefix]><filename>test.rs
5254 <<<<<<< CURRENT
5255 co<|user_cursor|>de
5256 =======
5257 <[fim-middle]>"#
5258 }
5259 )
5260 }
5261
5262 #[test]
5263 fn test_seed_coder_truncation_prioritizes_lower_order() {
5264 let input = make_input(
5265 "code",
5266 0..4,
5267 2,
5268 vec![],
5269 vec![
5270 RelatedFile {
5271 path: Path::new("low_prio.rs").into(),
5272 max_row: 5,
5273 in_open_source_repo: false,
5274 excerpts: vec![RelatedExcerpt {
5275 row_range: 0..5,
5276 text: "low prio\n".into(),
5277 order: 10,
5278 }],
5279 },
5280 RelatedFile {
5281 path: Path::new("high_prio.rs").into(),
5282 max_row: 5,
5283 in_open_source_repo: false,
5284 excerpts: vec![RelatedExcerpt {
5285 row_range: 0..5,
5286 text: "high prio\n".into(),
5287 order: 1,
5288 }],
5289 },
5290 ],
5291 );
5292
5293 // With large budget, both included; rendered in stable lexicographic order.
5294 assert_eq!(
5295 format_seed_coder(&input),
5296 indoc! {r#"
5297 <[fim-suffix]>
5298 <[fim-prefix]><filename>low_prio.rs
5299 low prio
5300 <filename>high_prio.rs
5301 high prio
5302
5303 <filename>test.rs
5304 <<<<<<< CURRENT
5305 co<|user_cursor|>de
5306 =======
5307 <[fim-middle]>"#}
5308 );
5309
5310 // With tight budget under the generic heuristic, context is dropped but the
5311 // minimal cursor section still fits.
5312 assert_eq!(
5313 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
5314 Some(
5315 indoc! {r#"
5316 <[fim-suffix]>
5317 <[fim-prefix]><filename>test.rs
5318 <<<<<<< CURRENT
5319 co<|user_cursor|>de
5320 =======
5321 <[fim-middle]>"#}
5322 .to_string()
5323 )
5324 );
5325 }
5326
5327 #[test]
5328 fn test_format_zeta1_from_input_basic() {
5329 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
5330 let input = ZetaPromptInput {
5331 cursor_path: Path::new("src/main.rs").into(),
5332 cursor_excerpt: excerpt.into(),
5333 cursor_offset_in_excerpt: 30,
5334 excerpt_start_row: Some(0),
5335 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
5336 related_files: Some(vec![]),
5337 active_buffer_diagnostics: vec![],
5338 excerpt_ranges: ExcerptRanges {
5339 editable_150: 15..41,
5340 editable_180: 15..41,
5341 editable_350: 15..41,
5342 editable_150_context_350: 0..excerpt.len(),
5343 editable_180_context_350: 0..excerpt.len(),
5344 editable_350_context_150: 0..excerpt.len(),
5345 ..Default::default()
5346 },
5347 syntax_ranges: None,
5348 in_open_source_repo: false,
5349 can_collect_data: false,
5350 repo_url: None,
5351 };
5352
5353 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
5354
5355 assert_eq!(
5356 prompt,
5357 concat!(
5358 "### Instruction:\n",
5359 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5360 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5361 "into account the cursor location.\n",
5362 "\n",
5363 "### User Edits:\n",
5364 "\n",
5365 "User edited other.rs:\n",
5366 "```diff\n",
5367 "-old\n",
5368 "+new\n",
5369 "\n",
5370 "```\n",
5371 "\n",
5372 "### User Excerpt:\n",
5373 "\n",
5374 "```src/main.rs\n",
5375 "<|start_of_file|>\n",
5376 "fn before() {}\n",
5377 "<|editable_region_start|>\n",
5378 "fn foo() {\n",
5379 " <|user_cursor_is_here|>let x = 1;\n",
5380 "\n",
5381 "<|editable_region_end|>}\n",
5382 "fn after() {}\n",
5383 "\n",
5384 "```\n",
5385 "\n",
5386 "### Response:\n",
5387 ),
5388 );
5389 }
5390
5391 #[test]
5392 fn test_format_zeta1_from_input_no_start_of_file() {
5393 let excerpt = "fn foo() {\n let x = 1;\n}\n";
5394 let input = ZetaPromptInput {
5395 cursor_path: Path::new("src/main.rs").into(),
5396 cursor_excerpt: excerpt.into(),
5397 cursor_offset_in_excerpt: 15,
5398 excerpt_start_row: Some(10),
5399 events: vec![],
5400 related_files: Some(vec![]),
5401 active_buffer_diagnostics: vec![],
5402 excerpt_ranges: ExcerptRanges {
5403 editable_150: 0..28,
5404 editable_180: 0..28,
5405 editable_350: 0..28,
5406 editable_150_context_350: 0..28,
5407 editable_180_context_350: 0..28,
5408 editable_350_context_150: 0..28,
5409 ..Default::default()
5410 },
5411 syntax_ranges: None,
5412 in_open_source_repo: false,
5413 can_collect_data: false,
5414 repo_url: None,
5415 };
5416
5417 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5418
5419 assert_eq!(
5420 prompt,
5421 concat!(
5422 "### Instruction:\n",
5423 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5424 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5425 "into account the cursor location.\n",
5426 "\n",
5427 "### User Edits:\n",
5428 "\n",
5429 "\n",
5430 "\n",
5431 "### User Excerpt:\n",
5432 "\n",
5433 "```src/main.rs\n",
5434 "<|editable_region_start|>\n",
5435 "fn foo() {\n",
5436 " <|user_cursor_is_here|>let x = 1;\n",
5437 "}\n",
5438 "\n",
5439 "<|editable_region_end|>\n",
5440 "```\n",
5441 "\n",
5442 "### Response:\n",
5443 ),
5444 );
5445 }
5446
5447 #[test]
5448 fn test_format_zeta1_from_input_with_sub_ranges() {
5449 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
5450 let editable_range = 10..37;
5451 let context_range = 0..excerpt.len();
5452
5453 let input = ZetaPromptInput {
5454 cursor_path: Path::new("test.rs").into(),
5455 cursor_excerpt: excerpt.into(),
5456 cursor_offset_in_excerpt: 25,
5457 excerpt_start_row: Some(0),
5458 events: vec![],
5459 related_files: Some(vec![]),
5460 active_buffer_diagnostics: vec![],
5461 excerpt_ranges: ExcerptRanges {
5462 editable_150: editable_range.clone(),
5463 editable_180: editable_range.clone(),
5464 editable_350: editable_range.clone(),
5465 editable_150_context_350: context_range.clone(),
5466 editable_180_context_350: context_range.clone(),
5467 editable_350_context_150: context_range.clone(),
5468 ..Default::default()
5469 },
5470 syntax_ranges: None,
5471 in_open_source_repo: false,
5472 can_collect_data: false,
5473 repo_url: None,
5474 };
5475
5476 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5477
5478 assert_eq!(
5479 prompt,
5480 concat!(
5481 "### Instruction:\n",
5482 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5483 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5484 "into account the cursor location.\n",
5485 "\n",
5486 "### User Edits:\n",
5487 "\n",
5488 "\n",
5489 "\n",
5490 "### User Excerpt:\n",
5491 "\n",
5492 "```test.rs\n",
5493 "<|start_of_file|>\n",
5494 "// prefix\n",
5495 "<|editable_region_start|>\n",
5496 "fn foo() {\n",
5497 " <|user_cursor_is_here|>let x = 1;\n",
5498 "}\n",
5499 "<|editable_region_end|>\n",
5500 "// suffix\n",
5501 "\n",
5502 "```\n",
5503 "\n",
5504 "### Response:\n",
5505 ),
5506 );
5507 }
5508
5509 #[test]
5510 fn test_max_event_count() {
5511 fn make_numbered_event(index: usize) -> Event {
5512 return make_event(
5513 &format!("event-{index}.rs"),
5514 &format!("-old-{index}\n+new-{index}\n"),
5515 );
5516 }
5517 let input = make_input(
5518 "x",
5519 0..1,
5520 0,
5521 (0..3).map(make_numbered_event).collect(),
5522 vec![],
5523 );
5524
5525 let edit_history_section = format_edit_history_within_budget(
5526 &input.events,
5527 "<|file_sep|>",
5528 "edit history",
5529 usize::MAX,
5530 5,
5531 );
5532
5533 assert_eq!(
5534 &edit_history_section,
5535 indoc!(
5536 "
5537 <|file_sep|>edit history
5538 --- a/event-0.rs
5539 +++ b/event-0.rs
5540 -old-0
5541 +new-0
5542 --- a/event-1.rs
5543 +++ b/event-1.rs
5544 -old-1
5545 +new-1
5546 --- a/event-2.rs
5547 +++ b/event-2.rs
5548 -old-2
5549 +new-2
5550 "
5551 )
5552 );
5553
5554 let edit_history_section = format_edit_history_within_budget(
5555 &input.events,
5556 "<|file_sep|>",
5557 "edit history",
5558 usize::MAX,
5559 2,
5560 );
5561
5562 assert_eq!(
5563 &edit_history_section,
5564 indoc!(
5565 "
5566 <|file_sep|>edit history
5567 --- a/event-1.rs
5568 +++ b/event-1.rs
5569 -old-1
5570 +new-1
5571 --- a/event-2.rs
5572 +++ b/event-2.rs
5573 -old-2
5574 +new-2
5575 "
5576 )
5577 );
5578
5579 let edit_history_section = format_edit_history_within_budget(
5580 &input.events,
5581 "<|file_sep|>",
5582 "edit history",
5583 usize::MAX,
5584 0,
5585 );
5586
5587 assert_eq!(&edit_history_section, "");
5588 }
5589
5590 #[test]
5591 fn test_clean_zeta1_model_output_basic() {
5592 let output = indoc! {"
5593 <|editable_region_start|>
5594 fn main() {
5595 println!(\"hello\");
5596 }
5597 <|editable_region_end|>
5598 "};
5599
5600 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5601 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
5602 }
5603
5604 #[test]
5605 fn test_clean_zeta1_model_output_with_cursor() {
5606 let output = indoc! {"
5607 <|editable_region_start|>
5608 fn main() {
5609 <|user_cursor_is_here|>println!(\"hello\");
5610 }
5611 <|editable_region_end|>
5612 "};
5613
5614 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5615 assert_eq!(
5616 cleaned,
5617 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
5618 );
5619 }
5620
5621 #[test]
5622 fn test_clean_zeta1_model_output_no_markers() {
5623 let output = "fn main() {}\n";
5624 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5625 assert_eq!(cleaned, "fn main() {}\n");
5626 }
5627
5628 #[test]
5629 fn test_clean_zeta1_model_output_empty_region() {
5630 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5631 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5632 assert_eq!(cleaned, "");
5633 }
5634
5635 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5636 let mut result = excerpt.to_string();
5637 result.replace_range(
5638 parsed_output.range_in_excerpt.clone(),
5639 &parsed_output.new_editable_region,
5640 );
5641 result
5642 }
5643
5644 #[test]
5645 fn test_parse_zeta2_model_output() {
5646 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5647 let context_start = excerpt.find("ctx start").unwrap();
5648 let context_end = excerpt.find("after ctx").unwrap();
5649 let editable_start = excerpt.find("editable old").unwrap();
5650 let editable_end = editable_start + "editable old\n".len();
5651 let input = make_input_with_context_range(
5652 excerpt,
5653 editable_start..editable_end,
5654 context_start..context_end,
5655 editable_start,
5656 );
5657
5658 let output = parse_zeta2_model_output(
5659 "editable new\n>>>>>>> UPDATED\n",
5660 ZetaFormat::V0131GitMergeMarkersPrefix,
5661 &input,
5662 )
5663 .unwrap();
5664
5665 assert_eq!(
5666 apply_edit(excerpt, &output),
5667 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5668 );
5669 }
5670
5671 #[test]
5672 fn test_parse_zeta2_model_output_identity() {
5673 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5674 let editable_start = excerpt.find("bbb").unwrap();
5675 let editable_end = excerpt.find("ddd").unwrap();
5676 let input = make_input_with_context_range(
5677 excerpt,
5678 editable_start..editable_end,
5679 0..excerpt.len(),
5680 editable_start,
5681 );
5682
5683 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5684 let output =
5685 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5686
5687 assert_eq!(apply_edit(excerpt, &output), excerpt);
5688 }
5689
5690 #[test]
5691 fn test_parse_zeta2_model_output_strips_end_marker() {
5692 let excerpt = "hello\nworld\n";
5693 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5694
5695 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5696 let output1 =
5697 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5698 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5699
5700 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5701 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5702 }
5703
5704 #[test]
5705 fn test_parsed_output_to_patch_round_trips_through_udiff_application() {
5706 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5707 let context_start = excerpt.find("ctx start").unwrap();
5708 let context_end = excerpt.find("after ctx").unwrap();
5709 let editable_start = excerpt.find("editable old").unwrap();
5710 let editable_end = editable_start + "editable old\n".len();
5711 let input = make_input_with_context_range(
5712 excerpt,
5713 editable_start..editable_end,
5714 context_start..context_end,
5715 editable_start,
5716 );
5717
5718 let parsed = parse_zeta2_model_output(
5719 "editable new\n>>>>>>> UPDATED\n",
5720 ZetaFormat::V0131GitMergeMarkersPrefix,
5721 &input,
5722 )
5723 .unwrap();
5724 let expected = apply_edit(excerpt, &parsed);
5725 let patch = parsed_output_to_patch(&input, parsed).unwrap();
5726 let patched = udiff::apply_diff_to_string(&patch, excerpt).unwrap();
5727
5728 assert_eq!(patched, expected);
5729 }
5730
5731 #[test]
5732 fn test_special_tokens_not_triggered_by_comment_separator() {
5733 // Regression test for https://github.com/zed-industries/zed/issues/52489
5734 let excerpt = "fn main() {\n // =======\n println!(\"hello\");\n}\n";
5735 let input = make_input(excerpt, 0..excerpt.len(), 0, vec![], vec![]);
5736 assert!(
5737 !prompt_input_contains_special_tokens(&input, ZetaFormat::V0131GitMergeMarkersPrefix),
5738 "comment containing ======= should not trigger special token detection"
5739 );
5740 }
5741}