1pub mod excerpt_ranges;
2pub mod multi_region;
3pub mod udiff;
4
5use anyhow::{Result, anyhow};
6use serde::{Deserialize, Serialize};
7use std::fmt::Write;
8use std::ops::Range;
9use std::path::Path;
10use std::sync::Arc;
11use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
12
13pub use crate::excerpt_ranges::{
14 ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
15};
16
17pub const CURSOR_MARKER: &str = "<|user_cursor|>";
18
19/// Use up to this amount of the editable region for prefill.
20/// Larger values may result in more robust generation, but
21/// this region becomes non-editable.
22pub const PREFILL_RATIO: f64 = 0.1; // 10%
23
24fn estimate_tokens(bytes: usize) -> usize {
25 bytes / 3
26}
27
28/// Leave some slack to avoid overflow.
29fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
30 (max_tokens as f64 * 0.9).floor() as usize
31}
32
33/// Ensure text fits into the tokens budget; trim by line boundaries if needed.
34pub fn clamp_text_to_token_count(text: &str, max_tokens: usize) -> &str {
35 if estimate_tokens(text.len()) <= max_tokens {
36 return text;
37 }
38
39 let mut end_byte_offset = 0;
40
41 for line in text.split_inclusive('\n') {
42 if estimate_tokens(line.len() + end_byte_offset) > max_tokens {
43 break;
44 }
45
46 end_byte_offset += line.len();
47 }
48
49 &text[..end_byte_offset]
50}
51
52#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
53pub struct ZetaPromptInput {
54 pub cursor_path: Arc<Path>,
55 pub cursor_excerpt: Arc<str>,
56 pub cursor_offset_in_excerpt: usize,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
58 pub excerpt_start_row: Option<u32>,
59 pub events: Vec<Arc<Event>>,
60 #[serde(default)]
61 pub related_files: Option<Vec<RelatedFile>>,
62 #[serde(default, skip_serializing_if = "Vec::is_empty")]
63 pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
64 /// These ranges let the server select model-appropriate subsets.
65 pub excerpt_ranges: ExcerptRanges,
66 /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
67 /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
68 /// When present, the server uses these to compute editable/context ranges
69 /// instead of `excerpt_ranges`.
70 #[serde(default, skip_serializing_if = "Option::is_none")]
71 pub syntax_ranges: Option<Vec<Range<usize>>>,
72 #[serde(default)]
73 pub in_open_source_repo: bool,
74 #[serde(default)]
75 pub can_collect_data: bool,
76 #[serde(default, skip_serializing_if = "Option::is_none")]
77 pub repo_url: Option<String>,
78}
79
80#[derive(
81 Default,
82 Clone,
83 Copy,
84 Debug,
85 PartialEq,
86 Eq,
87 Hash,
88 EnumIter,
89 IntoStaticStr,
90 Serialize,
91 Deserialize,
92)]
93#[allow(non_camel_case_types)]
94pub enum ZetaFormat {
95 V0112MiddleAtEnd,
96 V0113Ordered,
97 V0114180EditableRegion,
98 V0120GitMergeMarkers,
99 #[default]
100 V0131GitMergeMarkersPrefix,
101 V0211Prefill,
102 V0211SeedCoder,
103 V0331SeedCoderModelPy,
104 v0226Hashline,
105 V0304VariableEdit,
106 V0304SeedNoEdits,
107 /// Multi-block marker spans with NO_EDITS sentinel.
108 V0306SeedMultiRegions,
109 /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
110 V0316SeedMultiRegions,
111 /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
112 V0317SeedMultiRegions,
113 /// V0316 with larger block sizes.
114 V0318SeedMultiRegions,
115 /// V0318-style markers over the full available current file excerpt with no related files.
116 V0327SingleFile,
117 /// V0318-style prompt with buffer diagnostics
118 V0420Diagnostics,
119}
120
121impl std::fmt::Display for ZetaFormat {
122 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
123 write!(f, "{}", <&'static str>::from(self))
124 }
125}
126
127impl ZetaFormat {
128 pub fn parse(format_name: &str) -> Result<Self> {
129 let lower = format_name.to_lowercase();
130
131 // Exact case-insensitive match takes priority, bypassing ambiguity checks.
132 for variant in ZetaFormat::iter() {
133 if <&'static str>::from(&variant).to_lowercase() == lower {
134 return Ok(variant);
135 }
136 }
137
138 let mut results = ZetaFormat::iter().filter(|version| {
139 <&'static str>::from(version)
140 .to_lowercase()
141 .contains(&lower)
142 });
143 let Some(result) = results.next() else {
144 anyhow::bail!(
145 "`{format_name}` did not match any of:\n{}",
146 Self::options_as_string()
147 );
148 };
149 if results.next().is_some() {
150 anyhow::bail!(
151 "`{format_name}` matched more than one of:\n{}",
152 Self::options_as_string()
153 );
154 }
155 Ok(result)
156 }
157
158 pub fn options_as_string() -> String {
159 ZetaFormat::iter()
160 .map(|format| format!("- {}\n", <&'static str>::from(format)))
161 .collect::<Vec<_>>()
162 .concat()
163 }
164}
165
166#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
167#[serde(tag = "event")]
168pub enum Event {
169 BufferChange {
170 path: Arc<Path>,
171 old_path: Arc<Path>,
172 diff: String,
173 predicted: bool,
174 in_open_source_repo: bool,
175 },
176}
177
178impl Event {
179 pub fn in_open_source_repo(&self) -> bool {
180 match self {
181 Event::BufferChange {
182 in_open_source_repo,
183 ..
184 } => *in_open_source_repo,
185 }
186 }
187}
188
189pub fn write_event(prompt: &mut String, event: &Event) {
190 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
191 for component in path.components() {
192 prompt.push('/');
193 write!(prompt, "{}", component.as_os_str().display()).ok();
194 }
195 }
196 match event {
197 Event::BufferChange {
198 path,
199 old_path,
200 diff,
201 predicted,
202 in_open_source_repo: _,
203 } => {
204 if *predicted {
205 prompt.push_str("// User accepted prediction:\n");
206 }
207 prompt.push_str("--- a");
208 write_path_as_unix_str(prompt, old_path.as_ref());
209 prompt.push_str("\n+++ b");
210 write_path_as_unix_str(prompt, path.as_ref());
211 prompt.push('\n');
212 prompt.push_str(diff);
213 }
214 }
215}
216
217#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
218pub struct ActiveBufferDiagnostic {
219 pub severity: Option<i32>,
220 pub message: String,
221 pub snippet: String,
222 pub snippet_buffer_row_range: Range<u32>,
223 pub diagnostic_range_in_snippet: Range<usize>,
224}
225
226#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
227pub struct RelatedFile {
228 pub path: Arc<Path>,
229 pub max_row: u32,
230 pub excerpts: Vec<RelatedExcerpt>,
231 #[serde(default)]
232 pub in_open_source_repo: bool,
233}
234
235#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
236pub struct RelatedExcerpt {
237 pub row_range: Range<u32>,
238 pub text: Arc<str>,
239 #[serde(default)]
240 pub order: usize,
241}
242
243pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
244 special_tokens_for_format(format).iter().any(|token| {
245 if let Some(line_token) = token.strip_suffix('\n') {
246 input.cursor_excerpt.lines().any(|line| line == line_token)
247 } else {
248 input.cursor_excerpt.contains(token)
249 }
250 })
251}
252
253pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
254 let max_prompt_tokens = match format {
255 ZetaFormat::V0112MiddleAtEnd
256 | ZetaFormat::V0113Ordered
257 | ZetaFormat::V0114180EditableRegion
258 | ZetaFormat::V0120GitMergeMarkers
259 | ZetaFormat::V0131GitMergeMarkersPrefix
260 | ZetaFormat::V0211Prefill
261 | ZetaFormat::V0211SeedCoder
262 | ZetaFormat::v0226Hashline
263 | ZetaFormat::V0304VariableEdit
264 | ZetaFormat::V0304SeedNoEdits
265 | ZetaFormat::V0306SeedMultiRegions
266 | ZetaFormat::V0316SeedMultiRegions
267 | ZetaFormat::V0317SeedMultiRegions
268 | ZetaFormat::V0331SeedCoderModelPy
269 | ZetaFormat::V0318SeedMultiRegions => 4096,
270 ZetaFormat::V0420Diagnostics => 8192,
271 ZetaFormat::V0327SingleFile => 16384,
272 };
273
274 format_prompt_with_budget_for_format(input, format, max_prompt_tokens)
275}
276
277pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
278 match format {
279 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
280 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
281 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
282 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
283 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
284 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
285 ZetaFormat::V0211SeedCoder | ZetaFormat::V0331SeedCoderModelPy => {
286 seed_coder::special_tokens()
287 }
288 ZetaFormat::v0226Hashline => hashline::special_tokens(),
289 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
290 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
291 ZetaFormat::V0316SeedMultiRegions => {
292 static TOKENS: &[&str] = &[
293 seed_coder::FIM_SUFFIX,
294 seed_coder::FIM_PREFIX,
295 seed_coder::FIM_MIDDLE,
296 seed_coder::FILE_MARKER,
297 multi_region::V0316_END_MARKER,
298 CURSOR_MARKER,
299 multi_region::MARKER_TAG_PREFIX,
300 ];
301 TOKENS
302 }
303 ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
304 static TOKENS: &[&str] = &[
305 seed_coder::FIM_SUFFIX,
306 seed_coder::FIM_PREFIX,
307 seed_coder::FIM_MIDDLE,
308 seed_coder::FILE_MARKER,
309 multi_region::V0318_END_MARKER,
310 CURSOR_MARKER,
311 multi_region::MARKER_TAG_PREFIX,
312 ];
313 TOKENS
314 }
315 ZetaFormat::V0317SeedMultiRegions => {
316 static TOKENS: &[&str] = &[
317 seed_coder::FIM_SUFFIX,
318 seed_coder::FIM_PREFIX,
319 seed_coder::FIM_MIDDLE,
320 seed_coder::FILE_MARKER,
321 multi_region::V0317_END_MARKER,
322 CURSOR_MARKER,
323 multi_region::RELATIVE_MARKER_TAG_PREFIX,
324 ];
325 TOKENS
326 }
327 ZetaFormat::V0327SingleFile => {
328 static TOKENS: &[&str] = &[
329 seed_coder::FIM_SUFFIX,
330 seed_coder::FIM_PREFIX,
331 seed_coder::FIM_MIDDLE,
332 seed_coder::FILE_MARKER,
333 multi_region::V0327_END_MARKER,
334 CURSOR_MARKER,
335 multi_region::MARKER_TAG_PREFIX,
336 ];
337 TOKENS
338 }
339 ZetaFormat::V0306SeedMultiRegions => {
340 static TOKENS: &[&str] = &[
341 seed_coder::FIM_SUFFIX,
342 seed_coder::FIM_PREFIX,
343 seed_coder::FIM_MIDDLE,
344 seed_coder::FILE_MARKER,
345 seed_coder::START_MARKER,
346 seed_coder::SEPARATOR,
347 seed_coder::END_MARKER,
348 CURSOR_MARKER,
349 multi_region::MARKER_TAG_PREFIX,
350 ];
351 TOKENS
352 }
353 }
354}
355
356/// Returns the (editable_token_limit, context_token_limit) for a given format.
357pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
358 match format {
359 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
360 ZetaFormat::V0114180EditableRegion => (180, 350),
361 ZetaFormat::V0120GitMergeMarkers
362 | ZetaFormat::V0131GitMergeMarkersPrefix
363 | ZetaFormat::V0211Prefill
364 | ZetaFormat::V0211SeedCoder
365 | ZetaFormat::V0331SeedCoderModelPy
366 | ZetaFormat::v0226Hashline
367 | ZetaFormat::V0306SeedMultiRegions
368 | ZetaFormat::V0316SeedMultiRegions
369 | ZetaFormat::V0318SeedMultiRegions
370 | ZetaFormat::V0420Diagnostics
371 | ZetaFormat::V0317SeedMultiRegions
372 | ZetaFormat::V0327SingleFile
373 | ZetaFormat::V0304SeedNoEdits => (350, 150),
374
375 ZetaFormat::V0304VariableEdit => (1024, 0),
376 }
377}
378
379pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
380 match format {
381 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
382 ZetaFormat::V0112MiddleAtEnd
383 | ZetaFormat::V0113Ordered
384 | ZetaFormat::V0114180EditableRegion
385 | ZetaFormat::V0120GitMergeMarkers
386 | ZetaFormat::V0131GitMergeMarkersPrefix
387 | ZetaFormat::V0211Prefill
388 | ZetaFormat::V0211SeedCoder
389 | ZetaFormat::V0331SeedCoderModelPy
390 | ZetaFormat::V0304VariableEdit
391 | ZetaFormat::V0306SeedMultiRegions
392 | ZetaFormat::V0304SeedNoEdits => &[],
393 ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
394 ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
395 &[multi_region::V0318_END_MARKER]
396 }
397 ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
398 ZetaFormat::V0327SingleFile => &[multi_region::V0327_END_MARKER],
399 }
400}
401
402/// Return (editable_range, context_range) for the prompt format
403pub fn excerpt_ranges_for_format(
404 format: ZetaFormat,
405 ranges: &ExcerptRanges,
406) -> (Range<usize>, Range<usize>) {
407 match format {
408 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
409 ranges.editable_150.clone(),
410 ranges.editable_150_context_350.clone(),
411 ),
412 ZetaFormat::V0114180EditableRegion => (
413 ranges.editable_180.clone(),
414 ranges.editable_180_context_350.clone(),
415 ),
416 ZetaFormat::V0120GitMergeMarkers
417 | ZetaFormat::V0131GitMergeMarkersPrefix
418 | ZetaFormat::V0211Prefill
419 | ZetaFormat::V0211SeedCoder
420 | ZetaFormat::V0331SeedCoderModelPy
421 | ZetaFormat::v0226Hashline
422 | ZetaFormat::V0304SeedNoEdits
423 | ZetaFormat::V0306SeedMultiRegions
424 | ZetaFormat::V0316SeedMultiRegions
425 | ZetaFormat::V0318SeedMultiRegions
426 | ZetaFormat::V0317SeedMultiRegions
427 | ZetaFormat::V0420Diagnostics => (
428 ranges.editable_350.clone(),
429 ranges.editable_350_context_150.clone(),
430 ),
431 ZetaFormat::V0327SingleFile => (
432 ranges.editable_350_context_150.clone(),
433 ranges.context_8192.clone().unwrap_or(
434 // shouldn't be used, only for compat with old data/clients
435 ranges.editable_350_context_150.clone(),
436 ),
437 ),
438
439 ZetaFormat::V0304VariableEdit => {
440 let context = ranges
441 .editable_350_context_1024
442 .clone()
443 .or(ranges.editable_350_context_512.clone())
444 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
445 (context.clone(), context)
446 }
447 }
448}
449
450pub fn write_cursor_excerpt_section_for_format(
451 format: ZetaFormat,
452 prompt: &mut String,
453 path: &Path,
454 context: &str,
455 editable_range: &Range<usize>,
456 cursor_offset: usize,
457) {
458 match format {
459 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
460 prompt,
461 path,
462 context,
463 editable_range,
464 cursor_offset,
465 ),
466 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
467 v0113_ordered::write_cursor_excerpt_section(
468 prompt,
469 path,
470 context,
471 editable_range,
472 cursor_offset,
473 )
474 }
475 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
476 prompt,
477 path,
478 context,
479 editable_range,
480 cursor_offset,
481 ),
482 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
483 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
484 prompt,
485 path,
486 context,
487 editable_range,
488 cursor_offset,
489 )
490 }
491 ZetaFormat::V0211SeedCoder
492 | ZetaFormat::V0331SeedCoderModelPy
493 | ZetaFormat::V0304SeedNoEdits => seed_coder::write_cursor_excerpt_section(
494 prompt,
495 path,
496 context,
497 editable_range,
498 cursor_offset,
499 ),
500 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
501 prompt,
502 path,
503 context,
504 editable_range,
505 cursor_offset,
506 ),
507 ZetaFormat::V0304VariableEdit => {
508 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
509 }
510 ZetaFormat::V0306SeedMultiRegions => {
511 prompt.push_str(&build_v0306_cursor_prefix(
512 path,
513 context,
514 editable_range,
515 cursor_offset,
516 ));
517 }
518 ZetaFormat::V0316SeedMultiRegions => {
519 prompt.push_str(&build_v0316_cursor_prefix(
520 path,
521 context,
522 editable_range,
523 cursor_offset,
524 ));
525 }
526 ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
527 prompt.push_str(&build_v0318_cursor_prefix(
528 path,
529 context,
530 editable_range,
531 cursor_offset,
532 ));
533 }
534 ZetaFormat::V0317SeedMultiRegions => {
535 prompt.push_str(&build_v0317_cursor_prefix(
536 path,
537 context,
538 editable_range,
539 cursor_offset,
540 ));
541 }
542 ZetaFormat::V0327SingleFile => {
543 prompt.push_str(&build_v0318_cursor_prefix(
544 path,
545 context,
546 editable_range,
547 cursor_offset,
548 ));
549 }
550 }
551}
552
553fn build_v0306_cursor_prefix(
554 path: &Path,
555 context: &str,
556 editable_range: &Range<usize>,
557 cursor_offset: usize,
558) -> String {
559 let mut section = String::new();
560 let path_str = path.to_string_lossy();
561 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
562
563 section.push_str(&context[..editable_range.start]);
564 section.push_str(seed_coder::START_MARKER);
565
566 let editable_text = &context[editable_range.clone()];
567 let cursor_in_editable = cursor_offset - editable_range.start;
568 multi_region::write_editable_with_markers(
569 &mut section,
570 editable_text,
571 cursor_in_editable,
572 CURSOR_MARKER,
573 );
574
575 if !section.ends_with('\n') {
576 section.push('\n');
577 }
578 section.push_str(seed_coder::SEPARATOR);
579 section
580}
581
582fn build_v0316_cursor_prefix(
583 path: &Path,
584 context: &str,
585 editable_range: &Range<usize>,
586 cursor_offset: usize,
587) -> String {
588 let mut section = String::new();
589 let path_str = path.to_string_lossy();
590 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
591
592 section.push_str(&context[..editable_range.start]);
593
594 let editable_text = &context[editable_range.clone()];
595 let cursor_in_editable = cursor_offset - editable_range.start;
596 multi_region::write_editable_with_markers_v0316(
597 &mut section,
598 editable_text,
599 cursor_in_editable,
600 CURSOR_MARKER,
601 );
602
603 if !section.ends_with('\n') {
604 section.push('\n');
605 }
606 section
607}
608
609fn build_v0318_cursor_prefix(
610 path: &Path,
611 context: &str,
612 editable_range: &Range<usize>,
613 cursor_offset: usize,
614) -> String {
615 let mut section = String::new();
616 let path_str = path.to_string_lossy();
617 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
618
619 section.push_str(&context[..editable_range.start]);
620
621 let editable_text = &context[editable_range.clone()];
622 let cursor_in_editable = cursor_offset - editable_range.start;
623 multi_region::write_editable_with_markers_v0318(
624 &mut section,
625 editable_text,
626 cursor_in_editable,
627 CURSOR_MARKER,
628 );
629
630 if !section.ends_with('\n') {
631 section.push('\n');
632 }
633 section
634}
635
636fn build_v0317_cursor_prefix(
637 path: &Path,
638 context: &str,
639 editable_range: &Range<usize>,
640 cursor_offset: usize,
641) -> String {
642 let mut section = String::new();
643 let path_str = path.to_string_lossy();
644 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
645
646 section.push_str(&context[..editable_range.start]);
647
648 let editable_text = &context[editable_range.clone()];
649 let cursor_in_editable = cursor_offset - editable_range.start;
650 multi_region::write_editable_with_markers_v0317(
651 &mut section,
652 editable_text,
653 cursor_in_editable,
654 CURSOR_MARKER,
655 );
656
657 if !section.ends_with('\n') {
658 section.push('\n');
659 }
660 section
661}
662
663fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
664 let start_row = text[0..range.start].matches('\n').count() as u32;
665 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
666 if !text[..range.end].ends_with('\n') {
667 end_row += 1;
668 }
669 return start_row..end_row;
670}
671
672fn assemble_single_file_fim_prompt(
673 context: &str,
674 editable_range: &Range<usize>,
675 cursor_prefix_section: &str,
676 events: &[Arc<Event>],
677 max_tokens: usize,
678) -> String {
679 let suffix_section = seed_coder::build_suffix_section(context, editable_range);
680
681 let suffix_tokens = estimate_tokens(suffix_section.len() + seed_coder::FIM_PREFIX.len());
682 let cursor_prefix_tokens =
683 estimate_tokens(cursor_prefix_section.len() + seed_coder::FIM_MIDDLE.len());
684 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
685
686 let edit_history_section = format_edit_history_within_budget(
687 events,
688 seed_coder::FILE_MARKER,
689 "edit_history",
690 budget_after_cursor,
691 max_edit_event_count_for_format(&ZetaFormat::V0327SingleFile),
692 );
693
694 let mut prompt = String::new();
695 prompt.push_str(&suffix_section);
696 prompt.push_str(seed_coder::FIM_PREFIX);
697 prompt.push_str(&edit_history_section);
698 if !edit_history_section.is_empty() {
699 prompt.push('\n');
700 }
701 prompt.push_str(cursor_prefix_section);
702 prompt.push_str(seed_coder::FIM_MIDDLE);
703 prompt
704}
705
706pub fn format_prompt_with_budget_for_format(
707 input: &ZetaPromptInput,
708 format: ZetaFormat,
709 max_tokens: usize,
710) -> Option<String> {
711 let (context, editable_range, context_range, cursor_offset) =
712 resolve_cursor_region(input, format);
713 let path = &*input.cursor_path;
714
715 let empty_files = Vec::new();
716 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
717 let filtered_related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
718 let relative_row_range =
719 offset_range_to_row_range(&input.cursor_excerpt, context_range.clone());
720 let row_range = relative_row_range.start + cursor_excerpt_start_row
721 ..relative_row_range.end + cursor_excerpt_start_row;
722 filter_redundant_excerpts(
723 input_related_files.to_vec(),
724 input.cursor_path.as_ref(),
725 row_range,
726 )
727 } else {
728 input_related_files.to_vec()
729 };
730 let related_files = filtered_related_files.as_slice();
731
732 let prompt = match format {
733 ZetaFormat::V0211SeedCoder
734 | ZetaFormat::V0331SeedCoderModelPy
735 | ZetaFormat::V0304SeedNoEdits
736 | ZetaFormat::V0306SeedMultiRegions
737 | ZetaFormat::V0316SeedMultiRegions
738 | ZetaFormat::V0318SeedMultiRegions
739 | ZetaFormat::V0317SeedMultiRegions
740 | ZetaFormat::V0420Diagnostics => {
741 let mut cursor_section = String::new();
742
743 write_cursor_excerpt_section_for_format(
744 format,
745 &mut cursor_section,
746 path,
747 context,
748 &editable_range,
749 cursor_offset,
750 );
751
752 let cursor_buffer_row = input.excerpt_start_row.map(|excerpt_start_row| {
753 excerpt_start_row
754 + input.cursor_excerpt[..context_range.start + cursor_offset]
755 .bytes()
756 .filter(|byte| *byte == b'\n')
757 .count() as u32
758 });
759
760 let budget_with_margin = apply_prompt_budget_margin(max_tokens);
761 seed_coder::assemble_fim_prompt(
762 context,
763 &editable_range,
764 &cursor_section,
765 &input.events,
766 related_files,
767 if format == ZetaFormat::V0420Diagnostics {
768 &input.active_buffer_diagnostics
769 } else {
770 &[]
771 },
772 cursor_buffer_row,
773 budget_with_margin,
774 )
775 }
776 ZetaFormat::V0327SingleFile => {
777 let mut cursor_section = String::new();
778 write_cursor_excerpt_section_for_format(
779 format,
780 &mut cursor_section,
781 path,
782 context,
783 &editable_range,
784 cursor_offset,
785 );
786
787 assemble_single_file_fim_prompt(
788 context,
789 &editable_range,
790 &cursor_section,
791 &input.events,
792 apply_prompt_budget_margin(max_tokens),
793 )
794 }
795 _ => {
796 let mut cursor_section = String::new();
797 write_cursor_excerpt_section_for_format(
798 format,
799 &mut cursor_section,
800 path,
801 context,
802 &editable_range,
803 cursor_offset,
804 );
805
806 let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
807 let cursor_tokens = estimate_tokens(cursor_section.len());
808 remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
809
810 let edit_history_section = format_edit_history_within_budget(
811 &input.events,
812 "<|file_sep|>",
813 "edit history",
814 remaining_budget,
815 max_edit_event_count_for_format(&format),
816 );
817 let edit_history_tokens = estimate_tokens(edit_history_section.len());
818 remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
819
820 let related_files_section = format_related_files_within_budget(
821 &related_files,
822 "<|file_sep|>",
823 "",
824 remaining_budget,
825 );
826
827 let mut prompt = String::new();
828 prompt.push_str(&related_files_section);
829 prompt.push_str(&edit_history_section);
830 prompt.push_str(&cursor_section);
831 prompt
832 }
833 };
834 let prompt_tokens = estimate_tokens(prompt.len());
835 if prompt_tokens > max_tokens {
836 return None;
837 }
838 return Some(prompt);
839}
840
841fn format_active_buffer_diagnostics_with_budget(
842 diagnostics: &[ActiveBufferDiagnostic],
843 cursor_buffer_row: Option<u32>,
844 budget: usize,
845) -> String {
846 if diagnostics.is_empty() || budget == 0 {
847 return String::new();
848 }
849
850 let mut diagnostic_indices = (0..diagnostics.len()).collect::<Vec<_>>();
851 if let Some(cursor_buffer_row) = cursor_buffer_row {
852 diagnostic_indices.sort_by_key(|index| {
853 let range = &diagnostics[*index].snippet_buffer_row_range;
854 u32::abs_diff(cursor_buffer_row, range.start)
855 + u32::abs_diff(cursor_buffer_row, range.end)
856 });
857 }
858
859 let mut output = format!("{}diagnostics\n", seed_coder::FILE_MARKER);
860 let header_tokens = estimate_tokens(output.len());
861 if header_tokens > budget {
862 return String::new();
863 }
864
865 let mut used_tokens = header_tokens;
866 let mut included_diagnostics = 0;
867 for diagnostic_index in diagnostic_indices.into_iter().take(10) {
868 let diagnostic = &diagnostics[diagnostic_index];
869 let snippet = clamp_text_to_token_count(&diagnostic.snippet, 256);
870
871 let diagnostic_section = format!(
872 "*{}*:\n```\n{}{}\n```\n",
873 diagnostic.message,
874 snippet,
875 if snippet.len() < diagnostic.snippet.len() {
876 "..."
877 } else {
878 ""
879 }
880 );
881 let diagnostic_tokens = estimate_tokens(diagnostic_section.len());
882 if used_tokens + diagnostic_tokens > budget {
883 break;
884 }
885 output.push_str(&diagnostic_section);
886 used_tokens += diagnostic_tokens;
887 included_diagnostics += 1;
888 }
889
890 if included_diagnostics == 0 {
891 String::new()
892 } else {
893 output
894 }
895}
896
897pub fn filter_redundant_excerpts(
898 mut related_files: Vec<RelatedFile>,
899 cursor_path: &Path,
900 cursor_row_range: Range<u32>,
901) -> Vec<RelatedFile> {
902 for file in &mut related_files {
903 if file.path.as_ref() == cursor_path {
904 file.excerpts.retain(|excerpt| {
905 excerpt.row_range.start < cursor_row_range.start
906 || excerpt.row_range.end > cursor_row_range.end
907 });
908 }
909 }
910 related_files.retain(|file| !file.excerpts.is_empty());
911 related_files
912}
913
914pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
915 match format {
916 ZetaFormat::V0112MiddleAtEnd
917 | ZetaFormat::V0113Ordered
918 | ZetaFormat::V0114180EditableRegion
919 | ZetaFormat::V0120GitMergeMarkers
920 | ZetaFormat::V0131GitMergeMarkersPrefix
921 | ZetaFormat::V0211Prefill
922 | ZetaFormat::V0211SeedCoder
923 | ZetaFormat::V0331SeedCoderModelPy
924 | ZetaFormat::v0226Hashline
925 | ZetaFormat::V0304SeedNoEdits
926 | ZetaFormat::V0304VariableEdit
927 | ZetaFormat::V0306SeedMultiRegions
928 | ZetaFormat::V0316SeedMultiRegions
929 | ZetaFormat::V0318SeedMultiRegions
930 | ZetaFormat::V0317SeedMultiRegions
931 | ZetaFormat::V0420Diagnostics
932 | ZetaFormat::V0327SingleFile => 6,
933 }
934}
935
936pub fn get_prefill_for_format(
937 format: ZetaFormat,
938 context: &str,
939 editable_range: &Range<usize>,
940) -> String {
941 match format {
942 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
943 ZetaFormat::V0112MiddleAtEnd
944 | ZetaFormat::V0113Ordered
945 | ZetaFormat::V0114180EditableRegion
946 | ZetaFormat::V0120GitMergeMarkers
947 | ZetaFormat::V0131GitMergeMarkersPrefix
948 | ZetaFormat::V0211SeedCoder
949 | ZetaFormat::V0331SeedCoderModelPy
950 | ZetaFormat::v0226Hashline
951 | ZetaFormat::V0304VariableEdit => String::new(),
952 ZetaFormat::V0304SeedNoEdits
953 | ZetaFormat::V0306SeedMultiRegions
954 | ZetaFormat::V0316SeedMultiRegions
955 | ZetaFormat::V0318SeedMultiRegions
956 | ZetaFormat::V0317SeedMultiRegions
957 | ZetaFormat::V0420Diagnostics
958 | ZetaFormat::V0327SingleFile => String::new(),
959 }
960}
961
962pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
963 match format {
964 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
965 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
966 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
967 ZetaFormat::V0211SeedCoder
968 | ZetaFormat::V0331SeedCoderModelPy
969 | ZetaFormat::V0304SeedNoEdits
970 | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
971 ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
972 ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
973 ZetaFormat::V0420Diagnostics => Some(multi_region::V0318_END_MARKER),
974 ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
975 ZetaFormat::V0327SingleFile => Some(multi_region::V0327_END_MARKER),
976
977 ZetaFormat::V0112MiddleAtEnd
978 | ZetaFormat::V0113Ordered
979 | ZetaFormat::V0114180EditableRegion
980 | ZetaFormat::v0226Hashline
981 | ZetaFormat::V0304VariableEdit => None,
982 }
983}
984
985pub fn encode_patch_as_output_for_format(
986 format: ZetaFormat,
987 old_editable_region: &str,
988 patch: &str,
989 cursor_offset: Option<usize>,
990) -> Result<Option<String>> {
991 match format {
992 ZetaFormat::v0226Hashline => {
993 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
994 }
995 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
996 old_editable_region,
997 patch,
998 cursor_offset,
999 )
1000 .map(Some),
1001 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
1002 Ok(seed_coder::no_edits(patch))
1003 }
1004 ZetaFormat::V0316SeedMultiRegions => {
1005 let empty_patch = patch.lines().count() <= 3;
1006 if empty_patch {
1007 let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
1008 let marker_num =
1009 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
1010 let tag = multi_region::marker_tag(marker_num);
1011 Ok(Some(format!(
1012 "{tag}{tag}{}",
1013 multi_region::V0316_END_MARKER
1014 )))
1015 } else {
1016 Ok(None)
1017 }
1018 }
1019 ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
1020 let empty_patch = patch.lines().count() <= 3;
1021 if empty_patch {
1022 let marker_offsets =
1023 multi_region::compute_marker_offsets_v0318(old_editable_region);
1024 let marker_num =
1025 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
1026 let tag = multi_region::marker_tag(marker_num);
1027 Ok(Some(format!(
1028 "{tag}{tag}{}",
1029 multi_region::V0318_END_MARKER
1030 )))
1031 } else {
1032 Ok(None)
1033 }
1034 }
1035 ZetaFormat::V0317SeedMultiRegions => {
1036 let empty_patch = patch.lines().count() <= 3;
1037 if empty_patch {
1038 let tag = multi_region::marker_tag_relative(0);
1039 Ok(Some(format!(
1040 "{tag}{tag}{}",
1041 multi_region::V0317_END_MARKER
1042 )))
1043 } else {
1044 Ok(None)
1045 }
1046 }
1047 ZetaFormat::V0327SingleFile => {
1048 let empty_patch = patch.lines().count() <= 3;
1049 if empty_patch {
1050 let marker_offsets =
1051 multi_region::compute_marker_offsets_v0318(old_editable_region);
1052 let marker_num =
1053 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
1054 let tag = multi_region::marker_tag(marker_num);
1055 Ok(Some(format!(
1056 "{tag}{tag}{}",
1057 multi_region::V0327_END_MARKER
1058 )))
1059 } else {
1060 Ok(None)
1061 }
1062 }
1063 _ => Ok(None),
1064 }
1065}
1066
1067/// Given a `ZetaPromptInput`, a format, and a patch (with cursor already
1068/// extracted), produce the expected model output string for training.
1069pub fn format_expected_output(
1070 input: &ZetaPromptInput,
1071 format: ZetaFormat,
1072 patch: &str,
1073 cursor_offset: Option<usize>,
1074) -> Result<String> {
1075 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1076 let mut old_editable = context[editable_range].to_string();
1077 if !old_editable.is_empty() && !old_editable.ends_with('\n') {
1078 old_editable.push('\n');
1079 }
1080
1081 // Formats with their own output encoding (hashline, variable-edit,
1082 // multi-region empty patches) are handled here.
1083 if let Some(output) =
1084 encode_patch_as_output_for_format(format, &old_editable, patch, cursor_offset)?
1085 {
1086 return Ok(output);
1087 }
1088
1089 let empty_patch = patch.lines().count() <= 3;
1090
1091 match format {
1092 // Multi-region formats: non-empty patches need diff application
1093 // then marker-span encoding.
1094 ZetaFormat::V0316SeedMultiRegions => {
1095 let (new_editable, first_hunk_offset) =
1096 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1097 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1098 multi_region::encode_from_old_and_new_v0316(
1099 &old_editable,
1100 &new_editable,
1101 cursor_in_new,
1102 CURSOR_MARKER,
1103 multi_region::V0316_END_MARKER,
1104 )
1105 }
1106 ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
1107 let (new_editable, first_hunk_offset) =
1108 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1109 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1110 multi_region::encode_from_old_and_new_v0318(
1111 &old_editable,
1112 &new_editable,
1113 cursor_in_new,
1114 CURSOR_MARKER,
1115 multi_region::V0318_END_MARKER,
1116 )
1117 }
1118 ZetaFormat::V0327SingleFile => {
1119 let (new_editable, first_hunk_offset) =
1120 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1121 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1122 multi_region::encode_from_old_and_new_v0318(
1123 &old_editable,
1124 &new_editable,
1125 cursor_in_new,
1126 CURSOR_MARKER,
1127 multi_region::V0327_END_MARKER,
1128 )
1129 }
1130 ZetaFormat::V0317SeedMultiRegions => {
1131 let (new_editable, first_hunk_offset) =
1132 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1133 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1134 multi_region::encode_from_old_and_new_v0317(
1135 &old_editable,
1136 &new_editable,
1137 cursor_in_new,
1138 CURSOR_MARKER,
1139 multi_region::V0317_END_MARKER,
1140 )
1141 }
1142 // V0131-style formats and fallback: produce new editable text with
1143 // cursor marker inserted, followed by the end marker.
1144 ZetaFormat::V0112MiddleAtEnd
1145 | ZetaFormat::V0113Ordered
1146 | ZetaFormat::V0114180EditableRegion
1147 | ZetaFormat::V0120GitMergeMarkers
1148 | ZetaFormat::V0131GitMergeMarkersPrefix
1149 | ZetaFormat::V0211Prefill
1150 | ZetaFormat::V0211SeedCoder
1151 | ZetaFormat::v0226Hashline
1152 | ZetaFormat::V0304VariableEdit
1153 | ZetaFormat::V0304SeedNoEdits
1154 | ZetaFormat::V0331SeedCoderModelPy
1155 | ZetaFormat::V0306SeedMultiRegions => {
1156 let (mut result, first_hunk_offset) = if empty_patch {
1157 (old_editable.clone(), None)
1158 } else {
1159 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?
1160 };
1161
1162 if let Some(cursor) = cursor_offset {
1163 let hunk_start = if !empty_patch {
1164 first_hunk_offset.unwrap_or(0)
1165 } else {
1166 0
1167 };
1168 let offset = (hunk_start + cursor).min(result.len());
1169 result.insert_str(offset, CURSOR_MARKER);
1170 }
1171
1172 if !result.is_empty() && !result.ends_with('\n') {
1173 result.push('\n');
1174 }
1175
1176 if let Some(end_marker) = output_end_marker_for_format(format) {
1177 result.push_str(end_marker);
1178 }
1179
1180 Ok(result)
1181 }
1182 }
1183}
1184
1185/// Compute the cursor position within the new text after diff application.
1186fn cursor_in_new_text(
1187 cursor_offset: Option<usize>,
1188 first_hunk_offset: Option<usize>,
1189 new_text: &str,
1190) -> Option<usize> {
1191 cursor_offset.map(|cursor| {
1192 let hunk_start = first_hunk_offset.unwrap_or(0);
1193 (hunk_start + cursor).min(new_text.len())
1194 })
1195}
1196
1197#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1198pub struct ParsedOutput {
1199 /// Text that should replace the editable region
1200 pub new_editable_region: String,
1201 /// The byte range within `cursor_excerpt` that this replacement applies to
1202 pub range_in_excerpt: Range<usize>,
1203 /// Byte offset of the cursor marker within `new_editable_region`, if present
1204 pub cursor_offset_in_new_editable_region: Option<usize>,
1205}
1206
1207#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1208pub struct CursorPosition {
1209 pub path: String,
1210 pub row: usize,
1211 pub column: usize,
1212 pub offset: usize,
1213 pub editable_region_offset: usize,
1214}
1215
1216pub fn parsed_output_from_editable_region(
1217 range_in_excerpt: Range<usize>,
1218 mut new_editable_region: String,
1219) -> ParsedOutput {
1220 let cursor_offset_in_new_editable_region = new_editable_region.find(CURSOR_MARKER);
1221 if let Some(offset) = cursor_offset_in_new_editable_region {
1222 new_editable_region.replace_range(offset..offset + CURSOR_MARKER.len(), "");
1223 }
1224
1225 ParsedOutput {
1226 new_editable_region,
1227 range_in_excerpt,
1228 cursor_offset_in_new_editable_region,
1229 }
1230}
1231
1232/// Parse model output for the given zeta format
1233pub fn parse_zeta2_model_output(
1234 output: &str,
1235 format: ZetaFormat,
1236 prompt_inputs: &ZetaPromptInput,
1237) -> Result<ParsedOutput> {
1238 let output = match output_end_marker_for_format(format) {
1239 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
1240 None => output,
1241 };
1242
1243 let (context, editable_range_in_context, context_range, cursor_offset) =
1244 resolve_cursor_region(prompt_inputs, format);
1245 let context_start = context_range.start;
1246 let old_editable_region = &context[editable_range_in_context.clone()];
1247 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
1248
1249 let (range_in_context, output) = match format {
1250 ZetaFormat::v0226Hashline => (
1251 editable_range_in_context,
1252 if hashline::output_has_edit_commands(output) {
1253 hashline::apply_edit_commands(old_editable_region, output)
1254 } else {
1255 output.to_string()
1256 },
1257 ),
1258 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
1259 ZetaFormat::V0304SeedNoEdits => (
1260 editable_range_in_context,
1261 if output.starts_with(seed_coder::NO_EDITS) {
1262 old_editable_region.to_string()
1263 } else {
1264 output.to_string()
1265 },
1266 ),
1267 ZetaFormat::V0306SeedMultiRegions => (
1268 editable_range_in_context,
1269 if output.starts_with(seed_coder::NO_EDITS) {
1270 old_editable_region.to_string()
1271 } else {
1272 multi_region::apply_marker_span(old_editable_region, output)?
1273 },
1274 ),
1275 ZetaFormat::V0316SeedMultiRegions => (
1276 editable_range_in_context,
1277 multi_region::apply_marker_span_v0316(old_editable_region, output)?,
1278 ),
1279 ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => (
1280 editable_range_in_context,
1281 multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1282 ),
1283 ZetaFormat::V0317SeedMultiRegions => (
1284 editable_range_in_context,
1285 multi_region::apply_marker_span_v0317(
1286 old_editable_region,
1287 output,
1288 Some(cursor_offset_in_editable),
1289 )?,
1290 ),
1291 ZetaFormat::V0327SingleFile => (
1292 editable_range_in_context,
1293 multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1294 ),
1295 _ => (editable_range_in_context, output.to_string()),
1296 };
1297
1298 let range_in_excerpt =
1299 range_in_context.start + context_start..range_in_context.end + context_start;
1300
1301 Ok(parsed_output_from_editable_region(range_in_excerpt, output))
1302}
1303
1304pub fn parse_zeta2_model_output_as_patch(
1305 output: &str,
1306 format: ZetaFormat,
1307 prompt_inputs: &ZetaPromptInput,
1308) -> Result<String> {
1309 let parsed = parse_zeta2_model_output(output, format, prompt_inputs)?;
1310 parsed_output_to_patch(prompt_inputs, parsed)
1311}
1312
1313pub fn cursor_position_from_parsed_output(
1314 prompt_inputs: &ZetaPromptInput,
1315 parsed: &ParsedOutput,
1316) -> Option<CursorPosition> {
1317 let cursor_offset = parsed.cursor_offset_in_new_editable_region?;
1318 let editable_region_offset = parsed.range_in_excerpt.start;
1319 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1320
1321 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
1322
1323 let new_editable_region = &parsed.new_editable_region;
1324 let prefix_end = cursor_offset.min(new_editable_region.len());
1325 let new_region_prefix = &new_editable_region[..prefix_end];
1326
1327 let row = editable_region_start_line + new_region_prefix.matches('\n').count();
1328
1329 let column = match new_region_prefix.rfind('\n') {
1330 Some(last_newline) => cursor_offset - last_newline - 1,
1331 None => {
1332 let content_prefix = &excerpt[..editable_region_offset];
1333 let content_column = match content_prefix.rfind('\n') {
1334 Some(last_newline) => editable_region_offset - last_newline - 1,
1335 None => editable_region_offset,
1336 };
1337 content_column + cursor_offset
1338 }
1339 };
1340
1341 Some(CursorPosition {
1342 path: prompt_inputs.cursor_path.to_string_lossy().into_owned(),
1343 row,
1344 column,
1345 offset: editable_region_offset + cursor_offset,
1346 editable_region_offset: cursor_offset,
1347 })
1348}
1349
1350pub fn parsed_output_to_patch(
1351 prompt_inputs: &ZetaPromptInput,
1352 parsed: ParsedOutput,
1353) -> Result<String> {
1354 let range_in_excerpt = parsed.range_in_excerpt;
1355 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1356 let old_text = excerpt[range_in_excerpt.clone()].to_string();
1357 let mut new_text = parsed.new_editable_region;
1358
1359 let mut old_text_normalized = old_text;
1360 if !new_text.is_empty() && !new_text.ends_with('\n') {
1361 new_text.push('\n');
1362 }
1363 if !old_text_normalized.is_empty() && !old_text_normalized.ends_with('\n') {
1364 old_text_normalized.push('\n');
1365 }
1366
1367 let editable_region_offset = range_in_excerpt.start;
1368 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count() as u32;
1369 let editable_region_lines = old_text_normalized.lines().count() as u32;
1370
1371 let diff = udiff::unified_diff_with_context(
1372 &old_text_normalized,
1373 &new_text,
1374 editable_region_start_line,
1375 editable_region_start_line,
1376 editable_region_lines,
1377 );
1378
1379 let path = prompt_inputs
1380 .cursor_path
1381 .to_string_lossy()
1382 .trim_start_matches('/')
1383 .to_string();
1384 let formatted_diff = format!("--- a/{path}\n+++ b/{path}\n{diff}");
1385
1386 Ok(udiff::encode_cursor_in_patch(
1387 &formatted_diff,
1388 parsed.cursor_offset_in_new_editable_region,
1389 ))
1390}
1391
1392pub fn excerpt_range_for_format(
1393 format: ZetaFormat,
1394 ranges: &ExcerptRanges,
1395) -> (Range<usize>, Range<usize>) {
1396 excerpt_ranges_for_format(format, ranges)
1397}
1398
1399pub fn resolve_cursor_region(
1400 input: &ZetaPromptInput,
1401 format: ZetaFormat,
1402) -> (&str, Range<usize>, Range<usize>, usize) {
1403 let (editable_range, context_range) = if format == ZetaFormat::V0327SingleFile {
1404 let (editable_tokens, _) = token_limits_for_format(format);
1405 let context_range = 0..input.cursor_excerpt.len();
1406 let editable_range = multi_region::compute_v0327_editable_range(
1407 &input.cursor_excerpt,
1408 input.cursor_offset_in_excerpt,
1409 editable_tokens,
1410 );
1411 (editable_range, context_range)
1412 } else if let Some(syntax_ranges) = &input.syntax_ranges {
1413 let (editable_tokens, context_tokens) = token_limits_for_format(format);
1414 compute_editable_and_context_ranges(
1415 &input.cursor_excerpt,
1416 input.cursor_offset_in_excerpt,
1417 syntax_ranges,
1418 editable_tokens,
1419 context_tokens,
1420 )
1421 } else {
1422 excerpt_range_for_format(format, &input.excerpt_ranges)
1423 };
1424
1425 let context_start = context_range.start;
1426 let context_text = &input.cursor_excerpt[context_range.clone()];
1427 let adjusted_editable =
1428 (editable_range.start - context_start)..(editable_range.end - context_start);
1429 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
1430
1431 (
1432 context_text,
1433 adjusted_editable,
1434 context_range,
1435 adjusted_cursor,
1436 )
1437}
1438
1439pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
1440 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1441 get_prefill_for_format(format, context, &editable_range)
1442}
1443
1444fn format_edit_history_within_budget(
1445 events: &[Arc<Event>],
1446 file_marker: &str,
1447 edit_history_name: &str,
1448 max_tokens: usize,
1449 max_edit_event_count: usize,
1450) -> String {
1451 let header = format!("{}{}\n", file_marker, edit_history_name);
1452 let header_tokens = estimate_tokens(header.len());
1453 if header_tokens >= max_tokens {
1454 return String::new();
1455 }
1456
1457 let mut event_strings: Vec<String> = Vec::new();
1458 let mut total_tokens = header_tokens;
1459
1460 for event in events.iter().rev().take(max_edit_event_count) {
1461 let mut event_str = String::new();
1462 write_event(&mut event_str, event);
1463 let event_tokens = estimate_tokens(event_str.len());
1464
1465 if total_tokens + event_tokens > max_tokens {
1466 break;
1467 }
1468 total_tokens += event_tokens;
1469 event_strings.push(event_str);
1470 }
1471
1472 if event_strings.is_empty() {
1473 return String::new();
1474 }
1475
1476 let mut result = header;
1477 for event_str in event_strings.iter().rev() {
1478 result.push_str(event_str);
1479 }
1480 result
1481}
1482
1483fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
1484 let needs_newline = !excerpt.text.ends_with('\n');
1485 let needs_ellipsis = excerpt.row_range.end < file_max_row;
1486 let len = excerpt.text.len()
1487 + if needs_newline { "\n".len() } else { 0 }
1488 + if needs_ellipsis { "...\n".len() } else { 0 };
1489 estimate_tokens(len)
1490}
1491
1492pub fn format_related_files_within_budget(
1493 related_files: &[RelatedFile],
1494 file_prefix: &str,
1495 file_suffix: &str,
1496 max_tokens: usize,
1497) -> String {
1498 struct ExcerptCandidate {
1499 file_ix: usize,
1500 excerpt_ix: usize,
1501 order: usize,
1502 }
1503
1504 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
1505 .iter()
1506 .enumerate()
1507 .flat_map(|(file_ix, file)| {
1508 file.excerpts
1509 .iter()
1510 .enumerate()
1511 .map(move |(excerpt_ix, e)| ExcerptCandidate {
1512 file_ix,
1513 excerpt_ix,
1514 order: e.order,
1515 })
1516 })
1517 .collect();
1518
1519 // Pre-compute file header strings and their token costs.
1520 let file_headers: Vec<String> = related_files
1521 .iter()
1522 .map(|file| {
1523 let path_str = file.path.to_string_lossy();
1524 format!("{}{}\n", file_prefix, path_str)
1525 })
1526 .collect();
1527
1528 // Sort the excerpts by their order and determine how many fit within the budget.
1529 let mut total_tokens = 0;
1530 let mut included_excerpt_count = 0_usize;
1531 let mut included_file_indices = vec![false; related_files.len()];
1532 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1533 for candidate in &excerpt_candidates {
1534 let file = &related_files[candidate.file_ix];
1535 let excerpt = &file.excerpts[candidate.excerpt_ix];
1536 let file_already_included = included_file_indices[candidate.file_ix];
1537 let header_cost = if file_already_included {
1538 0
1539 } else {
1540 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1541 };
1542 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1543 if total_tokens + header_cost + excerpt_cost > max_tokens {
1544 break;
1545 }
1546 total_tokens += header_cost + excerpt_cost;
1547 if !file_already_included {
1548 included_file_indices[candidate.file_ix] = true;
1549 }
1550 included_excerpt_count += 1;
1551 }
1552
1553 excerpt_candidates.truncate(included_excerpt_count);
1554 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1555
1556 // Render all of the files that fit within the token budget, in the original order.
1557 let mut result = String::new();
1558 let mut last_file_ix = None;
1559 for candidate in &excerpt_candidates {
1560 if last_file_ix != Some(candidate.file_ix) {
1561 if last_file_ix.is_some() {
1562 result.push_str(file_suffix);
1563 }
1564 result.push_str(&file_headers[candidate.file_ix]);
1565 last_file_ix = Some(candidate.file_ix);
1566 }
1567 let file = &related_files[candidate.file_ix];
1568 let excerpt = &file.excerpts[candidate.excerpt_ix];
1569 result.push_str(&excerpt.text);
1570 if !result.ends_with('\n') {
1571 result.push('\n');
1572 }
1573 if excerpt.row_range.end < file.max_row {
1574 result.push_str("...\n");
1575 }
1576 }
1577
1578 result
1579}
1580
1581pub fn write_related_files(
1582 prompt: &mut String,
1583 related_files: &[RelatedFile],
1584) -> Vec<Range<usize>> {
1585 let mut ranges = Vec::new();
1586 for file in related_files {
1587 let start = prompt.len();
1588 let path_str = file.path.to_string_lossy();
1589 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1590 for excerpt in &file.excerpts {
1591 prompt.push_str(&excerpt.text);
1592 if !prompt.ends_with('\n') {
1593 prompt.push('\n');
1594 }
1595 if excerpt.row_range.end < file.max_row {
1596 prompt.push_str("...\n");
1597 }
1598 }
1599 let end = prompt.len();
1600 ranges.push(start..end);
1601 }
1602 ranges
1603}
1604
1605mod v0112_middle_at_end {
1606 use super::*;
1607
1608 pub fn special_tokens() -> &'static [&'static str] {
1609 &[
1610 "<|fim_prefix|>",
1611 "<|fim_suffix|>",
1612 "<|fim_middle|>",
1613 "<|file_sep|>",
1614 CURSOR_MARKER,
1615 ]
1616 }
1617
1618 pub fn write_cursor_excerpt_section(
1619 prompt: &mut String,
1620 path: &Path,
1621 context: &str,
1622 editable_range: &Range<usize>,
1623 cursor_offset: usize,
1624 ) {
1625 let path_str = path.to_string_lossy();
1626 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1627
1628 prompt.push_str("<|fim_prefix|>\n");
1629 prompt.push_str(&context[..editable_range.start]);
1630
1631 prompt.push_str("<|fim_suffix|>\n");
1632 prompt.push_str(&context[editable_range.end..]);
1633 if !prompt.ends_with('\n') {
1634 prompt.push('\n');
1635 }
1636
1637 prompt.push_str("<|fim_middle|>current\n");
1638 prompt.push_str(&context[editable_range.start..cursor_offset]);
1639 prompt.push_str(CURSOR_MARKER);
1640 prompt.push_str(&context[cursor_offset..editable_range.end]);
1641 if !prompt.ends_with('\n') {
1642 prompt.push('\n');
1643 }
1644
1645 prompt.push_str("<|fim_middle|>updated\n");
1646 }
1647}
1648
1649mod v0113_ordered {
1650 use super::*;
1651
1652 pub fn special_tokens() -> &'static [&'static str] {
1653 &[
1654 "<|fim_prefix|>",
1655 "<|fim_suffix|>",
1656 "<|fim_middle|>",
1657 "<|file_sep|>",
1658 CURSOR_MARKER,
1659 ]
1660 }
1661
1662 pub fn write_cursor_excerpt_section(
1663 prompt: &mut String,
1664 path: &Path,
1665 context: &str,
1666 editable_range: &Range<usize>,
1667 cursor_offset: usize,
1668 ) {
1669 let path_str = path.to_string_lossy();
1670 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1671
1672 prompt.push_str("<|fim_prefix|>\n");
1673 prompt.push_str(&context[..editable_range.start]);
1674 if !prompt.ends_with('\n') {
1675 prompt.push('\n');
1676 }
1677
1678 prompt.push_str("<|fim_middle|>current\n");
1679 prompt.push_str(&context[editable_range.start..cursor_offset]);
1680 prompt.push_str(CURSOR_MARKER);
1681 prompt.push_str(&context[cursor_offset..editable_range.end]);
1682 if !prompt.ends_with('\n') {
1683 prompt.push('\n');
1684 }
1685
1686 prompt.push_str("<|fim_suffix|>\n");
1687 prompt.push_str(&context[editable_range.end..]);
1688 if !prompt.ends_with('\n') {
1689 prompt.push('\n');
1690 }
1691
1692 prompt.push_str("<|fim_middle|>updated\n");
1693 }
1694}
1695
1696mod v0114180_editable_region {
1697 use super::*;
1698
1699 pub fn special_tokens() -> &'static [&'static str] {
1700 v0113_ordered::special_tokens()
1701 }
1702}
1703
1704pub mod v0120_git_merge_markers {
1705 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1706 //!
1707 //! Example prompt:
1708 //!
1709 //! <|file_sep|>path/to/target_file.py
1710 //! <|fim_prefix|>
1711 //! code before editable region
1712 //! <|fim_suffix|>
1713 //! code after editable region
1714 //! <|fim_middle|>
1715 //! <<<<<<< CURRENT
1716 //! code that
1717 //! needs to<|user_cursor|>
1718 //! be rewritten
1719 //! =======
1720 //!
1721 //! Expected output (should be generated by the model):
1722 //!
1723 //! updated
1724 //! code with
1725 //! changes applied
1726 //! >>>>>>> UPDATED
1727
1728 use super::*;
1729
1730 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1731 pub const SEPARATOR: &str = "=======\n";
1732 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1733
1734 pub fn special_tokens() -> &'static [&'static str] {
1735 &[
1736 "<|fim_prefix|>",
1737 "<|fim_suffix|>",
1738 "<|fim_middle|>",
1739 "<|file_sep|>",
1740 START_MARKER,
1741 SEPARATOR,
1742 END_MARKER,
1743 CURSOR_MARKER,
1744 ]
1745 }
1746
1747 pub fn write_cursor_excerpt_section(
1748 prompt: &mut String,
1749 path: &Path,
1750 context: &str,
1751 editable_range: &Range<usize>,
1752 cursor_offset: usize,
1753 ) {
1754 let path_str = path.to_string_lossy();
1755 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1756
1757 prompt.push_str("<|fim_prefix|>");
1758 prompt.push_str(&context[..editable_range.start]);
1759
1760 prompt.push_str("<|fim_suffix|>");
1761 prompt.push_str(&context[editable_range.end..]);
1762 if !prompt.ends_with('\n') {
1763 prompt.push('\n');
1764 }
1765
1766 prompt.push_str("<|fim_middle|>");
1767 prompt.push_str(START_MARKER);
1768 prompt.push_str(&context[editable_range.start..cursor_offset]);
1769 prompt.push_str(CURSOR_MARKER);
1770 prompt.push_str(&context[cursor_offset..editable_range.end]);
1771 if !prompt.ends_with('\n') {
1772 prompt.push('\n');
1773 }
1774 prompt.push_str(SEPARATOR);
1775 }
1776}
1777
1778pub mod v0131_git_merge_markers_prefix {
1779 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1780 //!
1781 //! Example prompt:
1782 //!
1783 //! <|file_sep|>path/to/target_file.py
1784 //! <|fim_prefix|>
1785 //! code before editable region
1786 //! <<<<<<< CURRENT
1787 //! code that
1788 //! needs to<|user_cursor|>
1789 //! be rewritten
1790 //! =======
1791 //! <|fim_suffix|>
1792 //! code after editable region
1793 //! <|fim_middle|>
1794 //!
1795 //! Expected output (should be generated by the model):
1796 //!
1797 //! updated
1798 //! code with
1799 //! changes applied
1800 //! >>>>>>> UPDATED
1801
1802 use super::*;
1803
1804 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1805 pub const SEPARATOR: &str = "=======\n";
1806 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1807
1808 pub fn special_tokens() -> &'static [&'static str] {
1809 &[
1810 "<|fim_prefix|>",
1811 "<|fim_suffix|>",
1812 "<|fim_middle|>",
1813 "<|file_sep|>",
1814 START_MARKER,
1815 SEPARATOR,
1816 END_MARKER,
1817 CURSOR_MARKER,
1818 ]
1819 }
1820
1821 pub fn write_cursor_excerpt_section(
1822 prompt: &mut String,
1823 path: &Path,
1824 context: &str,
1825 editable_range: &Range<usize>,
1826 cursor_offset: usize,
1827 ) {
1828 let path_str = path.to_string_lossy();
1829 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1830
1831 prompt.push_str("<|fim_prefix|>");
1832 prompt.push_str(&context[..editable_range.start]);
1833 prompt.push_str(START_MARKER);
1834 prompt.push_str(&context[editable_range.start..cursor_offset]);
1835 prompt.push_str(CURSOR_MARKER);
1836 prompt.push_str(&context[cursor_offset..editable_range.end]);
1837 if !prompt.ends_with('\n') {
1838 prompt.push('\n');
1839 }
1840 prompt.push_str(SEPARATOR);
1841
1842 prompt.push_str("<|fim_suffix|>");
1843 prompt.push_str(&context[editable_range.end..]);
1844 if !prompt.ends_with('\n') {
1845 prompt.push('\n');
1846 }
1847
1848 prompt.push_str("<|fim_middle|>");
1849 }
1850}
1851
1852pub mod v0211_prefill {
1853 use super::*;
1854
1855 pub fn special_tokens() -> &'static [&'static str] {
1856 v0131_git_merge_markers_prefix::special_tokens()
1857 }
1858
1859 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1860 let editable_region = &context[editable_range.start..editable_range.end];
1861
1862 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1863 let prefill_len = editable_region.floor_char_boundary(prefill_len);
1864
1865 // Find a token boundary to avoid splitting tokens in the prefill.
1866 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1867 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1868 // the \n and consume any consecutive \n characters after it.
1869 let prefill = &editable_region[..prefill_len];
1870 match prefill.rfind('\n') {
1871 Some(pos) => {
1872 let mut end = pos + 1;
1873 while end < editable_region.len()
1874 && editable_region.as_bytes().get(end) == Some(&b'\n')
1875 {
1876 end += 1;
1877 }
1878 editable_region[..end].to_string()
1879 }
1880 // No newline found. Fall back to splitting before the last space
1881 // (word-level boundary)
1882 None => match prefill.rfind(' ') {
1883 Some(pos) => prefill[..pos].to_string(),
1884 None => prefill.to_string(),
1885 },
1886 }
1887 }
1888}
1889
1890pub mod hashline {
1891
1892 use std::fmt::Display;
1893
1894 pub const END_MARKER: &str = "<|fim_middle|>updated";
1895 pub const START_MARKER: &str = "<|fim_middle|>current";
1896
1897 use super::*;
1898
1899 const SET_COMMAND_MARKER: &str = "<|set|>";
1900 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1901 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1902
1903 pub fn special_tokens() -> &'static [&'static str] {
1904 return &[
1905 SET_COMMAND_MARKER,
1906 "<|set_range|>",
1907 INSERT_COMMAND_MARKER,
1908 NO_EDITS_COMMAND_MARKER,
1909 CURSOR_MARKER,
1910 "<|file_sep|>",
1911 "<|fim_prefix|>",
1912 "<|fim_suffix|>",
1913 "<|fim_middle|>",
1914 ];
1915 }
1916
1917 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1918 #[derive(Debug, Clone, PartialEq, Eq)]
1919 struct LineRef {
1920 index: usize,
1921 hash: u8,
1922 }
1923
1924 impl Display for LineRef {
1925 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1926 write!(f, "{}:{:02x}", self.index, self.hash)
1927 }
1928 }
1929
1930 pub fn hash_line(line: &[u8]) -> u8 {
1931 let mut h: u8 = 0;
1932 for &byte in line {
1933 h = h.wrapping_add(byte);
1934 }
1935 return h;
1936 }
1937
1938 /// Write the hashline-encoded editable region into `out`. Each line of
1939 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1940 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1941 /// to the start of `editable_text`).
1942 pub fn write_hashline_editable_region(
1943 out: &mut String,
1944 editable_text: &str,
1945 cursor_offset_in_editable: usize,
1946 ) {
1947 let mut offset = 0;
1948 for (i, line) in editable_text.lines().enumerate() {
1949 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1950 && cursor_offset_in_editable < offset + line.len()
1951 {
1952 (
1953 &line[..cursor_offset_in_editable - offset],
1954 CURSOR_MARKER,
1955 &line[cursor_offset_in_editable - offset..],
1956 )
1957 } else {
1958 (line, "", "")
1959 };
1960 write!(
1961 out,
1962 "\n{}|{head}{cursor}{tail}",
1963 LineRef {
1964 index: i,
1965 hash: hash_line(line.as_bytes())
1966 }
1967 )
1968 .unwrap();
1969 offset += line.len() + 1;
1970 }
1971 }
1972
1973 pub fn write_cursor_excerpt_section(
1974 prompt: &mut String,
1975 path: &Path,
1976 context: &str,
1977 editable_range: &Range<usize>,
1978 cursor_offset: usize,
1979 ) {
1980 let path_str = path.to_string_lossy();
1981 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1982
1983 prompt.push_str("<|fim_prefix|>\n");
1984 prompt.push_str(&context[..editable_range.start]);
1985 prompt.push_str(START_MARKER);
1986
1987 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1988 let editable_region = &context[editable_range.clone()];
1989 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1990
1991 if !prompt.ends_with('\n') {
1992 prompt.push('\n');
1993 }
1994
1995 prompt.push_str("<|fim_suffix|>\n");
1996 prompt.push_str(&context[editable_range.end..]);
1997 if !prompt.ends_with('\n') {
1998 prompt.push('\n');
1999 }
2000
2001 prompt.push_str(END_MARKER);
2002 prompt.push('\n');
2003 }
2004
2005 /// A single edit command parsed from the model output.
2006 #[derive(Debug)]
2007 enum EditCommand<'a> {
2008 /// Replace a range of lines (inclusive on both ends). Single-line set is
2009 /// represented by `start == end`.
2010 Set {
2011 start: LineRef,
2012 end: LineRef,
2013 content: &'a str,
2014 },
2015 /// Insert new lines after the given line, or before the first line if
2016 /// `after` is `None`.
2017 Insert {
2018 after: Option<LineRef>,
2019 content: &'a str,
2020 },
2021 }
2022
2023 /// Parse a line reference like `3:c3` into a `LineRef`.
2024 fn parse_line_ref(s: &str) -> Option<LineRef> {
2025 let (idx_str, hash_str) = s.split_once(':')?;
2026 let index = idx_str.parse::<usize>().ok()?;
2027 let hash = u8::from_str_radix(hash_str, 16).ok()?;
2028 Some(LineRef { index, hash })
2029 }
2030
2031 /// Parse the model output into a list of `EditCommand`s.
2032 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
2033 let mut commands = Vec::new();
2034 let mut offset = 0usize;
2035
2036 while offset < model_output.len() {
2037 let next_nl = model_output[offset..]
2038 .find('\n')
2039 .map(|i| offset + i)
2040 .unwrap_or(model_output.len());
2041 let line = &model_output[offset..next_nl];
2042 let line_end = if next_nl < model_output.len() {
2043 next_nl + 1
2044 } else {
2045 next_nl
2046 };
2047
2048 let trimmed = line.trim();
2049 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
2050 (true, spec)
2051 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
2052 (false, spec)
2053 } else {
2054 offset = line_end;
2055 continue;
2056 };
2057
2058 let mut content_end = line_end;
2059 let mut scan = line_end;
2060
2061 while scan < model_output.len() {
2062 let body_nl = model_output[scan..]
2063 .find('\n')
2064 .map(|i| scan + i)
2065 .unwrap_or(model_output.len());
2066 let body_line = &model_output[scan..body_nl];
2067 if body_line.trim().starts_with(SET_COMMAND_MARKER)
2068 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
2069 {
2070 break;
2071 }
2072 scan = if body_nl < model_output.len() {
2073 body_nl + 1
2074 } else {
2075 body_nl
2076 };
2077 content_end = scan;
2078 }
2079
2080 let content = &model_output[line_end..content_end];
2081
2082 if is_set {
2083 if let Some((start_str, end_str)) = specifier.split_once('-') {
2084 if let (Some(start), Some(end)) =
2085 (parse_line_ref(start_str), parse_line_ref(end_str))
2086 {
2087 commands.push(EditCommand::Set {
2088 start,
2089 end,
2090 content,
2091 });
2092 }
2093 } else if let Some(target) = parse_line_ref(specifier) {
2094 commands.push(EditCommand::Set {
2095 start: target.clone(),
2096 end: target,
2097 content,
2098 });
2099 }
2100 } else {
2101 let after = parse_line_ref(specifier);
2102 commands.push(EditCommand::Insert { after, content });
2103 }
2104
2105 offset = scan;
2106 }
2107
2108 commands
2109 }
2110
2111 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
2112 /// (as opposed to being a plain full-replacement output).
2113 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
2114 /// editable region, returning the plain text content.
2115 pub fn strip_hashline_prefixes(region: &str) -> String {
2116 let mut decoded: String = region
2117 .lines()
2118 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
2119 .collect::<Vec<_>>()
2120 .join("\n");
2121 if region.ends_with('\n') {
2122 decoded.push('\n');
2123 }
2124 decoded
2125 }
2126
2127 pub fn output_has_edit_commands(model_output: &str) -> bool {
2128 model_output.contains(SET_COMMAND_MARKER)
2129 || model_output.contains(INSERT_COMMAND_MARKER)
2130 || model_output.contains(NO_EDITS_COMMAND_MARKER)
2131 }
2132
2133 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
2134 /// original editable region text.
2135 ///
2136 /// `editable_region` is the original text of the editable region (without hash
2137 /// prefixes). `model_output` is the raw model response containing edit commands.
2138 ///
2139 /// Returns the full replacement text for the editable region.
2140 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
2141 if model_output
2142 .trim_start()
2143 .starts_with(NO_EDITS_COMMAND_MARKER)
2144 {
2145 return editable_region.to_string();
2146 }
2147
2148 let original_lines: Vec<&str> = editable_region.lines().collect();
2149 let old_hashes: Vec<u8> = original_lines
2150 .iter()
2151 .map(|line| hash_line(line.as_bytes()))
2152 .collect();
2153
2154 let commands = parse_edit_commands(model_output);
2155
2156 // For set operations: indexed by start line → Some((end line index, content))
2157 // For insert operations: indexed by line index → vec of content to insert after
2158 // Insert-before-first is tracked separately.
2159 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
2160 let mut insert_before_first: Vec<&str> = Vec::new();
2161 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
2162
2163 for command in &commands {
2164 match command {
2165 EditCommand::Set {
2166 start,
2167 end,
2168 content,
2169 } => {
2170 if start.index < old_hashes.len()
2171 && end.index < old_hashes.len()
2172 && start.index <= end.index
2173 && old_hashes[start.index] == start.hash
2174 && old_hashes[end.index] == end.hash
2175 {
2176 set_ops[start.index] = Some((end.index, *content));
2177 }
2178 }
2179 EditCommand::Insert { after, content } => match after {
2180 None => insert_before_first.push(*content),
2181 Some(line_ref) => {
2182 if line_ref.index < old_hashes.len()
2183 && old_hashes[line_ref.index] == line_ref.hash
2184 {
2185 insert_after[line_ref.index].push(*content);
2186 }
2187 }
2188 },
2189 }
2190 }
2191
2192 let mut result = String::new();
2193
2194 // Emit any insertions before the first line
2195 for content in &insert_before_first {
2196 result.push_str(content);
2197 if !content.ends_with('\n') {
2198 result.push('\n');
2199 }
2200 }
2201
2202 let mut i = 0;
2203 while i < original_lines.len() {
2204 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
2205 // Replace lines i..=end_index with the replacement content
2206 result.push_str(replacement);
2207 if !replacement.is_empty() && !replacement.ends_with('\n') {
2208 result.push('\n');
2209 }
2210 // Emit any insertions after the end of this set range
2211 if *end_index < insert_after.len() {
2212 for content in &insert_after[*end_index] {
2213 result.push_str(content);
2214 if !content.ends_with('\n') {
2215 result.push('\n');
2216 }
2217 }
2218 }
2219 i = end_index + 1;
2220 } else {
2221 // Keep the original line
2222 result.push_str(original_lines[i]);
2223 result.push('\n');
2224 // Emit any insertions after this line
2225 for content in &insert_after[i] {
2226 result.push_str(content);
2227 if !content.ends_with('\n') {
2228 result.push('\n');
2229 }
2230 }
2231 i += 1;
2232 }
2233 }
2234
2235 // Preserve trailing newline behavior: if the original ended with a
2236 // newline the result already has one; if it didn't, trim the extra one
2237 // we added.
2238 if !editable_region.ends_with('\n') && result.ends_with('\n') {
2239 result.pop();
2240 }
2241
2242 result
2243 }
2244
2245 /// Convert a unified diff patch into hashline edit commands.
2246 ///
2247 /// Parses the unified diff `patch` directly to determine which lines of
2248 /// `old_text` are deleted/replaced and what new lines are added, then emits
2249 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
2250 /// `{index}:{hash}` identifiers.
2251 ///
2252 /// `cursor_offset` is an optional byte offset into the first hunk's new
2253 /// text (context + additions) where the cursor marker should be placed.
2254 pub fn patch_to_edit_commands(
2255 old_text: &str,
2256 patch: &str,
2257 cursor_offset: Option<usize>,
2258 ) -> Result<String> {
2259 let old_lines: Vec<&str> = old_text.lines().collect();
2260 let old_hashes: Vec<u8> = old_lines
2261 .iter()
2262 .map(|line| hash_line(line.as_bytes()))
2263 .collect();
2264
2265 let mut result = String::new();
2266 let mut first_hunk = true;
2267
2268 struct Hunk<'a> {
2269 line_range: Range<usize>,
2270 new_text_lines: Vec<&'a str>,
2271 cursor_line_offset_in_new_text: Option<(usize, usize)>,
2272 }
2273
2274 // Parse the patch line by line. We only care about hunk headers,
2275 // context, deletions, and additions.
2276 let mut old_line_index: usize = 0;
2277 let mut current_hunk: Option<Hunk> = None;
2278 // Byte offset tracking within the hunk's new text for cursor placement.
2279 let mut new_text_byte_offset: usize = 0;
2280 // The line index of the last old line seen before/in the current hunk
2281 // (used for insert-after reference).
2282 let mut last_old_line_before_hunk: Option<usize> = None;
2283
2284 fn flush_hunk(
2285 hunk: Hunk,
2286 last_old_line: Option<usize>,
2287 result: &mut String,
2288 old_hashes: &[u8],
2289 ) {
2290 if hunk.line_range.is_empty() {
2291 // Pure insertion — reference the old line to insert after when in bounds.
2292 if let Some(after) = last_old_line
2293 && let Some(&hash) = old_hashes.get(after)
2294 {
2295 write!(
2296 result,
2297 "{INSERT_COMMAND_MARKER}{}\n",
2298 LineRef { index: after, hash }
2299 )
2300 .unwrap();
2301 } else {
2302 result.push_str(INSERT_COMMAND_MARKER);
2303 result.push('\n');
2304 }
2305 } else {
2306 let start = hunk.line_range.start;
2307 let end_exclusive = hunk.line_range.end;
2308 let deleted_line_count = end_exclusive.saturating_sub(start);
2309
2310 if deleted_line_count == 1 {
2311 if let Some(&hash) = old_hashes.get(start) {
2312 write!(
2313 result,
2314 "{SET_COMMAND_MARKER}{}\n",
2315 LineRef { index: start, hash }
2316 )
2317 .unwrap();
2318 } else {
2319 result.push_str(SET_COMMAND_MARKER);
2320 result.push('\n');
2321 }
2322 } else {
2323 let end_inclusive = end_exclusive - 1;
2324 match (
2325 old_hashes.get(start).copied(),
2326 old_hashes.get(end_inclusive).copied(),
2327 ) {
2328 (Some(start_hash), Some(end_hash)) => {
2329 write!(
2330 result,
2331 "{SET_COMMAND_MARKER}{}-{}\n",
2332 LineRef {
2333 index: start,
2334 hash: start_hash
2335 },
2336 LineRef {
2337 index: end_inclusive,
2338 hash: end_hash
2339 }
2340 )
2341 .unwrap();
2342 }
2343 _ => {
2344 result.push_str(SET_COMMAND_MARKER);
2345 result.push('\n');
2346 }
2347 }
2348 }
2349 }
2350 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
2351 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
2352 && line_offset == cursor_line_offset
2353 {
2354 result.push_str(&line[..char_offset]);
2355 result.push_str(CURSOR_MARKER);
2356 result.push_str(&line[char_offset..]);
2357 continue;
2358 }
2359
2360 result.push_str(line);
2361 }
2362 }
2363
2364 for raw_line in patch.split_inclusive('\n') {
2365 if raw_line.starts_with("@@") {
2366 // Flush any pending change hunk from a previous patch hunk.
2367 if let Some(hunk) = current_hunk.take() {
2368 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2369 }
2370
2371 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
2372 // We intentionally do not trust old_start as a direct local index into `old_text`,
2373 // because some patches are produced against a larger file region and carry
2374 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
2375 if first_hunk {
2376 new_text_byte_offset = 0;
2377 first_hunk = false;
2378 }
2379 continue;
2380 }
2381
2382 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
2383 continue;
2384 }
2385 if raw_line.starts_with("\\ No newline") {
2386 continue;
2387 }
2388
2389 if raw_line.starts_with('-') {
2390 // Extend or start a change hunk with this deleted old line.
2391 match &mut current_hunk {
2392 Some(Hunk {
2393 line_range: range, ..
2394 }) => range.end = old_line_index + 1,
2395 None => {
2396 current_hunk = Some(Hunk {
2397 line_range: old_line_index..old_line_index + 1,
2398 new_text_lines: Vec::new(),
2399 cursor_line_offset_in_new_text: None,
2400 });
2401 }
2402 }
2403 old_line_index += 1;
2404 } else if let Some(added_content) = raw_line.strip_prefix('+') {
2405 // Place cursor marker if cursor_offset falls within this line.
2406 let mut cursor_line_offset = None;
2407 if let Some(cursor_off) = cursor_offset
2408 && (first_hunk
2409 || cursor_off >= new_text_byte_offset
2410 && cursor_off <= new_text_byte_offset + added_content.len())
2411 {
2412 let line_offset = added_content.floor_char_boundary(
2413 cursor_off
2414 .saturating_sub(new_text_byte_offset)
2415 .min(added_content.len()),
2416 );
2417 cursor_line_offset = Some(line_offset);
2418 }
2419
2420 new_text_byte_offset += added_content.len();
2421
2422 let hunk = current_hunk.get_or_insert(Hunk {
2423 line_range: old_line_index..old_line_index,
2424 new_text_lines: vec![],
2425 cursor_line_offset_in_new_text: None,
2426 });
2427 hunk.new_text_lines.push(added_content);
2428 hunk.cursor_line_offset_in_new_text = cursor_line_offset
2429 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
2430 } else {
2431 // Context line (starts with ' ' or is empty).
2432 if let Some(hunk) = current_hunk.take() {
2433 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2434 }
2435 last_old_line_before_hunk = Some(old_line_index);
2436 old_line_index += 1;
2437 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
2438 new_text_byte_offset += content.len();
2439 }
2440 }
2441
2442 // Flush final group.
2443 if let Some(hunk) = current_hunk.take() {
2444 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2445 }
2446
2447 // Trim a single trailing newline.
2448 if result.ends_with('\n') {
2449 result.pop();
2450 }
2451
2452 if result.is_empty() {
2453 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
2454 }
2455
2456 Ok(result)
2457 }
2458
2459 #[cfg(test)]
2460 mod tests {
2461 use super::*;
2462 use indoc::indoc;
2463
2464 #[test]
2465 fn test_format_cursor_region() {
2466 struct Case {
2467 name: &'static str,
2468 context: &'static str,
2469 editable_range: Range<usize>,
2470 cursor_offset: usize,
2471 expected: &'static str,
2472 }
2473
2474 let cases = [
2475 Case {
2476 name: "basic_cursor_placement",
2477 context: "hello world\n",
2478 editable_range: 0..12,
2479 cursor_offset: 5,
2480 expected: indoc! {"
2481 <|file_sep|>test.rs
2482 <|fim_prefix|>
2483 <|fim_middle|>current
2484 0:5c|hello<|user_cursor|> world
2485 <|fim_suffix|>
2486 <|fim_middle|>updated
2487 "},
2488 },
2489 Case {
2490 name: "multiline_cursor_on_second_line",
2491 context: "aaa\nbbb\nccc\n",
2492 editable_range: 0..12,
2493 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
2494 expected: indoc! {"
2495 <|file_sep|>test.rs
2496 <|fim_prefix|>
2497 <|fim_middle|>current
2498 0:23|aaa
2499 1:26|b<|user_cursor|>bb
2500 2:29|ccc
2501 <|fim_suffix|>
2502 <|fim_middle|>updated
2503 "},
2504 },
2505 Case {
2506 name: "no_trailing_newline_in_context",
2507 context: "line1\nline2",
2508 editable_range: 0..11,
2509 cursor_offset: 3,
2510 expected: indoc! {"
2511 <|file_sep|>test.rs
2512 <|fim_prefix|>
2513 <|fim_middle|>current
2514 0:d9|lin<|user_cursor|>e1
2515 1:da|line2
2516 <|fim_suffix|>
2517 <|fim_middle|>updated
2518 "},
2519 },
2520 Case {
2521 name: "leading_newline_in_editable_region",
2522 context: "\nabc\n",
2523 editable_range: 0..5,
2524 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2525 expected: indoc! {"
2526 <|file_sep|>test.rs
2527 <|fim_prefix|>
2528 <|fim_middle|>current
2529 0:00|
2530 1:26|a<|user_cursor|>bc
2531 <|fim_suffix|>
2532 <|fim_middle|>updated
2533 "},
2534 },
2535 Case {
2536 name: "with_suffix",
2537 context: "abc\ndef",
2538 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2539 cursor_offset: 2,
2540 expected: indoc! {"
2541 <|file_sep|>test.rs
2542 <|fim_prefix|>
2543 <|fim_middle|>current
2544 0:26|ab<|user_cursor|>c
2545 <|fim_suffix|>
2546 def
2547 <|fim_middle|>updated
2548 "},
2549 },
2550 Case {
2551 name: "unicode_two_byte_chars",
2552 context: "héllo\n",
2553 editable_range: 0..7,
2554 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2555 expected: indoc! {"
2556 <|file_sep|>test.rs
2557 <|fim_prefix|>
2558 <|fim_middle|>current
2559 0:1b|hé<|user_cursor|>llo
2560 <|fim_suffix|>
2561 <|fim_middle|>updated
2562 "},
2563 },
2564 Case {
2565 name: "unicode_three_byte_chars",
2566 context: "日本語\n",
2567 editable_range: 0..10,
2568 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2569 expected: indoc! {"
2570 <|file_sep|>test.rs
2571 <|fim_prefix|>
2572 <|fim_middle|>current
2573 0:80|日本<|user_cursor|>語
2574 <|fim_suffix|>
2575 <|fim_middle|>updated
2576 "},
2577 },
2578 Case {
2579 name: "unicode_four_byte_chars",
2580 context: "a🌍b\n",
2581 editable_range: 0..7,
2582 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2583 expected: indoc! {"
2584 <|file_sep|>test.rs
2585 <|fim_prefix|>
2586 <|fim_middle|>current
2587 0:6b|a🌍<|user_cursor|>b
2588 <|fim_suffix|>
2589 <|fim_middle|>updated
2590 "},
2591 },
2592 Case {
2593 name: "cursor_at_start_of_region_not_placed",
2594 context: "abc\n",
2595 editable_range: 0..4,
2596 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2597 expected: indoc! {"
2598 <|file_sep|>test.rs
2599 <|fim_prefix|>
2600 <|fim_middle|>current
2601 0:26|abc
2602 <|fim_suffix|>
2603 <|fim_middle|>updated
2604 "},
2605 },
2606 Case {
2607 name: "cursor_at_end_of_line_not_placed",
2608 context: "abc\ndef\n",
2609 editable_range: 0..8,
2610 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2611 expected: indoc! {"
2612 <|file_sep|>test.rs
2613 <|fim_prefix|>
2614 <|fim_middle|>current
2615 0:26|abc
2616 1:2f|def
2617 <|fim_suffix|>
2618 <|fim_middle|>updated
2619 "},
2620 },
2621 Case {
2622 name: "cursor_offset_relative_to_context_not_editable_region",
2623 // cursor_offset is relative to `context`, so when editable_range.start > 0,
2624 // write_cursor_excerpt_section must subtract it before comparing against
2625 // per-line offsets within the editable region.
2626 context: "pre\naaa\nbbb\nsuf\n",
2627 editable_range: 4..12, // editable region = "aaa\nbbb\n"
2628 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
2629 expected: indoc! {"
2630 <|file_sep|>test.rs
2631 <|fim_prefix|>
2632 pre
2633 <|fim_middle|>current
2634 0:23|aaa
2635 1:26|b<|user_cursor|>bb
2636 <|fim_suffix|>
2637 suf
2638 <|fim_middle|>updated
2639 "},
2640 },
2641 ];
2642
2643 for case in &cases {
2644 let mut prompt = String::new();
2645 hashline::write_cursor_excerpt_section(
2646 &mut prompt,
2647 Path::new("test.rs"),
2648 case.context,
2649 &case.editable_range,
2650 case.cursor_offset,
2651 );
2652 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2653 }
2654 }
2655
2656 #[test]
2657 fn test_apply_edit_commands() {
2658 struct Case {
2659 name: &'static str,
2660 original: &'static str,
2661 model_output: &'static str,
2662 expected: &'static str,
2663 }
2664
2665 let cases = vec![
2666 Case {
2667 name: "set_single_line",
2668 original: indoc! {"
2669 let mut total = 0;
2670 for product in products {
2671 total += ;
2672 }
2673 total
2674 "},
2675 model_output: indoc! {"
2676 <|set|>2:87
2677 total += product.price;
2678 "},
2679 expected: indoc! {"
2680 let mut total = 0;
2681 for product in products {
2682 total += product.price;
2683 }
2684 total
2685 "},
2686 },
2687 Case {
2688 name: "set_range",
2689 original: indoc! {"
2690 fn foo() {
2691 let x = 1;
2692 let y = 2;
2693 let z = 3;
2694 }
2695 "},
2696 model_output: indoc! {"
2697 <|set|>1:46-3:4a
2698 let sum = 6;
2699 "},
2700 expected: indoc! {"
2701 fn foo() {
2702 let sum = 6;
2703 }
2704 "},
2705 },
2706 Case {
2707 name: "insert_after_line",
2708 original: indoc! {"
2709 fn main() {
2710 let x = 1;
2711 }
2712 "},
2713 model_output: indoc! {"
2714 <|insert|>1:46
2715 let y = 2;
2716 "},
2717 expected: indoc! {"
2718 fn main() {
2719 let x = 1;
2720 let y = 2;
2721 }
2722 "},
2723 },
2724 Case {
2725 name: "insert_before_first",
2726 original: indoc! {"
2727 let x = 1;
2728 let y = 2;
2729 "},
2730 model_output: indoc! {"
2731 <|insert|>
2732 use std::io;
2733 "},
2734 expected: indoc! {"
2735 use std::io;
2736 let x = 1;
2737 let y = 2;
2738 "},
2739 },
2740 Case {
2741 name: "set_with_cursor_marker",
2742 original: indoc! {"
2743 fn main() {
2744 println!();
2745 }
2746 "},
2747 model_output: indoc! {"
2748 <|set|>1:34
2749 eprintln!(\"<|user_cursor|>\");
2750 "},
2751 expected: indoc! {"
2752 fn main() {
2753 eprintln!(\"<|user_cursor|>\");
2754 }
2755 "},
2756 },
2757 Case {
2758 name: "multiple_set_commands",
2759 original: indoc! {"
2760 aaa
2761 bbb
2762 ccc
2763 ddd
2764 "},
2765 model_output: indoc! {"
2766 <|set|>0:23
2767 AAA
2768 <|set|>2:29
2769 CCC
2770 "},
2771 expected: indoc! {"
2772 AAA
2773 bbb
2774 CCC
2775 ddd
2776 "},
2777 },
2778 Case {
2779 name: "set_range_multiline_replacement",
2780 original: indoc! {"
2781 fn handle_submit() {
2782 }
2783
2784 fn handle_keystroke() {
2785 "},
2786 model_output: indoc! {"
2787 <|set|>0:3f-1:7d
2788 fn handle_submit(modal_state: &mut ModalState) {
2789 <|user_cursor|>
2790 }
2791 "},
2792 expected: indoc! {"
2793 fn handle_submit(modal_state: &mut ModalState) {
2794 <|user_cursor|>
2795 }
2796
2797 fn handle_keystroke() {
2798 "},
2799 },
2800 Case {
2801 name: "no_edit_commands_returns_original",
2802 original: indoc! {"
2803 hello
2804 world
2805 "},
2806 model_output: "some random text with no commands",
2807 expected: indoc! {"
2808 hello
2809 world
2810 "},
2811 },
2812 Case {
2813 name: "no_edits_command_returns_original",
2814 original: indoc! {"
2815 hello
2816 world
2817 "},
2818 model_output: "<|no_edits|>",
2819 expected: indoc! {"
2820 hello
2821 world
2822 "},
2823 },
2824 Case {
2825 name: "wrong_hash_set_ignored",
2826 original: indoc! {"
2827 aaa
2828 bbb
2829 "},
2830 model_output: indoc! {"
2831 <|set|>0:ff
2832 ZZZ
2833 "},
2834 expected: indoc! {"
2835 aaa
2836 bbb
2837 "},
2838 },
2839 Case {
2840 name: "insert_and_set_combined",
2841 original: indoc! {"
2842 alpha
2843 beta
2844 gamma
2845 "},
2846 model_output: indoc! {"
2847 <|set|>0:06
2848 ALPHA
2849 <|insert|>1:9c
2850 beta_extra
2851 "},
2852 expected: indoc! {"
2853 ALPHA
2854 beta
2855 beta_extra
2856 gamma
2857 "},
2858 },
2859 Case {
2860 name: "no_trailing_newline_preserved",
2861 original: "hello\nworld",
2862 model_output: indoc! {"
2863 <|set|>0:14
2864 HELLO
2865 "},
2866 expected: "HELLO\nworld",
2867 },
2868 Case {
2869 name: "set_range_hash_mismatch_in_end_bound",
2870 original: indoc! {"
2871 one
2872 two
2873 three
2874 "},
2875 model_output: indoc! {"
2876 <|set|>0:42-2:ff
2877 ONE_TWO_THREE
2878 "},
2879 expected: indoc! {"
2880 one
2881 two
2882 three
2883 "},
2884 },
2885 Case {
2886 name: "set_range_start_greater_than_end_ignored",
2887 original: indoc! {"
2888 a
2889 b
2890 c
2891 "},
2892 model_output: indoc! {"
2893 <|set|>2:63-1:62
2894 X
2895 "},
2896 expected: indoc! {"
2897 a
2898 b
2899 c
2900 "},
2901 },
2902 Case {
2903 name: "insert_out_of_bounds_ignored",
2904 original: indoc! {"
2905 x
2906 y
2907 "},
2908 model_output: indoc! {"
2909 <|insert|>99:aa
2910 z
2911 "},
2912 expected: indoc! {"
2913 x
2914 y
2915 "},
2916 },
2917 Case {
2918 name: "set_out_of_bounds_ignored",
2919 original: indoc! {"
2920 x
2921 y
2922 "},
2923 model_output: indoc! {"
2924 <|set|>99:aa
2925 z
2926 "},
2927 expected: indoc! {"
2928 x
2929 y
2930 "},
2931 },
2932 Case {
2933 name: "malformed_set_command_ignored",
2934 original: indoc! {"
2935 alpha
2936 beta
2937 "},
2938 model_output: indoc! {"
2939 <|set|>not-a-line-ref
2940 UPDATED
2941 "},
2942 expected: indoc! {"
2943 alpha
2944 beta
2945 "},
2946 },
2947 Case {
2948 name: "malformed_insert_hash_treated_as_before_first",
2949 original: indoc! {"
2950 alpha
2951 beta
2952 "},
2953 model_output: indoc! {"
2954 <|insert|>1:nothex
2955 preamble
2956 "},
2957 expected: indoc! {"
2958 preamble
2959 alpha
2960 beta
2961 "},
2962 },
2963 Case {
2964 name: "set_then_insert_same_target_orders_insert_after_replacement",
2965 original: indoc! {"
2966 cat
2967 dog
2968 "},
2969 model_output: indoc! {"
2970 <|set|>0:38
2971 CAT
2972 <|insert|>0:38
2973 TAIL
2974 "},
2975 expected: indoc! {"
2976 CAT
2977 TAIL
2978 dog
2979 "},
2980 },
2981 Case {
2982 name: "overlapping_set_ranges_last_wins",
2983 original: indoc! {"
2984 a
2985 b
2986 c
2987 d
2988 "},
2989 model_output: indoc! {"
2990 <|set|>0:61-2:63
2991 FIRST
2992 <|set|>1:62-3:64
2993 SECOND
2994 "},
2995 expected: indoc! {"
2996 FIRST
2997 d
2998 "},
2999 },
3000 Case {
3001 name: "insert_before_first_and_after_line",
3002 original: indoc! {"
3003 a
3004 b
3005 "},
3006 model_output: indoc! {"
3007 <|insert|>
3008 HEAD
3009 <|insert|>0:61
3010 MID
3011 "},
3012 expected: indoc! {"
3013 HEAD
3014 a
3015 MID
3016 b
3017 "},
3018 },
3019 ];
3020
3021 for case in &cases {
3022 let result = hashline::apply_edit_commands(case.original, &case.model_output);
3023 assert_eq!(result, case.expected, "failed case: {}", case.name);
3024 }
3025 }
3026
3027 #[test]
3028 fn test_output_has_edit_commands() {
3029 assert!(hashline::output_has_edit_commands(&format!(
3030 "{}0:ab\nnew",
3031 SET_COMMAND_MARKER
3032 )));
3033 assert!(hashline::output_has_edit_commands(&format!(
3034 "{}0:ab\nnew",
3035 INSERT_COMMAND_MARKER
3036 )));
3037 assert!(hashline::output_has_edit_commands(&format!(
3038 "some text\n{}1:cd\nstuff",
3039 SET_COMMAND_MARKER
3040 )));
3041 assert!(!hashline::output_has_edit_commands("just plain text"));
3042 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
3043 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
3044 }
3045
3046 // ---- hashline::patch_to_edit_commands round-trip tests ----
3047
3048 #[test]
3049 fn test_patch_to_edit_commands() {
3050 struct Case {
3051 name: &'static str,
3052 old: &'static str,
3053 patch: &'static str,
3054 expected_new: &'static str,
3055 }
3056
3057 let cases = [
3058 Case {
3059 name: "single_line_replacement",
3060 old: indoc! {"
3061 let mut total = 0;
3062 for product in products {
3063 total += ;
3064 }
3065 total
3066 "},
3067 patch: indoc! {"
3068 @@ -1,5 +1,5 @@
3069 let mut total = 0;
3070 for product in products {
3071 - total += ;
3072 + total += product.price;
3073 }
3074 total
3075 "},
3076 expected_new: indoc! {"
3077 let mut total = 0;
3078 for product in products {
3079 total += product.price;
3080 }
3081 total
3082 "},
3083 },
3084 Case {
3085 name: "multiline_replacement",
3086 old: indoc! {"
3087 fn foo() {
3088 let x = 1;
3089 let y = 2;
3090 let z = 3;
3091 }
3092 "},
3093 patch: indoc! {"
3094 @@ -1,5 +1,3 @@
3095 fn foo() {
3096 - let x = 1;
3097 - let y = 2;
3098 - let z = 3;
3099 + let sum = 1 + 2 + 3;
3100 }
3101 "},
3102 expected_new: indoc! {"
3103 fn foo() {
3104 let sum = 1 + 2 + 3;
3105 }
3106 "},
3107 },
3108 Case {
3109 name: "insertion",
3110 old: indoc! {"
3111 fn main() {
3112 let x = 1;
3113 }
3114 "},
3115 patch: indoc! {"
3116 @@ -1,3 +1,4 @@
3117 fn main() {
3118 let x = 1;
3119 + let y = 2;
3120 }
3121 "},
3122 expected_new: indoc! {"
3123 fn main() {
3124 let x = 1;
3125 let y = 2;
3126 }
3127 "},
3128 },
3129 Case {
3130 name: "insertion_before_first",
3131 old: indoc! {"
3132 let x = 1;
3133 let y = 2;
3134 "},
3135 patch: indoc! {"
3136 @@ -1,2 +1,3 @@
3137 +use std::io;
3138 let x = 1;
3139 let y = 2;
3140 "},
3141 expected_new: indoc! {"
3142 use std::io;
3143 let x = 1;
3144 let y = 2;
3145 "},
3146 },
3147 Case {
3148 name: "deletion",
3149 old: indoc! {"
3150 aaa
3151 bbb
3152 ccc
3153 ddd
3154 "},
3155 patch: indoc! {"
3156 @@ -1,4 +1,2 @@
3157 aaa
3158 -bbb
3159 -ccc
3160 ddd
3161 "},
3162 expected_new: indoc! {"
3163 aaa
3164 ddd
3165 "},
3166 },
3167 Case {
3168 name: "multiple_changes",
3169 old: indoc! {"
3170 alpha
3171 beta
3172 gamma
3173 delta
3174 epsilon
3175 "},
3176 patch: indoc! {"
3177 @@ -1,5 +1,5 @@
3178 -alpha
3179 +ALPHA
3180 beta
3181 gamma
3182 -delta
3183 +DELTA
3184 epsilon
3185 "},
3186 expected_new: indoc! {"
3187 ALPHA
3188 beta
3189 gamma
3190 DELTA
3191 epsilon
3192 "},
3193 },
3194 Case {
3195 name: "replace_with_insertion",
3196 old: indoc! {r#"
3197 fn handle() {
3198 modal_state.close();
3199 modal_state.dismiss();
3200 "#},
3201 patch: indoc! {r#"
3202 @@ -1,3 +1,4 @@
3203 fn handle() {
3204 modal_state.close();
3205 + eprintln!("");
3206 modal_state.dismiss();
3207 "#},
3208 expected_new: indoc! {r#"
3209 fn handle() {
3210 modal_state.close();
3211 eprintln!("");
3212 modal_state.dismiss();
3213 "#},
3214 },
3215 Case {
3216 name: "complete_replacement",
3217 old: indoc! {"
3218 aaa
3219 bbb
3220 ccc
3221 "},
3222 patch: indoc! {"
3223 @@ -1,3 +1,3 @@
3224 -aaa
3225 -bbb
3226 -ccc
3227 +xxx
3228 +yyy
3229 +zzz
3230 "},
3231 expected_new: indoc! {"
3232 xxx
3233 yyy
3234 zzz
3235 "},
3236 },
3237 Case {
3238 name: "add_function_body",
3239 old: indoc! {"
3240 fn foo() {
3241 modal_state.dismiss();
3242 }
3243
3244 fn
3245
3246 fn handle_keystroke() {
3247 "},
3248 patch: indoc! {"
3249 @@ -1,6 +1,8 @@
3250 fn foo() {
3251 modal_state.dismiss();
3252 }
3253
3254 -fn
3255 +fn handle_submit() {
3256 + todo()
3257 +}
3258
3259 fn handle_keystroke() {
3260 "},
3261 expected_new: indoc! {"
3262 fn foo() {
3263 modal_state.dismiss();
3264 }
3265
3266 fn handle_submit() {
3267 todo()
3268 }
3269
3270 fn handle_keystroke() {
3271 "},
3272 },
3273 Case {
3274 name: "with_cursor_offset",
3275 old: indoc! {r#"
3276 fn main() {
3277 println!();
3278 }
3279 "#},
3280 patch: indoc! {r#"
3281 @@ -1,3 +1,3 @@
3282 fn main() {
3283 - println!();
3284 + eprintln!("");
3285 }
3286 "#},
3287 expected_new: indoc! {r#"
3288 fn main() {
3289 eprintln!("<|user_cursor|>");
3290 }
3291 "#},
3292 },
3293 Case {
3294 name: "non_local_hunk_header_pure_insertion_repro",
3295 old: indoc! {"
3296 aaa
3297 bbb
3298 "},
3299 patch: indoc! {"
3300 @@ -20,2 +20,3 @@
3301 aaa
3302 +xxx
3303 bbb
3304 "},
3305 expected_new: indoc! {"
3306 aaa
3307 xxx
3308 bbb
3309 "},
3310 },
3311 Case {
3312 name: "empty_patch_produces_no_edits_marker",
3313 old: indoc! {"
3314 aaa
3315 bbb
3316 "},
3317 patch: "@@ -20,2 +20,3 @@\n",
3318 expected_new: indoc! {"
3319 aaa
3320 bbb
3321 "},
3322 },
3323 ];
3324
3325 for case in &cases {
3326 // The cursor_offset for patch_to_edit_commands is relative to
3327 // the first hunk's new text (context + additions). We compute
3328 // it by finding where the marker sits in the expected output
3329 // (which mirrors the new text of the hunk).
3330 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
3331
3332 let commands =
3333 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
3334 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
3335
3336 assert!(
3337 hashline::output_has_edit_commands(&commands),
3338 "case {}: expected edit commands, got: {commands:?}",
3339 case.name,
3340 );
3341
3342 let applied = hashline::apply_edit_commands(case.old, &commands);
3343 assert_eq!(applied, case.expected_new, "case {}", case.name);
3344 }
3345 }
3346 }
3347}
3348
3349pub mod seed_coder {
3350 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
3351 //!
3352 //! Seed-Coder uses different FIM tokens and order than Qwen:
3353 //! - SPM order: suffix comes FIRST, then prefix, then middle
3354 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
3355 //! - File markers: StarCoder-style `<filename>path` (single token + path)
3356 //!
3357 //! All context (related files, edit history) goes in the PREFIX section.
3358 //! The suffix contains only code after the editable region.
3359 //!
3360 //! Example prompt:
3361 //!
3362 //! <[fim-suffix]>
3363 //! code after editable region
3364 //! <[fim-prefix]><filename>related/file.py
3365 //! related file content
3366 //!
3367 //! <filename>edit_history
3368 //! --- a/some_file.py
3369 //! +++ b/some_file.py
3370 //! -old
3371 //! +new
3372 //!
3373 //! <filename>path/to/target_file.py
3374 //! code before editable region
3375 //! <<<<<<< CURRENT
3376 //! code that
3377 //! needs to<|user_cursor|>
3378 //! be rewritten
3379 //! =======
3380 //! <[fim-middle]>
3381 //!
3382 //! Expected output (model generates):
3383 //!
3384 //! updated
3385 //! code with
3386 //! changes applied
3387 //! >>>>>>> UPDATED
3388
3389 use super::*;
3390
3391 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
3392 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
3393 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
3394 pub const FILE_MARKER: &str = "<filename>";
3395
3396 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
3397 pub const SEPARATOR: &str = "=======\n";
3398 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
3399
3400 pub const NO_EDITS: &str = "NO_EDITS\n";
3401
3402 pub fn special_tokens() -> &'static [&'static str] {
3403 &[
3404 FIM_SUFFIX,
3405 FIM_PREFIX,
3406 FIM_MIDDLE,
3407 FILE_MARKER,
3408 START_MARKER,
3409 SEPARATOR,
3410 END_MARKER,
3411 CURSOR_MARKER,
3412 ]
3413 }
3414
3415 pub fn write_cursor_excerpt_section(
3416 prompt: &mut String,
3417 path: &Path,
3418 context: &str,
3419 editable_range: &Range<usize>,
3420 cursor_offset: usize,
3421 ) {
3422 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3423 prompt.push_str(§ion);
3424 }
3425
3426 pub fn format_prompt_with_budget(
3427 path: &Path,
3428 context: &str,
3429 editable_range: &Range<usize>,
3430 cursor_offset: usize,
3431 events: &[Arc<Event>],
3432 related_files: &[RelatedFile],
3433 diagnostics: &[ActiveBufferDiagnostic],
3434 max_tokens: usize,
3435 ) -> String {
3436 let cursor_prefix_section =
3437 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3438 assemble_fim_prompt(
3439 context,
3440 editable_range,
3441 &cursor_prefix_section,
3442 events,
3443 related_files,
3444 diagnostics,
3445 None,
3446 max_tokens,
3447 )
3448 }
3449
3450 pub fn assemble_fim_prompt(
3451 context: &str,
3452 editable_range: &Range<usize>,
3453 cursor_prefix_section: &str,
3454 events: &[Arc<Event>],
3455 related_files: &[RelatedFile],
3456 diagnostics: &[ActiveBufferDiagnostic],
3457 cursor_buffer_row: Option<u32>,
3458 max_tokens: usize,
3459 ) -> String {
3460 let suffix_section = build_suffix_section(context, editable_range);
3461
3462 let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
3463 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
3464 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
3465
3466 let edit_history_section = super::format_edit_history_within_budget(
3467 events,
3468 FILE_MARKER,
3469 "edit_history",
3470 budget_after_cursor,
3471 max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
3472 );
3473 let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
3474 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
3475
3476 let diagnostics_section = super::format_active_buffer_diagnostics_with_budget(
3477 diagnostics,
3478 cursor_buffer_row,
3479 budget_after_edit_history,
3480 );
3481 let diagnostics_tokens = estimate_tokens(diagnostics_section.len() + "\n".len());
3482 let budget_after_diagnostics = budget_after_edit_history.saturating_sub(diagnostics_tokens);
3483
3484 let related_files_section = super::format_related_files_within_budget(
3485 related_files,
3486 FILE_MARKER,
3487 "",
3488 budget_after_diagnostics,
3489 );
3490
3491 let mut prompt = String::new();
3492 prompt.push_str(&suffix_section);
3493 prompt.push_str(FIM_PREFIX);
3494 prompt.push_str(&diagnostics_section);
3495 if !diagnostics_section.is_empty() {
3496 prompt.push('\n');
3497 }
3498 prompt.push_str(&related_files_section);
3499 if !related_files_section.is_empty() {
3500 prompt.push('\n');
3501 }
3502 prompt.push_str(&edit_history_section);
3503 if !edit_history_section.is_empty() {
3504 prompt.push('\n');
3505 }
3506 prompt.push_str(cursor_prefix_section);
3507 prompt.push_str(FIM_MIDDLE);
3508
3509 prompt
3510 }
3511
3512 pub(crate) fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
3513 let mut section = String::new();
3514 section.push_str(FIM_SUFFIX);
3515 section.push_str(&context[editable_range.end..]);
3516 if !section.ends_with('\n') {
3517 section.push('\n');
3518 }
3519 section
3520 }
3521
3522 fn build_cursor_prefix_section(
3523 path: &Path,
3524 context: &str,
3525 editable_range: &Range<usize>,
3526 cursor_offset: usize,
3527 ) -> String {
3528 let mut section = String::new();
3529 let path_str = path.to_string_lossy();
3530 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3531
3532 section.push_str(&context[..editable_range.start]);
3533 section.push_str(START_MARKER);
3534 section.push_str(&context[editable_range.start..cursor_offset]);
3535 section.push_str(CURSOR_MARKER);
3536 section.push_str(&context[cursor_offset..editable_range.end]);
3537 if !section.ends_with('\n') {
3538 section.push('\n');
3539 }
3540 section.push_str(SEPARATOR);
3541 section
3542 }
3543
3544 /// Format patch as containing no changes if it's empty; otherwise return None.
3545 pub(crate) fn no_edits(patch: &str) -> Option<String> {
3546 // Count lines in the patch
3547 let empty_patch = patch.lines().count() <= 3;
3548 if empty_patch {
3549 Some(format!("{NO_EDITS}{END_MARKER}"))
3550 } else {
3551 None
3552 }
3553 }
3554}
3555
3556pub mod v0304_variable_edit {
3557 //! A prompt format with no fixed editable region. The entire context is shown
3558 //! to the model, and it chooses which text to replace by outputting surrounding
3559 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3560 //! text.
3561 //!
3562 //! Example prompt:
3563 //!
3564 //! <|file_sep|>path/to/file.py
3565 //! zero
3566 //! one
3567 //! two
3568 //! three<|user_cursor|>
3569 //! four
3570 //! five
3571 //! <|fim_prefix|>
3572 //
3573 //! Expected output (model generates):
3574 //!
3575 //! two
3576 //! <|fim_middle|>
3577 //! THREE
3578 //! <|fim_suffix|>
3579 //! four
3580 //!
3581 //! The output means: find "two\n...\nfour" in the context, and replace
3582 //! everything between "two\n" and "four" with "THREE\n".
3583
3584 use super::*;
3585
3586 pub fn special_tokens() -> &'static [&'static str] {
3587 &[
3588 "<|fim_prefix|>",
3589 "<|fim_suffix|>",
3590 "<|fim_middle|>",
3591 "<|file_sep|>",
3592 CURSOR_MARKER,
3593 ]
3594 }
3595
3596 pub fn write_cursor_excerpt_section(
3597 prompt: &mut String,
3598 path: &Path,
3599 context: &str,
3600 cursor_offset: usize,
3601 ) {
3602 let path_str = path.to_string_lossy();
3603 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3604
3605 prompt.push_str(&context[..cursor_offset]);
3606 prompt.push_str(CURSOR_MARKER);
3607 prompt.push_str(&context[cursor_offset..]);
3608 if !prompt.ends_with('\n') {
3609 prompt.push('\n');
3610 }
3611 prompt.push_str("<|fim_prefix|>\n")
3612 }
3613
3614 /// Apply a variable-edit model output to the original context text.
3615 ///
3616 /// The model output has the form:
3617 ///
3618 /// - prefix context lines
3619 /// - `<|fim_middle|>`
3620 /// - new text
3621 /// - `<|fim_suffix|>`
3622 /// - suffix context lines
3623 ///
3624 /// We locate the prefix/suffix context lines in the original text and replace
3625 /// everything between them with the new text.
3626 pub fn apply_variable_edit(
3627 context: &str,
3628 model_output: &str,
3629 ) -> Result<(Range<usize>, String)> {
3630 let (prefix_context, rest) = model_output
3631 .split_once("<|fim_middle|>\n")
3632 .or_else(|| model_output.split_once("<|fim_middle|>"))
3633 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3634
3635 let (new_text, suffix_context) = rest
3636 .split_once("<|fim_suffix|>\n")
3637 .or_else(|| rest.split_once("<|fim_suffix|>"))
3638 .unwrap_or((rest, ""));
3639
3640 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3641 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3642 } else {
3643 suffix_context
3644 };
3645
3646 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3647 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3648 + prefix_context.len();
3649 let suffix_offset = if suffix_context.is_empty() {
3650 context.len()
3651 } else {
3652 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3653 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3654 + prefix_offset
3655 };
3656
3657 let edit_range = prefix_offset..suffix_offset;
3658 return Ok((edit_range, new_text.to_string()));
3659 }
3660
3661 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3662 if needle.is_empty() {
3663 return Some(0);
3664 }
3665
3666 haystack.match_indices(needle).find_map(|(offset, _)| {
3667 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3668 matched_line_start.then_some(offset)
3669 })
3670 }
3671
3672 /// Convert a unified diff patch into the variable-edit output format.
3673 ///
3674 /// Parses `patch` as a unified diff against `old_text` and produces model
3675 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3676 /// delimiters. The diff is resolved by content matching rather than line
3677 /// numbers.
3678 pub fn patch_to_variable_edit_output(
3679 old_text: &str,
3680 patch: &str,
3681 cursor_offset: Option<usize>,
3682 ) -> Result<String> {
3683 // Parse the unified diff into hunks. Each hunk has an `old_context`
3684 // string (context + deleted lines interleaved in order) and a list of
3685 // edits expressed as byte ranges within that context plus replacement
3686 // text.
3687 let hunks = parse_hunks(patch);
3688 if hunks.is_empty() {
3689 return Ok(String::new());
3690 }
3691
3692 // Apply each hunk by finding its old_context in the text and
3693 // performing the edits. We search forward from where the previous
3694 // hunk ended so that hunks are applied in order.
3695 let mut new_text = old_text.to_string();
3696 let mut search_from: usize = 0;
3697 let mut first_hunk_pos: Option<usize> = None;
3698
3699 for hunk in &hunks {
3700 let context_pos = new_text[search_from..]
3701 .find(&hunk.old_context)
3702 .map(|pos| pos + search_from)
3703 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3704
3705 if first_hunk_pos.is_none() {
3706 first_hunk_pos = Some(context_pos);
3707 }
3708
3709 // Apply edits in reverse order so byte offsets remain valid.
3710 for edit in hunk.edits.iter().rev() {
3711 let abs_start = context_pos + edit.range.start;
3712 let abs_end = context_pos + edit.range.end;
3713 new_text.replace_range(abs_start..abs_end, &edit.text);
3714 }
3715
3716 // Advance past this hunk's region in the (now modified) text.
3717 let new_region_len: usize =
3718 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3719 len + edit.text.len() - (edit.range.end - edit.range.start)
3720 });
3721 search_from = context_pos + new_region_len;
3722 }
3723
3724 // Now we have old_text and new_text. Find the changed line range by
3725 // comparing them.
3726 let old_lines: Vec<&str> = old_text.lines().collect();
3727 let new_lines: Vec<&str> = new_text.lines().collect();
3728
3729 // Find first differing line.
3730 let first_changed_row = old_lines
3731 .iter()
3732 .zip(new_lines.iter())
3733 .position(|(a, b)| a != b)
3734 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3735
3736 // Find last differing line (from the end).
3737 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3738 let common_suffix = old_lines
3739 .iter()
3740 .rev()
3741 .zip(new_lines.iter().rev())
3742 .take(max_suffix)
3743 .take_while(|(a, b)| a == b)
3744 .count();
3745
3746 let old_end = old_lines.len() - common_suffix;
3747 let new_end = new_lines.len() - common_suffix;
3748
3749 if first_changed_row == old_end && first_changed_row == new_end {
3750 return Ok(String::new());
3751 }
3752
3753 // Build the replacement text from new_lines[first_diff..new_end].
3754 let mut merged_new_text = String::new();
3755 for line in &new_lines[first_changed_row..new_end] {
3756 merged_new_text.push_str(line);
3757 merged_new_text.push('\n');
3758 }
3759
3760 // cursor_offset is relative to the first hunk's new content in
3761 // new_text. Translate it to an offset within merged_new_text, which
3762 // only contains lines first_diff..new_end of new_text.
3763 if let Some(hunk_offset) = cursor_offset {
3764 let hunk_start = first_hunk_pos.unwrap_or(0);
3765 let absolute_pos = hunk_start + hunk_offset;
3766
3767 // Byte offset where first_diff starts in new_text.
3768 let merged_start: usize = new_lines[..first_changed_row]
3769 .iter()
3770 .map(|line| line.len() + 1)
3771 .sum();
3772
3773 if absolute_pos >= merged_start {
3774 let relative_offset = absolute_pos - merged_start;
3775 if relative_offset <= merged_new_text.len() {
3776 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3777 }
3778 }
3779 }
3780
3781 // Build output with 2 lines of context above and below.
3782 let context_lines_count = 2;
3783 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3784 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3785
3786 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3787 let pattern = &lines[line_range];
3788 let pattern_len = pattern.len();
3789
3790 let mut count = 0;
3791 for offset in 0..=lines.len() - pattern_len {
3792 if &lines[offset..offset + pattern_len] == pattern {
3793 count += 1;
3794 }
3795 }
3796 count
3797 }
3798
3799 // Expand prefix and suffix until they are unique
3800 while prefix_start > 0 {
3801 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3802 prefix_start -= 1;
3803 } else {
3804 break;
3805 }
3806 }
3807 while suffix_end < old_lines.len() {
3808 if count_matches(old_end..suffix_end, &old_lines) > 1 {
3809 suffix_end += 1;
3810 } else {
3811 break;
3812 }
3813 }
3814
3815 let mut output = String::new();
3816 for line in &old_lines[prefix_start..first_changed_row] {
3817 output.push_str(line);
3818 output.push('\n');
3819 }
3820 output.push_str("<|fim_middle|>\n");
3821 output.push_str(&merged_new_text);
3822 output.push_str("<|fim_suffix|>\n");
3823 for line in &old_lines[old_end..suffix_end] {
3824 output.push_str(line);
3825 output.push('\n');
3826 }
3827
3828 Ok(output)
3829 }
3830
3831 struct ParsedHunk {
3832 old_context: String,
3833 edits: Vec<ParsedEdit>,
3834 }
3835
3836 struct ParsedEdit {
3837 range: Range<usize>,
3838 text: String,
3839 }
3840
3841 /// Parse a unified diff into content-based hunks. Each hunk contains an
3842 /// `old_context` string (context lines + deleted lines, which together
3843 /// form the text that should be found in the original) and a list of edits
3844 /// expressed as byte ranges within that context.
3845 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3846 let mut hunks = Vec::new();
3847 let mut current: Option<ParsedHunk> = None;
3848
3849 for line in patch.lines() {
3850 if line.starts_with("@@") {
3851 if let Some(hunk) = current.take() {
3852 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3853 hunks.push(hunk);
3854 }
3855 }
3856 current = Some(ParsedHunk {
3857 old_context: String::new(),
3858 edits: Vec::new(),
3859 });
3860 } else if line.starts_with("---") || line.starts_with("+++") {
3861 continue;
3862 } else if let Some(hunk) = &mut current {
3863 if let Some(added) = line.strip_prefix('+') {
3864 let pos = hunk.old_context.len();
3865 if let Some(last_edit) = hunk.edits.last_mut() {
3866 if last_edit.range.end == pos {
3867 writeln!(&mut last_edit.text, "{added}").ok();
3868 continue;
3869 }
3870 }
3871 hunk.edits.push(ParsedEdit {
3872 range: pos..pos,
3873 text: format!("{added}\n"),
3874 });
3875 } else if let Some(removed) = line.strip_prefix('-') {
3876 let start = hunk.old_context.len();
3877 writeln!(&mut hunk.old_context, "{removed}").ok();
3878 let end = hunk.old_context.len();
3879 if let Some(last_edit) = hunk.edits.last_mut() {
3880 if last_edit.range.end == start {
3881 last_edit.range.end = end;
3882 continue;
3883 }
3884 }
3885 hunk.edits.push(ParsedEdit {
3886 range: start..end,
3887 text: String::new(),
3888 });
3889 } else {
3890 let ctx = line.strip_prefix(' ').unwrap_or(line);
3891 writeln!(&mut hunk.old_context, "{ctx}").ok();
3892 }
3893 }
3894 }
3895
3896 if let Some(hunk) = current {
3897 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3898 hunks.push(hunk);
3899 }
3900 }
3901
3902 hunks
3903 }
3904
3905 #[cfg(test)]
3906 mod tests {
3907 use super::*;
3908 use indoc::indoc;
3909
3910 #[test]
3911 fn test_apply_variable_edit() {
3912 struct Case {
3913 name: &'static str,
3914 original: &'static str,
3915 model_output: &'static str,
3916 expected: &'static str,
3917 }
3918
3919 let cases = [
3920 Case {
3921 name: "simple_single_line_replacement",
3922 original: indoc! {"
3923 zero
3924 one
3925 two
3926 three
3927 four
3928 five
3929 "},
3930 model_output: indoc! {"
3931 two
3932 <|fim_middle|>
3933 THREE
3934 <|fim_suffix|>
3935 four
3936 "},
3937 expected: indoc! {"
3938 zero
3939 one
3940 two
3941 THREE
3942 four
3943 five
3944 "},
3945 },
3946 Case {
3947 name: "multi_line_replacement",
3948 original: indoc! {"
3949 a
3950 b
3951 c
3952 d
3953 e
3954 "},
3955 model_output: indoc! {"
3956 a
3957 <|fim_middle|>
3958 B
3959 C
3960 D
3961 <|fim_suffix|>
3962 e
3963 "},
3964 expected: indoc! {"
3965 a
3966 B
3967 C
3968 D
3969 e
3970 "},
3971 },
3972 Case {
3973 name: "insertion_between_existing_lines",
3974 original: indoc! {"
3975 a
3976 b
3977 c
3978 "},
3979 model_output: indoc! {"
3980 a
3981 <|fim_middle|>
3982 X
3983 <|fim_suffix|>
3984 b
3985 "},
3986 expected: indoc! {"
3987 a
3988 X
3989 b
3990 c
3991 "},
3992 },
3993 Case {
3994 name: "deletion",
3995 original: indoc! {"
3996 a
3997 b
3998 c
3999 d
4000 "},
4001 model_output: indoc! {"
4002 a
4003 <|fim_middle|>
4004 <|fim_suffix|>
4005 c
4006 "},
4007 expected: indoc! {"
4008 a
4009 c
4010 d
4011 "},
4012 },
4013 Case {
4014 name: "replacement_at_start_no_prefix_context",
4015 original: indoc! {"
4016 a
4017 b
4018 c
4019 "},
4020 model_output: indoc! {"
4021 <|fim_middle|>
4022 X
4023 <|fim_suffix|>
4024 b
4025 "},
4026 expected: indoc! {"
4027 X
4028 b
4029 c
4030 "},
4031 },
4032 Case {
4033 name: "replacement_at_end_no_suffix_context",
4034 original: indoc! {"
4035 a
4036 b
4037 c
4038 "},
4039 model_output: indoc! {"
4040 b
4041 <|fim_middle|>
4042 Z
4043 <|fim_suffix|>
4044 "},
4045 expected: indoc! {"
4046 a
4047 b
4048 Z
4049 "},
4050 },
4051 Case {
4052 name: "context_with_trailing_newline_is_preserved",
4053 original: indoc! {"
4054 a
4055 b
4056 c
4057 "},
4058 model_output: indoc! {"
4059 a
4060 <|fim_middle|>
4061 B
4062 <|fim_suffix|>
4063 c
4064 "},
4065 expected: indoc! {"
4066 a
4067 B
4068 c
4069 "},
4070 },
4071 Case {
4072 name: "cursor_marker_passes_through_untouched",
4073 original: indoc! {"
4074 a
4075 b
4076 c
4077 "},
4078 model_output: indoc! {"
4079 a
4080 <|fim_middle|>
4081 B<|user_cursor|>B
4082 <|fim_suffix|>
4083 c
4084 "},
4085 expected: indoc! {"
4086 a
4087 B<|user_cursor|>B
4088 c
4089 "},
4090 },
4091 Case {
4092 name: "multiple_prefix_context_lines",
4093 original: indoc! {"
4094 a
4095 b
4096 c
4097 d
4098 e
4099 "},
4100 model_output: indoc! {"
4101 b
4102 c
4103 <|fim_middle|>
4104 D
4105 <|fim_suffix|>
4106 e
4107 "},
4108 expected: indoc! {"
4109 a
4110 b
4111 c
4112 D
4113 e
4114 "},
4115 },
4116 ];
4117
4118 for case in cases {
4119 let (edit_range, replacement) =
4120 apply_variable_edit(case.original, case.model_output).unwrap();
4121 let mut edited = case.original.to_string();
4122 edited.replace_range(edit_range, &replacement);
4123 assert_eq!(edited, case.expected, "{}", case.name);
4124 }
4125 }
4126
4127 #[test]
4128 fn test_patch_to_variable_edit() {
4129 struct Case {
4130 name: &'static str,
4131 old: &'static str,
4132 patch: &'static str,
4133 cursor_offset: Option<usize>,
4134 expected_variable_edit: &'static str,
4135 expected_after_apply: &'static str,
4136 }
4137
4138 let cases = [
4139 Case {
4140 name: "simple_replacement",
4141 old: indoc! {"
4142 zero
4143 one
4144 two
4145 three
4146 four
4147 five
4148 "},
4149 patch: indoc! {"
4150 @@ -3,3 +3,3 @@
4151 two
4152 -three
4153 +THREE
4154 four
4155 "},
4156 cursor_offset: None,
4157 expected_variable_edit: indoc! {"
4158 one
4159 two
4160 <|fim_middle|>
4161 THREE
4162 <|fim_suffix|>
4163 four
4164 five
4165 "},
4166 expected_after_apply: indoc! {"
4167 zero
4168 one
4169 two
4170 THREE
4171 four
4172 five
4173 "},
4174 },
4175 Case {
4176 name: "insertion",
4177 old: indoc! {"
4178 a
4179 b
4180 c
4181 d
4182 e
4183 "},
4184 patch: indoc! {"
4185 @@ -2,0 +3,1 @@
4186 b
4187 +X
4188 c
4189 "},
4190 cursor_offset: None,
4191 expected_variable_edit: indoc! {"
4192 a
4193 b
4194 <|fim_middle|>
4195 X
4196 <|fim_suffix|>
4197 c
4198 d
4199 "},
4200 expected_after_apply: indoc! {"
4201 a
4202 b
4203 X
4204 c
4205 d
4206 e
4207 "},
4208 },
4209 Case {
4210 name: "deletion",
4211 old: indoc! {"
4212 a
4213 b
4214 c
4215 d
4216 e
4217 "},
4218 patch: indoc! {"
4219 @@ -2,3 +2,2 @@
4220 b
4221 -c
4222 d
4223 "},
4224 cursor_offset: None,
4225 expected_variable_edit: indoc! {"
4226 a
4227 b
4228 <|fim_middle|>
4229 <|fim_suffix|>
4230 d
4231 e
4232 "},
4233 expected_after_apply: indoc! {"
4234 a
4235 b
4236 d
4237 e
4238 "},
4239 },
4240 Case {
4241 name: "edit_near_start",
4242 old: indoc! {"
4243 first
4244 second
4245 third
4246 fourth
4247 "},
4248 patch: indoc! {"
4249 @@ -1,1 +1,1 @@
4250 -first
4251 +FIRST
4252 "},
4253 cursor_offset: None,
4254 expected_variable_edit: indoc! {"
4255 <|fim_middle|>
4256 FIRST
4257 <|fim_suffix|>
4258 second
4259 third
4260 "},
4261 expected_after_apply: indoc! {"
4262 FIRST
4263 second
4264 third
4265 fourth
4266 "},
4267 },
4268 Case {
4269 name: "edit_near_end",
4270 old: indoc! {"
4271 first
4272 second
4273 third
4274 fourth
4275 "},
4276 patch: indoc! {"
4277 @@ -4,1 +4,1 @@
4278 -fourth
4279 +FOURTH
4280 "},
4281 cursor_offset: None,
4282 expected_variable_edit: indoc! {"
4283 second
4284 third
4285 <|fim_middle|>
4286 FOURTH
4287 <|fim_suffix|>
4288 "},
4289 expected_after_apply: indoc! {"
4290 first
4291 second
4292 third
4293 FOURTH
4294 "},
4295 },
4296 Case {
4297 name: "cursor_at_start_of_replacement",
4298 old: indoc! {"
4299 zero
4300 one
4301 two
4302 three
4303 four
4304 five
4305 "},
4306 patch: indoc! {"
4307 @@ -3,3 +3,3 @@
4308 two
4309 -three
4310 +THREE
4311 four
4312 "},
4313 cursor_offset: Some(4),
4314 expected_variable_edit: indoc! {"
4315 one
4316 two
4317 <|fim_middle|>
4318 <|user_cursor|>THREE
4319 <|fim_suffix|>
4320 four
4321 five
4322 "},
4323 expected_after_apply: indoc! {"
4324 zero
4325 one
4326 two
4327 <|user_cursor|>THREE
4328 four
4329 five
4330 "},
4331 },
4332 Case {
4333 name: "cursor_in_middle_of_replacement",
4334 old: indoc! {"
4335 zero
4336 one
4337 two
4338 three
4339 four
4340 five
4341 "},
4342 patch: indoc! {"
4343 @@ -3,3 +3,3 @@
4344 two
4345 -three
4346 +THREE
4347 four
4348 "},
4349 cursor_offset: Some(6),
4350 expected_variable_edit: indoc! {"
4351 one
4352 two
4353 <|fim_middle|>
4354 TH<|user_cursor|>REE
4355 <|fim_suffix|>
4356 four
4357 five
4358 "},
4359 expected_after_apply: indoc! {"
4360 zero
4361 one
4362 two
4363 TH<|user_cursor|>REE
4364 four
4365 five
4366 "},
4367 },
4368 Case {
4369 name: "expands_context_when_two_lines_not_unique_before_and_after",
4370 old: indoc! {"
4371 one
4372 a
4373 b
4374 c
4375 d
4376 two
4377 a
4378 b
4379 c
4380 d
4381 three
4382 a
4383 b
4384 c
4385 d
4386 four
4387 "},
4388 patch: indoc! {"
4389 @@ -4,5 +4,5 @@
4390 two
4391 a
4392 b
4393 -c
4394 +C
4395 d
4396 three
4397 "},
4398 cursor_offset: None,
4399 expected_variable_edit: indoc! {"
4400 two
4401 a
4402 b
4403 <|fim_middle|>
4404 C
4405 <|fim_suffix|>
4406 d
4407 three
4408 "},
4409 expected_after_apply: indoc! {"
4410 one
4411 a
4412 b
4413 c
4414 d
4415 two
4416 a
4417 b
4418 C
4419 d
4420 three
4421 a
4422 b
4423 c
4424 d
4425 four
4426 "},
4427 },
4428 Case {
4429 name: "expands_context_when_two_lines_not_unique_before_and_after",
4430 old: indoc! {"
4431 {
4432 {
4433 one();
4434 }
4435 }
4436 {
4437 {
4438 two();
4439 }
4440 }
4441 {
4442 {
4443 three();
4444 }
4445 }
4446 {
4447 {
4448 four();
4449 }
4450 }
4451 "},
4452 patch: indoc! {"
4453 @@ -4,5 +4,5 @@
4454 {
4455 - two();
4456 + TWO();
4457 }
4458 "},
4459 cursor_offset: None,
4460 expected_variable_edit: indoc! {"
4461 one();
4462 }
4463 }
4464 {
4465 {
4466 <|fim_middle|>
4467 TWO();
4468 <|fim_suffix|>
4469 }
4470 }
4471 {
4472 {
4473 three();
4474 "},
4475 expected_after_apply: indoc! {"
4476 {
4477 {
4478 one();
4479 }
4480 }
4481 {
4482 {
4483 TWO();
4484 }
4485 }
4486 {
4487 {
4488 three();
4489 }
4490 }
4491 {
4492 {
4493 four();
4494 }
4495 }
4496 "},
4497 },
4498 ];
4499
4500 for case in cases {
4501 let output =
4502 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
4503 .unwrap_or_else(|error| {
4504 panic!("failed converting patch for {}: {error}", case.name)
4505 });
4506 assert_eq!(
4507 output, case.expected_variable_edit,
4508 "patch->variable_edit mismatch for {}",
4509 case.name
4510 );
4511
4512 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
4513 .unwrap_or_else(|error| {
4514 panic!("failed applying variable_edit for {}: {error}", case.name)
4515 });
4516 let mut edited_by_variable_edit = case.old.to_string();
4517 edited_by_variable_edit.replace_range(edit_range, &replacement);
4518 assert_eq!(
4519 edited_by_variable_edit, case.expected_after_apply,
4520 "variable_edit apply mismatch for {}",
4521 case.name
4522 );
4523
4524 let (expected_edit_range, expected_replacement) =
4525 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4526 |error| {
4527 panic!(
4528 "failed applying expected variable_edit for {}: {error}",
4529 case.name
4530 )
4531 },
4532 );
4533 let mut edited_by_expected_variable_edit = case.old.to_string();
4534 edited_by_expected_variable_edit
4535 .replace_range(expected_edit_range, &expected_replacement);
4536 assert_eq!(
4537 edited_by_expected_variable_edit, case.expected_after_apply,
4538 "expected variable_edit apply mismatch for {}",
4539 case.name
4540 );
4541 }
4542 }
4543
4544 #[test]
4545 fn test_write_cursor_excerpt_section() {
4546 let path = Path::new("test.rs");
4547 let context = "fn main() {\n hello();\n}\n";
4548 let cursor_offset = 17;
4549 let mut prompt = String::new();
4550 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4551 assert_eq!(
4552 prompt,
4553 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4554 );
4555 }
4556 }
4557}
4558
4559/// The zeta1 prompt format
4560pub mod zeta1 {
4561 use super::*;
4562 use std::fmt::Write;
4563
4564 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4565 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4566 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4567 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4568
4569 const INSTRUCTION_HEADER: &str = concat!(
4570 "### Instruction:\n",
4571 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4572 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4573 "into account the cursor location.\n\n",
4574 "### User Edits:\n\n"
4575 );
4576 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4577 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4578
4579 /// Formats a complete zeta1 prompt from the input events and excerpt.
4580 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4581 let mut prompt = String::with_capacity(
4582 INSTRUCTION_HEADER.len()
4583 + input_events.len()
4584 + EXCERPT_HEADER.len()
4585 + input_excerpt.len()
4586 + RESPONSE_HEADER.len(),
4587 );
4588 prompt.push_str(INSTRUCTION_HEADER);
4589 prompt.push_str(input_events);
4590 prompt.push_str(EXCERPT_HEADER);
4591 prompt.push_str(input_excerpt);
4592 prompt.push_str(RESPONSE_HEADER);
4593 prompt
4594 }
4595
4596 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4597 /// editable and context byte-offset ranges within `cursor_excerpt`.
4598 pub fn format_zeta1_from_input(
4599 input: &ZetaPromptInput,
4600 editable_range: Range<usize>,
4601 context_range: Range<usize>,
4602 ) -> String {
4603 let events = format_zeta1_events(&input.events);
4604 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4605 format_zeta1_prompt(&events, &excerpt)
4606 }
4607
4608 /// Formats events in zeta1 style (oldest first).
4609 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4610 let mut result = String::new();
4611 for event in
4612 events
4613 .iter()
4614 .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4615 &ZetaFormat::V0114180EditableRegion,
4616 )))
4617 {
4618 let event_string = format_zeta1_event(event);
4619 if event_string.is_empty() {
4620 continue;
4621 }
4622 if !result.is_empty() {
4623 result.push_str("\n\n");
4624 }
4625 result.push_str(&event_string);
4626 }
4627 result
4628 }
4629
4630 fn format_zeta1_event(event: &Event) -> String {
4631 match event {
4632 Event::BufferChange {
4633 path,
4634 old_path,
4635 diff,
4636 ..
4637 } => {
4638 let mut prompt = String::new();
4639 if old_path != path {
4640 writeln!(
4641 prompt,
4642 "User renamed {} to {}\n",
4643 old_path.display(),
4644 path.display()
4645 )
4646 .ok();
4647 }
4648 if !diff.is_empty() {
4649 write!(
4650 prompt,
4651 "User edited {}:\n```diff\n{}\n```",
4652 path.display(),
4653 diff
4654 )
4655 .ok();
4656 }
4657 prompt
4658 }
4659 }
4660 }
4661
4662 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4663 /// within `cursor_excerpt`.
4664 fn format_zeta1_excerpt(
4665 input: &ZetaPromptInput,
4666 editable_range: Range<usize>,
4667 context_range: Range<usize>,
4668 ) -> String {
4669 let path_str = input.cursor_path.to_string_lossy();
4670 let excerpt = &*input.cursor_excerpt;
4671 let cursor_offset = input.cursor_offset_in_excerpt;
4672
4673 let mut prompt = String::new();
4674 writeln!(&mut prompt, "```{path_str}").ok();
4675
4676 let starts_at_file_beginning =
4677 input.excerpt_start_row == Some(0) && context_range.start == 0;
4678 if starts_at_file_beginning {
4679 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4680 }
4681
4682 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4683
4684 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4685 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4686 prompt.push_str(CURSOR_MARKER);
4687 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4688 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4689
4690 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4691 write!(prompt, "\n```").ok();
4692
4693 prompt
4694 }
4695
4696 /// Cleans zeta1 model output by extracting content between editable region
4697 /// markers and converting the zeta1 cursor marker to the universal one.
4698 /// Returns `None` if the output doesn't contain the expected markers.
4699 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4700 let content = output.replace(CURSOR_MARKER, "");
4701
4702 let content_start = content
4703 .find(EDITABLE_REGION_START_MARKER)
4704 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4705 .map(|pos| {
4706 if content.as_bytes().get(pos) == Some(&b'\n') {
4707 pos + 1
4708 } else {
4709 pos
4710 }
4711 })
4712 .unwrap_or(0);
4713
4714 let content_end = content
4715 .find(EDITABLE_REGION_END_MARKER)
4716 .map(|pos| {
4717 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4718 pos - 1
4719 } else {
4720 pos
4721 }
4722 })
4723 .unwrap_or(content.len());
4724
4725 if content_start > content_end {
4726 return Some(String::new());
4727 }
4728
4729 let extracted = &content[content_start..content_end];
4730
4731 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4732 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4733 let text_before_cursor = text_before_cursor
4734 .find(EDITABLE_REGION_START_MARKER)
4735 .map(|pos| {
4736 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4737 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4738 after_marker + 1
4739 } else {
4740 after_marker
4741 }
4742 })
4743 .unwrap_or(0);
4744 let offset_in_extracted = zeta1_cursor_pos
4745 .saturating_sub(text_before_cursor)
4746 .min(extracted.len());
4747 offset_in_extracted
4748 });
4749
4750 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4751 if let Some(offset) = cursor_offset {
4752 result.push_str(&extracted[..offset]);
4753 result.push_str(super::CURSOR_MARKER);
4754 result.push_str(&extracted[offset..]);
4755 } else {
4756 result.push_str(extracted);
4757 }
4758
4759 Some(result)
4760 }
4761}
4762
4763#[cfg(test)]
4764mod tests {
4765 use super::*;
4766 use indoc::indoc;
4767
4768 fn make_input(
4769 cursor_excerpt: &str,
4770 editable_range: Range<usize>,
4771 cursor_offset: usize,
4772 events: Vec<Event>,
4773 related_files: Vec<RelatedFile>,
4774 ) -> ZetaPromptInput {
4775 let context_range = 0..cursor_excerpt.len();
4776 ZetaPromptInput {
4777 cursor_path: Path::new("test.rs").into(),
4778 cursor_excerpt: cursor_excerpt.into(),
4779 cursor_offset_in_excerpt: cursor_offset,
4780 excerpt_start_row: None,
4781 events: events.into_iter().map(Arc::new).collect(),
4782 related_files: Some(related_files),
4783 active_buffer_diagnostics: vec![],
4784 excerpt_ranges: ExcerptRanges {
4785 editable_150: editable_range.clone(),
4786 editable_180: editable_range.clone(),
4787 editable_350: editable_range,
4788 editable_150_context_350: context_range.clone(),
4789 editable_180_context_350: context_range.clone(),
4790 editable_350_context_150: context_range,
4791 ..Default::default()
4792 },
4793 syntax_ranges: None,
4794 in_open_source_repo: false,
4795 can_collect_data: false,
4796 repo_url: None,
4797 }
4798 }
4799
4800 fn make_input_with_context_range(
4801 excerpt: &str,
4802 editable_range: Range<usize>,
4803 context_range: Range<usize>,
4804 cursor_offset: usize,
4805 ) -> ZetaPromptInput {
4806 ZetaPromptInput {
4807 cursor_path: Path::new("test.rs").into(),
4808 cursor_excerpt: excerpt.into(),
4809 cursor_offset_in_excerpt: cursor_offset,
4810 excerpt_start_row: None,
4811 events: vec![],
4812 related_files: Some(vec![]),
4813 active_buffer_diagnostics: vec![],
4814 excerpt_ranges: ExcerptRanges {
4815 editable_150: editable_range.clone(),
4816 editable_180: editable_range.clone(),
4817 editable_350: editable_range,
4818 editable_150_context_350: context_range.clone(),
4819 editable_180_context_350: context_range.clone(),
4820 editable_350_context_150: context_range,
4821 ..Default::default()
4822 },
4823 syntax_ranges: None,
4824 in_open_source_repo: false,
4825 can_collect_data: false,
4826 repo_url: None,
4827 }
4828 }
4829
4830 fn make_event(path: &str, diff: &str) -> Event {
4831 Event::BufferChange {
4832 path: Path::new(path).into(),
4833 old_path: Path::new(path).into(),
4834 diff: diff.to_string(),
4835 predicted: false,
4836 in_open_source_repo: false,
4837 }
4838 }
4839
4840 fn make_related_file(path: &str, content: &str) -> RelatedFile {
4841 RelatedFile {
4842 path: Path::new(path).into(),
4843 max_row: content.lines().count() as u32,
4844 excerpts: vec![RelatedExcerpt {
4845 row_range: 0..content.lines().count() as u32,
4846 text: content.into(),
4847 order: 0,
4848 }],
4849 in_open_source_repo: false,
4850 }
4851 }
4852
4853 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4854 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4855 }
4856
4857 fn budget_with_margin(requested_tokens: usize) -> usize {
4858 ((requested_tokens as f64) / 0.9).ceil() as usize
4859 }
4860
4861 #[test]
4862 fn test_no_truncation_when_within_budget() {
4863 let input = make_input(
4864 "prefix\neditable\nsuffix",
4865 7..15,
4866 10,
4867 vec![make_event("a.rs", "-old\n+new\n")],
4868 vec![make_related_file("related.rs", "fn helper() {}\n")],
4869 );
4870
4871 assert_eq!(
4872 format_with_budget(&input, 10000).unwrap(),
4873 indoc! {r#"
4874 <|file_sep|>related.rs
4875 fn helper() {}
4876 <|file_sep|>edit history
4877 --- a/a.rs
4878 +++ b/a.rs
4879 -old
4880 +new
4881 <|file_sep|>test.rs
4882 <|fim_prefix|>
4883 prefix
4884 <|fim_middle|>current
4885 edi<|user_cursor|>table
4886 <|fim_suffix|>
4887
4888 suffix
4889 <|fim_middle|>updated
4890 "#}
4891 .to_string()
4892 );
4893 }
4894
4895 #[test]
4896 fn test_truncation_drops_edit_history_when_budget_tight() {
4897 let input = make_input(
4898 "code",
4899 0..4,
4900 2,
4901 vec![make_event("a.rs", "-x\n+y\n")],
4902 vec![
4903 make_related_file("r1.rs", "aaaaaaa\n"),
4904 make_related_file("r2.rs", "bbbbbbb\n"),
4905 ],
4906 );
4907
4908 assert_eq!(
4909 format_with_budget(&input, 10000).unwrap(),
4910 indoc! {r#"
4911 <|file_sep|>r1.rs
4912 aaaaaaa
4913 <|file_sep|>r2.rs
4914 bbbbbbb
4915 <|file_sep|>edit history
4916 --- a/a.rs
4917 +++ b/a.rs
4918 -x
4919 +y
4920 <|file_sep|>test.rs
4921 <|fim_prefix|>
4922 <|fim_middle|>current
4923 co<|user_cursor|>de
4924 <|fim_suffix|>
4925 <|fim_middle|>updated
4926 "#}
4927 .to_string()
4928 );
4929
4930 assert_eq!(
4931 format_with_budget(&input, budget_with_margin(55)),
4932 Some(
4933 indoc! {r#"
4934 <|file_sep|>edit history
4935 --- a/a.rs
4936 +++ b/a.rs
4937 -x
4938 +y
4939 <|file_sep|>test.rs
4940 <|fim_prefix|>
4941 <|fim_middle|>current
4942 co<|user_cursor|>de
4943 <|fim_suffix|>
4944 <|fim_middle|>updated
4945 "#}
4946 .to_string()
4947 )
4948 );
4949 }
4950
4951 #[test]
4952 fn test_truncation_includes_partial_excerpts() {
4953 let input = make_input(
4954 "x",
4955 0..1,
4956 0,
4957 vec![],
4958 vec![RelatedFile {
4959 path: Path::new("big.rs").into(),
4960 max_row: 30,
4961 in_open_source_repo: false,
4962 excerpts: vec![
4963 RelatedExcerpt {
4964 row_range: 0..10,
4965 text: "first excerpt\n".into(),
4966 order: 0,
4967 },
4968 RelatedExcerpt {
4969 row_range: 10..20,
4970 text: "second excerpt\n".into(),
4971 order: 0,
4972 },
4973 RelatedExcerpt {
4974 row_range: 20..30,
4975 text: "third excerpt\n".into(),
4976 order: 0,
4977 },
4978 ],
4979 }],
4980 );
4981
4982 assert_eq!(
4983 format_with_budget(&input, 10000).unwrap(),
4984 indoc! {r#"
4985 <|file_sep|>big.rs
4986 first excerpt
4987 ...
4988 second excerpt
4989 ...
4990 third excerpt
4991 <|file_sep|>test.rs
4992 <|fim_prefix|>
4993 <|fim_middle|>current
4994 <|user_cursor|>x
4995 <|fim_suffix|>
4996 <|fim_middle|>updated
4997 "#}
4998 .to_string()
4999 );
5000
5001 assert_eq!(
5002 format_with_budget(&input, budget_with_margin(50)).unwrap(),
5003 indoc! {r#"
5004 <|file_sep|>big.rs
5005 first excerpt
5006 ...
5007 <|file_sep|>test.rs
5008 <|fim_prefix|>
5009 <|fim_middle|>current
5010 <|user_cursor|>x
5011 <|fim_suffix|>
5012 <|fim_middle|>updated
5013 "#}
5014 .to_string()
5015 );
5016 }
5017
5018 #[test]
5019 fn test_truncation_prioritizes_lower_order_excerpts() {
5020 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
5021 // With tight budget, only the lower-order excerpt from file_b should be included.
5022 let input = make_input(
5023 "x",
5024 0..1,
5025 0,
5026 vec![],
5027 vec![
5028 RelatedFile {
5029 path: Path::new("file_a.rs").into(),
5030 max_row: 10,
5031 in_open_source_repo: false,
5032 excerpts: vec![RelatedExcerpt {
5033 row_range: 0..10,
5034 text: "low priority content\n".into(),
5035 order: 5,
5036 }],
5037 },
5038 RelatedFile {
5039 path: Path::new("file_b.rs").into(),
5040 max_row: 10,
5041 in_open_source_repo: false,
5042 excerpts: vec![RelatedExcerpt {
5043 row_range: 0..10,
5044 text: "high priority content\n".into(),
5045 order: 1,
5046 }],
5047 },
5048 ],
5049 );
5050
5051 // With large budget, both files included; rendered in stable lexicographic order.
5052 assert_eq!(
5053 format_with_budget(&input, 10000).unwrap(),
5054 indoc! {r#"
5055 <|file_sep|>file_a.rs
5056 low priority content
5057 <|file_sep|>file_b.rs
5058 high priority content
5059 <|file_sep|>test.rs
5060 <|fim_prefix|>
5061 <|fim_middle|>current
5062 <|user_cursor|>x
5063 <|fim_suffix|>
5064 <|fim_middle|>updated
5065 "#}
5066 .to_string()
5067 );
5068
5069 // With tight budget, only file_b (lower order) fits.
5070 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
5071 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
5072 // file_a would need another 14 tokens, which doesn't fit.
5073 assert_eq!(
5074 format_with_budget(&input, budget_with_margin(52)).unwrap(),
5075 indoc! {r#"
5076 <|file_sep|>file_b.rs
5077 high priority content
5078 <|file_sep|>test.rs
5079 <|fim_prefix|>
5080 <|fim_middle|>current
5081 <|user_cursor|>x
5082 <|fim_suffix|>
5083 <|fim_middle|>updated
5084 "#}
5085 .to_string()
5086 );
5087 }
5088
5089 #[test]
5090 fn test_truncation_drops_high_order_excerpts_within_file() {
5091 // A single file has excerpts at order 1 and order 3. With a tight budget,
5092 // only the order-1 excerpts are included while the order-3 excerpt is
5093 // dropped — even though they belong to the same file. This also preserves
5094 // the parent invariant: parent outline items have order ≤ their best
5095 // child, so they're always included when any child is.
5096 let input = make_input(
5097 "x",
5098 0..1,
5099 0,
5100 vec![],
5101 vec![RelatedFile {
5102 path: Path::new("mod.rs").into(),
5103 max_row: 30,
5104 in_open_source_repo: false,
5105 excerpts: vec![
5106 RelatedExcerpt {
5107 row_range: 0..5,
5108 text: "mod header\n".into(),
5109 order: 1,
5110 },
5111 RelatedExcerpt {
5112 row_range: 5..15,
5113 text: "important fn\n".into(),
5114 order: 1,
5115 },
5116 RelatedExcerpt {
5117 row_range: 15..30,
5118 text: "less important fn\n".into(),
5119 order: 3,
5120 },
5121 ],
5122 }],
5123 );
5124
5125 // With large budget, all three excerpts included.
5126 assert_eq!(
5127 format_with_budget(&input, 10000).unwrap(),
5128 indoc! {r#"
5129 <|file_sep|>mod.rs
5130 mod header
5131 ...
5132 important fn
5133 ...
5134 less important fn
5135 <|file_sep|>test.rs
5136 <|fim_prefix|>
5137 <|fim_middle|>current
5138 <|user_cursor|>x
5139 <|fim_suffix|>
5140 <|fim_middle|>updated
5141 "#}
5142 .to_string()
5143 );
5144
5145 // With tight budget, only order<=1 excerpts included (header + important fn).
5146 assert_eq!(
5147 format_with_budget(&input, budget_with_margin(55)).unwrap(),
5148 indoc! {r#"
5149 <|file_sep|>mod.rs
5150 mod header
5151 ...
5152 important fn
5153 ...
5154 <|file_sep|>test.rs
5155 <|fim_prefix|>
5156 <|fim_middle|>current
5157 <|user_cursor|>x
5158 <|fim_suffix|>
5159 <|fim_middle|>updated
5160 "#}
5161 .to_string()
5162 );
5163 }
5164
5165 #[test]
5166 fn test_truncation_drops_older_events_first() {
5167 let input = make_input(
5168 "x",
5169 0..1,
5170 0,
5171 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
5172 vec![],
5173 );
5174
5175 assert_eq!(
5176 format_with_budget(&input, 10000).unwrap(),
5177 indoc! {r#"
5178 <|file_sep|>edit history
5179 --- a/old.rs
5180 +++ b/old.rs
5181 -1
5182 --- a/new.rs
5183 +++ b/new.rs
5184 -2
5185 <|file_sep|>test.rs
5186 <|fim_prefix|>
5187 <|fim_middle|>current
5188 <|user_cursor|>x
5189 <|fim_suffix|>
5190 <|fim_middle|>updated
5191 "#}
5192 .to_string()
5193 );
5194
5195 assert_eq!(
5196 format_with_budget(&input, 60).unwrap(),
5197 indoc! {r#"
5198 <|file_sep|>edit history
5199 --- a/new.rs
5200 +++ b/new.rs
5201 -2
5202 <|file_sep|>test.rs
5203 <|fim_prefix|>
5204 <|fim_middle|>current
5205 <|user_cursor|>x
5206 <|fim_suffix|>
5207 <|fim_middle|>updated
5208 "#}
5209 .to_string()
5210 );
5211 }
5212
5213 #[test]
5214 fn test_cursor_excerpt_always_included_with_minimal_budget() {
5215 let input = make_input(
5216 "fn main() {}",
5217 0..12,
5218 3,
5219 vec![make_event("a.rs", "-old\n+new\n")],
5220 vec![make_related_file("related.rs", "helper\n")],
5221 );
5222
5223 assert!(format_with_budget(&input, 30).is_none())
5224 }
5225
5226 #[track_caller]
5227 fn format_seed_coder(input: &ZetaPromptInput) -> String {
5228 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
5229 .expect("seed coder prompt formatting should succeed")
5230 }
5231
5232 #[track_caller]
5233 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
5234 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
5235 .expect("seed coder prompt formatting should succeed")
5236 }
5237
5238 #[test]
5239 fn test_seed_coder_alias_matches_v0211_seed_coder() {
5240 let input = make_input(
5241 "prefix\neditable\nsuffix",
5242 7..15,
5243 10,
5244 vec![make_event("a.rs", "-old\n+new\n")],
5245 vec![make_related_file("related.rs", "fn helper() {}\n")],
5246 );
5247
5248 assert_eq!(
5249 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 10000),
5250 format_prompt_with_budget_for_format(&input, ZetaFormat::V0331SeedCoderModelPy, 10000)
5251 );
5252 assert_eq!(
5253 ZetaFormat::parse("V0331SeedCoderModelPy").unwrap(),
5254 ZetaFormat::V0331SeedCoderModelPy
5255 );
5256 }
5257
5258 #[test]
5259 fn test_seed_coder_basic_format() {
5260 let input = make_input(
5261 "prefix\neditable\nsuffix",
5262 7..15,
5263 10,
5264 vec![make_event("a.rs", "-old\n+new\n")],
5265 vec![make_related_file("related.rs", "fn helper() {}\n")],
5266 );
5267
5268 assert_eq!(
5269 format_seed_coder(&input),
5270 indoc! {r#"
5271 <[fim-suffix]>
5272 suffix
5273 <[fim-prefix]><filename>related.rs
5274 fn helper() {}
5275
5276 <filename>edit_history
5277 --- a/a.rs
5278 +++ b/a.rs
5279 -old
5280 +new
5281
5282 <filename>test.rs
5283 prefix
5284 <<<<<<< CURRENT
5285 edi<|user_cursor|>table
5286 =======
5287 <[fim-middle]>"#}
5288 );
5289 }
5290
5291 #[test]
5292 fn test_v0420_formats_diagnostics_before_related_files() {
5293 let mut input = make_input(
5294 "prefix\neditable\nsuffix",
5295 7..15,
5296 10,
5297 vec![],
5298 vec![make_related_file("related.rs", "fn helper() {}\n")],
5299 );
5300 input.active_buffer_diagnostics = vec![ActiveBufferDiagnostic {
5301 severity: Some(1),
5302 message: "missing semicolon".to_string(),
5303 snippet: "let value = 1".to_string(),
5304 snippet_buffer_row_range: 1..2,
5305 diagnostic_range_in_snippet: 12..13,
5306 }];
5307
5308 let prompt =
5309 format_prompt_with_budget_for_format(&input, ZetaFormat::V0420Diagnostics, 10000)
5310 .expect("v0420 prompt formatting should succeed");
5311
5312 assert_eq!(
5313 prompt,
5314 indoc! {r#"
5315 <[fim-suffix]>
5316 suffix
5317 <[fim-prefix]><filename>diagnostics
5318 *missing semicolon*:
5319 ```
5320 let value = 1
5321 ```
5322
5323 <filename>related.rs
5324 fn helper() {}
5325
5326 <filename>test.rs
5327 prefix
5328 <|marker_1|>edi<|user_cursor|>table<|marker_2|>
5329 <[fim-middle]>"#}
5330 );
5331 }
5332
5333 #[test]
5334 fn test_v0317_formats_prompt_with_many_related_files() {
5335 let related_files = (0..900)
5336 .map(|index| {
5337 make_related_file(
5338 &format!("related_{index}.rs"),
5339 "fn helper() {\n let value = 1;\n}\n",
5340 )
5341 })
5342 .collect();
5343
5344 let input = make_input(
5345 "code",
5346 0..4,
5347 2,
5348 vec![make_event("a.rs", "-x\n+y\n")],
5349 related_files,
5350 );
5351
5352 let prompt =
5353 format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
5354
5355 assert!(prompt.is_some());
5356 let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
5357 assert!(prompt.contains("test.rs"));
5358 assert!(prompt.contains(CURSOR_MARKER));
5359 }
5360
5361 #[test]
5362 fn test_v0327_formats_single_file_prompt_without_related_files() {
5363 let excerpt = indoc! {"
5364 line01
5365 line02
5366 line03
5367 line04
5368 line05
5369 line06
5370 line07
5371 line08
5372 line09
5373 line10
5374 line11
5375 line12
5376 line13
5377 line14
5378 line15
5379 line16
5380 line17
5381 line18
5382 line19
5383 line20
5384 "};
5385 let cursor_offset = excerpt.find("line10").expect("cursor line exists");
5386 let input = make_input(
5387 excerpt,
5388 0..excerpt.len(),
5389 cursor_offset,
5390 vec![make_event("a.rs", "-x\n+y\n")],
5391 vec![make_related_file("related.rs", "fn helper() {}\n")],
5392 );
5393
5394 let prompt =
5395 format_prompt_with_budget_for_format(&input, ZetaFormat::V0327SingleFile, 4096)
5396 .expect("v0327 prompt should fit");
5397
5398 assert!(prompt.contains("line01"));
5399 assert!(prompt.contains("line20"));
5400 assert!(prompt.contains("<filename>edit_history"));
5401 assert!(prompt.contains("<filename>test.rs"));
5402 assert!(prompt.contains(CURSOR_MARKER));
5403 assert!(!prompt.contains("related.rs"));
5404 assert!(!prompt.contains("fn helper() {}"));
5405 }
5406
5407 #[test]
5408 fn test_v0327_resolve_cursor_region_uses_full_excerpt_context() {
5409 let excerpt = (0..80)
5410 .map(|index| format!("l{index:02}\n"))
5411 .collect::<String>();
5412 let cursor_offset = excerpt.find("l40").expect("cursor line exists");
5413 let input = make_input(&excerpt, 0..excerpt.len(), cursor_offset, vec![], vec![]);
5414
5415 let (context, editable_range, context_range, adjusted_cursor) =
5416 resolve_cursor_region(&input, ZetaFormat::V0327SingleFile);
5417
5418 assert_eq!(context, excerpt);
5419 assert_eq!(context_range, 0..excerpt.len());
5420 assert_eq!(adjusted_cursor, cursor_offset);
5421 assert!(editable_range.start < adjusted_cursor);
5422 assert!(editable_range.end > adjusted_cursor);
5423 assert!(editable_range.end < excerpt.len());
5424 }
5425
5426 #[test]
5427 fn test_seed_coder_no_context() {
5428 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
5429
5430 assert_eq!(
5431 format_seed_coder(&input),
5432 indoc! {r#"
5433 <[fim-suffix]>
5434 after
5435 <[fim-prefix]><filename>test.rs
5436 before
5437 <<<<<<< CURRENT
5438 mid<|user_cursor|>dle
5439 =======
5440 <[fim-middle]>"#}
5441 );
5442 }
5443
5444 #[test]
5445 fn test_seed_coder_truncation_drops_context() {
5446 let input = make_input(
5447 "code",
5448 0..4,
5449 2,
5450 vec![make_event("a.rs", "-x\n+y\n")],
5451 vec![make_related_file("r1.rs", "content\n")],
5452 );
5453
5454 // With large budget, everything is included
5455 assert_eq!(
5456 format_seed_coder(&input),
5457 indoc! {r#"
5458 <[fim-suffix]>
5459 <[fim-prefix]><filename>r1.rs
5460 content
5461
5462 <filename>edit_history
5463 --- a/a.rs
5464 +++ b/a.rs
5465 -x
5466 +y
5467
5468 <filename>test.rs
5469 <<<<<<< CURRENT
5470 co<|user_cursor|>de
5471 =======
5472 <[fim-middle]>"#}
5473 );
5474
5475 assert_eq!(
5476 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
5477 None
5478 );
5479
5480 assert_eq!(
5481 format_seed_coder_with_budget(&input, 40),
5482 indoc! {r#"
5483 <[fim-suffix]>
5484 <[fim-prefix]><filename>test.rs
5485 <<<<<<< CURRENT
5486 co<|user_cursor|>de
5487 =======
5488 <[fim-middle]>"#
5489 }
5490 )
5491 }
5492
5493 #[test]
5494 fn test_seed_coder_truncation_prioritizes_lower_order() {
5495 let input = make_input(
5496 "code",
5497 0..4,
5498 2,
5499 vec![],
5500 vec![
5501 RelatedFile {
5502 path: Path::new("low_prio.rs").into(),
5503 max_row: 5,
5504 in_open_source_repo: false,
5505 excerpts: vec![RelatedExcerpt {
5506 row_range: 0..5,
5507 text: "low prio\n".into(),
5508 order: 10,
5509 }],
5510 },
5511 RelatedFile {
5512 path: Path::new("high_prio.rs").into(),
5513 max_row: 5,
5514 in_open_source_repo: false,
5515 excerpts: vec![RelatedExcerpt {
5516 row_range: 0..5,
5517 text: "high prio\n".into(),
5518 order: 1,
5519 }],
5520 },
5521 ],
5522 );
5523
5524 // With large budget, both included; rendered in stable lexicographic order.
5525 assert_eq!(
5526 format_seed_coder(&input),
5527 indoc! {r#"
5528 <[fim-suffix]>
5529 <[fim-prefix]><filename>low_prio.rs
5530 low prio
5531 <filename>high_prio.rs
5532 high prio
5533
5534 <filename>test.rs
5535 <<<<<<< CURRENT
5536 co<|user_cursor|>de
5537 =======
5538 <[fim-middle]>"#}
5539 );
5540
5541 // With tight budget under the generic heuristic, context is dropped but the
5542 // minimal cursor section still fits.
5543 assert_eq!(
5544 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
5545 Some(
5546 indoc! {r#"
5547 <[fim-suffix]>
5548 <[fim-prefix]><filename>test.rs
5549 <<<<<<< CURRENT
5550 co<|user_cursor|>de
5551 =======
5552 <[fim-middle]>"#}
5553 .to_string()
5554 )
5555 );
5556 }
5557
5558 #[test]
5559 fn test_format_zeta1_from_input_basic() {
5560 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
5561 let input = ZetaPromptInput {
5562 cursor_path: Path::new("src/main.rs").into(),
5563 cursor_excerpt: excerpt.into(),
5564 cursor_offset_in_excerpt: 30,
5565 excerpt_start_row: Some(0),
5566 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
5567 related_files: Some(vec![]),
5568 active_buffer_diagnostics: vec![],
5569 excerpt_ranges: ExcerptRanges {
5570 editable_150: 15..41,
5571 editable_180: 15..41,
5572 editable_350: 15..41,
5573 editable_150_context_350: 0..excerpt.len(),
5574 editable_180_context_350: 0..excerpt.len(),
5575 editable_350_context_150: 0..excerpt.len(),
5576 ..Default::default()
5577 },
5578 syntax_ranges: None,
5579 in_open_source_repo: false,
5580 can_collect_data: false,
5581 repo_url: None,
5582 };
5583
5584 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
5585
5586 assert_eq!(
5587 prompt,
5588 concat!(
5589 "### Instruction:\n",
5590 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5591 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5592 "into account the cursor location.\n",
5593 "\n",
5594 "### User Edits:\n",
5595 "\n",
5596 "User edited other.rs:\n",
5597 "```diff\n",
5598 "-old\n",
5599 "+new\n",
5600 "\n",
5601 "```\n",
5602 "\n",
5603 "### User Excerpt:\n",
5604 "\n",
5605 "```src/main.rs\n",
5606 "<|start_of_file|>\n",
5607 "fn before() {}\n",
5608 "<|editable_region_start|>\n",
5609 "fn foo() {\n",
5610 " <|user_cursor_is_here|>let x = 1;\n",
5611 "\n",
5612 "<|editable_region_end|>}\n",
5613 "fn after() {}\n",
5614 "\n",
5615 "```\n",
5616 "\n",
5617 "### Response:\n",
5618 ),
5619 );
5620 }
5621
5622 #[test]
5623 fn test_format_zeta1_from_input_no_start_of_file() {
5624 let excerpt = "fn foo() {\n let x = 1;\n}\n";
5625 let input = ZetaPromptInput {
5626 cursor_path: Path::new("src/main.rs").into(),
5627 cursor_excerpt: excerpt.into(),
5628 cursor_offset_in_excerpt: 15,
5629 excerpt_start_row: Some(10),
5630 events: vec![],
5631 related_files: Some(vec![]),
5632 active_buffer_diagnostics: vec![],
5633 excerpt_ranges: ExcerptRanges {
5634 editable_150: 0..28,
5635 editable_180: 0..28,
5636 editable_350: 0..28,
5637 editable_150_context_350: 0..28,
5638 editable_180_context_350: 0..28,
5639 editable_350_context_150: 0..28,
5640 ..Default::default()
5641 },
5642 syntax_ranges: None,
5643 in_open_source_repo: false,
5644 can_collect_data: false,
5645 repo_url: None,
5646 };
5647
5648 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5649
5650 assert_eq!(
5651 prompt,
5652 concat!(
5653 "### Instruction:\n",
5654 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5655 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5656 "into account the cursor location.\n",
5657 "\n",
5658 "### User Edits:\n",
5659 "\n",
5660 "\n",
5661 "\n",
5662 "### User Excerpt:\n",
5663 "\n",
5664 "```src/main.rs\n",
5665 "<|editable_region_start|>\n",
5666 "fn foo() {\n",
5667 " <|user_cursor_is_here|>let x = 1;\n",
5668 "}\n",
5669 "\n",
5670 "<|editable_region_end|>\n",
5671 "```\n",
5672 "\n",
5673 "### Response:\n",
5674 ),
5675 );
5676 }
5677
5678 #[test]
5679 fn test_format_zeta1_from_input_with_sub_ranges() {
5680 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
5681 let editable_range = 10..37;
5682 let context_range = 0..excerpt.len();
5683
5684 let input = ZetaPromptInput {
5685 cursor_path: Path::new("test.rs").into(),
5686 cursor_excerpt: excerpt.into(),
5687 cursor_offset_in_excerpt: 25,
5688 excerpt_start_row: Some(0),
5689 events: vec![],
5690 related_files: Some(vec![]),
5691 active_buffer_diagnostics: vec![],
5692 excerpt_ranges: ExcerptRanges {
5693 editable_150: editable_range.clone(),
5694 editable_180: editable_range.clone(),
5695 editable_350: editable_range.clone(),
5696 editable_150_context_350: context_range.clone(),
5697 editable_180_context_350: context_range.clone(),
5698 editable_350_context_150: context_range.clone(),
5699 ..Default::default()
5700 },
5701 syntax_ranges: None,
5702 in_open_source_repo: false,
5703 can_collect_data: false,
5704 repo_url: None,
5705 };
5706
5707 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5708
5709 assert_eq!(
5710 prompt,
5711 concat!(
5712 "### Instruction:\n",
5713 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5714 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5715 "into account the cursor location.\n",
5716 "\n",
5717 "### User Edits:\n",
5718 "\n",
5719 "\n",
5720 "\n",
5721 "### User Excerpt:\n",
5722 "\n",
5723 "```test.rs\n",
5724 "<|start_of_file|>\n",
5725 "// prefix\n",
5726 "<|editable_region_start|>\n",
5727 "fn foo() {\n",
5728 " <|user_cursor_is_here|>let x = 1;\n",
5729 "}\n",
5730 "<|editable_region_end|>\n",
5731 "// suffix\n",
5732 "\n",
5733 "```\n",
5734 "\n",
5735 "### Response:\n",
5736 ),
5737 );
5738 }
5739
5740 #[test]
5741 fn test_max_event_count() {
5742 fn make_numbered_event(index: usize) -> Event {
5743 return make_event(
5744 &format!("event-{index}.rs"),
5745 &format!("-old-{index}\n+new-{index}\n"),
5746 );
5747 }
5748 let input = make_input(
5749 "x",
5750 0..1,
5751 0,
5752 (0..3).map(make_numbered_event).collect(),
5753 vec![],
5754 );
5755
5756 let edit_history_section = format_edit_history_within_budget(
5757 &input.events,
5758 "<|file_sep|>",
5759 "edit history",
5760 usize::MAX,
5761 5,
5762 );
5763
5764 assert_eq!(
5765 &edit_history_section,
5766 indoc!(
5767 "
5768 <|file_sep|>edit history
5769 --- a/event-0.rs
5770 +++ b/event-0.rs
5771 -old-0
5772 +new-0
5773 --- a/event-1.rs
5774 +++ b/event-1.rs
5775 -old-1
5776 +new-1
5777 --- a/event-2.rs
5778 +++ b/event-2.rs
5779 -old-2
5780 +new-2
5781 "
5782 )
5783 );
5784
5785 let edit_history_section = format_edit_history_within_budget(
5786 &input.events,
5787 "<|file_sep|>",
5788 "edit history",
5789 usize::MAX,
5790 2,
5791 );
5792
5793 assert_eq!(
5794 &edit_history_section,
5795 indoc!(
5796 "
5797 <|file_sep|>edit history
5798 --- a/event-1.rs
5799 +++ b/event-1.rs
5800 -old-1
5801 +new-1
5802 --- a/event-2.rs
5803 +++ b/event-2.rs
5804 -old-2
5805 +new-2
5806 "
5807 )
5808 );
5809
5810 let edit_history_section = format_edit_history_within_budget(
5811 &input.events,
5812 "<|file_sep|>",
5813 "edit history",
5814 usize::MAX,
5815 0,
5816 );
5817
5818 assert_eq!(&edit_history_section, "");
5819 }
5820
5821 #[test]
5822 fn test_clean_zeta1_model_output_basic() {
5823 let output = indoc! {"
5824 <|editable_region_start|>
5825 fn main() {
5826 println!(\"hello\");
5827 }
5828 <|editable_region_end|>
5829 "};
5830
5831 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5832 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
5833 }
5834
5835 #[test]
5836 fn test_clean_zeta1_model_output_with_cursor() {
5837 let output = indoc! {"
5838 <|editable_region_start|>
5839 fn main() {
5840 <|user_cursor_is_here|>println!(\"hello\");
5841 }
5842 <|editable_region_end|>
5843 "};
5844
5845 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5846 assert_eq!(
5847 cleaned,
5848 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
5849 );
5850 }
5851
5852 #[test]
5853 fn test_clean_zeta1_model_output_no_markers() {
5854 let output = "fn main() {}\n";
5855 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5856 assert_eq!(cleaned, "fn main() {}\n");
5857 }
5858
5859 #[test]
5860 fn test_clean_zeta1_model_output_empty_region() {
5861 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5862 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5863 assert_eq!(cleaned, "");
5864 }
5865
5866 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5867 let mut result = excerpt.to_string();
5868 result.replace_range(
5869 parsed_output.range_in_excerpt.clone(),
5870 &parsed_output.new_editable_region,
5871 );
5872 result
5873 }
5874
5875 #[test]
5876 fn test_parse_zeta2_model_output() {
5877 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5878 let context_start = excerpt.find("ctx start").unwrap();
5879 let context_end = excerpt.find("after ctx").unwrap();
5880 let editable_start = excerpt.find("editable old").unwrap();
5881 let editable_end = editable_start + "editable old\n".len();
5882 let input = make_input_with_context_range(
5883 excerpt,
5884 editable_start..editable_end,
5885 context_start..context_end,
5886 editable_start,
5887 );
5888
5889 let output = parse_zeta2_model_output(
5890 "editable new\n>>>>>>> UPDATED\n",
5891 ZetaFormat::V0131GitMergeMarkersPrefix,
5892 &input,
5893 )
5894 .unwrap();
5895
5896 assert_eq!(
5897 apply_edit(excerpt, &output),
5898 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5899 );
5900 }
5901
5902 #[test]
5903 fn test_parse_zeta2_model_output_identity() {
5904 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5905 let editable_start = excerpt.find("bbb").unwrap();
5906 let editable_end = excerpt.find("ddd").unwrap();
5907 let input = make_input_with_context_range(
5908 excerpt,
5909 editable_start..editable_end,
5910 0..excerpt.len(),
5911 editable_start,
5912 );
5913
5914 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5915 let output =
5916 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5917
5918 assert_eq!(apply_edit(excerpt, &output), excerpt);
5919 }
5920
5921 #[test]
5922 fn test_parse_zeta2_model_output_strips_end_marker() {
5923 let excerpt = "hello\nworld\n";
5924 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5925
5926 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5927 let output1 =
5928 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5929 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5930
5931 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5932 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5933 }
5934
5935 #[test]
5936 fn test_parsed_output_to_patch_round_trips_through_udiff_application() {
5937 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5938 let context_start = excerpt.find("ctx start").unwrap();
5939 let context_end = excerpt.find("after ctx").unwrap();
5940 let editable_start = excerpt.find("editable old").unwrap();
5941 let editable_end = editable_start + "editable old\n".len();
5942 let input = make_input_with_context_range(
5943 excerpt,
5944 editable_start..editable_end,
5945 context_start..context_end,
5946 editable_start,
5947 );
5948
5949 let parsed = parse_zeta2_model_output(
5950 "editable new\n>>>>>>> UPDATED\n",
5951 ZetaFormat::V0131GitMergeMarkersPrefix,
5952 &input,
5953 )
5954 .unwrap();
5955 let expected = apply_edit(excerpt, &parsed);
5956 let patch = parsed_output_to_patch(&input, parsed).unwrap();
5957 let patched = udiff::apply_diff_to_string(&patch, excerpt).unwrap();
5958
5959 assert_eq!(patched, expected);
5960 }
5961
5962 #[test]
5963 fn test_special_tokens_not_triggered_by_comment_separator() {
5964 // Regression test for https://github.com/zed-industries/zed/issues/52489
5965 let excerpt = "fn main() {\n // =======\n println!(\"hello\");\n}\n";
5966 let input = make_input(excerpt, 0..excerpt.len(), 0, vec![], vec![]);
5967 assert!(
5968 !prompt_input_contains_special_tokens(&input, ZetaFormat::V0131GitMergeMarkersPrefix),
5969 "comment containing ======= should not trigger special token detection"
5970 );
5971 }
5972}