1pub mod excerpt_ranges;
2pub mod multi_region;
3pub mod udiff;
4
5use anyhow::{Result, anyhow};
6use serde::{Deserialize, Serialize};
7use std::fmt::Write;
8use std::ops::Range;
9use std::path::Path;
10use std::sync::Arc;
11use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
12
13pub use crate::excerpt_ranges::{
14 ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
15};
16
17pub const CURSOR_MARKER: &str = "<|user_cursor|>";
18
19/// Use up to this amount of the editable region for prefill.
20/// Larger values may result in more robust generation, but
21/// this region becomes non-editable.
22pub const PREFILL_RATIO: f64 = 0.1; // 10%
23
24fn estimate_tokens(bytes: usize) -> usize {
25 bytes / 3
26}
27
28/// Leave some slack to avoid overflow.
29fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
30 (max_tokens as f64 * 0.9).floor() as usize
31}
32
33/// Ensure text fits into the tokens budget; trim by line boundaries if needed.
34pub fn clamp_text_to_token_count(text: &str, max_tokens: usize) -> &str {
35 if estimate_tokens(text.len()) <= max_tokens {
36 return text;
37 }
38
39 let mut end_byte_offset = 0;
40
41 for line in text.split_inclusive('\n') {
42 if estimate_tokens(line.len() + end_byte_offset) > max_tokens {
43 break;
44 }
45
46 end_byte_offset += line.len();
47 }
48
49 &text[..end_byte_offset]
50}
51
52#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
53pub struct ZetaPromptInput {
54 pub cursor_path: Arc<Path>,
55 pub cursor_excerpt: Arc<str>,
56 pub cursor_offset_in_excerpt: usize,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
58 pub excerpt_start_row: Option<u32>,
59 pub events: Vec<Arc<Event>>,
60 #[serde(default)]
61 pub related_files: Option<Vec<RelatedFile>>,
62 #[serde(default, skip_serializing_if = "Vec::is_empty")]
63 pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
64 /// These ranges let the server select model-appropriate subsets.
65 pub excerpt_ranges: ExcerptRanges,
66 /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
67 /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
68 /// When present, the server uses these to compute editable/context ranges
69 /// instead of `excerpt_ranges`.
70 #[serde(default, skip_serializing_if = "Option::is_none")]
71 pub syntax_ranges: Option<Vec<Range<usize>>>,
72 #[serde(default)]
73 pub in_open_source_repo: bool,
74 #[serde(default)]
75 pub can_collect_data: bool,
76 #[serde(default, skip_serializing_if = "Option::is_none")]
77 pub repo_url: Option<String>,
78}
79
80#[derive(
81 Default,
82 Clone,
83 Copy,
84 Debug,
85 PartialEq,
86 Eq,
87 Hash,
88 EnumIter,
89 IntoStaticStr,
90 Serialize,
91 Deserialize,
92)]
93#[allow(non_camel_case_types)]
94pub enum ZetaFormat {
95 V0112MiddleAtEnd,
96 V0113Ordered,
97 V0114180EditableRegion,
98 V0120GitMergeMarkers,
99 #[default]
100 V0131GitMergeMarkersPrefix,
101 V0211Prefill,
102 #[serde(alias = "Zeta2")]
103 V0211SeedCoder,
104 V0331SeedCoderModelPy,
105 v0226Hashline,
106 V0304VariableEdit,
107 V0304SeedNoEdits,
108 /// Multi-block marker spans with NO_EDITS sentinel.
109 V0306SeedMultiRegions,
110 /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
111 V0316SeedMultiRegions,
112 /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
113 V0317SeedMultiRegions,
114 /// V0316 with larger block sizes.
115 #[serde(alias = "Zeta2.1")]
116 V0318SeedMultiRegions,
117 /// V0318-style markers over the full available current file excerpt with no related files.
118 V0327SingleFile,
119 /// V0318-style prompt with buffer diagnostics
120 V0420Diagnostics,
121}
122
123impl std::fmt::Display for ZetaFormat {
124 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125 write!(f, "{}", <&'static str>::from(self))
126 }
127}
128
129impl ZetaFormat {
130 pub fn parse(format_name: &str) -> Result<Self> {
131 let lower = format_name.to_lowercase();
132
133 // Exact case-insensitive match takes priority, bypassing ambiguity checks.
134 for variant in ZetaFormat::iter() {
135 if <&'static str>::from(&variant).to_lowercase() == lower {
136 return Ok(variant);
137 }
138 }
139
140 let mut results = ZetaFormat::iter().filter(|version| {
141 <&'static str>::from(version)
142 .to_lowercase()
143 .contains(&lower)
144 });
145 let Some(result) = results.next() else {
146 anyhow::bail!(
147 "`{format_name}` did not match any of:\n{}",
148 Self::options_as_string()
149 );
150 };
151 if results.next().is_some() {
152 anyhow::bail!(
153 "`{format_name}` matched more than one of:\n{}",
154 Self::options_as_string()
155 );
156 }
157 Ok(result)
158 }
159
160 pub fn options_as_string() -> String {
161 ZetaFormat::iter()
162 .map(|format| format!("- {}\n", <&'static str>::from(format)))
163 .collect::<Vec<_>>()
164 .concat()
165 }
166}
167
168#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
169#[serde(tag = "event")]
170pub enum Event {
171 BufferChange {
172 path: Arc<Path>,
173 old_path: Arc<Path>,
174 diff: String,
175 predicted: bool,
176 in_open_source_repo: bool,
177 },
178}
179
180impl Event {
181 pub fn in_open_source_repo(&self) -> bool {
182 match self {
183 Event::BufferChange {
184 in_open_source_repo,
185 ..
186 } => *in_open_source_repo,
187 }
188 }
189}
190
191pub fn write_event(prompt: &mut String, event: &Event) {
192 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
193 for component in path.components() {
194 prompt.push('/');
195 write!(prompt, "{}", component.as_os_str().display()).ok();
196 }
197 }
198 match event {
199 Event::BufferChange {
200 path,
201 old_path,
202 diff,
203 predicted,
204 in_open_source_repo: _,
205 } => {
206 if *predicted {
207 prompt.push_str("// User accepted prediction:\n");
208 }
209 prompt.push_str("--- a");
210 write_path_as_unix_str(prompt, old_path.as_ref());
211 prompt.push_str("\n+++ b");
212 write_path_as_unix_str(prompt, path.as_ref());
213 prompt.push('\n');
214 prompt.push_str(diff);
215 }
216 }
217}
218
219#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
220pub struct ActiveBufferDiagnostic {
221 pub severity: Option<i32>,
222 pub message: String,
223 pub snippet: String,
224 pub snippet_buffer_row_range: Range<u32>,
225 pub diagnostic_range_in_snippet: Range<usize>,
226}
227
228#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
229pub struct RelatedFile {
230 pub path: Arc<Path>,
231 pub max_row: u32,
232 pub excerpts: Vec<RelatedExcerpt>,
233 #[serde(default)]
234 pub in_open_source_repo: bool,
235}
236
237#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
238pub struct RelatedExcerpt {
239 pub row_range: Range<u32>,
240 pub text: Arc<str>,
241 #[serde(default)]
242 pub order: usize,
243}
244
245pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
246 special_tokens_for_format(format).iter().any(|token| {
247 if let Some(line_token) = token.strip_suffix('\n') {
248 input.cursor_excerpt.lines().any(|line| line == line_token)
249 } else {
250 input.cursor_excerpt.contains(token)
251 }
252 })
253}
254
255pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
256 let max_prompt_tokens = match format {
257 ZetaFormat::V0112MiddleAtEnd
258 | ZetaFormat::V0113Ordered
259 | ZetaFormat::V0114180EditableRegion
260 | ZetaFormat::V0120GitMergeMarkers
261 | ZetaFormat::V0131GitMergeMarkersPrefix
262 | ZetaFormat::V0211Prefill
263 | ZetaFormat::V0211SeedCoder
264 | ZetaFormat::v0226Hashline
265 | ZetaFormat::V0304VariableEdit
266 | ZetaFormat::V0304SeedNoEdits
267 | ZetaFormat::V0306SeedMultiRegions
268 | ZetaFormat::V0316SeedMultiRegions
269 | ZetaFormat::V0317SeedMultiRegions
270 | ZetaFormat::V0331SeedCoderModelPy
271 | ZetaFormat::V0318SeedMultiRegions => 4096,
272 ZetaFormat::V0420Diagnostics => 8192,
273 ZetaFormat::V0327SingleFile => 16384,
274 };
275
276 format_prompt_with_budget_for_format(input, format, max_prompt_tokens)
277}
278
279pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
280 match format {
281 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
282 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
283 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
284 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
285 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
286 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
287 ZetaFormat::V0211SeedCoder | ZetaFormat::V0331SeedCoderModelPy => {
288 seed_coder::special_tokens()
289 }
290 ZetaFormat::v0226Hashline => hashline::special_tokens(),
291 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
292 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
293 ZetaFormat::V0316SeedMultiRegions => {
294 static TOKENS: &[&str] = &[
295 seed_coder::FIM_SUFFIX,
296 seed_coder::FIM_PREFIX,
297 seed_coder::FIM_MIDDLE,
298 seed_coder::FILE_MARKER,
299 multi_region::V0316_END_MARKER,
300 CURSOR_MARKER,
301 multi_region::MARKER_TAG_PREFIX,
302 ];
303 TOKENS
304 }
305 ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
306 static TOKENS: &[&str] = &[
307 seed_coder::FIM_SUFFIX,
308 seed_coder::FIM_PREFIX,
309 seed_coder::FIM_MIDDLE,
310 seed_coder::FILE_MARKER,
311 multi_region::V0318_END_MARKER,
312 CURSOR_MARKER,
313 multi_region::MARKER_TAG_PREFIX,
314 ];
315 TOKENS
316 }
317 ZetaFormat::V0317SeedMultiRegions => {
318 static TOKENS: &[&str] = &[
319 seed_coder::FIM_SUFFIX,
320 seed_coder::FIM_PREFIX,
321 seed_coder::FIM_MIDDLE,
322 seed_coder::FILE_MARKER,
323 multi_region::V0317_END_MARKER,
324 CURSOR_MARKER,
325 multi_region::RELATIVE_MARKER_TAG_PREFIX,
326 ];
327 TOKENS
328 }
329 ZetaFormat::V0327SingleFile => {
330 static TOKENS: &[&str] = &[
331 seed_coder::FIM_SUFFIX,
332 seed_coder::FIM_PREFIX,
333 seed_coder::FIM_MIDDLE,
334 seed_coder::FILE_MARKER,
335 multi_region::V0327_END_MARKER,
336 CURSOR_MARKER,
337 multi_region::MARKER_TAG_PREFIX,
338 ];
339 TOKENS
340 }
341 ZetaFormat::V0306SeedMultiRegions => {
342 static TOKENS: &[&str] = &[
343 seed_coder::FIM_SUFFIX,
344 seed_coder::FIM_PREFIX,
345 seed_coder::FIM_MIDDLE,
346 seed_coder::FILE_MARKER,
347 seed_coder::START_MARKER,
348 seed_coder::SEPARATOR,
349 seed_coder::END_MARKER,
350 CURSOR_MARKER,
351 multi_region::MARKER_TAG_PREFIX,
352 ];
353 TOKENS
354 }
355 }
356}
357
358/// Returns the (editable_token_limit, context_token_limit) for a given format.
359pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
360 match format {
361 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
362 ZetaFormat::V0114180EditableRegion => (180, 350),
363 ZetaFormat::V0120GitMergeMarkers
364 | ZetaFormat::V0131GitMergeMarkersPrefix
365 | ZetaFormat::V0211Prefill
366 | ZetaFormat::V0211SeedCoder
367 | ZetaFormat::V0331SeedCoderModelPy
368 | ZetaFormat::v0226Hashline
369 | ZetaFormat::V0306SeedMultiRegions
370 | ZetaFormat::V0316SeedMultiRegions
371 | ZetaFormat::V0318SeedMultiRegions
372 | ZetaFormat::V0420Diagnostics
373 | ZetaFormat::V0317SeedMultiRegions
374 | ZetaFormat::V0327SingleFile
375 | ZetaFormat::V0304SeedNoEdits => (350, 150),
376
377 ZetaFormat::V0304VariableEdit => (1024, 0),
378 }
379}
380
381pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
382 match format {
383 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
384 ZetaFormat::V0112MiddleAtEnd
385 | ZetaFormat::V0113Ordered
386 | ZetaFormat::V0114180EditableRegion
387 | ZetaFormat::V0120GitMergeMarkers
388 | ZetaFormat::V0131GitMergeMarkersPrefix
389 | ZetaFormat::V0211Prefill
390 | ZetaFormat::V0211SeedCoder
391 | ZetaFormat::V0331SeedCoderModelPy
392 | ZetaFormat::V0304VariableEdit
393 | ZetaFormat::V0306SeedMultiRegions
394 | ZetaFormat::V0304SeedNoEdits => &[],
395 ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
396 ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
397 &[multi_region::V0318_END_MARKER]
398 }
399 ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
400 ZetaFormat::V0327SingleFile => &[multi_region::V0327_END_MARKER],
401 }
402}
403
404/// Return (editable_range, context_range) for the prompt format
405pub fn excerpt_ranges_for_format(
406 format: ZetaFormat,
407 ranges: &ExcerptRanges,
408) -> (Range<usize>, Range<usize>) {
409 match format {
410 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
411 ranges.editable_150.clone(),
412 ranges.editable_150_context_350.clone(),
413 ),
414 ZetaFormat::V0114180EditableRegion => (
415 ranges.editable_180.clone(),
416 ranges.editable_180_context_350.clone(),
417 ),
418 ZetaFormat::V0120GitMergeMarkers
419 | ZetaFormat::V0131GitMergeMarkersPrefix
420 | ZetaFormat::V0211Prefill
421 | ZetaFormat::V0211SeedCoder
422 | ZetaFormat::V0331SeedCoderModelPy
423 | ZetaFormat::v0226Hashline
424 | ZetaFormat::V0304SeedNoEdits
425 | ZetaFormat::V0306SeedMultiRegions
426 | ZetaFormat::V0316SeedMultiRegions
427 | ZetaFormat::V0318SeedMultiRegions
428 | ZetaFormat::V0317SeedMultiRegions
429 | ZetaFormat::V0420Diagnostics => (
430 ranges.editable_350.clone(),
431 ranges.editable_350_context_150.clone(),
432 ),
433 ZetaFormat::V0327SingleFile => (
434 ranges.editable_350_context_150.clone(),
435 ranges.context_8192.clone().unwrap_or(
436 // shouldn't be used, only for compat with old data/clients
437 ranges.editable_350_context_150.clone(),
438 ),
439 ),
440
441 ZetaFormat::V0304VariableEdit => {
442 let context = ranges
443 .editable_350_context_1024
444 .clone()
445 .or(ranges.editable_350_context_512.clone())
446 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
447 (context.clone(), context)
448 }
449 }
450}
451
452pub fn write_cursor_excerpt_section_for_format(
453 format: ZetaFormat,
454 prompt: &mut String,
455 path: &Path,
456 context: &str,
457 editable_range: &Range<usize>,
458 cursor_offset: usize,
459) {
460 match format {
461 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
462 prompt,
463 path,
464 context,
465 editable_range,
466 cursor_offset,
467 ),
468 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
469 v0113_ordered::write_cursor_excerpt_section(
470 prompt,
471 path,
472 context,
473 editable_range,
474 cursor_offset,
475 )
476 }
477 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
478 prompt,
479 path,
480 context,
481 editable_range,
482 cursor_offset,
483 ),
484 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
485 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
486 prompt,
487 path,
488 context,
489 editable_range,
490 cursor_offset,
491 )
492 }
493 ZetaFormat::V0211SeedCoder
494 | ZetaFormat::V0331SeedCoderModelPy
495 | ZetaFormat::V0304SeedNoEdits => seed_coder::write_cursor_excerpt_section(
496 prompt,
497 path,
498 context,
499 editable_range,
500 cursor_offset,
501 ),
502 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
503 prompt,
504 path,
505 context,
506 editable_range,
507 cursor_offset,
508 ),
509 ZetaFormat::V0304VariableEdit => {
510 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
511 }
512 ZetaFormat::V0306SeedMultiRegions => {
513 prompt.push_str(&build_v0306_cursor_prefix(
514 path,
515 context,
516 editable_range,
517 cursor_offset,
518 ));
519 }
520 ZetaFormat::V0316SeedMultiRegions => {
521 prompt.push_str(&build_v0316_cursor_prefix(
522 path,
523 context,
524 editable_range,
525 cursor_offset,
526 ));
527 }
528 ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
529 prompt.push_str(&build_v0318_cursor_prefix(
530 path,
531 context,
532 editable_range,
533 cursor_offset,
534 ));
535 }
536 ZetaFormat::V0317SeedMultiRegions => {
537 prompt.push_str(&build_v0317_cursor_prefix(
538 path,
539 context,
540 editable_range,
541 cursor_offset,
542 ));
543 }
544 ZetaFormat::V0327SingleFile => {
545 prompt.push_str(&build_v0318_cursor_prefix(
546 path,
547 context,
548 editable_range,
549 cursor_offset,
550 ));
551 }
552 }
553}
554
555fn build_v0306_cursor_prefix(
556 path: &Path,
557 context: &str,
558 editable_range: &Range<usize>,
559 cursor_offset: usize,
560) -> String {
561 let mut section = String::new();
562 let path_str = path.to_string_lossy();
563 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
564
565 section.push_str(&context[..editable_range.start]);
566 section.push_str(seed_coder::START_MARKER);
567
568 let editable_text = &context[editable_range.clone()];
569 let cursor_in_editable = cursor_offset - editable_range.start;
570 multi_region::write_editable_with_markers(
571 &mut section,
572 editable_text,
573 cursor_in_editable,
574 CURSOR_MARKER,
575 );
576
577 if !section.ends_with('\n') {
578 section.push('\n');
579 }
580 section.push_str(seed_coder::SEPARATOR);
581 section
582}
583
584fn build_v0316_cursor_prefix(
585 path: &Path,
586 context: &str,
587 editable_range: &Range<usize>,
588 cursor_offset: usize,
589) -> String {
590 let mut section = String::new();
591 let path_str = path.to_string_lossy();
592 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
593
594 section.push_str(&context[..editable_range.start]);
595
596 let editable_text = &context[editable_range.clone()];
597 let cursor_in_editable = cursor_offset - editable_range.start;
598 multi_region::write_editable_with_markers_v0316(
599 &mut section,
600 editable_text,
601 cursor_in_editable,
602 CURSOR_MARKER,
603 );
604
605 if !section.ends_with('\n') {
606 section.push('\n');
607 }
608 section
609}
610
611fn build_v0318_cursor_prefix(
612 path: &Path,
613 context: &str,
614 editable_range: &Range<usize>,
615 cursor_offset: usize,
616) -> String {
617 let mut section = String::new();
618 let path_str = path.to_string_lossy();
619 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
620
621 section.push_str(&context[..editable_range.start]);
622
623 let editable_text = &context[editable_range.clone()];
624 let cursor_in_editable = cursor_offset - editable_range.start;
625 multi_region::write_editable_with_markers_v0318(
626 &mut section,
627 editable_text,
628 cursor_in_editable,
629 CURSOR_MARKER,
630 );
631
632 if !section.ends_with('\n') {
633 section.push('\n');
634 }
635 section
636}
637
638fn build_v0317_cursor_prefix(
639 path: &Path,
640 context: &str,
641 editable_range: &Range<usize>,
642 cursor_offset: usize,
643) -> String {
644 let mut section = String::new();
645 let path_str = path.to_string_lossy();
646 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
647
648 section.push_str(&context[..editable_range.start]);
649
650 let editable_text = &context[editable_range.clone()];
651 let cursor_in_editable = cursor_offset - editable_range.start;
652 multi_region::write_editable_with_markers_v0317(
653 &mut section,
654 editable_text,
655 cursor_in_editable,
656 CURSOR_MARKER,
657 );
658
659 if !section.ends_with('\n') {
660 section.push('\n');
661 }
662 section
663}
664
665fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
666 let start_row = text[0..range.start].matches('\n').count() as u32;
667 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
668 if !text[..range.end].ends_with('\n') {
669 end_row += 1;
670 }
671 return start_row..end_row;
672}
673
674fn assemble_single_file_fim_prompt(
675 context: &str,
676 editable_range: &Range<usize>,
677 cursor_prefix_section: &str,
678 events: &[Arc<Event>],
679 max_tokens: usize,
680) -> String {
681 let suffix_section = seed_coder::build_suffix_section(context, editable_range);
682
683 let suffix_tokens = estimate_tokens(suffix_section.len() + seed_coder::FIM_PREFIX.len());
684 let cursor_prefix_tokens =
685 estimate_tokens(cursor_prefix_section.len() + seed_coder::FIM_MIDDLE.len());
686 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
687
688 let edit_history_section = format_edit_history_within_budget(
689 events,
690 seed_coder::FILE_MARKER,
691 "edit_history",
692 budget_after_cursor,
693 max_edit_event_count_for_format(&ZetaFormat::V0327SingleFile),
694 );
695
696 let mut prompt = String::new();
697 prompt.push_str(&suffix_section);
698 prompt.push_str(seed_coder::FIM_PREFIX);
699 prompt.push_str(&edit_history_section);
700 if !edit_history_section.is_empty() {
701 prompt.push('\n');
702 }
703 prompt.push_str(cursor_prefix_section);
704 prompt.push_str(seed_coder::FIM_MIDDLE);
705 prompt
706}
707
708pub fn format_prompt_with_budget_for_format(
709 input: &ZetaPromptInput,
710 format: ZetaFormat,
711 max_tokens: usize,
712) -> Option<String> {
713 let (context, editable_range, context_range, cursor_offset) =
714 resolve_cursor_region(input, format);
715 let path = &*input.cursor_path;
716
717 let empty_files = Vec::new();
718 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
719 let filtered_related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
720 let relative_row_range =
721 offset_range_to_row_range(&input.cursor_excerpt, context_range.clone());
722 let row_range = relative_row_range.start + cursor_excerpt_start_row
723 ..relative_row_range.end + cursor_excerpt_start_row;
724 filter_redundant_excerpts(
725 input_related_files.to_vec(),
726 input.cursor_path.as_ref(),
727 row_range,
728 )
729 } else {
730 input_related_files.to_vec()
731 };
732 let related_files = filtered_related_files.as_slice();
733
734 let prompt = match format {
735 ZetaFormat::V0211SeedCoder
736 | ZetaFormat::V0331SeedCoderModelPy
737 | ZetaFormat::V0304SeedNoEdits
738 | ZetaFormat::V0306SeedMultiRegions
739 | ZetaFormat::V0316SeedMultiRegions
740 | ZetaFormat::V0318SeedMultiRegions
741 | ZetaFormat::V0317SeedMultiRegions
742 | ZetaFormat::V0420Diagnostics => {
743 let mut cursor_section = String::new();
744
745 write_cursor_excerpt_section_for_format(
746 format,
747 &mut cursor_section,
748 path,
749 context,
750 &editable_range,
751 cursor_offset,
752 );
753
754 let cursor_buffer_row = input.excerpt_start_row.map(|excerpt_start_row| {
755 excerpt_start_row
756 + input.cursor_excerpt[..context_range.start + cursor_offset]
757 .bytes()
758 .filter(|byte| *byte == b'\n')
759 .count() as u32
760 });
761
762 let budget_with_margin = apply_prompt_budget_margin(max_tokens);
763 seed_coder::assemble_fim_prompt(
764 context,
765 &editable_range,
766 &cursor_section,
767 &input.events,
768 related_files,
769 if format == ZetaFormat::V0420Diagnostics {
770 &input.active_buffer_diagnostics
771 } else {
772 &[]
773 },
774 cursor_buffer_row,
775 budget_with_margin,
776 )
777 }
778 ZetaFormat::V0327SingleFile => {
779 let mut cursor_section = String::new();
780 write_cursor_excerpt_section_for_format(
781 format,
782 &mut cursor_section,
783 path,
784 context,
785 &editable_range,
786 cursor_offset,
787 );
788
789 assemble_single_file_fim_prompt(
790 context,
791 &editable_range,
792 &cursor_section,
793 &input.events,
794 apply_prompt_budget_margin(max_tokens),
795 )
796 }
797 _ => {
798 let mut cursor_section = String::new();
799 write_cursor_excerpt_section_for_format(
800 format,
801 &mut cursor_section,
802 path,
803 context,
804 &editable_range,
805 cursor_offset,
806 );
807
808 let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
809 let cursor_tokens = estimate_tokens(cursor_section.len());
810 remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
811
812 let edit_history_section = format_edit_history_within_budget(
813 &input.events,
814 "<|file_sep|>",
815 "edit history",
816 remaining_budget,
817 max_edit_event_count_for_format(&format),
818 );
819 let edit_history_tokens = estimate_tokens(edit_history_section.len());
820 remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
821
822 let related_files_section = format_related_files_within_budget(
823 &related_files,
824 "<|file_sep|>",
825 "",
826 remaining_budget,
827 );
828
829 let mut prompt = String::new();
830 prompt.push_str(&related_files_section);
831 prompt.push_str(&edit_history_section);
832 prompt.push_str(&cursor_section);
833 prompt
834 }
835 };
836 let prompt_tokens = estimate_tokens(prompt.len());
837 if prompt_tokens > max_tokens {
838 return None;
839 }
840 return Some(prompt);
841}
842
843fn format_active_buffer_diagnostics_with_budget(
844 diagnostics: &[ActiveBufferDiagnostic],
845 cursor_buffer_row: Option<u32>,
846 budget: usize,
847) -> String {
848 if diagnostics.is_empty() || budget == 0 {
849 return String::new();
850 }
851
852 let mut diagnostic_indices = (0..diagnostics.len()).collect::<Vec<_>>();
853 if let Some(cursor_buffer_row) = cursor_buffer_row {
854 diagnostic_indices.sort_by_key(|index| {
855 let range = &diagnostics[*index].snippet_buffer_row_range;
856 u32::abs_diff(cursor_buffer_row, range.start)
857 + u32::abs_diff(cursor_buffer_row, range.end)
858 });
859 }
860
861 let mut output = format!("{}diagnostics\n", seed_coder::FILE_MARKER);
862 let header_tokens = estimate_tokens(output.len());
863 if header_tokens > budget {
864 return String::new();
865 }
866
867 let mut used_tokens = header_tokens;
868 let mut included_diagnostics = 0;
869 for diagnostic_index in diagnostic_indices.into_iter().take(10) {
870 let diagnostic = &diagnostics[diagnostic_index];
871 let snippet = clamp_text_to_token_count(&diagnostic.snippet, 256);
872
873 let diagnostic_section = format!(
874 "*{}*:\n```\n{}{}\n```\n",
875 diagnostic.message,
876 snippet,
877 if snippet.len() < diagnostic.snippet.len() {
878 "..."
879 } else {
880 ""
881 }
882 );
883 let diagnostic_tokens = estimate_tokens(diagnostic_section.len());
884 if used_tokens + diagnostic_tokens > budget {
885 break;
886 }
887 output.push_str(&diagnostic_section);
888 used_tokens += diagnostic_tokens;
889 included_diagnostics += 1;
890 }
891
892 if included_diagnostics == 0 {
893 String::new()
894 } else {
895 output
896 }
897}
898
899pub fn filter_redundant_excerpts(
900 mut related_files: Vec<RelatedFile>,
901 cursor_path: &Path,
902 cursor_row_range: Range<u32>,
903) -> Vec<RelatedFile> {
904 for file in &mut related_files {
905 if file.path.as_ref() == cursor_path {
906 file.excerpts.retain(|excerpt| {
907 excerpt.row_range.start < cursor_row_range.start
908 || excerpt.row_range.end > cursor_row_range.end
909 });
910 }
911 }
912 related_files.retain(|file| !file.excerpts.is_empty());
913 related_files
914}
915
916pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
917 match format {
918 ZetaFormat::V0112MiddleAtEnd
919 | ZetaFormat::V0113Ordered
920 | ZetaFormat::V0114180EditableRegion
921 | ZetaFormat::V0120GitMergeMarkers
922 | ZetaFormat::V0131GitMergeMarkersPrefix
923 | ZetaFormat::V0211Prefill
924 | ZetaFormat::V0211SeedCoder
925 | ZetaFormat::V0331SeedCoderModelPy
926 | ZetaFormat::v0226Hashline
927 | ZetaFormat::V0304SeedNoEdits
928 | ZetaFormat::V0304VariableEdit
929 | ZetaFormat::V0306SeedMultiRegions
930 | ZetaFormat::V0316SeedMultiRegions
931 | ZetaFormat::V0318SeedMultiRegions
932 | ZetaFormat::V0317SeedMultiRegions
933 | ZetaFormat::V0420Diagnostics
934 | ZetaFormat::V0327SingleFile => 6,
935 }
936}
937
938pub fn get_prefill_for_format(
939 format: ZetaFormat,
940 context: &str,
941 editable_range: &Range<usize>,
942) -> String {
943 match format {
944 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
945 ZetaFormat::V0112MiddleAtEnd
946 | ZetaFormat::V0113Ordered
947 | ZetaFormat::V0114180EditableRegion
948 | ZetaFormat::V0120GitMergeMarkers
949 | ZetaFormat::V0131GitMergeMarkersPrefix
950 | ZetaFormat::V0211SeedCoder
951 | ZetaFormat::V0331SeedCoderModelPy
952 | ZetaFormat::v0226Hashline
953 | ZetaFormat::V0304VariableEdit => String::new(),
954 ZetaFormat::V0304SeedNoEdits
955 | ZetaFormat::V0306SeedMultiRegions
956 | ZetaFormat::V0316SeedMultiRegions
957 | ZetaFormat::V0318SeedMultiRegions
958 | ZetaFormat::V0317SeedMultiRegions
959 | ZetaFormat::V0420Diagnostics
960 | ZetaFormat::V0327SingleFile => String::new(),
961 }
962}
963
964pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
965 match format {
966 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
967 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
968 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
969 ZetaFormat::V0211SeedCoder
970 | ZetaFormat::V0331SeedCoderModelPy
971 | ZetaFormat::V0304SeedNoEdits
972 | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
973 ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
974 ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
975 ZetaFormat::V0420Diagnostics => Some(multi_region::V0318_END_MARKER),
976 ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
977 ZetaFormat::V0327SingleFile => Some(multi_region::V0327_END_MARKER),
978
979 ZetaFormat::V0112MiddleAtEnd
980 | ZetaFormat::V0113Ordered
981 | ZetaFormat::V0114180EditableRegion
982 | ZetaFormat::v0226Hashline
983 | ZetaFormat::V0304VariableEdit => None,
984 }
985}
986
987pub fn encode_patch_as_output_for_format(
988 format: ZetaFormat,
989 old_editable_region: &str,
990 patch: &str,
991 cursor_offset: Option<usize>,
992) -> Result<Option<String>> {
993 match format {
994 ZetaFormat::v0226Hashline => {
995 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
996 }
997 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
998 old_editable_region,
999 patch,
1000 cursor_offset,
1001 )
1002 .map(Some),
1003 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
1004 Ok(seed_coder::no_edits(patch))
1005 }
1006 ZetaFormat::V0316SeedMultiRegions => {
1007 let empty_patch = patch.lines().count() <= 3;
1008 if empty_patch {
1009 let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
1010 let marker_num =
1011 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
1012 let tag = multi_region::marker_tag(marker_num);
1013 Ok(Some(format!(
1014 "{tag}{tag}{}",
1015 multi_region::V0316_END_MARKER
1016 )))
1017 } else {
1018 Ok(None)
1019 }
1020 }
1021 ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
1022 let empty_patch = patch.lines().count() <= 3;
1023 if empty_patch {
1024 let marker_offsets =
1025 multi_region::compute_marker_offsets_v0318(old_editable_region);
1026 let marker_num =
1027 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
1028 let tag = multi_region::marker_tag(marker_num);
1029 Ok(Some(format!(
1030 "{tag}{tag}{}",
1031 multi_region::V0318_END_MARKER
1032 )))
1033 } else {
1034 Ok(None)
1035 }
1036 }
1037 ZetaFormat::V0317SeedMultiRegions => {
1038 let empty_patch = patch.lines().count() <= 3;
1039 if empty_patch {
1040 let tag = multi_region::marker_tag_relative(0);
1041 Ok(Some(format!(
1042 "{tag}{tag}{}",
1043 multi_region::V0317_END_MARKER
1044 )))
1045 } else {
1046 Ok(None)
1047 }
1048 }
1049 ZetaFormat::V0327SingleFile => {
1050 let empty_patch = patch.lines().count() <= 3;
1051 if empty_patch {
1052 let marker_offsets =
1053 multi_region::compute_marker_offsets_v0318(old_editable_region);
1054 let marker_num =
1055 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
1056 let tag = multi_region::marker_tag(marker_num);
1057 Ok(Some(format!(
1058 "{tag}{tag}{}",
1059 multi_region::V0327_END_MARKER
1060 )))
1061 } else {
1062 Ok(None)
1063 }
1064 }
1065 _ => Ok(None),
1066 }
1067}
1068
1069/// Given a `ZetaPromptInput`, a format, and a patch (with cursor already
1070/// extracted), produce the expected model output string for training.
1071pub fn format_expected_output(
1072 input: &ZetaPromptInput,
1073 format: ZetaFormat,
1074 patch: &str,
1075 cursor_offset: Option<usize>,
1076) -> Result<String> {
1077 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1078 let mut old_editable = context[editable_range].to_string();
1079 if !old_editable.is_empty() && !old_editable.ends_with('\n') {
1080 old_editable.push('\n');
1081 }
1082
1083 // Formats with their own output encoding (hashline, variable-edit,
1084 // multi-region empty patches) are handled here.
1085 if let Some(output) =
1086 encode_patch_as_output_for_format(format, &old_editable, patch, cursor_offset)?
1087 {
1088 return Ok(output);
1089 }
1090
1091 let empty_patch = patch.lines().count() <= 3;
1092
1093 match format {
1094 // Multi-region formats: non-empty patches need diff application
1095 // then marker-span encoding.
1096 ZetaFormat::V0316SeedMultiRegions => {
1097 let (new_editable, first_hunk_offset) =
1098 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1099 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1100 multi_region::encode_from_old_and_new_v0316(
1101 &old_editable,
1102 &new_editable,
1103 cursor_in_new,
1104 CURSOR_MARKER,
1105 multi_region::V0316_END_MARKER,
1106 )
1107 }
1108 ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
1109 let (new_editable, first_hunk_offset) =
1110 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1111 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1112 multi_region::encode_from_old_and_new_v0318(
1113 &old_editable,
1114 &new_editable,
1115 cursor_in_new,
1116 CURSOR_MARKER,
1117 multi_region::V0318_END_MARKER,
1118 )
1119 }
1120 ZetaFormat::V0327SingleFile => {
1121 let (new_editable, first_hunk_offset) =
1122 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1123 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1124 multi_region::encode_from_old_and_new_v0318(
1125 &old_editable,
1126 &new_editable,
1127 cursor_in_new,
1128 CURSOR_MARKER,
1129 multi_region::V0327_END_MARKER,
1130 )
1131 }
1132 ZetaFormat::V0317SeedMultiRegions => {
1133 let (new_editable, first_hunk_offset) =
1134 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1135 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1136 multi_region::encode_from_old_and_new_v0317(
1137 &old_editable,
1138 &new_editable,
1139 cursor_in_new,
1140 CURSOR_MARKER,
1141 multi_region::V0317_END_MARKER,
1142 )
1143 }
1144 // V0131-style formats and fallback: produce new editable text with
1145 // cursor marker inserted, followed by the end marker.
1146 ZetaFormat::V0112MiddleAtEnd
1147 | ZetaFormat::V0113Ordered
1148 | ZetaFormat::V0114180EditableRegion
1149 | ZetaFormat::V0120GitMergeMarkers
1150 | ZetaFormat::V0131GitMergeMarkersPrefix
1151 | ZetaFormat::V0211Prefill
1152 | ZetaFormat::V0211SeedCoder
1153 | ZetaFormat::v0226Hashline
1154 | ZetaFormat::V0304VariableEdit
1155 | ZetaFormat::V0304SeedNoEdits
1156 | ZetaFormat::V0331SeedCoderModelPy
1157 | ZetaFormat::V0306SeedMultiRegions => {
1158 let (mut result, first_hunk_offset) = if empty_patch {
1159 (old_editable.clone(), None)
1160 } else {
1161 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?
1162 };
1163
1164 if let Some(cursor) = cursor_offset {
1165 let hunk_start = if !empty_patch {
1166 first_hunk_offset.unwrap_or(0)
1167 } else {
1168 0
1169 };
1170 let offset = (hunk_start + cursor).min(result.len());
1171 result.insert_str(offset, CURSOR_MARKER);
1172 }
1173
1174 if !result.is_empty() && !result.ends_with('\n') {
1175 result.push('\n');
1176 }
1177
1178 if let Some(end_marker) = output_end_marker_for_format(format) {
1179 result.push_str(end_marker);
1180 }
1181
1182 Ok(result)
1183 }
1184 }
1185}
1186
1187/// Compute the cursor position within the new text after diff application.
1188fn cursor_in_new_text(
1189 cursor_offset: Option<usize>,
1190 first_hunk_offset: Option<usize>,
1191 new_text: &str,
1192) -> Option<usize> {
1193 cursor_offset.map(|cursor| {
1194 let hunk_start = first_hunk_offset.unwrap_or(0);
1195 (hunk_start + cursor).min(new_text.len())
1196 })
1197}
1198
1199#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1200pub struct ParsedOutput {
1201 /// Text that should replace the editable region
1202 pub new_editable_region: String,
1203 /// The byte range within `cursor_excerpt` that this replacement applies to
1204 pub range_in_excerpt: Range<usize>,
1205 /// Byte offset of the cursor marker within `new_editable_region`, if present
1206 pub cursor_offset_in_new_editable_region: Option<usize>,
1207}
1208
1209#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1210pub struct CursorPosition {
1211 pub path: String,
1212 pub row: usize,
1213 pub column: usize,
1214 pub offset: usize,
1215 pub editable_region_offset: usize,
1216}
1217
1218pub fn parsed_output_from_editable_region(
1219 range_in_excerpt: Range<usize>,
1220 mut new_editable_region: String,
1221) -> ParsedOutput {
1222 let cursor_offset_in_new_editable_region = new_editable_region.find(CURSOR_MARKER);
1223 if let Some(offset) = cursor_offset_in_new_editable_region {
1224 new_editable_region.replace_range(offset..offset + CURSOR_MARKER.len(), "");
1225 }
1226
1227 ParsedOutput {
1228 new_editable_region,
1229 range_in_excerpt,
1230 cursor_offset_in_new_editable_region,
1231 }
1232}
1233
1234/// Parse model output for the given zeta format
1235pub fn parse_zeta2_model_output(
1236 output: &str,
1237 format: ZetaFormat,
1238 prompt_inputs: &ZetaPromptInput,
1239) -> Result<ParsedOutput> {
1240 let output = match output_end_marker_for_format(format) {
1241 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
1242 None => output,
1243 };
1244
1245 let (context, editable_range_in_context, context_range, cursor_offset) =
1246 resolve_cursor_region(prompt_inputs, format);
1247 let context_start = context_range.start;
1248 let old_editable_region = &context[editable_range_in_context.clone()];
1249 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
1250
1251 let (range_in_context, output) = match format {
1252 ZetaFormat::v0226Hashline => (
1253 editable_range_in_context,
1254 if hashline::output_has_edit_commands(output) {
1255 hashline::apply_edit_commands(old_editable_region, output)
1256 } else {
1257 output.to_string()
1258 },
1259 ),
1260 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
1261 ZetaFormat::V0304SeedNoEdits => (
1262 editable_range_in_context,
1263 if output.starts_with(seed_coder::NO_EDITS) {
1264 old_editable_region.to_string()
1265 } else {
1266 output.to_string()
1267 },
1268 ),
1269 ZetaFormat::V0306SeedMultiRegions => (
1270 editable_range_in_context,
1271 if output.starts_with(seed_coder::NO_EDITS) {
1272 old_editable_region.to_string()
1273 } else {
1274 multi_region::apply_marker_span(old_editable_region, output)?
1275 },
1276 ),
1277 ZetaFormat::V0316SeedMultiRegions => (
1278 editable_range_in_context,
1279 multi_region::apply_marker_span_v0316(old_editable_region, output)?,
1280 ),
1281 ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => (
1282 editable_range_in_context,
1283 multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1284 ),
1285 ZetaFormat::V0317SeedMultiRegions => (
1286 editable_range_in_context,
1287 multi_region::apply_marker_span_v0317(
1288 old_editable_region,
1289 output,
1290 Some(cursor_offset_in_editable),
1291 )?,
1292 ),
1293 ZetaFormat::V0327SingleFile => (
1294 editable_range_in_context,
1295 multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1296 ),
1297 _ => (editable_range_in_context, output.to_string()),
1298 };
1299
1300 let range_in_excerpt =
1301 range_in_context.start + context_start..range_in_context.end + context_start;
1302
1303 Ok(parsed_output_from_editable_region(range_in_excerpt, output))
1304}
1305
1306pub fn parse_zeta2_model_output_as_patch(
1307 output: &str,
1308 format: ZetaFormat,
1309 prompt_inputs: &ZetaPromptInput,
1310) -> Result<String> {
1311 let parsed = parse_zeta2_model_output(output, format, prompt_inputs)?;
1312 parsed_output_to_patch(prompt_inputs, parsed)
1313}
1314
1315pub fn cursor_position_from_parsed_output(
1316 prompt_inputs: &ZetaPromptInput,
1317 parsed: &ParsedOutput,
1318) -> Option<CursorPosition> {
1319 let cursor_offset = parsed.cursor_offset_in_new_editable_region?;
1320 let editable_region_offset = parsed.range_in_excerpt.start;
1321 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1322
1323 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
1324
1325 let new_editable_region = &parsed.new_editable_region;
1326 let prefix_end = cursor_offset.min(new_editable_region.len());
1327 let new_region_prefix = &new_editable_region[..prefix_end];
1328
1329 let row = editable_region_start_line + new_region_prefix.matches('\n').count();
1330
1331 let column = match new_region_prefix.rfind('\n') {
1332 Some(last_newline) => cursor_offset - last_newline - 1,
1333 None => {
1334 let content_prefix = &excerpt[..editable_region_offset];
1335 let content_column = match content_prefix.rfind('\n') {
1336 Some(last_newline) => editable_region_offset - last_newline - 1,
1337 None => editable_region_offset,
1338 };
1339 content_column + cursor_offset
1340 }
1341 };
1342
1343 Some(CursorPosition {
1344 path: prompt_inputs.cursor_path.to_string_lossy().into_owned(),
1345 row,
1346 column,
1347 offset: editable_region_offset + cursor_offset,
1348 editable_region_offset: cursor_offset,
1349 })
1350}
1351
1352pub fn parsed_output_to_patch(
1353 prompt_inputs: &ZetaPromptInput,
1354 parsed: ParsedOutput,
1355) -> Result<String> {
1356 let range_in_excerpt = parsed.range_in_excerpt;
1357 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1358 let old_text = excerpt[range_in_excerpt.clone()].to_string();
1359 let mut new_text = parsed.new_editable_region;
1360
1361 let mut old_text_normalized = old_text;
1362 if !new_text.is_empty() && !new_text.ends_with('\n') {
1363 new_text.push('\n');
1364 }
1365 if !old_text_normalized.is_empty() && !old_text_normalized.ends_with('\n') {
1366 old_text_normalized.push('\n');
1367 }
1368
1369 let editable_region_offset = range_in_excerpt.start;
1370 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count() as u32;
1371 let editable_region_lines = old_text_normalized.lines().count() as u32;
1372
1373 let diff = udiff::unified_diff_with_context(
1374 &old_text_normalized,
1375 &new_text,
1376 editable_region_start_line,
1377 editable_region_start_line,
1378 editable_region_lines,
1379 );
1380
1381 let path = prompt_inputs
1382 .cursor_path
1383 .to_string_lossy()
1384 .trim_start_matches('/')
1385 .to_string();
1386 let formatted_diff = format!("--- a/{path}\n+++ b/{path}\n{diff}");
1387
1388 Ok(udiff::encode_cursor_in_patch(
1389 &formatted_diff,
1390 parsed.cursor_offset_in_new_editable_region,
1391 ))
1392}
1393
1394pub fn excerpt_range_for_format(
1395 format: ZetaFormat,
1396 ranges: &ExcerptRanges,
1397) -> (Range<usize>, Range<usize>) {
1398 excerpt_ranges_for_format(format, ranges)
1399}
1400
1401pub fn resolve_cursor_region(
1402 input: &ZetaPromptInput,
1403 format: ZetaFormat,
1404) -> (&str, Range<usize>, Range<usize>, usize) {
1405 let (editable_range, context_range) = if format == ZetaFormat::V0327SingleFile {
1406 let (editable_tokens, _) = token_limits_for_format(format);
1407 let context_range = 0..input.cursor_excerpt.len();
1408 let editable_range = multi_region::compute_v0327_editable_range(
1409 &input.cursor_excerpt,
1410 input.cursor_offset_in_excerpt,
1411 editable_tokens,
1412 );
1413 (editable_range, context_range)
1414 } else if let Some(syntax_ranges) = &input.syntax_ranges {
1415 let (editable_tokens, context_tokens) = token_limits_for_format(format);
1416 compute_editable_and_context_ranges(
1417 &input.cursor_excerpt,
1418 input.cursor_offset_in_excerpt,
1419 syntax_ranges,
1420 editable_tokens,
1421 context_tokens,
1422 )
1423 } else {
1424 excerpt_range_for_format(format, &input.excerpt_ranges)
1425 };
1426
1427 let context_start = context_range.start;
1428 let context_text = &input.cursor_excerpt[context_range.clone()];
1429 let adjusted_editable =
1430 (editable_range.start - context_start)..(editable_range.end - context_start);
1431 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
1432
1433 (
1434 context_text,
1435 adjusted_editable,
1436 context_range,
1437 adjusted_cursor,
1438 )
1439}
1440
1441pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
1442 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1443 get_prefill_for_format(format, context, &editable_range)
1444}
1445
1446fn format_edit_history_within_budget(
1447 events: &[Arc<Event>],
1448 file_marker: &str,
1449 edit_history_name: &str,
1450 max_tokens: usize,
1451 max_edit_event_count: usize,
1452) -> String {
1453 let header = format!("{}{}\n", file_marker, edit_history_name);
1454 let header_tokens = estimate_tokens(header.len());
1455 if header_tokens >= max_tokens {
1456 return String::new();
1457 }
1458
1459 let mut event_strings: Vec<String> = Vec::new();
1460 let mut total_tokens = header_tokens;
1461
1462 for event in events.iter().rev().take(max_edit_event_count) {
1463 let mut event_str = String::new();
1464 write_event(&mut event_str, event);
1465 let event_tokens = estimate_tokens(event_str.len());
1466
1467 if total_tokens + event_tokens > max_tokens {
1468 break;
1469 }
1470 total_tokens += event_tokens;
1471 event_strings.push(event_str);
1472 }
1473
1474 if event_strings.is_empty() {
1475 return String::new();
1476 }
1477
1478 let mut result = header;
1479 for event_str in event_strings.iter().rev() {
1480 result.push_str(event_str);
1481 }
1482 result
1483}
1484
1485fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
1486 let needs_newline = !excerpt.text.ends_with('\n');
1487 let needs_ellipsis = excerpt.row_range.end < file_max_row;
1488 let len = excerpt.text.len()
1489 + if needs_newline { "\n".len() } else { 0 }
1490 + if needs_ellipsis { "...\n".len() } else { 0 };
1491 estimate_tokens(len)
1492}
1493
1494pub fn format_related_files_within_budget(
1495 related_files: &[RelatedFile],
1496 file_prefix: &str,
1497 file_suffix: &str,
1498 max_tokens: usize,
1499) -> String {
1500 struct ExcerptCandidate {
1501 file_ix: usize,
1502 excerpt_ix: usize,
1503 order: usize,
1504 }
1505
1506 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
1507 .iter()
1508 .enumerate()
1509 .flat_map(|(file_ix, file)| {
1510 file.excerpts
1511 .iter()
1512 .enumerate()
1513 .map(move |(excerpt_ix, e)| ExcerptCandidate {
1514 file_ix,
1515 excerpt_ix,
1516 order: e.order,
1517 })
1518 })
1519 .collect();
1520
1521 // Pre-compute file header strings and their token costs.
1522 let file_headers: Vec<String> = related_files
1523 .iter()
1524 .map(|file| {
1525 let path_str = file.path.to_string_lossy();
1526 format!("{}{}\n", file_prefix, path_str)
1527 })
1528 .collect();
1529
1530 // Sort the excerpts by their order and determine how many fit within the budget.
1531 let mut total_tokens = 0;
1532 let mut included_excerpt_count = 0_usize;
1533 let mut included_file_indices = vec![false; related_files.len()];
1534 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1535 for candidate in &excerpt_candidates {
1536 let file = &related_files[candidate.file_ix];
1537 let excerpt = &file.excerpts[candidate.excerpt_ix];
1538 let file_already_included = included_file_indices[candidate.file_ix];
1539 let header_cost = if file_already_included {
1540 0
1541 } else {
1542 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1543 };
1544 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1545 if total_tokens + header_cost + excerpt_cost > max_tokens {
1546 break;
1547 }
1548 total_tokens += header_cost + excerpt_cost;
1549 if !file_already_included {
1550 included_file_indices[candidate.file_ix] = true;
1551 }
1552 included_excerpt_count += 1;
1553 }
1554
1555 excerpt_candidates.truncate(included_excerpt_count);
1556 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1557
1558 // Render all of the files that fit within the token budget, in the original order.
1559 let mut result = String::new();
1560 let mut last_file_ix = None;
1561 for candidate in &excerpt_candidates {
1562 if last_file_ix != Some(candidate.file_ix) {
1563 if last_file_ix.is_some() {
1564 result.push_str(file_suffix);
1565 }
1566 result.push_str(&file_headers[candidate.file_ix]);
1567 last_file_ix = Some(candidate.file_ix);
1568 }
1569 let file = &related_files[candidate.file_ix];
1570 let excerpt = &file.excerpts[candidate.excerpt_ix];
1571 result.push_str(&excerpt.text);
1572 if !result.ends_with('\n') {
1573 result.push('\n');
1574 }
1575 if excerpt.row_range.end < file.max_row {
1576 result.push_str("...\n");
1577 }
1578 }
1579
1580 result
1581}
1582
1583pub fn write_related_files(
1584 prompt: &mut String,
1585 related_files: &[RelatedFile],
1586) -> Vec<Range<usize>> {
1587 let mut ranges = Vec::new();
1588 for file in related_files {
1589 let start = prompt.len();
1590 let path_str = file.path.to_string_lossy();
1591 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1592 for excerpt in &file.excerpts {
1593 prompt.push_str(&excerpt.text);
1594 if !prompt.ends_with('\n') {
1595 prompt.push('\n');
1596 }
1597 if excerpt.row_range.end < file.max_row {
1598 prompt.push_str("...\n");
1599 }
1600 }
1601 let end = prompt.len();
1602 ranges.push(start..end);
1603 }
1604 ranges
1605}
1606
1607mod v0112_middle_at_end {
1608 use super::*;
1609
1610 pub fn special_tokens() -> &'static [&'static str] {
1611 &[
1612 "<|fim_prefix|>",
1613 "<|fim_suffix|>",
1614 "<|fim_middle|>",
1615 "<|file_sep|>",
1616 CURSOR_MARKER,
1617 ]
1618 }
1619
1620 pub fn write_cursor_excerpt_section(
1621 prompt: &mut String,
1622 path: &Path,
1623 context: &str,
1624 editable_range: &Range<usize>,
1625 cursor_offset: usize,
1626 ) {
1627 let path_str = path.to_string_lossy();
1628 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1629
1630 prompt.push_str("<|fim_prefix|>\n");
1631 prompt.push_str(&context[..editable_range.start]);
1632
1633 prompt.push_str("<|fim_suffix|>\n");
1634 prompt.push_str(&context[editable_range.end..]);
1635 if !prompt.ends_with('\n') {
1636 prompt.push('\n');
1637 }
1638
1639 prompt.push_str("<|fim_middle|>current\n");
1640 prompt.push_str(&context[editable_range.start..cursor_offset]);
1641 prompt.push_str(CURSOR_MARKER);
1642 prompt.push_str(&context[cursor_offset..editable_range.end]);
1643 if !prompt.ends_with('\n') {
1644 prompt.push('\n');
1645 }
1646
1647 prompt.push_str("<|fim_middle|>updated\n");
1648 }
1649}
1650
1651mod v0113_ordered {
1652 use super::*;
1653
1654 pub fn special_tokens() -> &'static [&'static str] {
1655 &[
1656 "<|fim_prefix|>",
1657 "<|fim_suffix|>",
1658 "<|fim_middle|>",
1659 "<|file_sep|>",
1660 CURSOR_MARKER,
1661 ]
1662 }
1663
1664 pub fn write_cursor_excerpt_section(
1665 prompt: &mut String,
1666 path: &Path,
1667 context: &str,
1668 editable_range: &Range<usize>,
1669 cursor_offset: usize,
1670 ) {
1671 let path_str = path.to_string_lossy();
1672 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1673
1674 prompt.push_str("<|fim_prefix|>\n");
1675 prompt.push_str(&context[..editable_range.start]);
1676 if !prompt.ends_with('\n') {
1677 prompt.push('\n');
1678 }
1679
1680 prompt.push_str("<|fim_middle|>current\n");
1681 prompt.push_str(&context[editable_range.start..cursor_offset]);
1682 prompt.push_str(CURSOR_MARKER);
1683 prompt.push_str(&context[cursor_offset..editable_range.end]);
1684 if !prompt.ends_with('\n') {
1685 prompt.push('\n');
1686 }
1687
1688 prompt.push_str("<|fim_suffix|>\n");
1689 prompt.push_str(&context[editable_range.end..]);
1690 if !prompt.ends_with('\n') {
1691 prompt.push('\n');
1692 }
1693
1694 prompt.push_str("<|fim_middle|>updated\n");
1695 }
1696}
1697
1698mod v0114180_editable_region {
1699 use super::*;
1700
1701 pub fn special_tokens() -> &'static [&'static str] {
1702 v0113_ordered::special_tokens()
1703 }
1704}
1705
1706pub mod v0120_git_merge_markers {
1707 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1708 //!
1709 //! Example prompt:
1710 //!
1711 //! <|file_sep|>path/to/target_file.py
1712 //! <|fim_prefix|>
1713 //! code before editable region
1714 //! <|fim_suffix|>
1715 //! code after editable region
1716 //! <|fim_middle|>
1717 //! <<<<<<< CURRENT
1718 //! code that
1719 //! needs to<|user_cursor|>
1720 //! be rewritten
1721 //! =======
1722 //!
1723 //! Expected output (should be generated by the model):
1724 //!
1725 //! updated
1726 //! code with
1727 //! changes applied
1728 //! >>>>>>> UPDATED
1729
1730 use super::*;
1731
1732 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1733 pub const SEPARATOR: &str = "=======\n";
1734 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1735
1736 pub fn special_tokens() -> &'static [&'static str] {
1737 &[
1738 "<|fim_prefix|>",
1739 "<|fim_suffix|>",
1740 "<|fim_middle|>",
1741 "<|file_sep|>",
1742 START_MARKER,
1743 SEPARATOR,
1744 END_MARKER,
1745 CURSOR_MARKER,
1746 ]
1747 }
1748
1749 pub fn write_cursor_excerpt_section(
1750 prompt: &mut String,
1751 path: &Path,
1752 context: &str,
1753 editable_range: &Range<usize>,
1754 cursor_offset: usize,
1755 ) {
1756 let path_str = path.to_string_lossy();
1757 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1758
1759 prompt.push_str("<|fim_prefix|>");
1760 prompt.push_str(&context[..editable_range.start]);
1761
1762 prompt.push_str("<|fim_suffix|>");
1763 prompt.push_str(&context[editable_range.end..]);
1764 if !prompt.ends_with('\n') {
1765 prompt.push('\n');
1766 }
1767
1768 prompt.push_str("<|fim_middle|>");
1769 prompt.push_str(START_MARKER);
1770 prompt.push_str(&context[editable_range.start..cursor_offset]);
1771 prompt.push_str(CURSOR_MARKER);
1772 prompt.push_str(&context[cursor_offset..editable_range.end]);
1773 if !prompt.ends_with('\n') {
1774 prompt.push('\n');
1775 }
1776 prompt.push_str(SEPARATOR);
1777 }
1778}
1779
1780pub mod v0131_git_merge_markers_prefix {
1781 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1782 //!
1783 //! Example prompt:
1784 //!
1785 //! <|file_sep|>path/to/target_file.py
1786 //! <|fim_prefix|>
1787 //! code before editable region
1788 //! <<<<<<< CURRENT
1789 //! code that
1790 //! needs to<|user_cursor|>
1791 //! be rewritten
1792 //! =======
1793 //! <|fim_suffix|>
1794 //! code after editable region
1795 //! <|fim_middle|>
1796 //!
1797 //! Expected output (should be generated by the model):
1798 //!
1799 //! updated
1800 //! code with
1801 //! changes applied
1802 //! >>>>>>> UPDATED
1803
1804 use super::*;
1805
1806 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1807 pub const SEPARATOR: &str = "=======\n";
1808 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1809
1810 pub fn special_tokens() -> &'static [&'static str] {
1811 &[
1812 "<|fim_prefix|>",
1813 "<|fim_suffix|>",
1814 "<|fim_middle|>",
1815 "<|file_sep|>",
1816 START_MARKER,
1817 SEPARATOR,
1818 END_MARKER,
1819 CURSOR_MARKER,
1820 ]
1821 }
1822
1823 pub fn write_cursor_excerpt_section(
1824 prompt: &mut String,
1825 path: &Path,
1826 context: &str,
1827 editable_range: &Range<usize>,
1828 cursor_offset: usize,
1829 ) {
1830 let path_str = path.to_string_lossy();
1831 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1832
1833 prompt.push_str("<|fim_prefix|>");
1834 prompt.push_str(&context[..editable_range.start]);
1835 prompt.push_str(START_MARKER);
1836 prompt.push_str(&context[editable_range.start..cursor_offset]);
1837 prompt.push_str(CURSOR_MARKER);
1838 prompt.push_str(&context[cursor_offset..editable_range.end]);
1839 if !prompt.ends_with('\n') {
1840 prompt.push('\n');
1841 }
1842 prompt.push_str(SEPARATOR);
1843
1844 prompt.push_str("<|fim_suffix|>");
1845 prompt.push_str(&context[editable_range.end..]);
1846 if !prompt.ends_with('\n') {
1847 prompt.push('\n');
1848 }
1849
1850 prompt.push_str("<|fim_middle|>");
1851 }
1852}
1853
1854pub mod v0211_prefill {
1855 use super::*;
1856
1857 pub fn special_tokens() -> &'static [&'static str] {
1858 v0131_git_merge_markers_prefix::special_tokens()
1859 }
1860
1861 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1862 let editable_region = &context[editable_range.start..editable_range.end];
1863
1864 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1865 let prefill_len = editable_region.floor_char_boundary(prefill_len);
1866
1867 // Find a token boundary to avoid splitting tokens in the prefill.
1868 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1869 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1870 // the \n and consume any consecutive \n characters after it.
1871 let prefill = &editable_region[..prefill_len];
1872 match prefill.rfind('\n') {
1873 Some(pos) => {
1874 let mut end = pos + 1;
1875 while end < editable_region.len()
1876 && editable_region.as_bytes().get(end) == Some(&b'\n')
1877 {
1878 end += 1;
1879 }
1880 editable_region[..end].to_string()
1881 }
1882 // No newline found. Fall back to splitting before the last space
1883 // (word-level boundary)
1884 None => match prefill.rfind(' ') {
1885 Some(pos) => prefill[..pos].to_string(),
1886 None => prefill.to_string(),
1887 },
1888 }
1889 }
1890}
1891
1892pub mod hashline {
1893
1894 use std::fmt::Display;
1895
1896 pub const END_MARKER: &str = "<|fim_middle|>updated";
1897 pub const START_MARKER: &str = "<|fim_middle|>current";
1898
1899 use super::*;
1900
1901 const SET_COMMAND_MARKER: &str = "<|set|>";
1902 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1903 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1904
1905 pub fn special_tokens() -> &'static [&'static str] {
1906 return &[
1907 SET_COMMAND_MARKER,
1908 "<|set_range|>",
1909 INSERT_COMMAND_MARKER,
1910 NO_EDITS_COMMAND_MARKER,
1911 CURSOR_MARKER,
1912 "<|file_sep|>",
1913 "<|fim_prefix|>",
1914 "<|fim_suffix|>",
1915 "<|fim_middle|>",
1916 ];
1917 }
1918
1919 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1920 #[derive(Debug, Clone, PartialEq, Eq)]
1921 struct LineRef {
1922 index: usize,
1923 hash: u8,
1924 }
1925
1926 impl Display for LineRef {
1927 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1928 write!(f, "{}:{:02x}", self.index, self.hash)
1929 }
1930 }
1931
1932 pub fn hash_line(line: &[u8]) -> u8 {
1933 let mut h: u8 = 0;
1934 for &byte in line {
1935 h = h.wrapping_add(byte);
1936 }
1937 return h;
1938 }
1939
1940 /// Write the hashline-encoded editable region into `out`. Each line of
1941 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1942 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1943 /// to the start of `editable_text`).
1944 pub fn write_hashline_editable_region(
1945 out: &mut String,
1946 editable_text: &str,
1947 cursor_offset_in_editable: usize,
1948 ) {
1949 let mut offset = 0;
1950 for (i, line) in editable_text.lines().enumerate() {
1951 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1952 && cursor_offset_in_editable < offset + line.len()
1953 {
1954 (
1955 &line[..cursor_offset_in_editable - offset],
1956 CURSOR_MARKER,
1957 &line[cursor_offset_in_editable - offset..],
1958 )
1959 } else {
1960 (line, "", "")
1961 };
1962 write!(
1963 out,
1964 "\n{}|{head}{cursor}{tail}",
1965 LineRef {
1966 index: i,
1967 hash: hash_line(line.as_bytes())
1968 }
1969 )
1970 .unwrap();
1971 offset += line.len() + 1;
1972 }
1973 }
1974
1975 pub fn write_cursor_excerpt_section(
1976 prompt: &mut String,
1977 path: &Path,
1978 context: &str,
1979 editable_range: &Range<usize>,
1980 cursor_offset: usize,
1981 ) {
1982 let path_str = path.to_string_lossy();
1983 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1984
1985 prompt.push_str("<|fim_prefix|>\n");
1986 prompt.push_str(&context[..editable_range.start]);
1987 prompt.push_str(START_MARKER);
1988
1989 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1990 let editable_region = &context[editable_range.clone()];
1991 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1992
1993 if !prompt.ends_with('\n') {
1994 prompt.push('\n');
1995 }
1996
1997 prompt.push_str("<|fim_suffix|>\n");
1998 prompt.push_str(&context[editable_range.end..]);
1999 if !prompt.ends_with('\n') {
2000 prompt.push('\n');
2001 }
2002
2003 prompt.push_str(END_MARKER);
2004 prompt.push('\n');
2005 }
2006
2007 /// A single edit command parsed from the model output.
2008 #[derive(Debug)]
2009 enum EditCommand<'a> {
2010 /// Replace a range of lines (inclusive on both ends). Single-line set is
2011 /// represented by `start == end`.
2012 Set {
2013 start: LineRef,
2014 end: LineRef,
2015 content: &'a str,
2016 },
2017 /// Insert new lines after the given line, or before the first line if
2018 /// `after` is `None`.
2019 Insert {
2020 after: Option<LineRef>,
2021 content: &'a str,
2022 },
2023 }
2024
2025 /// Parse a line reference like `3:c3` into a `LineRef`.
2026 fn parse_line_ref(s: &str) -> Option<LineRef> {
2027 let (idx_str, hash_str) = s.split_once(':')?;
2028 let index = idx_str.parse::<usize>().ok()?;
2029 let hash = u8::from_str_radix(hash_str, 16).ok()?;
2030 Some(LineRef { index, hash })
2031 }
2032
2033 /// Parse the model output into a list of `EditCommand`s.
2034 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
2035 let mut commands = Vec::new();
2036 let mut offset = 0usize;
2037
2038 while offset < model_output.len() {
2039 let next_nl = model_output[offset..]
2040 .find('\n')
2041 .map(|i| offset + i)
2042 .unwrap_or(model_output.len());
2043 let line = &model_output[offset..next_nl];
2044 let line_end = if next_nl < model_output.len() {
2045 next_nl + 1
2046 } else {
2047 next_nl
2048 };
2049
2050 let trimmed = line.trim();
2051 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
2052 (true, spec)
2053 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
2054 (false, spec)
2055 } else {
2056 offset = line_end;
2057 continue;
2058 };
2059
2060 let mut content_end = line_end;
2061 let mut scan = line_end;
2062
2063 while scan < model_output.len() {
2064 let body_nl = model_output[scan..]
2065 .find('\n')
2066 .map(|i| scan + i)
2067 .unwrap_or(model_output.len());
2068 let body_line = &model_output[scan..body_nl];
2069 if body_line.trim().starts_with(SET_COMMAND_MARKER)
2070 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
2071 {
2072 break;
2073 }
2074 scan = if body_nl < model_output.len() {
2075 body_nl + 1
2076 } else {
2077 body_nl
2078 };
2079 content_end = scan;
2080 }
2081
2082 let content = &model_output[line_end..content_end];
2083
2084 if is_set {
2085 if let Some((start_str, end_str)) = specifier.split_once('-') {
2086 if let (Some(start), Some(end)) =
2087 (parse_line_ref(start_str), parse_line_ref(end_str))
2088 {
2089 commands.push(EditCommand::Set {
2090 start,
2091 end,
2092 content,
2093 });
2094 }
2095 } else if let Some(target) = parse_line_ref(specifier) {
2096 commands.push(EditCommand::Set {
2097 start: target.clone(),
2098 end: target,
2099 content,
2100 });
2101 }
2102 } else {
2103 let after = parse_line_ref(specifier);
2104 commands.push(EditCommand::Insert { after, content });
2105 }
2106
2107 offset = scan;
2108 }
2109
2110 commands
2111 }
2112
2113 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
2114 /// (as opposed to being a plain full-replacement output).
2115 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
2116 /// editable region, returning the plain text content.
2117 pub fn strip_hashline_prefixes(region: &str) -> String {
2118 let mut decoded: String = region
2119 .lines()
2120 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
2121 .collect::<Vec<_>>()
2122 .join("\n");
2123 if region.ends_with('\n') {
2124 decoded.push('\n');
2125 }
2126 decoded
2127 }
2128
2129 pub fn output_has_edit_commands(model_output: &str) -> bool {
2130 model_output.contains(SET_COMMAND_MARKER)
2131 || model_output.contains(INSERT_COMMAND_MARKER)
2132 || model_output.contains(NO_EDITS_COMMAND_MARKER)
2133 }
2134
2135 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
2136 /// original editable region text.
2137 ///
2138 /// `editable_region` is the original text of the editable region (without hash
2139 /// prefixes). `model_output` is the raw model response containing edit commands.
2140 ///
2141 /// Returns the full replacement text for the editable region.
2142 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
2143 if model_output
2144 .trim_start()
2145 .starts_with(NO_EDITS_COMMAND_MARKER)
2146 {
2147 return editable_region.to_string();
2148 }
2149
2150 let original_lines: Vec<&str> = editable_region.lines().collect();
2151 let old_hashes: Vec<u8> = original_lines
2152 .iter()
2153 .map(|line| hash_line(line.as_bytes()))
2154 .collect();
2155
2156 let commands = parse_edit_commands(model_output);
2157
2158 // For set operations: indexed by start line → Some((end line index, content))
2159 // For insert operations: indexed by line index → vec of content to insert after
2160 // Insert-before-first is tracked separately.
2161 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
2162 let mut insert_before_first: Vec<&str> = Vec::new();
2163 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
2164
2165 for command in &commands {
2166 match command {
2167 EditCommand::Set {
2168 start,
2169 end,
2170 content,
2171 } => {
2172 if start.index < old_hashes.len()
2173 && end.index < old_hashes.len()
2174 && start.index <= end.index
2175 && old_hashes[start.index] == start.hash
2176 && old_hashes[end.index] == end.hash
2177 {
2178 set_ops[start.index] = Some((end.index, *content));
2179 }
2180 }
2181 EditCommand::Insert { after, content } => match after {
2182 None => insert_before_first.push(*content),
2183 Some(line_ref) => {
2184 if line_ref.index < old_hashes.len()
2185 && old_hashes[line_ref.index] == line_ref.hash
2186 {
2187 insert_after[line_ref.index].push(*content);
2188 }
2189 }
2190 },
2191 }
2192 }
2193
2194 let mut result = String::new();
2195
2196 // Emit any insertions before the first line
2197 for content in &insert_before_first {
2198 result.push_str(content);
2199 if !content.ends_with('\n') {
2200 result.push('\n');
2201 }
2202 }
2203
2204 let mut i = 0;
2205 while i < original_lines.len() {
2206 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
2207 // Replace lines i..=end_index with the replacement content
2208 result.push_str(replacement);
2209 if !replacement.is_empty() && !replacement.ends_with('\n') {
2210 result.push('\n');
2211 }
2212 // Emit any insertions after the end of this set range
2213 if *end_index < insert_after.len() {
2214 for content in &insert_after[*end_index] {
2215 result.push_str(content);
2216 if !content.ends_with('\n') {
2217 result.push('\n');
2218 }
2219 }
2220 }
2221 i = end_index + 1;
2222 } else {
2223 // Keep the original line
2224 result.push_str(original_lines[i]);
2225 result.push('\n');
2226 // Emit any insertions after this line
2227 for content in &insert_after[i] {
2228 result.push_str(content);
2229 if !content.ends_with('\n') {
2230 result.push('\n');
2231 }
2232 }
2233 i += 1;
2234 }
2235 }
2236
2237 // Preserve trailing newline behavior: if the original ended with a
2238 // newline the result already has one; if it didn't, trim the extra one
2239 // we added.
2240 if !editable_region.ends_with('\n') && result.ends_with('\n') {
2241 result.pop();
2242 }
2243
2244 result
2245 }
2246
2247 /// Convert a unified diff patch into hashline edit commands.
2248 ///
2249 /// Parses the unified diff `patch` directly to determine which lines of
2250 /// `old_text` are deleted/replaced and what new lines are added, then emits
2251 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
2252 /// `{index}:{hash}` identifiers.
2253 ///
2254 /// `cursor_offset` is an optional byte offset into the first hunk's new
2255 /// text (context + additions) where the cursor marker should be placed.
2256 pub fn patch_to_edit_commands(
2257 old_text: &str,
2258 patch: &str,
2259 cursor_offset: Option<usize>,
2260 ) -> Result<String> {
2261 let old_lines: Vec<&str> = old_text.lines().collect();
2262 let old_hashes: Vec<u8> = old_lines
2263 .iter()
2264 .map(|line| hash_line(line.as_bytes()))
2265 .collect();
2266
2267 let mut result = String::new();
2268 let mut first_hunk = true;
2269
2270 struct Hunk<'a> {
2271 line_range: Range<usize>,
2272 new_text_lines: Vec<&'a str>,
2273 cursor_line_offset_in_new_text: Option<(usize, usize)>,
2274 }
2275
2276 // Parse the patch line by line. We only care about hunk headers,
2277 // context, deletions, and additions.
2278 let mut old_line_index: usize = 0;
2279 let mut current_hunk: Option<Hunk> = None;
2280 // Byte offset tracking within the hunk's new text for cursor placement.
2281 let mut new_text_byte_offset: usize = 0;
2282 // The line index of the last old line seen before/in the current hunk
2283 // (used for insert-after reference).
2284 let mut last_old_line_before_hunk: Option<usize> = None;
2285
2286 fn flush_hunk(
2287 hunk: Hunk,
2288 last_old_line: Option<usize>,
2289 result: &mut String,
2290 old_hashes: &[u8],
2291 ) {
2292 if hunk.line_range.is_empty() {
2293 // Pure insertion — reference the old line to insert after when in bounds.
2294 if let Some(after) = last_old_line
2295 && let Some(&hash) = old_hashes.get(after)
2296 {
2297 write!(
2298 result,
2299 "{INSERT_COMMAND_MARKER}{}\n",
2300 LineRef { index: after, hash }
2301 )
2302 .unwrap();
2303 } else {
2304 result.push_str(INSERT_COMMAND_MARKER);
2305 result.push('\n');
2306 }
2307 } else {
2308 let start = hunk.line_range.start;
2309 let end_exclusive = hunk.line_range.end;
2310 let deleted_line_count = end_exclusive.saturating_sub(start);
2311
2312 if deleted_line_count == 1 {
2313 if let Some(&hash) = old_hashes.get(start) {
2314 write!(
2315 result,
2316 "{SET_COMMAND_MARKER}{}\n",
2317 LineRef { index: start, hash }
2318 )
2319 .unwrap();
2320 } else {
2321 result.push_str(SET_COMMAND_MARKER);
2322 result.push('\n');
2323 }
2324 } else {
2325 let end_inclusive = end_exclusive - 1;
2326 match (
2327 old_hashes.get(start).copied(),
2328 old_hashes.get(end_inclusive).copied(),
2329 ) {
2330 (Some(start_hash), Some(end_hash)) => {
2331 write!(
2332 result,
2333 "{SET_COMMAND_MARKER}{}-{}\n",
2334 LineRef {
2335 index: start,
2336 hash: start_hash
2337 },
2338 LineRef {
2339 index: end_inclusive,
2340 hash: end_hash
2341 }
2342 )
2343 .unwrap();
2344 }
2345 _ => {
2346 result.push_str(SET_COMMAND_MARKER);
2347 result.push('\n');
2348 }
2349 }
2350 }
2351 }
2352 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
2353 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
2354 && line_offset == cursor_line_offset
2355 {
2356 result.push_str(&line[..char_offset]);
2357 result.push_str(CURSOR_MARKER);
2358 result.push_str(&line[char_offset..]);
2359 continue;
2360 }
2361
2362 result.push_str(line);
2363 }
2364 }
2365
2366 for raw_line in patch.split_inclusive('\n') {
2367 if raw_line.starts_with("@@") {
2368 // Flush any pending change hunk from a previous patch hunk.
2369 if let Some(hunk) = current_hunk.take() {
2370 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2371 }
2372
2373 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
2374 // We intentionally do not trust old_start as a direct local index into `old_text`,
2375 // because some patches are produced against a larger file region and carry
2376 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
2377 if first_hunk {
2378 new_text_byte_offset = 0;
2379 first_hunk = false;
2380 }
2381 continue;
2382 }
2383
2384 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
2385 continue;
2386 }
2387 if raw_line.starts_with("\\ No newline") {
2388 continue;
2389 }
2390
2391 if raw_line.starts_with('-') {
2392 // Extend or start a change hunk with this deleted old line.
2393 match &mut current_hunk {
2394 Some(Hunk {
2395 line_range: range, ..
2396 }) => range.end = old_line_index + 1,
2397 None => {
2398 current_hunk = Some(Hunk {
2399 line_range: old_line_index..old_line_index + 1,
2400 new_text_lines: Vec::new(),
2401 cursor_line_offset_in_new_text: None,
2402 });
2403 }
2404 }
2405 old_line_index += 1;
2406 } else if let Some(added_content) = raw_line.strip_prefix('+') {
2407 // Place cursor marker if cursor_offset falls within this line.
2408 let mut cursor_line_offset = None;
2409 if let Some(cursor_off) = cursor_offset
2410 && (first_hunk
2411 || cursor_off >= new_text_byte_offset
2412 && cursor_off <= new_text_byte_offset + added_content.len())
2413 {
2414 let line_offset = added_content.floor_char_boundary(
2415 cursor_off
2416 .saturating_sub(new_text_byte_offset)
2417 .min(added_content.len()),
2418 );
2419 cursor_line_offset = Some(line_offset);
2420 }
2421
2422 new_text_byte_offset += added_content.len();
2423
2424 let hunk = current_hunk.get_or_insert(Hunk {
2425 line_range: old_line_index..old_line_index,
2426 new_text_lines: vec![],
2427 cursor_line_offset_in_new_text: None,
2428 });
2429 hunk.new_text_lines.push(added_content);
2430 hunk.cursor_line_offset_in_new_text = cursor_line_offset
2431 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
2432 } else {
2433 // Context line (starts with ' ' or is empty).
2434 if let Some(hunk) = current_hunk.take() {
2435 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2436 }
2437 last_old_line_before_hunk = Some(old_line_index);
2438 old_line_index += 1;
2439 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
2440 new_text_byte_offset += content.len();
2441 }
2442 }
2443
2444 // Flush final group.
2445 if let Some(hunk) = current_hunk.take() {
2446 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2447 }
2448
2449 // Trim a single trailing newline.
2450 if result.ends_with('\n') {
2451 result.pop();
2452 }
2453
2454 if result.is_empty() {
2455 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
2456 }
2457
2458 Ok(result)
2459 }
2460
2461 #[cfg(test)]
2462 mod tests {
2463 use super::*;
2464 use indoc::indoc;
2465
2466 #[test]
2467 fn test_format_cursor_region() {
2468 struct Case {
2469 name: &'static str,
2470 context: &'static str,
2471 editable_range: Range<usize>,
2472 cursor_offset: usize,
2473 expected: &'static str,
2474 }
2475
2476 let cases = [
2477 Case {
2478 name: "basic_cursor_placement",
2479 context: "hello world\n",
2480 editable_range: 0..12,
2481 cursor_offset: 5,
2482 expected: indoc! {"
2483 <|file_sep|>test.rs
2484 <|fim_prefix|>
2485 <|fim_middle|>current
2486 0:5c|hello<|user_cursor|> world
2487 <|fim_suffix|>
2488 <|fim_middle|>updated
2489 "},
2490 },
2491 Case {
2492 name: "multiline_cursor_on_second_line",
2493 context: "aaa\nbbb\nccc\n",
2494 editable_range: 0..12,
2495 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
2496 expected: indoc! {"
2497 <|file_sep|>test.rs
2498 <|fim_prefix|>
2499 <|fim_middle|>current
2500 0:23|aaa
2501 1:26|b<|user_cursor|>bb
2502 2:29|ccc
2503 <|fim_suffix|>
2504 <|fim_middle|>updated
2505 "},
2506 },
2507 Case {
2508 name: "no_trailing_newline_in_context",
2509 context: "line1\nline2",
2510 editable_range: 0..11,
2511 cursor_offset: 3,
2512 expected: indoc! {"
2513 <|file_sep|>test.rs
2514 <|fim_prefix|>
2515 <|fim_middle|>current
2516 0:d9|lin<|user_cursor|>e1
2517 1:da|line2
2518 <|fim_suffix|>
2519 <|fim_middle|>updated
2520 "},
2521 },
2522 Case {
2523 name: "leading_newline_in_editable_region",
2524 context: "\nabc\n",
2525 editable_range: 0..5,
2526 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2527 expected: indoc! {"
2528 <|file_sep|>test.rs
2529 <|fim_prefix|>
2530 <|fim_middle|>current
2531 0:00|
2532 1:26|a<|user_cursor|>bc
2533 <|fim_suffix|>
2534 <|fim_middle|>updated
2535 "},
2536 },
2537 Case {
2538 name: "with_suffix",
2539 context: "abc\ndef",
2540 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2541 cursor_offset: 2,
2542 expected: indoc! {"
2543 <|file_sep|>test.rs
2544 <|fim_prefix|>
2545 <|fim_middle|>current
2546 0:26|ab<|user_cursor|>c
2547 <|fim_suffix|>
2548 def
2549 <|fim_middle|>updated
2550 "},
2551 },
2552 Case {
2553 name: "unicode_two_byte_chars",
2554 context: "héllo\n",
2555 editable_range: 0..7,
2556 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2557 expected: indoc! {"
2558 <|file_sep|>test.rs
2559 <|fim_prefix|>
2560 <|fim_middle|>current
2561 0:1b|hé<|user_cursor|>llo
2562 <|fim_suffix|>
2563 <|fim_middle|>updated
2564 "},
2565 },
2566 Case {
2567 name: "unicode_three_byte_chars",
2568 context: "日本語\n",
2569 editable_range: 0..10,
2570 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2571 expected: indoc! {"
2572 <|file_sep|>test.rs
2573 <|fim_prefix|>
2574 <|fim_middle|>current
2575 0:80|日本<|user_cursor|>語
2576 <|fim_suffix|>
2577 <|fim_middle|>updated
2578 "},
2579 },
2580 Case {
2581 name: "unicode_four_byte_chars",
2582 context: "a🌍b\n",
2583 editable_range: 0..7,
2584 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2585 expected: indoc! {"
2586 <|file_sep|>test.rs
2587 <|fim_prefix|>
2588 <|fim_middle|>current
2589 0:6b|a🌍<|user_cursor|>b
2590 <|fim_suffix|>
2591 <|fim_middle|>updated
2592 "},
2593 },
2594 Case {
2595 name: "cursor_at_start_of_region_not_placed",
2596 context: "abc\n",
2597 editable_range: 0..4,
2598 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2599 expected: indoc! {"
2600 <|file_sep|>test.rs
2601 <|fim_prefix|>
2602 <|fim_middle|>current
2603 0:26|abc
2604 <|fim_suffix|>
2605 <|fim_middle|>updated
2606 "},
2607 },
2608 Case {
2609 name: "cursor_at_end_of_line_not_placed",
2610 context: "abc\ndef\n",
2611 editable_range: 0..8,
2612 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2613 expected: indoc! {"
2614 <|file_sep|>test.rs
2615 <|fim_prefix|>
2616 <|fim_middle|>current
2617 0:26|abc
2618 1:2f|def
2619 <|fim_suffix|>
2620 <|fim_middle|>updated
2621 "},
2622 },
2623 Case {
2624 name: "cursor_offset_relative_to_context_not_editable_region",
2625 // cursor_offset is relative to `context`, so when editable_range.start > 0,
2626 // write_cursor_excerpt_section must subtract it before comparing against
2627 // per-line offsets within the editable region.
2628 context: "pre\naaa\nbbb\nsuf\n",
2629 editable_range: 4..12, // editable region = "aaa\nbbb\n"
2630 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
2631 expected: indoc! {"
2632 <|file_sep|>test.rs
2633 <|fim_prefix|>
2634 pre
2635 <|fim_middle|>current
2636 0:23|aaa
2637 1:26|b<|user_cursor|>bb
2638 <|fim_suffix|>
2639 suf
2640 <|fim_middle|>updated
2641 "},
2642 },
2643 ];
2644
2645 for case in &cases {
2646 let mut prompt = String::new();
2647 hashline::write_cursor_excerpt_section(
2648 &mut prompt,
2649 Path::new("test.rs"),
2650 case.context,
2651 &case.editable_range,
2652 case.cursor_offset,
2653 );
2654 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2655 }
2656 }
2657
2658 #[test]
2659 fn test_apply_edit_commands() {
2660 struct Case {
2661 name: &'static str,
2662 original: &'static str,
2663 model_output: &'static str,
2664 expected: &'static str,
2665 }
2666
2667 let cases = vec![
2668 Case {
2669 name: "set_single_line",
2670 original: indoc! {"
2671 let mut total = 0;
2672 for product in products {
2673 total += ;
2674 }
2675 total
2676 "},
2677 model_output: indoc! {"
2678 <|set|>2:87
2679 total += product.price;
2680 "},
2681 expected: indoc! {"
2682 let mut total = 0;
2683 for product in products {
2684 total += product.price;
2685 }
2686 total
2687 "},
2688 },
2689 Case {
2690 name: "set_range",
2691 original: indoc! {"
2692 fn foo() {
2693 let x = 1;
2694 let y = 2;
2695 let z = 3;
2696 }
2697 "},
2698 model_output: indoc! {"
2699 <|set|>1:46-3:4a
2700 let sum = 6;
2701 "},
2702 expected: indoc! {"
2703 fn foo() {
2704 let sum = 6;
2705 }
2706 "},
2707 },
2708 Case {
2709 name: "insert_after_line",
2710 original: indoc! {"
2711 fn main() {
2712 let x = 1;
2713 }
2714 "},
2715 model_output: indoc! {"
2716 <|insert|>1:46
2717 let y = 2;
2718 "},
2719 expected: indoc! {"
2720 fn main() {
2721 let x = 1;
2722 let y = 2;
2723 }
2724 "},
2725 },
2726 Case {
2727 name: "insert_before_first",
2728 original: indoc! {"
2729 let x = 1;
2730 let y = 2;
2731 "},
2732 model_output: indoc! {"
2733 <|insert|>
2734 use std::io;
2735 "},
2736 expected: indoc! {"
2737 use std::io;
2738 let x = 1;
2739 let y = 2;
2740 "},
2741 },
2742 Case {
2743 name: "set_with_cursor_marker",
2744 original: indoc! {"
2745 fn main() {
2746 println!();
2747 }
2748 "},
2749 model_output: indoc! {"
2750 <|set|>1:34
2751 eprintln!(\"<|user_cursor|>\");
2752 "},
2753 expected: indoc! {"
2754 fn main() {
2755 eprintln!(\"<|user_cursor|>\");
2756 }
2757 "},
2758 },
2759 Case {
2760 name: "multiple_set_commands",
2761 original: indoc! {"
2762 aaa
2763 bbb
2764 ccc
2765 ddd
2766 "},
2767 model_output: indoc! {"
2768 <|set|>0:23
2769 AAA
2770 <|set|>2:29
2771 CCC
2772 "},
2773 expected: indoc! {"
2774 AAA
2775 bbb
2776 CCC
2777 ddd
2778 "},
2779 },
2780 Case {
2781 name: "set_range_multiline_replacement",
2782 original: indoc! {"
2783 fn handle_submit() {
2784 }
2785
2786 fn handle_keystroke() {
2787 "},
2788 model_output: indoc! {"
2789 <|set|>0:3f-1:7d
2790 fn handle_submit(modal_state: &mut ModalState) {
2791 <|user_cursor|>
2792 }
2793 "},
2794 expected: indoc! {"
2795 fn handle_submit(modal_state: &mut ModalState) {
2796 <|user_cursor|>
2797 }
2798
2799 fn handle_keystroke() {
2800 "},
2801 },
2802 Case {
2803 name: "no_edit_commands_returns_original",
2804 original: indoc! {"
2805 hello
2806 world
2807 "},
2808 model_output: "some random text with no commands",
2809 expected: indoc! {"
2810 hello
2811 world
2812 "},
2813 },
2814 Case {
2815 name: "no_edits_command_returns_original",
2816 original: indoc! {"
2817 hello
2818 world
2819 "},
2820 model_output: "<|no_edits|>",
2821 expected: indoc! {"
2822 hello
2823 world
2824 "},
2825 },
2826 Case {
2827 name: "wrong_hash_set_ignored",
2828 original: indoc! {"
2829 aaa
2830 bbb
2831 "},
2832 model_output: indoc! {"
2833 <|set|>0:ff
2834 ZZZ
2835 "},
2836 expected: indoc! {"
2837 aaa
2838 bbb
2839 "},
2840 },
2841 Case {
2842 name: "insert_and_set_combined",
2843 original: indoc! {"
2844 alpha
2845 beta
2846 gamma
2847 "},
2848 model_output: indoc! {"
2849 <|set|>0:06
2850 ALPHA
2851 <|insert|>1:9c
2852 beta_extra
2853 "},
2854 expected: indoc! {"
2855 ALPHA
2856 beta
2857 beta_extra
2858 gamma
2859 "},
2860 },
2861 Case {
2862 name: "no_trailing_newline_preserved",
2863 original: "hello\nworld",
2864 model_output: indoc! {"
2865 <|set|>0:14
2866 HELLO
2867 "},
2868 expected: "HELLO\nworld",
2869 },
2870 Case {
2871 name: "set_range_hash_mismatch_in_end_bound",
2872 original: indoc! {"
2873 one
2874 two
2875 three
2876 "},
2877 model_output: indoc! {"
2878 <|set|>0:42-2:ff
2879 ONE_TWO_THREE
2880 "},
2881 expected: indoc! {"
2882 one
2883 two
2884 three
2885 "},
2886 },
2887 Case {
2888 name: "set_range_start_greater_than_end_ignored",
2889 original: indoc! {"
2890 a
2891 b
2892 c
2893 "},
2894 model_output: indoc! {"
2895 <|set|>2:63-1:62
2896 X
2897 "},
2898 expected: indoc! {"
2899 a
2900 b
2901 c
2902 "},
2903 },
2904 Case {
2905 name: "insert_out_of_bounds_ignored",
2906 original: indoc! {"
2907 x
2908 y
2909 "},
2910 model_output: indoc! {"
2911 <|insert|>99:aa
2912 z
2913 "},
2914 expected: indoc! {"
2915 x
2916 y
2917 "},
2918 },
2919 Case {
2920 name: "set_out_of_bounds_ignored",
2921 original: indoc! {"
2922 x
2923 y
2924 "},
2925 model_output: indoc! {"
2926 <|set|>99:aa
2927 z
2928 "},
2929 expected: indoc! {"
2930 x
2931 y
2932 "},
2933 },
2934 Case {
2935 name: "malformed_set_command_ignored",
2936 original: indoc! {"
2937 alpha
2938 beta
2939 "},
2940 model_output: indoc! {"
2941 <|set|>not-a-line-ref
2942 UPDATED
2943 "},
2944 expected: indoc! {"
2945 alpha
2946 beta
2947 "},
2948 },
2949 Case {
2950 name: "malformed_insert_hash_treated_as_before_first",
2951 original: indoc! {"
2952 alpha
2953 beta
2954 "},
2955 model_output: indoc! {"
2956 <|insert|>1:nothex
2957 preamble
2958 "},
2959 expected: indoc! {"
2960 preamble
2961 alpha
2962 beta
2963 "},
2964 },
2965 Case {
2966 name: "set_then_insert_same_target_orders_insert_after_replacement",
2967 original: indoc! {"
2968 cat
2969 dog
2970 "},
2971 model_output: indoc! {"
2972 <|set|>0:38
2973 CAT
2974 <|insert|>0:38
2975 TAIL
2976 "},
2977 expected: indoc! {"
2978 CAT
2979 TAIL
2980 dog
2981 "},
2982 },
2983 Case {
2984 name: "overlapping_set_ranges_last_wins",
2985 original: indoc! {"
2986 a
2987 b
2988 c
2989 d
2990 "},
2991 model_output: indoc! {"
2992 <|set|>0:61-2:63
2993 FIRST
2994 <|set|>1:62-3:64
2995 SECOND
2996 "},
2997 expected: indoc! {"
2998 FIRST
2999 d
3000 "},
3001 },
3002 Case {
3003 name: "insert_before_first_and_after_line",
3004 original: indoc! {"
3005 a
3006 b
3007 "},
3008 model_output: indoc! {"
3009 <|insert|>
3010 HEAD
3011 <|insert|>0:61
3012 MID
3013 "},
3014 expected: indoc! {"
3015 HEAD
3016 a
3017 MID
3018 b
3019 "},
3020 },
3021 ];
3022
3023 for case in &cases {
3024 let result = hashline::apply_edit_commands(case.original, &case.model_output);
3025 assert_eq!(result, case.expected, "failed case: {}", case.name);
3026 }
3027 }
3028
3029 #[test]
3030 fn test_output_has_edit_commands() {
3031 assert!(hashline::output_has_edit_commands(&format!(
3032 "{}0:ab\nnew",
3033 SET_COMMAND_MARKER
3034 )));
3035 assert!(hashline::output_has_edit_commands(&format!(
3036 "{}0:ab\nnew",
3037 INSERT_COMMAND_MARKER
3038 )));
3039 assert!(hashline::output_has_edit_commands(&format!(
3040 "some text\n{}1:cd\nstuff",
3041 SET_COMMAND_MARKER
3042 )));
3043 assert!(!hashline::output_has_edit_commands("just plain text"));
3044 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
3045 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
3046 }
3047
3048 // ---- hashline::patch_to_edit_commands round-trip tests ----
3049
3050 #[test]
3051 fn test_patch_to_edit_commands() {
3052 struct Case {
3053 name: &'static str,
3054 old: &'static str,
3055 patch: &'static str,
3056 expected_new: &'static str,
3057 }
3058
3059 let cases = [
3060 Case {
3061 name: "single_line_replacement",
3062 old: indoc! {"
3063 let mut total = 0;
3064 for product in products {
3065 total += ;
3066 }
3067 total
3068 "},
3069 patch: indoc! {"
3070 @@ -1,5 +1,5 @@
3071 let mut total = 0;
3072 for product in products {
3073 - total += ;
3074 + total += product.price;
3075 }
3076 total
3077 "},
3078 expected_new: indoc! {"
3079 let mut total = 0;
3080 for product in products {
3081 total += product.price;
3082 }
3083 total
3084 "},
3085 },
3086 Case {
3087 name: "multiline_replacement",
3088 old: indoc! {"
3089 fn foo() {
3090 let x = 1;
3091 let y = 2;
3092 let z = 3;
3093 }
3094 "},
3095 patch: indoc! {"
3096 @@ -1,5 +1,3 @@
3097 fn foo() {
3098 - let x = 1;
3099 - let y = 2;
3100 - let z = 3;
3101 + let sum = 1 + 2 + 3;
3102 }
3103 "},
3104 expected_new: indoc! {"
3105 fn foo() {
3106 let sum = 1 + 2 + 3;
3107 }
3108 "},
3109 },
3110 Case {
3111 name: "insertion",
3112 old: indoc! {"
3113 fn main() {
3114 let x = 1;
3115 }
3116 "},
3117 patch: indoc! {"
3118 @@ -1,3 +1,4 @@
3119 fn main() {
3120 let x = 1;
3121 + let y = 2;
3122 }
3123 "},
3124 expected_new: indoc! {"
3125 fn main() {
3126 let x = 1;
3127 let y = 2;
3128 }
3129 "},
3130 },
3131 Case {
3132 name: "insertion_before_first",
3133 old: indoc! {"
3134 let x = 1;
3135 let y = 2;
3136 "},
3137 patch: indoc! {"
3138 @@ -1,2 +1,3 @@
3139 +use std::io;
3140 let x = 1;
3141 let y = 2;
3142 "},
3143 expected_new: indoc! {"
3144 use std::io;
3145 let x = 1;
3146 let y = 2;
3147 "},
3148 },
3149 Case {
3150 name: "deletion",
3151 old: indoc! {"
3152 aaa
3153 bbb
3154 ccc
3155 ddd
3156 "},
3157 patch: indoc! {"
3158 @@ -1,4 +1,2 @@
3159 aaa
3160 -bbb
3161 -ccc
3162 ddd
3163 "},
3164 expected_new: indoc! {"
3165 aaa
3166 ddd
3167 "},
3168 },
3169 Case {
3170 name: "multiple_changes",
3171 old: indoc! {"
3172 alpha
3173 beta
3174 gamma
3175 delta
3176 epsilon
3177 "},
3178 patch: indoc! {"
3179 @@ -1,5 +1,5 @@
3180 -alpha
3181 +ALPHA
3182 beta
3183 gamma
3184 -delta
3185 +DELTA
3186 epsilon
3187 "},
3188 expected_new: indoc! {"
3189 ALPHA
3190 beta
3191 gamma
3192 DELTA
3193 epsilon
3194 "},
3195 },
3196 Case {
3197 name: "replace_with_insertion",
3198 old: indoc! {r#"
3199 fn handle() {
3200 modal_state.close();
3201 modal_state.dismiss();
3202 "#},
3203 patch: indoc! {r#"
3204 @@ -1,3 +1,4 @@
3205 fn handle() {
3206 modal_state.close();
3207 + eprintln!("");
3208 modal_state.dismiss();
3209 "#},
3210 expected_new: indoc! {r#"
3211 fn handle() {
3212 modal_state.close();
3213 eprintln!("");
3214 modal_state.dismiss();
3215 "#},
3216 },
3217 Case {
3218 name: "complete_replacement",
3219 old: indoc! {"
3220 aaa
3221 bbb
3222 ccc
3223 "},
3224 patch: indoc! {"
3225 @@ -1,3 +1,3 @@
3226 -aaa
3227 -bbb
3228 -ccc
3229 +xxx
3230 +yyy
3231 +zzz
3232 "},
3233 expected_new: indoc! {"
3234 xxx
3235 yyy
3236 zzz
3237 "},
3238 },
3239 Case {
3240 name: "add_function_body",
3241 old: indoc! {"
3242 fn foo() {
3243 modal_state.dismiss();
3244 }
3245
3246 fn
3247
3248 fn handle_keystroke() {
3249 "},
3250 patch: indoc! {"
3251 @@ -1,6 +1,8 @@
3252 fn foo() {
3253 modal_state.dismiss();
3254 }
3255
3256 -fn
3257 +fn handle_submit() {
3258 + todo()
3259 +}
3260
3261 fn handle_keystroke() {
3262 "},
3263 expected_new: indoc! {"
3264 fn foo() {
3265 modal_state.dismiss();
3266 }
3267
3268 fn handle_submit() {
3269 todo()
3270 }
3271
3272 fn handle_keystroke() {
3273 "},
3274 },
3275 Case {
3276 name: "with_cursor_offset",
3277 old: indoc! {r#"
3278 fn main() {
3279 println!();
3280 }
3281 "#},
3282 patch: indoc! {r#"
3283 @@ -1,3 +1,3 @@
3284 fn main() {
3285 - println!();
3286 + eprintln!("");
3287 }
3288 "#},
3289 expected_new: indoc! {r#"
3290 fn main() {
3291 eprintln!("<|user_cursor|>");
3292 }
3293 "#},
3294 },
3295 Case {
3296 name: "non_local_hunk_header_pure_insertion_repro",
3297 old: indoc! {"
3298 aaa
3299 bbb
3300 "},
3301 patch: indoc! {"
3302 @@ -20,2 +20,3 @@
3303 aaa
3304 +xxx
3305 bbb
3306 "},
3307 expected_new: indoc! {"
3308 aaa
3309 xxx
3310 bbb
3311 "},
3312 },
3313 Case {
3314 name: "empty_patch_produces_no_edits_marker",
3315 old: indoc! {"
3316 aaa
3317 bbb
3318 "},
3319 patch: "@@ -20,2 +20,3 @@\n",
3320 expected_new: indoc! {"
3321 aaa
3322 bbb
3323 "},
3324 },
3325 ];
3326
3327 for case in &cases {
3328 // The cursor_offset for patch_to_edit_commands is relative to
3329 // the first hunk's new text (context + additions). We compute
3330 // it by finding where the marker sits in the expected output
3331 // (which mirrors the new text of the hunk).
3332 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
3333
3334 let commands =
3335 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
3336 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
3337
3338 assert!(
3339 hashline::output_has_edit_commands(&commands),
3340 "case {}: expected edit commands, got: {commands:?}",
3341 case.name,
3342 );
3343
3344 let applied = hashline::apply_edit_commands(case.old, &commands);
3345 assert_eq!(applied, case.expected_new, "case {}", case.name);
3346 }
3347 }
3348 }
3349}
3350
3351pub mod seed_coder {
3352 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
3353 //!
3354 //! Seed-Coder uses different FIM tokens and order than Qwen:
3355 //! - SPM order: suffix comes FIRST, then prefix, then middle
3356 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
3357 //! - File markers: StarCoder-style `<filename>path` (single token + path)
3358 //!
3359 //! All context (related files, edit history) goes in the PREFIX section.
3360 //! The suffix contains only code after the editable region.
3361 //!
3362 //! Example prompt:
3363 //!
3364 //! <[fim-suffix]>
3365 //! code after editable region
3366 //! <[fim-prefix]><filename>related/file.py
3367 //! related file content
3368 //!
3369 //! <filename>edit_history
3370 //! --- a/some_file.py
3371 //! +++ b/some_file.py
3372 //! -old
3373 //! +new
3374 //!
3375 //! <filename>path/to/target_file.py
3376 //! code before editable region
3377 //! <<<<<<< CURRENT
3378 //! code that
3379 //! needs to<|user_cursor|>
3380 //! be rewritten
3381 //! =======
3382 //! <[fim-middle]>
3383 //!
3384 //! Expected output (model generates):
3385 //!
3386 //! updated
3387 //! code with
3388 //! changes applied
3389 //! >>>>>>> UPDATED
3390
3391 use super::*;
3392
3393 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
3394 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
3395 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
3396 pub const FILE_MARKER: &str = "<filename>";
3397
3398 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
3399 pub const SEPARATOR: &str = "=======\n";
3400 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
3401
3402 pub const NO_EDITS: &str = "NO_EDITS\n";
3403
3404 pub fn special_tokens() -> &'static [&'static str] {
3405 &[
3406 FIM_SUFFIX,
3407 FIM_PREFIX,
3408 FIM_MIDDLE,
3409 FILE_MARKER,
3410 START_MARKER,
3411 SEPARATOR,
3412 END_MARKER,
3413 CURSOR_MARKER,
3414 ]
3415 }
3416
3417 pub fn write_cursor_excerpt_section(
3418 prompt: &mut String,
3419 path: &Path,
3420 context: &str,
3421 editable_range: &Range<usize>,
3422 cursor_offset: usize,
3423 ) {
3424 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3425 prompt.push_str(§ion);
3426 }
3427
3428 pub fn format_prompt_with_budget(
3429 path: &Path,
3430 context: &str,
3431 editable_range: &Range<usize>,
3432 cursor_offset: usize,
3433 events: &[Arc<Event>],
3434 related_files: &[RelatedFile],
3435 diagnostics: &[ActiveBufferDiagnostic],
3436 max_tokens: usize,
3437 ) -> String {
3438 let cursor_prefix_section =
3439 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3440 assemble_fim_prompt(
3441 context,
3442 editable_range,
3443 &cursor_prefix_section,
3444 events,
3445 related_files,
3446 diagnostics,
3447 None,
3448 max_tokens,
3449 )
3450 }
3451
3452 pub fn assemble_fim_prompt(
3453 context: &str,
3454 editable_range: &Range<usize>,
3455 cursor_prefix_section: &str,
3456 events: &[Arc<Event>],
3457 related_files: &[RelatedFile],
3458 diagnostics: &[ActiveBufferDiagnostic],
3459 cursor_buffer_row: Option<u32>,
3460 max_tokens: usize,
3461 ) -> String {
3462 let suffix_section = build_suffix_section(context, editable_range);
3463
3464 let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
3465 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
3466 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
3467
3468 let edit_history_section = super::format_edit_history_within_budget(
3469 events,
3470 FILE_MARKER,
3471 "edit_history",
3472 budget_after_cursor,
3473 max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
3474 );
3475 let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
3476 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
3477
3478 let diagnostics_section = super::format_active_buffer_diagnostics_with_budget(
3479 diagnostics,
3480 cursor_buffer_row,
3481 budget_after_edit_history,
3482 );
3483 let diagnostics_tokens = estimate_tokens(diagnostics_section.len() + "\n".len());
3484 let budget_after_diagnostics = budget_after_edit_history.saturating_sub(diagnostics_tokens);
3485
3486 let related_files_section = super::format_related_files_within_budget(
3487 related_files,
3488 FILE_MARKER,
3489 "",
3490 budget_after_diagnostics,
3491 );
3492
3493 let mut prompt = String::new();
3494 prompt.push_str(&suffix_section);
3495 prompt.push_str(FIM_PREFIX);
3496 prompt.push_str(&diagnostics_section);
3497 if !diagnostics_section.is_empty() {
3498 prompt.push('\n');
3499 }
3500 prompt.push_str(&related_files_section);
3501 if !related_files_section.is_empty() {
3502 prompt.push('\n');
3503 }
3504 prompt.push_str(&edit_history_section);
3505 if !edit_history_section.is_empty() {
3506 prompt.push('\n');
3507 }
3508 prompt.push_str(cursor_prefix_section);
3509 prompt.push_str(FIM_MIDDLE);
3510
3511 prompt
3512 }
3513
3514 pub(crate) fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
3515 let mut section = String::new();
3516 section.push_str(FIM_SUFFIX);
3517 section.push_str(&context[editable_range.end..]);
3518 if !section.ends_with('\n') {
3519 section.push('\n');
3520 }
3521 section
3522 }
3523
3524 fn build_cursor_prefix_section(
3525 path: &Path,
3526 context: &str,
3527 editable_range: &Range<usize>,
3528 cursor_offset: usize,
3529 ) -> String {
3530 let mut section = String::new();
3531 let path_str = path.to_string_lossy();
3532 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3533
3534 section.push_str(&context[..editable_range.start]);
3535 section.push_str(START_MARKER);
3536 section.push_str(&context[editable_range.start..cursor_offset]);
3537 section.push_str(CURSOR_MARKER);
3538 section.push_str(&context[cursor_offset..editable_range.end]);
3539 if !section.ends_with('\n') {
3540 section.push('\n');
3541 }
3542 section.push_str(SEPARATOR);
3543 section
3544 }
3545
3546 /// Format patch as containing no changes if it's empty; otherwise return None.
3547 pub(crate) fn no_edits(patch: &str) -> Option<String> {
3548 // Count lines in the patch
3549 let empty_patch = patch.lines().count() <= 3;
3550 if empty_patch {
3551 Some(format!("{NO_EDITS}{END_MARKER}"))
3552 } else {
3553 None
3554 }
3555 }
3556}
3557
3558pub mod v0304_variable_edit {
3559 //! A prompt format with no fixed editable region. The entire context is shown
3560 //! to the model, and it chooses which text to replace by outputting surrounding
3561 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3562 //! text.
3563 //!
3564 //! Example prompt:
3565 //!
3566 //! <|file_sep|>path/to/file.py
3567 //! zero
3568 //! one
3569 //! two
3570 //! three<|user_cursor|>
3571 //! four
3572 //! five
3573 //! <|fim_prefix|>
3574 //
3575 //! Expected output (model generates):
3576 //!
3577 //! two
3578 //! <|fim_middle|>
3579 //! THREE
3580 //! <|fim_suffix|>
3581 //! four
3582 //!
3583 //! The output means: find "two\n...\nfour" in the context, and replace
3584 //! everything between "two\n" and "four" with "THREE\n".
3585
3586 use super::*;
3587
3588 pub fn special_tokens() -> &'static [&'static str] {
3589 &[
3590 "<|fim_prefix|>",
3591 "<|fim_suffix|>",
3592 "<|fim_middle|>",
3593 "<|file_sep|>",
3594 CURSOR_MARKER,
3595 ]
3596 }
3597
3598 pub fn write_cursor_excerpt_section(
3599 prompt: &mut String,
3600 path: &Path,
3601 context: &str,
3602 cursor_offset: usize,
3603 ) {
3604 let path_str = path.to_string_lossy();
3605 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3606
3607 prompt.push_str(&context[..cursor_offset]);
3608 prompt.push_str(CURSOR_MARKER);
3609 prompt.push_str(&context[cursor_offset..]);
3610 if !prompt.ends_with('\n') {
3611 prompt.push('\n');
3612 }
3613 prompt.push_str("<|fim_prefix|>\n")
3614 }
3615
3616 /// Apply a variable-edit model output to the original context text.
3617 ///
3618 /// The model output has the form:
3619 ///
3620 /// - prefix context lines
3621 /// - `<|fim_middle|>`
3622 /// - new text
3623 /// - `<|fim_suffix|>`
3624 /// - suffix context lines
3625 ///
3626 /// We locate the prefix/suffix context lines in the original text and replace
3627 /// everything between them with the new text.
3628 pub fn apply_variable_edit(
3629 context: &str,
3630 model_output: &str,
3631 ) -> Result<(Range<usize>, String)> {
3632 let (prefix_context, rest) = model_output
3633 .split_once("<|fim_middle|>\n")
3634 .or_else(|| model_output.split_once("<|fim_middle|>"))
3635 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3636
3637 let (new_text, suffix_context) = rest
3638 .split_once("<|fim_suffix|>\n")
3639 .or_else(|| rest.split_once("<|fim_suffix|>"))
3640 .unwrap_or((rest, ""));
3641
3642 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3643 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3644 } else {
3645 suffix_context
3646 };
3647
3648 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3649 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3650 + prefix_context.len();
3651 let suffix_offset = if suffix_context.is_empty() {
3652 context.len()
3653 } else {
3654 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3655 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3656 + prefix_offset
3657 };
3658
3659 let edit_range = prefix_offset..suffix_offset;
3660 return Ok((edit_range, new_text.to_string()));
3661 }
3662
3663 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3664 if needle.is_empty() {
3665 return Some(0);
3666 }
3667
3668 haystack.match_indices(needle).find_map(|(offset, _)| {
3669 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3670 matched_line_start.then_some(offset)
3671 })
3672 }
3673
3674 /// Convert a unified diff patch into the variable-edit output format.
3675 ///
3676 /// Parses `patch` as a unified diff against `old_text` and produces model
3677 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3678 /// delimiters. The diff is resolved by content matching rather than line
3679 /// numbers.
3680 pub fn patch_to_variable_edit_output(
3681 old_text: &str,
3682 patch: &str,
3683 cursor_offset: Option<usize>,
3684 ) -> Result<String> {
3685 // Parse the unified diff into hunks. Each hunk has an `old_context`
3686 // string (context + deleted lines interleaved in order) and a list of
3687 // edits expressed as byte ranges within that context plus replacement
3688 // text.
3689 let hunks = parse_hunks(patch);
3690 if hunks.is_empty() {
3691 return Ok(String::new());
3692 }
3693
3694 // Apply each hunk by finding its old_context in the text and
3695 // performing the edits. We search forward from where the previous
3696 // hunk ended so that hunks are applied in order.
3697 let mut new_text = old_text.to_string();
3698 let mut search_from: usize = 0;
3699 let mut first_hunk_pos: Option<usize> = None;
3700
3701 for hunk in &hunks {
3702 let context_pos = new_text[search_from..]
3703 .find(&hunk.old_context)
3704 .map(|pos| pos + search_from)
3705 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3706
3707 if first_hunk_pos.is_none() {
3708 first_hunk_pos = Some(context_pos);
3709 }
3710
3711 // Apply edits in reverse order so byte offsets remain valid.
3712 for edit in hunk.edits.iter().rev() {
3713 let abs_start = context_pos + edit.range.start;
3714 let abs_end = context_pos + edit.range.end;
3715 new_text.replace_range(abs_start..abs_end, &edit.text);
3716 }
3717
3718 // Advance past this hunk's region in the (now modified) text.
3719 let new_region_len: usize =
3720 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3721 len + edit.text.len() - (edit.range.end - edit.range.start)
3722 });
3723 search_from = context_pos + new_region_len;
3724 }
3725
3726 // Now we have old_text and new_text. Find the changed line range by
3727 // comparing them.
3728 let old_lines: Vec<&str> = old_text.lines().collect();
3729 let new_lines: Vec<&str> = new_text.lines().collect();
3730
3731 // Find first differing line.
3732 let first_changed_row = old_lines
3733 .iter()
3734 .zip(new_lines.iter())
3735 .position(|(a, b)| a != b)
3736 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3737
3738 // Find last differing line (from the end).
3739 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3740 let common_suffix = old_lines
3741 .iter()
3742 .rev()
3743 .zip(new_lines.iter().rev())
3744 .take(max_suffix)
3745 .take_while(|(a, b)| a == b)
3746 .count();
3747
3748 let old_end = old_lines.len() - common_suffix;
3749 let new_end = new_lines.len() - common_suffix;
3750
3751 if first_changed_row == old_end && first_changed_row == new_end {
3752 return Ok(String::new());
3753 }
3754
3755 // Build the replacement text from new_lines[first_diff..new_end].
3756 let mut merged_new_text = String::new();
3757 for line in &new_lines[first_changed_row..new_end] {
3758 merged_new_text.push_str(line);
3759 merged_new_text.push('\n');
3760 }
3761
3762 // cursor_offset is relative to the first hunk's new content in
3763 // new_text. Translate it to an offset within merged_new_text, which
3764 // only contains lines first_diff..new_end of new_text.
3765 if let Some(hunk_offset) = cursor_offset {
3766 let hunk_start = first_hunk_pos.unwrap_or(0);
3767 let absolute_pos = hunk_start + hunk_offset;
3768
3769 // Byte offset where first_diff starts in new_text.
3770 let merged_start: usize = new_lines[..first_changed_row]
3771 .iter()
3772 .map(|line| line.len() + 1)
3773 .sum();
3774
3775 if absolute_pos >= merged_start {
3776 let relative_offset = absolute_pos - merged_start;
3777 if relative_offset <= merged_new_text.len() {
3778 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3779 }
3780 }
3781 }
3782
3783 // Build output with 2 lines of context above and below.
3784 let context_lines_count = 2;
3785 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3786 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3787
3788 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3789 let pattern = &lines[line_range];
3790 let pattern_len = pattern.len();
3791
3792 let mut count = 0;
3793 for offset in 0..=lines.len() - pattern_len {
3794 if &lines[offset..offset + pattern_len] == pattern {
3795 count += 1;
3796 }
3797 }
3798 count
3799 }
3800
3801 // Expand prefix and suffix until they are unique
3802 while prefix_start > 0 {
3803 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3804 prefix_start -= 1;
3805 } else {
3806 break;
3807 }
3808 }
3809 while suffix_end < old_lines.len() {
3810 if count_matches(old_end..suffix_end, &old_lines) > 1 {
3811 suffix_end += 1;
3812 } else {
3813 break;
3814 }
3815 }
3816
3817 let mut output = String::new();
3818 for line in &old_lines[prefix_start..first_changed_row] {
3819 output.push_str(line);
3820 output.push('\n');
3821 }
3822 output.push_str("<|fim_middle|>\n");
3823 output.push_str(&merged_new_text);
3824 output.push_str("<|fim_suffix|>\n");
3825 for line in &old_lines[old_end..suffix_end] {
3826 output.push_str(line);
3827 output.push('\n');
3828 }
3829
3830 Ok(output)
3831 }
3832
3833 struct ParsedHunk {
3834 old_context: String,
3835 edits: Vec<ParsedEdit>,
3836 }
3837
3838 struct ParsedEdit {
3839 range: Range<usize>,
3840 text: String,
3841 }
3842
3843 /// Parse a unified diff into content-based hunks. Each hunk contains an
3844 /// `old_context` string (context lines + deleted lines, which together
3845 /// form the text that should be found in the original) and a list of edits
3846 /// expressed as byte ranges within that context.
3847 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3848 let mut hunks = Vec::new();
3849 let mut current: Option<ParsedHunk> = None;
3850
3851 for line in patch.lines() {
3852 if line.starts_with("@@") {
3853 if let Some(hunk) = current.take() {
3854 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3855 hunks.push(hunk);
3856 }
3857 }
3858 current = Some(ParsedHunk {
3859 old_context: String::new(),
3860 edits: Vec::new(),
3861 });
3862 } else if line.starts_with("---") || line.starts_with("+++") {
3863 continue;
3864 } else if let Some(hunk) = &mut current {
3865 if let Some(added) = line.strip_prefix('+') {
3866 let pos = hunk.old_context.len();
3867 if let Some(last_edit) = hunk.edits.last_mut() {
3868 if last_edit.range.end == pos {
3869 writeln!(&mut last_edit.text, "{added}").ok();
3870 continue;
3871 }
3872 }
3873 hunk.edits.push(ParsedEdit {
3874 range: pos..pos,
3875 text: format!("{added}\n"),
3876 });
3877 } else if let Some(removed) = line.strip_prefix('-') {
3878 let start = hunk.old_context.len();
3879 writeln!(&mut hunk.old_context, "{removed}").ok();
3880 let end = hunk.old_context.len();
3881 if let Some(last_edit) = hunk.edits.last_mut() {
3882 if last_edit.range.end == start {
3883 last_edit.range.end = end;
3884 continue;
3885 }
3886 }
3887 hunk.edits.push(ParsedEdit {
3888 range: start..end,
3889 text: String::new(),
3890 });
3891 } else {
3892 let ctx = line.strip_prefix(' ').unwrap_or(line);
3893 writeln!(&mut hunk.old_context, "{ctx}").ok();
3894 }
3895 }
3896 }
3897
3898 if let Some(hunk) = current {
3899 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3900 hunks.push(hunk);
3901 }
3902 }
3903
3904 hunks
3905 }
3906
3907 #[cfg(test)]
3908 mod tests {
3909 use super::*;
3910 use indoc::indoc;
3911
3912 #[test]
3913 fn test_apply_variable_edit() {
3914 struct Case {
3915 name: &'static str,
3916 original: &'static str,
3917 model_output: &'static str,
3918 expected: &'static str,
3919 }
3920
3921 let cases = [
3922 Case {
3923 name: "simple_single_line_replacement",
3924 original: indoc! {"
3925 zero
3926 one
3927 two
3928 three
3929 four
3930 five
3931 "},
3932 model_output: indoc! {"
3933 two
3934 <|fim_middle|>
3935 THREE
3936 <|fim_suffix|>
3937 four
3938 "},
3939 expected: indoc! {"
3940 zero
3941 one
3942 two
3943 THREE
3944 four
3945 five
3946 "},
3947 },
3948 Case {
3949 name: "multi_line_replacement",
3950 original: indoc! {"
3951 a
3952 b
3953 c
3954 d
3955 e
3956 "},
3957 model_output: indoc! {"
3958 a
3959 <|fim_middle|>
3960 B
3961 C
3962 D
3963 <|fim_suffix|>
3964 e
3965 "},
3966 expected: indoc! {"
3967 a
3968 B
3969 C
3970 D
3971 e
3972 "},
3973 },
3974 Case {
3975 name: "insertion_between_existing_lines",
3976 original: indoc! {"
3977 a
3978 b
3979 c
3980 "},
3981 model_output: indoc! {"
3982 a
3983 <|fim_middle|>
3984 X
3985 <|fim_suffix|>
3986 b
3987 "},
3988 expected: indoc! {"
3989 a
3990 X
3991 b
3992 c
3993 "},
3994 },
3995 Case {
3996 name: "deletion",
3997 original: indoc! {"
3998 a
3999 b
4000 c
4001 d
4002 "},
4003 model_output: indoc! {"
4004 a
4005 <|fim_middle|>
4006 <|fim_suffix|>
4007 c
4008 "},
4009 expected: indoc! {"
4010 a
4011 c
4012 d
4013 "},
4014 },
4015 Case {
4016 name: "replacement_at_start_no_prefix_context",
4017 original: indoc! {"
4018 a
4019 b
4020 c
4021 "},
4022 model_output: indoc! {"
4023 <|fim_middle|>
4024 X
4025 <|fim_suffix|>
4026 b
4027 "},
4028 expected: indoc! {"
4029 X
4030 b
4031 c
4032 "},
4033 },
4034 Case {
4035 name: "replacement_at_end_no_suffix_context",
4036 original: indoc! {"
4037 a
4038 b
4039 c
4040 "},
4041 model_output: indoc! {"
4042 b
4043 <|fim_middle|>
4044 Z
4045 <|fim_suffix|>
4046 "},
4047 expected: indoc! {"
4048 a
4049 b
4050 Z
4051 "},
4052 },
4053 Case {
4054 name: "context_with_trailing_newline_is_preserved",
4055 original: indoc! {"
4056 a
4057 b
4058 c
4059 "},
4060 model_output: indoc! {"
4061 a
4062 <|fim_middle|>
4063 B
4064 <|fim_suffix|>
4065 c
4066 "},
4067 expected: indoc! {"
4068 a
4069 B
4070 c
4071 "},
4072 },
4073 Case {
4074 name: "cursor_marker_passes_through_untouched",
4075 original: indoc! {"
4076 a
4077 b
4078 c
4079 "},
4080 model_output: indoc! {"
4081 a
4082 <|fim_middle|>
4083 B<|user_cursor|>B
4084 <|fim_suffix|>
4085 c
4086 "},
4087 expected: indoc! {"
4088 a
4089 B<|user_cursor|>B
4090 c
4091 "},
4092 },
4093 Case {
4094 name: "multiple_prefix_context_lines",
4095 original: indoc! {"
4096 a
4097 b
4098 c
4099 d
4100 e
4101 "},
4102 model_output: indoc! {"
4103 b
4104 c
4105 <|fim_middle|>
4106 D
4107 <|fim_suffix|>
4108 e
4109 "},
4110 expected: indoc! {"
4111 a
4112 b
4113 c
4114 D
4115 e
4116 "},
4117 },
4118 ];
4119
4120 for case in cases {
4121 let (edit_range, replacement) =
4122 apply_variable_edit(case.original, case.model_output).unwrap();
4123 let mut edited = case.original.to_string();
4124 edited.replace_range(edit_range, &replacement);
4125 assert_eq!(edited, case.expected, "{}", case.name);
4126 }
4127 }
4128
4129 #[test]
4130 fn test_patch_to_variable_edit() {
4131 struct Case {
4132 name: &'static str,
4133 old: &'static str,
4134 patch: &'static str,
4135 cursor_offset: Option<usize>,
4136 expected_variable_edit: &'static str,
4137 expected_after_apply: &'static str,
4138 }
4139
4140 let cases = [
4141 Case {
4142 name: "simple_replacement",
4143 old: indoc! {"
4144 zero
4145 one
4146 two
4147 three
4148 four
4149 five
4150 "},
4151 patch: indoc! {"
4152 @@ -3,3 +3,3 @@
4153 two
4154 -three
4155 +THREE
4156 four
4157 "},
4158 cursor_offset: None,
4159 expected_variable_edit: indoc! {"
4160 one
4161 two
4162 <|fim_middle|>
4163 THREE
4164 <|fim_suffix|>
4165 four
4166 five
4167 "},
4168 expected_after_apply: indoc! {"
4169 zero
4170 one
4171 two
4172 THREE
4173 four
4174 five
4175 "},
4176 },
4177 Case {
4178 name: "insertion",
4179 old: indoc! {"
4180 a
4181 b
4182 c
4183 d
4184 e
4185 "},
4186 patch: indoc! {"
4187 @@ -2,0 +3,1 @@
4188 b
4189 +X
4190 c
4191 "},
4192 cursor_offset: None,
4193 expected_variable_edit: indoc! {"
4194 a
4195 b
4196 <|fim_middle|>
4197 X
4198 <|fim_suffix|>
4199 c
4200 d
4201 "},
4202 expected_after_apply: indoc! {"
4203 a
4204 b
4205 X
4206 c
4207 d
4208 e
4209 "},
4210 },
4211 Case {
4212 name: "deletion",
4213 old: indoc! {"
4214 a
4215 b
4216 c
4217 d
4218 e
4219 "},
4220 patch: indoc! {"
4221 @@ -2,3 +2,2 @@
4222 b
4223 -c
4224 d
4225 "},
4226 cursor_offset: None,
4227 expected_variable_edit: indoc! {"
4228 a
4229 b
4230 <|fim_middle|>
4231 <|fim_suffix|>
4232 d
4233 e
4234 "},
4235 expected_after_apply: indoc! {"
4236 a
4237 b
4238 d
4239 e
4240 "},
4241 },
4242 Case {
4243 name: "edit_near_start",
4244 old: indoc! {"
4245 first
4246 second
4247 third
4248 fourth
4249 "},
4250 patch: indoc! {"
4251 @@ -1,1 +1,1 @@
4252 -first
4253 +FIRST
4254 "},
4255 cursor_offset: None,
4256 expected_variable_edit: indoc! {"
4257 <|fim_middle|>
4258 FIRST
4259 <|fim_suffix|>
4260 second
4261 third
4262 "},
4263 expected_after_apply: indoc! {"
4264 FIRST
4265 second
4266 third
4267 fourth
4268 "},
4269 },
4270 Case {
4271 name: "edit_near_end",
4272 old: indoc! {"
4273 first
4274 second
4275 third
4276 fourth
4277 "},
4278 patch: indoc! {"
4279 @@ -4,1 +4,1 @@
4280 -fourth
4281 +FOURTH
4282 "},
4283 cursor_offset: None,
4284 expected_variable_edit: indoc! {"
4285 second
4286 third
4287 <|fim_middle|>
4288 FOURTH
4289 <|fim_suffix|>
4290 "},
4291 expected_after_apply: indoc! {"
4292 first
4293 second
4294 third
4295 FOURTH
4296 "},
4297 },
4298 Case {
4299 name: "cursor_at_start_of_replacement",
4300 old: indoc! {"
4301 zero
4302 one
4303 two
4304 three
4305 four
4306 five
4307 "},
4308 patch: indoc! {"
4309 @@ -3,3 +3,3 @@
4310 two
4311 -three
4312 +THREE
4313 four
4314 "},
4315 cursor_offset: Some(4),
4316 expected_variable_edit: indoc! {"
4317 one
4318 two
4319 <|fim_middle|>
4320 <|user_cursor|>THREE
4321 <|fim_suffix|>
4322 four
4323 five
4324 "},
4325 expected_after_apply: indoc! {"
4326 zero
4327 one
4328 two
4329 <|user_cursor|>THREE
4330 four
4331 five
4332 "},
4333 },
4334 Case {
4335 name: "cursor_in_middle_of_replacement",
4336 old: indoc! {"
4337 zero
4338 one
4339 two
4340 three
4341 four
4342 five
4343 "},
4344 patch: indoc! {"
4345 @@ -3,3 +3,3 @@
4346 two
4347 -three
4348 +THREE
4349 four
4350 "},
4351 cursor_offset: Some(6),
4352 expected_variable_edit: indoc! {"
4353 one
4354 two
4355 <|fim_middle|>
4356 TH<|user_cursor|>REE
4357 <|fim_suffix|>
4358 four
4359 five
4360 "},
4361 expected_after_apply: indoc! {"
4362 zero
4363 one
4364 two
4365 TH<|user_cursor|>REE
4366 four
4367 five
4368 "},
4369 },
4370 Case {
4371 name: "expands_context_when_two_lines_not_unique_before_and_after",
4372 old: indoc! {"
4373 one
4374 a
4375 b
4376 c
4377 d
4378 two
4379 a
4380 b
4381 c
4382 d
4383 three
4384 a
4385 b
4386 c
4387 d
4388 four
4389 "},
4390 patch: indoc! {"
4391 @@ -4,5 +4,5 @@
4392 two
4393 a
4394 b
4395 -c
4396 +C
4397 d
4398 three
4399 "},
4400 cursor_offset: None,
4401 expected_variable_edit: indoc! {"
4402 two
4403 a
4404 b
4405 <|fim_middle|>
4406 C
4407 <|fim_suffix|>
4408 d
4409 three
4410 "},
4411 expected_after_apply: indoc! {"
4412 one
4413 a
4414 b
4415 c
4416 d
4417 two
4418 a
4419 b
4420 C
4421 d
4422 three
4423 a
4424 b
4425 c
4426 d
4427 four
4428 "},
4429 },
4430 Case {
4431 name: "expands_context_when_two_lines_not_unique_before_and_after",
4432 old: indoc! {"
4433 {
4434 {
4435 one();
4436 }
4437 }
4438 {
4439 {
4440 two();
4441 }
4442 }
4443 {
4444 {
4445 three();
4446 }
4447 }
4448 {
4449 {
4450 four();
4451 }
4452 }
4453 "},
4454 patch: indoc! {"
4455 @@ -4,5 +4,5 @@
4456 {
4457 - two();
4458 + TWO();
4459 }
4460 "},
4461 cursor_offset: None,
4462 expected_variable_edit: indoc! {"
4463 one();
4464 }
4465 }
4466 {
4467 {
4468 <|fim_middle|>
4469 TWO();
4470 <|fim_suffix|>
4471 }
4472 }
4473 {
4474 {
4475 three();
4476 "},
4477 expected_after_apply: indoc! {"
4478 {
4479 {
4480 one();
4481 }
4482 }
4483 {
4484 {
4485 TWO();
4486 }
4487 }
4488 {
4489 {
4490 three();
4491 }
4492 }
4493 {
4494 {
4495 four();
4496 }
4497 }
4498 "},
4499 },
4500 ];
4501
4502 for case in cases {
4503 let output =
4504 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
4505 .unwrap_or_else(|error| {
4506 panic!("failed converting patch for {}: {error}", case.name)
4507 });
4508 assert_eq!(
4509 output, case.expected_variable_edit,
4510 "patch->variable_edit mismatch for {}",
4511 case.name
4512 );
4513
4514 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
4515 .unwrap_or_else(|error| {
4516 panic!("failed applying variable_edit for {}: {error}", case.name)
4517 });
4518 let mut edited_by_variable_edit = case.old.to_string();
4519 edited_by_variable_edit.replace_range(edit_range, &replacement);
4520 assert_eq!(
4521 edited_by_variable_edit, case.expected_after_apply,
4522 "variable_edit apply mismatch for {}",
4523 case.name
4524 );
4525
4526 let (expected_edit_range, expected_replacement) =
4527 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4528 |error| {
4529 panic!(
4530 "failed applying expected variable_edit for {}: {error}",
4531 case.name
4532 )
4533 },
4534 );
4535 let mut edited_by_expected_variable_edit = case.old.to_string();
4536 edited_by_expected_variable_edit
4537 .replace_range(expected_edit_range, &expected_replacement);
4538 assert_eq!(
4539 edited_by_expected_variable_edit, case.expected_after_apply,
4540 "expected variable_edit apply mismatch for {}",
4541 case.name
4542 );
4543 }
4544 }
4545
4546 #[test]
4547 fn test_write_cursor_excerpt_section() {
4548 let path = Path::new("test.rs");
4549 let context = "fn main() {\n hello();\n}\n";
4550 let cursor_offset = 17;
4551 let mut prompt = String::new();
4552 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4553 assert_eq!(
4554 prompt,
4555 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4556 );
4557 }
4558 }
4559}
4560
4561/// The zeta1 prompt format
4562pub mod zeta1 {
4563 use super::*;
4564 use std::fmt::Write;
4565
4566 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4567 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4568 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4569 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4570
4571 const INSTRUCTION_HEADER: &str = concat!(
4572 "### Instruction:\n",
4573 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4574 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4575 "into account the cursor location.\n\n",
4576 "### User Edits:\n\n"
4577 );
4578 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4579 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4580
4581 /// Formats a complete zeta1 prompt from the input events and excerpt.
4582 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4583 let mut prompt = String::with_capacity(
4584 INSTRUCTION_HEADER.len()
4585 + input_events.len()
4586 + EXCERPT_HEADER.len()
4587 + input_excerpt.len()
4588 + RESPONSE_HEADER.len(),
4589 );
4590 prompt.push_str(INSTRUCTION_HEADER);
4591 prompt.push_str(input_events);
4592 prompt.push_str(EXCERPT_HEADER);
4593 prompt.push_str(input_excerpt);
4594 prompt.push_str(RESPONSE_HEADER);
4595 prompt
4596 }
4597
4598 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4599 /// editable and context byte-offset ranges within `cursor_excerpt`.
4600 pub fn format_zeta1_from_input(
4601 input: &ZetaPromptInput,
4602 editable_range: Range<usize>,
4603 context_range: Range<usize>,
4604 ) -> String {
4605 let events = format_zeta1_events(&input.events);
4606 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4607 format_zeta1_prompt(&events, &excerpt)
4608 }
4609
4610 /// Formats events in zeta1 style (oldest first).
4611 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4612 let mut result = String::new();
4613 for event in
4614 events
4615 .iter()
4616 .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4617 &ZetaFormat::V0114180EditableRegion,
4618 )))
4619 {
4620 let event_string = format_zeta1_event(event);
4621 if event_string.is_empty() {
4622 continue;
4623 }
4624 if !result.is_empty() {
4625 result.push_str("\n\n");
4626 }
4627 result.push_str(&event_string);
4628 }
4629 result
4630 }
4631
4632 fn format_zeta1_event(event: &Event) -> String {
4633 match event {
4634 Event::BufferChange {
4635 path,
4636 old_path,
4637 diff,
4638 ..
4639 } => {
4640 let mut prompt = String::new();
4641 if old_path != path {
4642 writeln!(
4643 prompt,
4644 "User renamed {} to {}\n",
4645 old_path.display(),
4646 path.display()
4647 )
4648 .ok();
4649 }
4650 if !diff.is_empty() {
4651 write!(
4652 prompt,
4653 "User edited {}:\n```diff\n{}\n```",
4654 path.display(),
4655 diff
4656 )
4657 .ok();
4658 }
4659 prompt
4660 }
4661 }
4662 }
4663
4664 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4665 /// within `cursor_excerpt`.
4666 fn format_zeta1_excerpt(
4667 input: &ZetaPromptInput,
4668 editable_range: Range<usize>,
4669 context_range: Range<usize>,
4670 ) -> String {
4671 let path_str = input.cursor_path.to_string_lossy();
4672 let excerpt = &*input.cursor_excerpt;
4673 let cursor_offset = input.cursor_offset_in_excerpt;
4674
4675 let mut prompt = String::new();
4676 writeln!(&mut prompt, "```{path_str}").ok();
4677
4678 let starts_at_file_beginning =
4679 input.excerpt_start_row == Some(0) && context_range.start == 0;
4680 if starts_at_file_beginning {
4681 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4682 }
4683
4684 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4685
4686 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4687 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4688 prompt.push_str(CURSOR_MARKER);
4689 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4690 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4691
4692 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4693 write!(prompt, "\n```").ok();
4694
4695 prompt
4696 }
4697
4698 /// Cleans zeta1 model output by extracting content between editable region
4699 /// markers and converting the zeta1 cursor marker to the universal one.
4700 /// Returns `None` if the output doesn't contain the expected markers.
4701 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4702 let content = output.replace(CURSOR_MARKER, "");
4703
4704 let content_start = content
4705 .find(EDITABLE_REGION_START_MARKER)
4706 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4707 .map(|pos| {
4708 if content.as_bytes().get(pos) == Some(&b'\n') {
4709 pos + 1
4710 } else {
4711 pos
4712 }
4713 })
4714 .unwrap_or(0);
4715
4716 let content_end = content
4717 .find(EDITABLE_REGION_END_MARKER)
4718 .map(|pos| {
4719 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4720 pos - 1
4721 } else {
4722 pos
4723 }
4724 })
4725 .unwrap_or(content.len());
4726
4727 if content_start > content_end {
4728 return Some(String::new());
4729 }
4730
4731 let extracted = &content[content_start..content_end];
4732
4733 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4734 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4735 let text_before_cursor = text_before_cursor
4736 .find(EDITABLE_REGION_START_MARKER)
4737 .map(|pos| {
4738 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4739 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4740 after_marker + 1
4741 } else {
4742 after_marker
4743 }
4744 })
4745 .unwrap_or(0);
4746 let offset_in_extracted = zeta1_cursor_pos
4747 .saturating_sub(text_before_cursor)
4748 .min(extracted.len());
4749 offset_in_extracted
4750 });
4751
4752 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4753 if let Some(offset) = cursor_offset {
4754 result.push_str(&extracted[..offset]);
4755 result.push_str(super::CURSOR_MARKER);
4756 result.push_str(&extracted[offset..]);
4757 } else {
4758 result.push_str(extracted);
4759 }
4760
4761 Some(result)
4762 }
4763}
4764
4765#[cfg(test)]
4766mod tests {
4767 use super::*;
4768 use indoc::indoc;
4769
4770 fn make_input(
4771 cursor_excerpt: &str,
4772 editable_range: Range<usize>,
4773 cursor_offset: usize,
4774 events: Vec<Event>,
4775 related_files: Vec<RelatedFile>,
4776 ) -> ZetaPromptInput {
4777 let context_range = 0..cursor_excerpt.len();
4778 ZetaPromptInput {
4779 cursor_path: Path::new("test.rs").into(),
4780 cursor_excerpt: cursor_excerpt.into(),
4781 cursor_offset_in_excerpt: cursor_offset,
4782 excerpt_start_row: None,
4783 events: events.into_iter().map(Arc::new).collect(),
4784 related_files: Some(related_files),
4785 active_buffer_diagnostics: vec![],
4786 excerpt_ranges: ExcerptRanges {
4787 editable_150: editable_range.clone(),
4788 editable_180: editable_range.clone(),
4789 editable_350: editable_range,
4790 editable_150_context_350: context_range.clone(),
4791 editable_180_context_350: context_range.clone(),
4792 editable_350_context_150: context_range,
4793 ..Default::default()
4794 },
4795 syntax_ranges: None,
4796 in_open_source_repo: false,
4797 can_collect_data: false,
4798 repo_url: None,
4799 }
4800 }
4801
4802 fn make_input_with_context_range(
4803 excerpt: &str,
4804 editable_range: Range<usize>,
4805 context_range: Range<usize>,
4806 cursor_offset: usize,
4807 ) -> ZetaPromptInput {
4808 ZetaPromptInput {
4809 cursor_path: Path::new("test.rs").into(),
4810 cursor_excerpt: excerpt.into(),
4811 cursor_offset_in_excerpt: cursor_offset,
4812 excerpt_start_row: None,
4813 events: vec![],
4814 related_files: Some(vec![]),
4815 active_buffer_diagnostics: vec![],
4816 excerpt_ranges: ExcerptRanges {
4817 editable_150: editable_range.clone(),
4818 editable_180: editable_range.clone(),
4819 editable_350: editable_range,
4820 editable_150_context_350: context_range.clone(),
4821 editable_180_context_350: context_range.clone(),
4822 editable_350_context_150: context_range,
4823 ..Default::default()
4824 },
4825 syntax_ranges: None,
4826 in_open_source_repo: false,
4827 can_collect_data: false,
4828 repo_url: None,
4829 }
4830 }
4831
4832 fn make_event(path: &str, diff: &str) -> Event {
4833 Event::BufferChange {
4834 path: Path::new(path).into(),
4835 old_path: Path::new(path).into(),
4836 diff: diff.to_string(),
4837 predicted: false,
4838 in_open_source_repo: false,
4839 }
4840 }
4841
4842 fn make_related_file(path: &str, content: &str) -> RelatedFile {
4843 RelatedFile {
4844 path: Path::new(path).into(),
4845 max_row: content.lines().count() as u32,
4846 excerpts: vec![RelatedExcerpt {
4847 row_range: 0..content.lines().count() as u32,
4848 text: content.into(),
4849 order: 0,
4850 }],
4851 in_open_source_repo: false,
4852 }
4853 }
4854
4855 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4856 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4857 }
4858
4859 fn budget_with_margin(requested_tokens: usize) -> usize {
4860 ((requested_tokens as f64) / 0.9).ceil() as usize
4861 }
4862
4863 #[test]
4864 fn test_no_truncation_when_within_budget() {
4865 let input = make_input(
4866 "prefix\neditable\nsuffix",
4867 7..15,
4868 10,
4869 vec![make_event("a.rs", "-old\n+new\n")],
4870 vec![make_related_file("related.rs", "fn helper() {}\n")],
4871 );
4872
4873 assert_eq!(
4874 format_with_budget(&input, 10000).unwrap(),
4875 indoc! {r#"
4876 <|file_sep|>related.rs
4877 fn helper() {}
4878 <|file_sep|>edit history
4879 --- a/a.rs
4880 +++ b/a.rs
4881 -old
4882 +new
4883 <|file_sep|>test.rs
4884 <|fim_prefix|>
4885 prefix
4886 <|fim_middle|>current
4887 edi<|user_cursor|>table
4888 <|fim_suffix|>
4889
4890 suffix
4891 <|fim_middle|>updated
4892 "#}
4893 .to_string()
4894 );
4895 }
4896
4897 #[test]
4898 fn test_truncation_drops_edit_history_when_budget_tight() {
4899 let input = make_input(
4900 "code",
4901 0..4,
4902 2,
4903 vec![make_event("a.rs", "-x\n+y\n")],
4904 vec![
4905 make_related_file("r1.rs", "aaaaaaa\n"),
4906 make_related_file("r2.rs", "bbbbbbb\n"),
4907 ],
4908 );
4909
4910 assert_eq!(
4911 format_with_budget(&input, 10000).unwrap(),
4912 indoc! {r#"
4913 <|file_sep|>r1.rs
4914 aaaaaaa
4915 <|file_sep|>r2.rs
4916 bbbbbbb
4917 <|file_sep|>edit history
4918 --- a/a.rs
4919 +++ b/a.rs
4920 -x
4921 +y
4922 <|file_sep|>test.rs
4923 <|fim_prefix|>
4924 <|fim_middle|>current
4925 co<|user_cursor|>de
4926 <|fim_suffix|>
4927 <|fim_middle|>updated
4928 "#}
4929 .to_string()
4930 );
4931
4932 assert_eq!(
4933 format_with_budget(&input, budget_with_margin(55)),
4934 Some(
4935 indoc! {r#"
4936 <|file_sep|>edit history
4937 --- a/a.rs
4938 +++ b/a.rs
4939 -x
4940 +y
4941 <|file_sep|>test.rs
4942 <|fim_prefix|>
4943 <|fim_middle|>current
4944 co<|user_cursor|>de
4945 <|fim_suffix|>
4946 <|fim_middle|>updated
4947 "#}
4948 .to_string()
4949 )
4950 );
4951 }
4952
4953 #[test]
4954 fn test_truncation_includes_partial_excerpts() {
4955 let input = make_input(
4956 "x",
4957 0..1,
4958 0,
4959 vec![],
4960 vec![RelatedFile {
4961 path: Path::new("big.rs").into(),
4962 max_row: 30,
4963 in_open_source_repo: false,
4964 excerpts: vec![
4965 RelatedExcerpt {
4966 row_range: 0..10,
4967 text: "first excerpt\n".into(),
4968 order: 0,
4969 },
4970 RelatedExcerpt {
4971 row_range: 10..20,
4972 text: "second excerpt\n".into(),
4973 order: 0,
4974 },
4975 RelatedExcerpt {
4976 row_range: 20..30,
4977 text: "third excerpt\n".into(),
4978 order: 0,
4979 },
4980 ],
4981 }],
4982 );
4983
4984 assert_eq!(
4985 format_with_budget(&input, 10000).unwrap(),
4986 indoc! {r#"
4987 <|file_sep|>big.rs
4988 first excerpt
4989 ...
4990 second excerpt
4991 ...
4992 third excerpt
4993 <|file_sep|>test.rs
4994 <|fim_prefix|>
4995 <|fim_middle|>current
4996 <|user_cursor|>x
4997 <|fim_suffix|>
4998 <|fim_middle|>updated
4999 "#}
5000 .to_string()
5001 );
5002
5003 assert_eq!(
5004 format_with_budget(&input, budget_with_margin(50)).unwrap(),
5005 indoc! {r#"
5006 <|file_sep|>big.rs
5007 first excerpt
5008 ...
5009 <|file_sep|>test.rs
5010 <|fim_prefix|>
5011 <|fim_middle|>current
5012 <|user_cursor|>x
5013 <|fim_suffix|>
5014 <|fim_middle|>updated
5015 "#}
5016 .to_string()
5017 );
5018 }
5019
5020 #[test]
5021 fn test_truncation_prioritizes_lower_order_excerpts() {
5022 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
5023 // With tight budget, only the lower-order excerpt from file_b should be included.
5024 let input = make_input(
5025 "x",
5026 0..1,
5027 0,
5028 vec![],
5029 vec![
5030 RelatedFile {
5031 path: Path::new("file_a.rs").into(),
5032 max_row: 10,
5033 in_open_source_repo: false,
5034 excerpts: vec![RelatedExcerpt {
5035 row_range: 0..10,
5036 text: "low priority content\n".into(),
5037 order: 5,
5038 }],
5039 },
5040 RelatedFile {
5041 path: Path::new("file_b.rs").into(),
5042 max_row: 10,
5043 in_open_source_repo: false,
5044 excerpts: vec![RelatedExcerpt {
5045 row_range: 0..10,
5046 text: "high priority content\n".into(),
5047 order: 1,
5048 }],
5049 },
5050 ],
5051 );
5052
5053 // With large budget, both files included; rendered in stable lexicographic order.
5054 assert_eq!(
5055 format_with_budget(&input, 10000).unwrap(),
5056 indoc! {r#"
5057 <|file_sep|>file_a.rs
5058 low priority content
5059 <|file_sep|>file_b.rs
5060 high priority content
5061 <|file_sep|>test.rs
5062 <|fim_prefix|>
5063 <|fim_middle|>current
5064 <|user_cursor|>x
5065 <|fim_suffix|>
5066 <|fim_middle|>updated
5067 "#}
5068 .to_string()
5069 );
5070
5071 // With tight budget, only file_b (lower order) fits.
5072 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
5073 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
5074 // file_a would need another 14 tokens, which doesn't fit.
5075 assert_eq!(
5076 format_with_budget(&input, budget_with_margin(52)).unwrap(),
5077 indoc! {r#"
5078 <|file_sep|>file_b.rs
5079 high priority content
5080 <|file_sep|>test.rs
5081 <|fim_prefix|>
5082 <|fim_middle|>current
5083 <|user_cursor|>x
5084 <|fim_suffix|>
5085 <|fim_middle|>updated
5086 "#}
5087 .to_string()
5088 );
5089 }
5090
5091 #[test]
5092 fn test_truncation_drops_high_order_excerpts_within_file() {
5093 // A single file has excerpts at order 1 and order 3. With a tight budget,
5094 // only the order-1 excerpts are included while the order-3 excerpt is
5095 // dropped — even though they belong to the same file. This also preserves
5096 // the parent invariant: parent outline items have order ≤ their best
5097 // child, so they're always included when any child is.
5098 let input = make_input(
5099 "x",
5100 0..1,
5101 0,
5102 vec![],
5103 vec![RelatedFile {
5104 path: Path::new("mod.rs").into(),
5105 max_row: 30,
5106 in_open_source_repo: false,
5107 excerpts: vec![
5108 RelatedExcerpt {
5109 row_range: 0..5,
5110 text: "mod header\n".into(),
5111 order: 1,
5112 },
5113 RelatedExcerpt {
5114 row_range: 5..15,
5115 text: "important fn\n".into(),
5116 order: 1,
5117 },
5118 RelatedExcerpt {
5119 row_range: 15..30,
5120 text: "less important fn\n".into(),
5121 order: 3,
5122 },
5123 ],
5124 }],
5125 );
5126
5127 // With large budget, all three excerpts included.
5128 assert_eq!(
5129 format_with_budget(&input, 10000).unwrap(),
5130 indoc! {r#"
5131 <|file_sep|>mod.rs
5132 mod header
5133 ...
5134 important fn
5135 ...
5136 less important fn
5137 <|file_sep|>test.rs
5138 <|fim_prefix|>
5139 <|fim_middle|>current
5140 <|user_cursor|>x
5141 <|fim_suffix|>
5142 <|fim_middle|>updated
5143 "#}
5144 .to_string()
5145 );
5146
5147 // With tight budget, only order<=1 excerpts included (header + important fn).
5148 assert_eq!(
5149 format_with_budget(&input, budget_with_margin(55)).unwrap(),
5150 indoc! {r#"
5151 <|file_sep|>mod.rs
5152 mod header
5153 ...
5154 important fn
5155 ...
5156 <|file_sep|>test.rs
5157 <|fim_prefix|>
5158 <|fim_middle|>current
5159 <|user_cursor|>x
5160 <|fim_suffix|>
5161 <|fim_middle|>updated
5162 "#}
5163 .to_string()
5164 );
5165 }
5166
5167 #[test]
5168 fn test_truncation_drops_older_events_first() {
5169 let input = make_input(
5170 "x",
5171 0..1,
5172 0,
5173 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
5174 vec![],
5175 );
5176
5177 assert_eq!(
5178 format_with_budget(&input, 10000).unwrap(),
5179 indoc! {r#"
5180 <|file_sep|>edit history
5181 --- a/old.rs
5182 +++ b/old.rs
5183 -1
5184 --- a/new.rs
5185 +++ b/new.rs
5186 -2
5187 <|file_sep|>test.rs
5188 <|fim_prefix|>
5189 <|fim_middle|>current
5190 <|user_cursor|>x
5191 <|fim_suffix|>
5192 <|fim_middle|>updated
5193 "#}
5194 .to_string()
5195 );
5196
5197 assert_eq!(
5198 format_with_budget(&input, 60).unwrap(),
5199 indoc! {r#"
5200 <|file_sep|>edit history
5201 --- a/new.rs
5202 +++ b/new.rs
5203 -2
5204 <|file_sep|>test.rs
5205 <|fim_prefix|>
5206 <|fim_middle|>current
5207 <|user_cursor|>x
5208 <|fim_suffix|>
5209 <|fim_middle|>updated
5210 "#}
5211 .to_string()
5212 );
5213 }
5214
5215 #[test]
5216 fn test_cursor_excerpt_always_included_with_minimal_budget() {
5217 let input = make_input(
5218 "fn main() {}",
5219 0..12,
5220 3,
5221 vec![make_event("a.rs", "-old\n+new\n")],
5222 vec![make_related_file("related.rs", "helper\n")],
5223 );
5224
5225 assert!(format_with_budget(&input, 30).is_none())
5226 }
5227
5228 #[track_caller]
5229 fn format_seed_coder(input: &ZetaPromptInput) -> String {
5230 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
5231 .expect("seed coder prompt formatting should succeed")
5232 }
5233
5234 #[track_caller]
5235 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
5236 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
5237 .expect("seed coder prompt formatting should succeed")
5238 }
5239
5240 #[test]
5241 fn test_seed_coder_alias_matches_v0211_seed_coder() {
5242 let input = make_input(
5243 "prefix\neditable\nsuffix",
5244 7..15,
5245 10,
5246 vec![make_event("a.rs", "-old\n+new\n")],
5247 vec![make_related_file("related.rs", "fn helper() {}\n")],
5248 );
5249
5250 assert_eq!(
5251 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 10000),
5252 format_prompt_with_budget_for_format(&input, ZetaFormat::V0331SeedCoderModelPy, 10000)
5253 );
5254 assert_eq!(
5255 ZetaFormat::parse("V0331SeedCoderModelPy").unwrap(),
5256 ZetaFormat::V0331SeedCoderModelPy
5257 );
5258 }
5259
5260 #[test]
5261 fn test_seed_coder_basic_format() {
5262 let input = make_input(
5263 "prefix\neditable\nsuffix",
5264 7..15,
5265 10,
5266 vec![make_event("a.rs", "-old\n+new\n")],
5267 vec![make_related_file("related.rs", "fn helper() {}\n")],
5268 );
5269
5270 assert_eq!(
5271 format_seed_coder(&input),
5272 indoc! {r#"
5273 <[fim-suffix]>
5274 suffix
5275 <[fim-prefix]><filename>related.rs
5276 fn helper() {}
5277
5278 <filename>edit_history
5279 --- a/a.rs
5280 +++ b/a.rs
5281 -old
5282 +new
5283
5284 <filename>test.rs
5285 prefix
5286 <<<<<<< CURRENT
5287 edi<|user_cursor|>table
5288 =======
5289 <[fim-middle]>"#}
5290 );
5291 }
5292
5293 #[test]
5294 fn test_v0420_formats_diagnostics_before_related_files() {
5295 let mut input = make_input(
5296 "prefix\neditable\nsuffix",
5297 7..15,
5298 10,
5299 vec![],
5300 vec![make_related_file("related.rs", "fn helper() {}\n")],
5301 );
5302 input.active_buffer_diagnostics = vec![ActiveBufferDiagnostic {
5303 severity: Some(1),
5304 message: "missing semicolon".to_string(),
5305 snippet: "let value = 1".to_string(),
5306 snippet_buffer_row_range: 1..2,
5307 diagnostic_range_in_snippet: 12..13,
5308 }];
5309
5310 let prompt =
5311 format_prompt_with_budget_for_format(&input, ZetaFormat::V0420Diagnostics, 10000)
5312 .expect("v0420 prompt formatting should succeed");
5313
5314 assert_eq!(
5315 prompt,
5316 indoc! {r#"
5317 <[fim-suffix]>
5318 suffix
5319 <[fim-prefix]><filename>diagnostics
5320 *missing semicolon*:
5321 ```
5322 let value = 1
5323 ```
5324
5325 <filename>related.rs
5326 fn helper() {}
5327
5328 <filename>test.rs
5329 prefix
5330 <|marker_1|>edi<|user_cursor|>table<|marker_2|>
5331 <[fim-middle]>"#}
5332 );
5333 }
5334
5335 #[test]
5336 fn test_v0317_formats_prompt_with_many_related_files() {
5337 let related_files = (0..900)
5338 .map(|index| {
5339 make_related_file(
5340 &format!("related_{index}.rs"),
5341 "fn helper() {\n let value = 1;\n}\n",
5342 )
5343 })
5344 .collect();
5345
5346 let input = make_input(
5347 "code",
5348 0..4,
5349 2,
5350 vec![make_event("a.rs", "-x\n+y\n")],
5351 related_files,
5352 );
5353
5354 let prompt =
5355 format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
5356
5357 assert!(prompt.is_some());
5358 let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
5359 assert!(prompt.contains("test.rs"));
5360 assert!(prompt.contains(CURSOR_MARKER));
5361 }
5362
5363 #[test]
5364 fn test_v0327_formats_single_file_prompt_without_related_files() {
5365 let excerpt = indoc! {"
5366 line01
5367 line02
5368 line03
5369 line04
5370 line05
5371 line06
5372 line07
5373 line08
5374 line09
5375 line10
5376 line11
5377 line12
5378 line13
5379 line14
5380 line15
5381 line16
5382 line17
5383 line18
5384 line19
5385 line20
5386 "};
5387 let cursor_offset = excerpt.find("line10").expect("cursor line exists");
5388 let input = make_input(
5389 excerpt,
5390 0..excerpt.len(),
5391 cursor_offset,
5392 vec![make_event("a.rs", "-x\n+y\n")],
5393 vec![make_related_file("related.rs", "fn helper() {}\n")],
5394 );
5395
5396 let prompt =
5397 format_prompt_with_budget_for_format(&input, ZetaFormat::V0327SingleFile, 4096)
5398 .expect("v0327 prompt should fit");
5399
5400 assert!(prompt.contains("line01"));
5401 assert!(prompt.contains("line20"));
5402 assert!(prompt.contains("<filename>edit_history"));
5403 assert!(prompt.contains("<filename>test.rs"));
5404 assert!(prompt.contains(CURSOR_MARKER));
5405 assert!(!prompt.contains("related.rs"));
5406 assert!(!prompt.contains("fn helper() {}"));
5407 }
5408
5409 #[test]
5410 fn test_v0327_resolve_cursor_region_uses_full_excerpt_context() {
5411 let excerpt = (0..80)
5412 .map(|index| format!("l{index:02}\n"))
5413 .collect::<String>();
5414 let cursor_offset = excerpt.find("l40").expect("cursor line exists");
5415 let input = make_input(&excerpt, 0..excerpt.len(), cursor_offset, vec![], vec![]);
5416
5417 let (context, editable_range, context_range, adjusted_cursor) =
5418 resolve_cursor_region(&input, ZetaFormat::V0327SingleFile);
5419
5420 assert_eq!(context, excerpt);
5421 assert_eq!(context_range, 0..excerpt.len());
5422 assert_eq!(adjusted_cursor, cursor_offset);
5423 assert!(editable_range.start < adjusted_cursor);
5424 assert!(editable_range.end > adjusted_cursor);
5425 assert!(editable_range.end < excerpt.len());
5426 }
5427
5428 #[test]
5429 fn test_seed_coder_no_context() {
5430 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
5431
5432 assert_eq!(
5433 format_seed_coder(&input),
5434 indoc! {r#"
5435 <[fim-suffix]>
5436 after
5437 <[fim-prefix]><filename>test.rs
5438 before
5439 <<<<<<< CURRENT
5440 mid<|user_cursor|>dle
5441 =======
5442 <[fim-middle]>"#}
5443 );
5444 }
5445
5446 #[test]
5447 fn test_seed_coder_truncation_drops_context() {
5448 let input = make_input(
5449 "code",
5450 0..4,
5451 2,
5452 vec![make_event("a.rs", "-x\n+y\n")],
5453 vec![make_related_file("r1.rs", "content\n")],
5454 );
5455
5456 // With large budget, everything is included
5457 assert_eq!(
5458 format_seed_coder(&input),
5459 indoc! {r#"
5460 <[fim-suffix]>
5461 <[fim-prefix]><filename>r1.rs
5462 content
5463
5464 <filename>edit_history
5465 --- a/a.rs
5466 +++ b/a.rs
5467 -x
5468 +y
5469
5470 <filename>test.rs
5471 <<<<<<< CURRENT
5472 co<|user_cursor|>de
5473 =======
5474 <[fim-middle]>"#}
5475 );
5476
5477 assert_eq!(
5478 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
5479 None
5480 );
5481
5482 assert_eq!(
5483 format_seed_coder_with_budget(&input, 40),
5484 indoc! {r#"
5485 <[fim-suffix]>
5486 <[fim-prefix]><filename>test.rs
5487 <<<<<<< CURRENT
5488 co<|user_cursor|>de
5489 =======
5490 <[fim-middle]>"#
5491 }
5492 )
5493 }
5494
5495 #[test]
5496 fn test_seed_coder_truncation_prioritizes_lower_order() {
5497 let input = make_input(
5498 "code",
5499 0..4,
5500 2,
5501 vec![],
5502 vec![
5503 RelatedFile {
5504 path: Path::new("low_prio.rs").into(),
5505 max_row: 5,
5506 in_open_source_repo: false,
5507 excerpts: vec![RelatedExcerpt {
5508 row_range: 0..5,
5509 text: "low prio\n".into(),
5510 order: 10,
5511 }],
5512 },
5513 RelatedFile {
5514 path: Path::new("high_prio.rs").into(),
5515 max_row: 5,
5516 in_open_source_repo: false,
5517 excerpts: vec![RelatedExcerpt {
5518 row_range: 0..5,
5519 text: "high prio\n".into(),
5520 order: 1,
5521 }],
5522 },
5523 ],
5524 );
5525
5526 // With large budget, both included; rendered in stable lexicographic order.
5527 assert_eq!(
5528 format_seed_coder(&input),
5529 indoc! {r#"
5530 <[fim-suffix]>
5531 <[fim-prefix]><filename>low_prio.rs
5532 low prio
5533 <filename>high_prio.rs
5534 high prio
5535
5536 <filename>test.rs
5537 <<<<<<< CURRENT
5538 co<|user_cursor|>de
5539 =======
5540 <[fim-middle]>"#}
5541 );
5542
5543 // With tight budget under the generic heuristic, context is dropped but the
5544 // minimal cursor section still fits.
5545 assert_eq!(
5546 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
5547 Some(
5548 indoc! {r#"
5549 <[fim-suffix]>
5550 <[fim-prefix]><filename>test.rs
5551 <<<<<<< CURRENT
5552 co<|user_cursor|>de
5553 =======
5554 <[fim-middle]>"#}
5555 .to_string()
5556 )
5557 );
5558 }
5559
5560 #[test]
5561 fn test_format_zeta1_from_input_basic() {
5562 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
5563 let input = ZetaPromptInput {
5564 cursor_path: Path::new("src/main.rs").into(),
5565 cursor_excerpt: excerpt.into(),
5566 cursor_offset_in_excerpt: 30,
5567 excerpt_start_row: Some(0),
5568 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
5569 related_files: Some(vec![]),
5570 active_buffer_diagnostics: vec![],
5571 excerpt_ranges: ExcerptRanges {
5572 editable_150: 15..41,
5573 editable_180: 15..41,
5574 editable_350: 15..41,
5575 editable_150_context_350: 0..excerpt.len(),
5576 editable_180_context_350: 0..excerpt.len(),
5577 editable_350_context_150: 0..excerpt.len(),
5578 ..Default::default()
5579 },
5580 syntax_ranges: None,
5581 in_open_source_repo: false,
5582 can_collect_data: false,
5583 repo_url: None,
5584 };
5585
5586 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
5587
5588 assert_eq!(
5589 prompt,
5590 concat!(
5591 "### Instruction:\n",
5592 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5593 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5594 "into account the cursor location.\n",
5595 "\n",
5596 "### User Edits:\n",
5597 "\n",
5598 "User edited other.rs:\n",
5599 "```diff\n",
5600 "-old\n",
5601 "+new\n",
5602 "\n",
5603 "```\n",
5604 "\n",
5605 "### User Excerpt:\n",
5606 "\n",
5607 "```src/main.rs\n",
5608 "<|start_of_file|>\n",
5609 "fn before() {}\n",
5610 "<|editable_region_start|>\n",
5611 "fn foo() {\n",
5612 " <|user_cursor_is_here|>let x = 1;\n",
5613 "\n",
5614 "<|editable_region_end|>}\n",
5615 "fn after() {}\n",
5616 "\n",
5617 "```\n",
5618 "\n",
5619 "### Response:\n",
5620 ),
5621 );
5622 }
5623
5624 #[test]
5625 fn test_format_zeta1_from_input_no_start_of_file() {
5626 let excerpt = "fn foo() {\n let x = 1;\n}\n";
5627 let input = ZetaPromptInput {
5628 cursor_path: Path::new("src/main.rs").into(),
5629 cursor_excerpt: excerpt.into(),
5630 cursor_offset_in_excerpt: 15,
5631 excerpt_start_row: Some(10),
5632 events: vec![],
5633 related_files: Some(vec![]),
5634 active_buffer_diagnostics: vec![],
5635 excerpt_ranges: ExcerptRanges {
5636 editable_150: 0..28,
5637 editable_180: 0..28,
5638 editable_350: 0..28,
5639 editable_150_context_350: 0..28,
5640 editable_180_context_350: 0..28,
5641 editable_350_context_150: 0..28,
5642 ..Default::default()
5643 },
5644 syntax_ranges: None,
5645 in_open_source_repo: false,
5646 can_collect_data: false,
5647 repo_url: None,
5648 };
5649
5650 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5651
5652 assert_eq!(
5653 prompt,
5654 concat!(
5655 "### Instruction:\n",
5656 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5657 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5658 "into account the cursor location.\n",
5659 "\n",
5660 "### User Edits:\n",
5661 "\n",
5662 "\n",
5663 "\n",
5664 "### User Excerpt:\n",
5665 "\n",
5666 "```src/main.rs\n",
5667 "<|editable_region_start|>\n",
5668 "fn foo() {\n",
5669 " <|user_cursor_is_here|>let x = 1;\n",
5670 "}\n",
5671 "\n",
5672 "<|editable_region_end|>\n",
5673 "```\n",
5674 "\n",
5675 "### Response:\n",
5676 ),
5677 );
5678 }
5679
5680 #[test]
5681 fn test_format_zeta1_from_input_with_sub_ranges() {
5682 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
5683 let editable_range = 10..37;
5684 let context_range = 0..excerpt.len();
5685
5686 let input = ZetaPromptInput {
5687 cursor_path: Path::new("test.rs").into(),
5688 cursor_excerpt: excerpt.into(),
5689 cursor_offset_in_excerpt: 25,
5690 excerpt_start_row: Some(0),
5691 events: vec![],
5692 related_files: Some(vec![]),
5693 active_buffer_diagnostics: vec![],
5694 excerpt_ranges: ExcerptRanges {
5695 editable_150: editable_range.clone(),
5696 editable_180: editable_range.clone(),
5697 editable_350: editable_range.clone(),
5698 editable_150_context_350: context_range.clone(),
5699 editable_180_context_350: context_range.clone(),
5700 editable_350_context_150: context_range.clone(),
5701 ..Default::default()
5702 },
5703 syntax_ranges: None,
5704 in_open_source_repo: false,
5705 can_collect_data: false,
5706 repo_url: None,
5707 };
5708
5709 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5710
5711 assert_eq!(
5712 prompt,
5713 concat!(
5714 "### Instruction:\n",
5715 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5716 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5717 "into account the cursor location.\n",
5718 "\n",
5719 "### User Edits:\n",
5720 "\n",
5721 "\n",
5722 "\n",
5723 "### User Excerpt:\n",
5724 "\n",
5725 "```test.rs\n",
5726 "<|start_of_file|>\n",
5727 "// prefix\n",
5728 "<|editable_region_start|>\n",
5729 "fn foo() {\n",
5730 " <|user_cursor_is_here|>let x = 1;\n",
5731 "}\n",
5732 "<|editable_region_end|>\n",
5733 "// suffix\n",
5734 "\n",
5735 "```\n",
5736 "\n",
5737 "### Response:\n",
5738 ),
5739 );
5740 }
5741
5742 #[test]
5743 fn test_max_event_count() {
5744 fn make_numbered_event(index: usize) -> Event {
5745 return make_event(
5746 &format!("event-{index}.rs"),
5747 &format!("-old-{index}\n+new-{index}\n"),
5748 );
5749 }
5750 let input = make_input(
5751 "x",
5752 0..1,
5753 0,
5754 (0..3).map(make_numbered_event).collect(),
5755 vec![],
5756 );
5757
5758 let edit_history_section = format_edit_history_within_budget(
5759 &input.events,
5760 "<|file_sep|>",
5761 "edit history",
5762 usize::MAX,
5763 5,
5764 );
5765
5766 assert_eq!(
5767 &edit_history_section,
5768 indoc!(
5769 "
5770 <|file_sep|>edit history
5771 --- a/event-0.rs
5772 +++ b/event-0.rs
5773 -old-0
5774 +new-0
5775 --- a/event-1.rs
5776 +++ b/event-1.rs
5777 -old-1
5778 +new-1
5779 --- a/event-2.rs
5780 +++ b/event-2.rs
5781 -old-2
5782 +new-2
5783 "
5784 )
5785 );
5786
5787 let edit_history_section = format_edit_history_within_budget(
5788 &input.events,
5789 "<|file_sep|>",
5790 "edit history",
5791 usize::MAX,
5792 2,
5793 );
5794
5795 assert_eq!(
5796 &edit_history_section,
5797 indoc!(
5798 "
5799 <|file_sep|>edit history
5800 --- a/event-1.rs
5801 +++ b/event-1.rs
5802 -old-1
5803 +new-1
5804 --- a/event-2.rs
5805 +++ b/event-2.rs
5806 -old-2
5807 +new-2
5808 "
5809 )
5810 );
5811
5812 let edit_history_section = format_edit_history_within_budget(
5813 &input.events,
5814 "<|file_sep|>",
5815 "edit history",
5816 usize::MAX,
5817 0,
5818 );
5819
5820 assert_eq!(&edit_history_section, "");
5821 }
5822
5823 #[test]
5824 fn test_clean_zeta1_model_output_basic() {
5825 let output = indoc! {"
5826 <|editable_region_start|>
5827 fn main() {
5828 println!(\"hello\");
5829 }
5830 <|editable_region_end|>
5831 "};
5832
5833 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5834 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
5835 }
5836
5837 #[test]
5838 fn test_clean_zeta1_model_output_with_cursor() {
5839 let output = indoc! {"
5840 <|editable_region_start|>
5841 fn main() {
5842 <|user_cursor_is_here|>println!(\"hello\");
5843 }
5844 <|editable_region_end|>
5845 "};
5846
5847 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5848 assert_eq!(
5849 cleaned,
5850 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
5851 );
5852 }
5853
5854 #[test]
5855 fn test_clean_zeta1_model_output_no_markers() {
5856 let output = "fn main() {}\n";
5857 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5858 assert_eq!(cleaned, "fn main() {}\n");
5859 }
5860
5861 #[test]
5862 fn test_clean_zeta1_model_output_empty_region() {
5863 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5864 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5865 assert_eq!(cleaned, "");
5866 }
5867
5868 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5869 let mut result = excerpt.to_string();
5870 result.replace_range(
5871 parsed_output.range_in_excerpt.clone(),
5872 &parsed_output.new_editable_region,
5873 );
5874 result
5875 }
5876
5877 #[test]
5878 fn test_parse_zeta2_model_output() {
5879 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5880 let context_start = excerpt.find("ctx start").unwrap();
5881 let context_end = excerpt.find("after ctx").unwrap();
5882 let editable_start = excerpt.find("editable old").unwrap();
5883 let editable_end = editable_start + "editable old\n".len();
5884 let input = make_input_with_context_range(
5885 excerpt,
5886 editable_start..editable_end,
5887 context_start..context_end,
5888 editable_start,
5889 );
5890
5891 let output = parse_zeta2_model_output(
5892 "editable new\n>>>>>>> UPDATED\n",
5893 ZetaFormat::V0131GitMergeMarkersPrefix,
5894 &input,
5895 )
5896 .unwrap();
5897
5898 assert_eq!(
5899 apply_edit(excerpt, &output),
5900 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5901 );
5902 }
5903
5904 #[test]
5905 fn test_parse_zeta2_model_output_identity() {
5906 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5907 let editable_start = excerpt.find("bbb").unwrap();
5908 let editable_end = excerpt.find("ddd").unwrap();
5909 let input = make_input_with_context_range(
5910 excerpt,
5911 editable_start..editable_end,
5912 0..excerpt.len(),
5913 editable_start,
5914 );
5915
5916 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5917 let output =
5918 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5919
5920 assert_eq!(apply_edit(excerpt, &output), excerpt);
5921 }
5922
5923 #[test]
5924 fn test_parse_zeta2_model_output_strips_end_marker() {
5925 let excerpt = "hello\nworld\n";
5926 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5927
5928 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5929 let output1 =
5930 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5931 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5932
5933 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5934 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5935 }
5936
5937 #[test]
5938 fn test_parsed_output_to_patch_round_trips_through_udiff_application() {
5939 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5940 let context_start = excerpt.find("ctx start").unwrap();
5941 let context_end = excerpt.find("after ctx").unwrap();
5942 let editable_start = excerpt.find("editable old").unwrap();
5943 let editable_end = editable_start + "editable old\n".len();
5944 let input = make_input_with_context_range(
5945 excerpt,
5946 editable_start..editable_end,
5947 context_start..context_end,
5948 editable_start,
5949 );
5950
5951 let parsed = parse_zeta2_model_output(
5952 "editable new\n>>>>>>> UPDATED\n",
5953 ZetaFormat::V0131GitMergeMarkersPrefix,
5954 &input,
5955 )
5956 .unwrap();
5957 let expected = apply_edit(excerpt, &parsed);
5958 let patch = parsed_output_to_patch(&input, parsed).unwrap();
5959 let patched = udiff::apply_diff_to_string(&patch, excerpt).unwrap();
5960
5961 assert_eq!(patched, expected);
5962 }
5963
5964 #[test]
5965 fn test_special_tokens_not_triggered_by_comment_separator() {
5966 // Regression test for https://github.com/zed-industries/zed/issues/52489
5967 let excerpt = "fn main() {\n // =======\n println!(\"hello\");\n}\n";
5968 let input = make_input(excerpt, 0..excerpt.len(), 0, vec![], vec![]);
5969 assert!(
5970 !prompt_input_contains_special_tokens(&input, ZetaFormat::V0131GitMergeMarkersPrefix),
5971 "comment containing ======= should not trigger special token detection"
5972 );
5973 }
5974}