1pub mod excerpt_ranges;
2pub mod multi_region;
3pub mod udiff;
4
5use anyhow::{Result, anyhow};
6use serde::{Deserialize, Serialize};
7use std::fmt::Write;
8use std::ops::Range;
9use std::path::Path;
10use std::sync::Arc;
11use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
12
13pub use crate::excerpt_ranges::{
14 ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
15};
16
17pub const CURSOR_MARKER: &str = "<|user_cursor|>";
18
19/// Use up to this amount of the editable region for prefill.
20/// Larger values may result in more robust generation, but
21/// this region becomes non-editable.
22pub const PREFILL_RATIO: f64 = 0.1; // 10%
23
24fn estimate_tokens(bytes: usize) -> usize {
25 bytes / 3
26}
27
28/// Leave some slack to avoid overflow.
29fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
30 (max_tokens as f64 * 0.9).floor() as usize
31}
32
33#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
34pub struct ZetaPromptInput {
35 pub cursor_path: Arc<Path>,
36 pub cursor_excerpt: Arc<str>,
37 pub cursor_offset_in_excerpt: usize,
38 #[serde(default, skip_serializing_if = "Option::is_none")]
39 pub excerpt_start_row: Option<u32>,
40 pub events: Vec<Arc<Event>>,
41 #[serde(default)]
42 pub related_files: Option<Vec<RelatedFile>>,
43 #[serde(default, skip_serializing_if = "Vec::is_empty")]
44 pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
45 /// These ranges let the server select model-appropriate subsets.
46 pub excerpt_ranges: ExcerptRanges,
47 /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
48 /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
49 /// When present, the server uses these to compute editable/context ranges
50 /// instead of `excerpt_ranges`.
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub syntax_ranges: Option<Vec<Range<usize>>>,
53 #[serde(default)]
54 pub in_open_source_repo: bool,
55 #[serde(default)]
56 pub can_collect_data: bool,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
58 pub repo_url: Option<String>,
59}
60
61#[derive(
62 Default,
63 Clone,
64 Copy,
65 Debug,
66 PartialEq,
67 Eq,
68 Hash,
69 EnumIter,
70 IntoStaticStr,
71 Serialize,
72 Deserialize,
73)]
74#[allow(non_camel_case_types)]
75pub enum ZetaFormat {
76 V0112MiddleAtEnd,
77 V0113Ordered,
78 V0114180EditableRegion,
79 V0120GitMergeMarkers,
80 #[default]
81 V0131GitMergeMarkersPrefix,
82 V0211Prefill,
83 V0211SeedCoder,
84 V0331SeedCoderModelPy,
85 v0226Hashline,
86 V0304VariableEdit,
87 V0304SeedNoEdits,
88 /// Multi-block marker spans with NO_EDITS sentinel.
89 V0306SeedMultiRegions,
90 /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
91 V0316SeedMultiRegions,
92 /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
93 V0317SeedMultiRegions,
94 /// V0316 with larger block sizes.
95 V0318SeedMultiRegions,
96 /// V0318-style markers over the full available current file excerpt with no related files.
97 V0327SingleFile,
98}
99
100impl std::fmt::Display for ZetaFormat {
101 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102 write!(f, "{}", <&'static str>::from(self))
103 }
104}
105
106impl ZetaFormat {
107 pub fn parse(format_name: &str) -> Result<Self> {
108 let lower = format_name.to_lowercase();
109
110 // Exact case-insensitive match takes priority, bypassing ambiguity checks.
111 for variant in ZetaFormat::iter() {
112 if <&'static str>::from(&variant).to_lowercase() == lower {
113 return Ok(variant);
114 }
115 }
116
117 let mut results = ZetaFormat::iter().filter(|version| {
118 <&'static str>::from(version)
119 .to_lowercase()
120 .contains(&lower)
121 });
122 let Some(result) = results.next() else {
123 anyhow::bail!(
124 "`{format_name}` did not match any of:\n{}",
125 Self::options_as_string()
126 );
127 };
128 if results.next().is_some() {
129 anyhow::bail!(
130 "`{format_name}` matched more than one of:\n{}",
131 Self::options_as_string()
132 );
133 }
134 Ok(result)
135 }
136
137 pub fn options_as_string() -> String {
138 ZetaFormat::iter()
139 .map(|format| format!("- {}\n", <&'static str>::from(format)))
140 .collect::<Vec<_>>()
141 .concat()
142 }
143}
144
145#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
146#[serde(tag = "event")]
147pub enum Event {
148 BufferChange {
149 path: Arc<Path>,
150 old_path: Arc<Path>,
151 diff: String,
152 predicted: bool,
153 in_open_source_repo: bool,
154 },
155}
156
157impl Event {
158 pub fn in_open_source_repo(&self) -> bool {
159 match self {
160 Event::BufferChange {
161 in_open_source_repo,
162 ..
163 } => *in_open_source_repo,
164 }
165 }
166}
167
168pub fn write_event(prompt: &mut String, event: &Event) {
169 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
170 for component in path.components() {
171 prompt.push('/');
172 write!(prompt, "{}", component.as_os_str().display()).ok();
173 }
174 }
175 match event {
176 Event::BufferChange {
177 path,
178 old_path,
179 diff,
180 predicted,
181 in_open_source_repo: _,
182 } => {
183 if *predicted {
184 prompt.push_str("// User accepted prediction:\n");
185 }
186 prompt.push_str("--- a");
187 write_path_as_unix_str(prompt, old_path.as_ref());
188 prompt.push_str("\n+++ b");
189 write_path_as_unix_str(prompt, path.as_ref());
190 prompt.push('\n');
191 prompt.push_str(diff);
192 }
193 }
194}
195
196#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
197pub struct ActiveBufferDiagnostic {
198 pub severity: Option<i32>,
199 pub message: String,
200 pub snippet: String,
201 pub snippet_buffer_row_range: Range<u32>,
202 pub diagnostic_range_in_snippet: Range<usize>,
203}
204
205#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
206pub struct RelatedFile {
207 pub path: Arc<Path>,
208 pub max_row: u32,
209 pub excerpts: Vec<RelatedExcerpt>,
210 #[serde(default)]
211 pub in_open_source_repo: bool,
212}
213
214#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
215pub struct RelatedExcerpt {
216 pub row_range: Range<u32>,
217 pub text: Arc<str>,
218 #[serde(default)]
219 pub order: usize,
220}
221
222pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
223 special_tokens_for_format(format).iter().any(|token| {
224 if let Some(line_token) = token.strip_suffix('\n') {
225 input.cursor_excerpt.lines().any(|line| line == line_token)
226 } else {
227 input.cursor_excerpt.contains(token)
228 }
229 })
230}
231
232pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
233 let max_prompt_tokens = match format {
234 ZetaFormat::V0112MiddleAtEnd
235 | ZetaFormat::V0113Ordered
236 | ZetaFormat::V0114180EditableRegion
237 | ZetaFormat::V0120GitMergeMarkers
238 | ZetaFormat::V0131GitMergeMarkersPrefix
239 | ZetaFormat::V0211Prefill
240 | ZetaFormat::V0211SeedCoder
241 | ZetaFormat::v0226Hashline
242 | ZetaFormat::V0304VariableEdit
243 | ZetaFormat::V0304SeedNoEdits
244 | ZetaFormat::V0306SeedMultiRegions
245 | ZetaFormat::V0316SeedMultiRegions
246 | ZetaFormat::V0317SeedMultiRegions
247 | ZetaFormat::V0331SeedCoderModelPy
248 | ZetaFormat::V0318SeedMultiRegions => 4096,
249 ZetaFormat::V0327SingleFile => 16384,
250 };
251
252 format_prompt_with_budget_for_format(input, format, max_prompt_tokens)
253}
254
255pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
256 match format {
257 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
258 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
259 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
260 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
261 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
262 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
263 ZetaFormat::V0211SeedCoder | ZetaFormat::V0331SeedCoderModelPy => {
264 seed_coder::special_tokens()
265 }
266 ZetaFormat::v0226Hashline => hashline::special_tokens(),
267 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
268 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
269 ZetaFormat::V0316SeedMultiRegions => {
270 static TOKENS: &[&str] = &[
271 seed_coder::FIM_SUFFIX,
272 seed_coder::FIM_PREFIX,
273 seed_coder::FIM_MIDDLE,
274 seed_coder::FILE_MARKER,
275 multi_region::V0316_END_MARKER,
276 CURSOR_MARKER,
277 multi_region::MARKER_TAG_PREFIX,
278 ];
279 TOKENS
280 }
281 ZetaFormat::V0318SeedMultiRegions => {
282 static TOKENS: &[&str] = &[
283 seed_coder::FIM_SUFFIX,
284 seed_coder::FIM_PREFIX,
285 seed_coder::FIM_MIDDLE,
286 seed_coder::FILE_MARKER,
287 multi_region::V0318_END_MARKER,
288 CURSOR_MARKER,
289 multi_region::MARKER_TAG_PREFIX,
290 ];
291 TOKENS
292 }
293 ZetaFormat::V0317SeedMultiRegions => {
294 static TOKENS: &[&str] = &[
295 seed_coder::FIM_SUFFIX,
296 seed_coder::FIM_PREFIX,
297 seed_coder::FIM_MIDDLE,
298 seed_coder::FILE_MARKER,
299 multi_region::V0317_END_MARKER,
300 CURSOR_MARKER,
301 multi_region::RELATIVE_MARKER_TAG_PREFIX,
302 ];
303 TOKENS
304 }
305 ZetaFormat::V0327SingleFile => {
306 static TOKENS: &[&str] = &[
307 seed_coder::FIM_SUFFIX,
308 seed_coder::FIM_PREFIX,
309 seed_coder::FIM_MIDDLE,
310 seed_coder::FILE_MARKER,
311 multi_region::V0327_END_MARKER,
312 CURSOR_MARKER,
313 multi_region::MARKER_TAG_PREFIX,
314 ];
315 TOKENS
316 }
317 ZetaFormat::V0306SeedMultiRegions => {
318 static TOKENS: &[&str] = &[
319 seed_coder::FIM_SUFFIX,
320 seed_coder::FIM_PREFIX,
321 seed_coder::FIM_MIDDLE,
322 seed_coder::FILE_MARKER,
323 seed_coder::START_MARKER,
324 seed_coder::SEPARATOR,
325 seed_coder::END_MARKER,
326 CURSOR_MARKER,
327 multi_region::MARKER_TAG_PREFIX,
328 ];
329 TOKENS
330 }
331 }
332}
333
334/// Returns the (editable_token_limit, context_token_limit) for a given format.
335pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
336 match format {
337 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
338 ZetaFormat::V0114180EditableRegion => (180, 350),
339 ZetaFormat::V0120GitMergeMarkers
340 | ZetaFormat::V0131GitMergeMarkersPrefix
341 | ZetaFormat::V0211Prefill
342 | ZetaFormat::V0211SeedCoder
343 | ZetaFormat::V0331SeedCoderModelPy
344 | ZetaFormat::v0226Hashline
345 | ZetaFormat::V0306SeedMultiRegions
346 | ZetaFormat::V0316SeedMultiRegions
347 | ZetaFormat::V0318SeedMultiRegions
348 | ZetaFormat::V0317SeedMultiRegions
349 | ZetaFormat::V0327SingleFile
350 | ZetaFormat::V0304SeedNoEdits => (350, 150),
351
352 ZetaFormat::V0304VariableEdit => (1024, 0),
353 }
354}
355
356pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
357 match format {
358 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
359 ZetaFormat::V0112MiddleAtEnd
360 | ZetaFormat::V0113Ordered
361 | ZetaFormat::V0114180EditableRegion
362 | ZetaFormat::V0120GitMergeMarkers
363 | ZetaFormat::V0131GitMergeMarkersPrefix
364 | ZetaFormat::V0211Prefill
365 | ZetaFormat::V0211SeedCoder
366 | ZetaFormat::V0331SeedCoderModelPy
367 | ZetaFormat::V0304VariableEdit
368 | ZetaFormat::V0306SeedMultiRegions
369 | ZetaFormat::V0304SeedNoEdits => &[],
370 ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
371 ZetaFormat::V0318SeedMultiRegions => &[multi_region::V0318_END_MARKER],
372 ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
373 ZetaFormat::V0327SingleFile => &[multi_region::V0327_END_MARKER],
374 }
375}
376
377/// Return (editable_range, context_range) for the prompt format
378pub fn excerpt_ranges_for_format(
379 format: ZetaFormat,
380 ranges: &ExcerptRanges,
381) -> (Range<usize>, Range<usize>) {
382 match format {
383 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
384 ranges.editable_150.clone(),
385 ranges.editable_150_context_350.clone(),
386 ),
387 ZetaFormat::V0114180EditableRegion => (
388 ranges.editable_180.clone(),
389 ranges.editable_180_context_350.clone(),
390 ),
391 ZetaFormat::V0120GitMergeMarkers
392 | ZetaFormat::V0131GitMergeMarkersPrefix
393 | ZetaFormat::V0211Prefill
394 | ZetaFormat::V0211SeedCoder
395 | ZetaFormat::V0331SeedCoderModelPy
396 | ZetaFormat::v0226Hashline
397 | ZetaFormat::V0304SeedNoEdits
398 | ZetaFormat::V0306SeedMultiRegions
399 | ZetaFormat::V0316SeedMultiRegions
400 | ZetaFormat::V0318SeedMultiRegions
401 | ZetaFormat::V0317SeedMultiRegions => (
402 ranges.editable_350.clone(),
403 ranges.editable_350_context_150.clone(),
404 ),
405 ZetaFormat::V0327SingleFile => (
406 ranges.editable_350_context_150.clone(),
407 ranges.context_8192.clone().unwrap_or(
408 // shouldn't be used, only for compat with old data/clients
409 ranges.editable_350_context_150.clone(),
410 ),
411 ),
412
413 ZetaFormat::V0304VariableEdit => {
414 let context = ranges
415 .editable_350_context_1024
416 .clone()
417 .or(ranges.editable_350_context_512.clone())
418 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
419 (context.clone(), context)
420 }
421 }
422}
423
424pub fn write_cursor_excerpt_section_for_format(
425 format: ZetaFormat,
426 prompt: &mut String,
427 path: &Path,
428 context: &str,
429 editable_range: &Range<usize>,
430 cursor_offset: usize,
431) {
432 match format {
433 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
434 prompt,
435 path,
436 context,
437 editable_range,
438 cursor_offset,
439 ),
440 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
441 v0113_ordered::write_cursor_excerpt_section(
442 prompt,
443 path,
444 context,
445 editable_range,
446 cursor_offset,
447 )
448 }
449 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
450 prompt,
451 path,
452 context,
453 editable_range,
454 cursor_offset,
455 ),
456 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
457 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
458 prompt,
459 path,
460 context,
461 editable_range,
462 cursor_offset,
463 )
464 }
465 ZetaFormat::V0211SeedCoder
466 | ZetaFormat::V0331SeedCoderModelPy
467 | ZetaFormat::V0304SeedNoEdits => seed_coder::write_cursor_excerpt_section(
468 prompt,
469 path,
470 context,
471 editable_range,
472 cursor_offset,
473 ),
474 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
475 prompt,
476 path,
477 context,
478 editable_range,
479 cursor_offset,
480 ),
481 ZetaFormat::V0304VariableEdit => {
482 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
483 }
484 ZetaFormat::V0306SeedMultiRegions => {
485 prompt.push_str(&build_v0306_cursor_prefix(
486 path,
487 context,
488 editable_range,
489 cursor_offset,
490 ));
491 }
492 ZetaFormat::V0316SeedMultiRegions => {
493 prompt.push_str(&build_v0316_cursor_prefix(
494 path,
495 context,
496 editable_range,
497 cursor_offset,
498 ));
499 }
500 ZetaFormat::V0318SeedMultiRegions => {
501 prompt.push_str(&build_v0318_cursor_prefix(
502 path,
503 context,
504 editable_range,
505 cursor_offset,
506 ));
507 }
508 ZetaFormat::V0317SeedMultiRegions => {
509 prompt.push_str(&build_v0317_cursor_prefix(
510 path,
511 context,
512 editable_range,
513 cursor_offset,
514 ));
515 }
516 ZetaFormat::V0327SingleFile => {
517 prompt.push_str(&build_v0318_cursor_prefix(
518 path,
519 context,
520 editable_range,
521 cursor_offset,
522 ));
523 }
524 }
525}
526
527fn build_v0306_cursor_prefix(
528 path: &Path,
529 context: &str,
530 editable_range: &Range<usize>,
531 cursor_offset: usize,
532) -> String {
533 let mut section = String::new();
534 let path_str = path.to_string_lossy();
535 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
536
537 section.push_str(&context[..editable_range.start]);
538 section.push_str(seed_coder::START_MARKER);
539
540 let editable_text = &context[editable_range.clone()];
541 let cursor_in_editable = cursor_offset - editable_range.start;
542 multi_region::write_editable_with_markers(
543 &mut section,
544 editable_text,
545 cursor_in_editable,
546 CURSOR_MARKER,
547 );
548
549 if !section.ends_with('\n') {
550 section.push('\n');
551 }
552 section.push_str(seed_coder::SEPARATOR);
553 section
554}
555
556fn build_v0316_cursor_prefix(
557 path: &Path,
558 context: &str,
559 editable_range: &Range<usize>,
560 cursor_offset: usize,
561) -> String {
562 let mut section = String::new();
563 let path_str = path.to_string_lossy();
564 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
565
566 section.push_str(&context[..editable_range.start]);
567
568 let editable_text = &context[editable_range.clone()];
569 let cursor_in_editable = cursor_offset - editable_range.start;
570 multi_region::write_editable_with_markers_v0316(
571 &mut section,
572 editable_text,
573 cursor_in_editable,
574 CURSOR_MARKER,
575 );
576
577 if !section.ends_with('\n') {
578 section.push('\n');
579 }
580 section
581}
582
583fn build_v0318_cursor_prefix(
584 path: &Path,
585 context: &str,
586 editable_range: &Range<usize>,
587 cursor_offset: usize,
588) -> String {
589 let mut section = String::new();
590 let path_str = path.to_string_lossy();
591 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
592
593 section.push_str(&context[..editable_range.start]);
594
595 let editable_text = &context[editable_range.clone()];
596 let cursor_in_editable = cursor_offset - editable_range.start;
597 multi_region::write_editable_with_markers_v0318(
598 &mut section,
599 editable_text,
600 cursor_in_editable,
601 CURSOR_MARKER,
602 );
603
604 if !section.ends_with('\n') {
605 section.push('\n');
606 }
607 section
608}
609
610fn build_v0317_cursor_prefix(
611 path: &Path,
612 context: &str,
613 editable_range: &Range<usize>,
614 cursor_offset: usize,
615) -> String {
616 let mut section = String::new();
617 let path_str = path.to_string_lossy();
618 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
619
620 section.push_str(&context[..editable_range.start]);
621
622 let editable_text = &context[editable_range.clone()];
623 let cursor_in_editable = cursor_offset - editable_range.start;
624 multi_region::write_editable_with_markers_v0317(
625 &mut section,
626 editable_text,
627 cursor_in_editable,
628 CURSOR_MARKER,
629 );
630
631 if !section.ends_with('\n') {
632 section.push('\n');
633 }
634 section
635}
636
637fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
638 let start_row = text[0..range.start].matches('\n').count() as u32;
639 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
640 if !text[..range.end].ends_with('\n') {
641 end_row += 1;
642 }
643 return start_row..end_row;
644}
645
646fn assemble_single_file_fim_prompt(
647 context: &str,
648 editable_range: &Range<usize>,
649 cursor_prefix_section: &str,
650 events: &[Arc<Event>],
651 max_tokens: usize,
652) -> String {
653 let suffix_section = seed_coder::build_suffix_section(context, editable_range);
654
655 let suffix_tokens = estimate_tokens(suffix_section.len() + seed_coder::FIM_PREFIX.len());
656 let cursor_prefix_tokens =
657 estimate_tokens(cursor_prefix_section.len() + seed_coder::FIM_MIDDLE.len());
658 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
659
660 let edit_history_section = format_edit_history_within_budget(
661 events,
662 seed_coder::FILE_MARKER,
663 "edit_history",
664 budget_after_cursor,
665 max_edit_event_count_for_format(&ZetaFormat::V0327SingleFile),
666 );
667
668 let mut prompt = String::new();
669 prompt.push_str(&suffix_section);
670 prompt.push_str(seed_coder::FIM_PREFIX);
671 prompt.push_str(&edit_history_section);
672 if !edit_history_section.is_empty() {
673 prompt.push('\n');
674 }
675 prompt.push_str(cursor_prefix_section);
676 prompt.push_str(seed_coder::FIM_MIDDLE);
677 prompt
678}
679
680pub fn format_prompt_with_budget_for_format(
681 input: &ZetaPromptInput,
682 format: ZetaFormat,
683 max_tokens: usize,
684) -> Option<String> {
685 let (context, editable_range, context_range, cursor_offset) =
686 resolve_cursor_region(input, format);
687 let path = &*input.cursor_path;
688
689 let empty_files = Vec::new();
690 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
691 let filtered_related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
692 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
693 let row_range = relative_row_range.start + cursor_excerpt_start_row
694 ..relative_row_range.end + cursor_excerpt_start_row;
695 filter_redundant_excerpts(
696 input_related_files.to_vec(),
697 input.cursor_path.as_ref(),
698 row_range,
699 )
700 } else {
701 input_related_files.to_vec()
702 };
703 let related_files = filtered_related_files.as_slice();
704
705 let prompt = match format {
706 ZetaFormat::V0211SeedCoder
707 | ZetaFormat::V0331SeedCoderModelPy
708 | ZetaFormat::V0304SeedNoEdits
709 | ZetaFormat::V0306SeedMultiRegions
710 | ZetaFormat::V0316SeedMultiRegions
711 | ZetaFormat::V0318SeedMultiRegions
712 | ZetaFormat::V0317SeedMultiRegions => {
713 let mut cursor_section = String::new();
714 write_cursor_excerpt_section_for_format(
715 format,
716 &mut cursor_section,
717 path,
718 context,
719 &editable_range,
720 cursor_offset,
721 );
722
723 let budget_with_margin = apply_prompt_budget_margin(max_tokens);
724 seed_coder::assemble_fim_prompt(
725 context,
726 &editable_range,
727 &cursor_section,
728 &input.events,
729 related_files,
730 budget_with_margin,
731 )
732 }
733 ZetaFormat::V0327SingleFile => {
734 let mut cursor_section = String::new();
735 write_cursor_excerpt_section_for_format(
736 format,
737 &mut cursor_section,
738 path,
739 context,
740 &editable_range,
741 cursor_offset,
742 );
743
744 assemble_single_file_fim_prompt(
745 context,
746 &editable_range,
747 &cursor_section,
748 &input.events,
749 apply_prompt_budget_margin(max_tokens),
750 )
751 }
752 _ => {
753 let mut cursor_section = String::new();
754 write_cursor_excerpt_section_for_format(
755 format,
756 &mut cursor_section,
757 path,
758 context,
759 &editable_range,
760 cursor_offset,
761 );
762
763 let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
764 let cursor_tokens = estimate_tokens(cursor_section.len());
765 remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
766
767 let edit_history_section = format_edit_history_within_budget(
768 &input.events,
769 "<|file_sep|>",
770 "edit history",
771 remaining_budget,
772 max_edit_event_count_for_format(&format),
773 );
774 let edit_history_tokens = estimate_tokens(edit_history_section.len());
775 remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
776
777 let related_files_section = format_related_files_within_budget(
778 &related_files,
779 "<|file_sep|>",
780 "",
781 remaining_budget,
782 );
783
784 let mut prompt = String::new();
785 prompt.push_str(&related_files_section);
786 prompt.push_str(&edit_history_section);
787 prompt.push_str(&cursor_section);
788 prompt
789 }
790 };
791 let prompt_tokens = estimate_tokens(prompt.len());
792 if prompt_tokens > max_tokens {
793 return None;
794 }
795 return Some(prompt);
796}
797
798pub fn filter_redundant_excerpts(
799 mut related_files: Vec<RelatedFile>,
800 cursor_path: &Path,
801 cursor_row_range: Range<u32>,
802) -> Vec<RelatedFile> {
803 for file in &mut related_files {
804 if file.path.as_ref() == cursor_path {
805 file.excerpts.retain(|excerpt| {
806 excerpt.row_range.start < cursor_row_range.start
807 || excerpt.row_range.end > cursor_row_range.end
808 });
809 }
810 }
811 related_files.retain(|file| !file.excerpts.is_empty());
812 related_files
813}
814
815pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
816 match format {
817 ZetaFormat::V0112MiddleAtEnd
818 | ZetaFormat::V0113Ordered
819 | ZetaFormat::V0114180EditableRegion
820 | ZetaFormat::V0120GitMergeMarkers
821 | ZetaFormat::V0131GitMergeMarkersPrefix
822 | ZetaFormat::V0211Prefill
823 | ZetaFormat::V0211SeedCoder
824 | ZetaFormat::V0331SeedCoderModelPy
825 | ZetaFormat::v0226Hashline
826 | ZetaFormat::V0304SeedNoEdits
827 | ZetaFormat::V0304VariableEdit
828 | ZetaFormat::V0306SeedMultiRegions
829 | ZetaFormat::V0316SeedMultiRegions
830 | ZetaFormat::V0318SeedMultiRegions
831 | ZetaFormat::V0317SeedMultiRegions
832 | ZetaFormat::V0327SingleFile => 6,
833 }
834}
835
836pub fn get_prefill_for_format(
837 format: ZetaFormat,
838 context: &str,
839 editable_range: &Range<usize>,
840) -> String {
841 match format {
842 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
843 ZetaFormat::V0112MiddleAtEnd
844 | ZetaFormat::V0113Ordered
845 | ZetaFormat::V0114180EditableRegion
846 | ZetaFormat::V0120GitMergeMarkers
847 | ZetaFormat::V0131GitMergeMarkersPrefix
848 | ZetaFormat::V0211SeedCoder
849 | ZetaFormat::V0331SeedCoderModelPy
850 | ZetaFormat::v0226Hashline
851 | ZetaFormat::V0304VariableEdit => String::new(),
852 ZetaFormat::V0304SeedNoEdits
853 | ZetaFormat::V0306SeedMultiRegions
854 | ZetaFormat::V0316SeedMultiRegions
855 | ZetaFormat::V0318SeedMultiRegions
856 | ZetaFormat::V0317SeedMultiRegions
857 | ZetaFormat::V0327SingleFile => String::new(),
858 }
859}
860
861pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
862 match format {
863 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
864 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
865 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
866 ZetaFormat::V0211SeedCoder
867 | ZetaFormat::V0331SeedCoderModelPy
868 | ZetaFormat::V0304SeedNoEdits
869 | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
870 ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
871 ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
872 ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
873 ZetaFormat::V0327SingleFile => Some(multi_region::V0327_END_MARKER),
874
875 ZetaFormat::V0112MiddleAtEnd
876 | ZetaFormat::V0113Ordered
877 | ZetaFormat::V0114180EditableRegion
878 | ZetaFormat::v0226Hashline
879 | ZetaFormat::V0304VariableEdit => None,
880 }
881}
882
883pub fn encode_patch_as_output_for_format(
884 format: ZetaFormat,
885 old_editable_region: &str,
886 patch: &str,
887 cursor_offset: Option<usize>,
888) -> Result<Option<String>> {
889 match format {
890 ZetaFormat::v0226Hashline => {
891 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
892 }
893 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
894 old_editable_region,
895 patch,
896 cursor_offset,
897 )
898 .map(Some),
899 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
900 Ok(seed_coder::no_edits(patch))
901 }
902 ZetaFormat::V0316SeedMultiRegions => {
903 let empty_patch = patch.lines().count() <= 3;
904 if empty_patch {
905 let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
906 let marker_num =
907 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
908 let tag = multi_region::marker_tag(marker_num);
909 Ok(Some(format!(
910 "{tag}{tag}{}",
911 multi_region::V0316_END_MARKER
912 )))
913 } else {
914 Ok(None)
915 }
916 }
917 ZetaFormat::V0318SeedMultiRegions => {
918 let empty_patch = patch.lines().count() <= 3;
919 if empty_patch {
920 let marker_offsets =
921 multi_region::compute_marker_offsets_v0318(old_editable_region);
922 let marker_num =
923 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
924 let tag = multi_region::marker_tag(marker_num);
925 Ok(Some(format!(
926 "{tag}{tag}{}",
927 multi_region::V0318_END_MARKER
928 )))
929 } else {
930 Ok(None)
931 }
932 }
933 ZetaFormat::V0317SeedMultiRegions => {
934 let empty_patch = patch.lines().count() <= 3;
935 if empty_patch {
936 let tag = multi_region::marker_tag_relative(0);
937 Ok(Some(format!(
938 "{tag}{tag}{}",
939 multi_region::V0317_END_MARKER
940 )))
941 } else {
942 Ok(None)
943 }
944 }
945 ZetaFormat::V0327SingleFile => {
946 let empty_patch = patch.lines().count() <= 3;
947 if empty_patch {
948 let marker_offsets =
949 multi_region::compute_marker_offsets_v0318(old_editable_region);
950 let marker_num =
951 multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
952 let tag = multi_region::marker_tag(marker_num);
953 Ok(Some(format!(
954 "{tag}{tag}{}",
955 multi_region::V0327_END_MARKER
956 )))
957 } else {
958 Ok(None)
959 }
960 }
961 _ => Ok(None),
962 }
963}
964
965/// Given a `ZetaPromptInput`, a format, and a patch (with cursor already
966/// extracted), produce the expected model output string for training.
967pub fn format_expected_output(
968 input: &ZetaPromptInput,
969 format: ZetaFormat,
970 patch: &str,
971 cursor_offset: Option<usize>,
972) -> Result<String> {
973 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
974 let mut old_editable = context[editable_range].to_string();
975 if !old_editable.is_empty() && !old_editable.ends_with('\n') {
976 old_editable.push('\n');
977 }
978
979 // Formats with their own output encoding (hashline, variable-edit,
980 // multi-region empty patches) are handled here.
981 if let Some(output) =
982 encode_patch_as_output_for_format(format, &old_editable, patch, cursor_offset)?
983 {
984 return Ok(output);
985 }
986
987 let empty_patch = patch.lines().count() <= 3;
988
989 match format {
990 // Multi-region formats: non-empty patches need diff application
991 // then marker-span encoding.
992 ZetaFormat::V0316SeedMultiRegions => {
993 let (new_editable, first_hunk_offset) =
994 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
995 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
996 multi_region::encode_from_old_and_new_v0316(
997 &old_editable,
998 &new_editable,
999 cursor_in_new,
1000 CURSOR_MARKER,
1001 multi_region::V0316_END_MARKER,
1002 )
1003 }
1004 ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0327SingleFile => {
1005 let (new_editable, first_hunk_offset) =
1006 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1007 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1008 multi_region::encode_from_old_and_new_v0318(
1009 &old_editable,
1010 &new_editable,
1011 cursor_in_new,
1012 CURSOR_MARKER,
1013 multi_region::V0318_END_MARKER,
1014 )
1015 }
1016 ZetaFormat::V0317SeedMultiRegions => {
1017 let (new_editable, first_hunk_offset) =
1018 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1019 let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1020 multi_region::encode_from_old_and_new_v0317(
1021 &old_editable,
1022 &new_editable,
1023 cursor_in_new,
1024 CURSOR_MARKER,
1025 multi_region::V0317_END_MARKER,
1026 )
1027 }
1028 // V0131-style formats and fallback: produce new editable text with
1029 // cursor marker inserted, followed by the end marker.
1030 ZetaFormat::V0112MiddleAtEnd
1031 | ZetaFormat::V0113Ordered
1032 | ZetaFormat::V0114180EditableRegion
1033 | ZetaFormat::V0120GitMergeMarkers
1034 | ZetaFormat::V0131GitMergeMarkersPrefix
1035 | ZetaFormat::V0211Prefill
1036 | ZetaFormat::V0211SeedCoder
1037 | ZetaFormat::v0226Hashline
1038 | ZetaFormat::V0304VariableEdit
1039 | ZetaFormat::V0304SeedNoEdits
1040 | ZetaFormat::V0331SeedCoderModelPy
1041 | ZetaFormat::V0306SeedMultiRegions => {
1042 let (mut result, first_hunk_offset) = if empty_patch {
1043 (old_editable.clone(), None)
1044 } else {
1045 udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?
1046 };
1047
1048 if let Some(cursor) = cursor_offset {
1049 let hunk_start = if !empty_patch {
1050 first_hunk_offset.unwrap_or(0)
1051 } else {
1052 0
1053 };
1054 let offset = (hunk_start + cursor).min(result.len());
1055 result.insert_str(offset, CURSOR_MARKER);
1056 }
1057
1058 if !result.is_empty() && !result.ends_with('\n') {
1059 result.push('\n');
1060 }
1061
1062 if let Some(end_marker) = output_end_marker_for_format(format) {
1063 result.push_str(end_marker);
1064 }
1065
1066 Ok(result)
1067 }
1068 }
1069}
1070
1071/// Compute the cursor position within the new text after diff application.
1072fn cursor_in_new_text(
1073 cursor_offset: Option<usize>,
1074 first_hunk_offset: Option<usize>,
1075 new_text: &str,
1076) -> Option<usize> {
1077 cursor_offset.map(|cursor| {
1078 let hunk_start = first_hunk_offset.unwrap_or(0);
1079 (hunk_start + cursor).min(new_text.len())
1080 })
1081}
1082
1083#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1084pub struct ParsedOutput {
1085 /// Text that should replace the editable region
1086 pub new_editable_region: String,
1087 /// The byte range within `cursor_excerpt` that this replacement applies to
1088 pub range_in_excerpt: Range<usize>,
1089 /// Byte offset of the cursor marker within `new_editable_region`, if present
1090 pub cursor_offset_in_new_editable_region: Option<usize>,
1091}
1092
1093#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1094pub struct CursorPosition {
1095 pub path: String,
1096 pub row: usize,
1097 pub column: usize,
1098 pub offset: usize,
1099 pub editable_region_offset: usize,
1100}
1101
1102pub fn parsed_output_from_editable_region(
1103 range_in_excerpt: Range<usize>,
1104 mut new_editable_region: String,
1105) -> ParsedOutput {
1106 let cursor_offset_in_new_editable_region = new_editable_region.find(CURSOR_MARKER);
1107 if let Some(offset) = cursor_offset_in_new_editable_region {
1108 new_editable_region.replace_range(offset..offset + CURSOR_MARKER.len(), "");
1109 }
1110
1111 ParsedOutput {
1112 new_editable_region,
1113 range_in_excerpt,
1114 cursor_offset_in_new_editable_region,
1115 }
1116}
1117
1118/// Parse model output for the given zeta format
1119pub fn parse_zeta2_model_output(
1120 output: &str,
1121 format: ZetaFormat,
1122 prompt_inputs: &ZetaPromptInput,
1123) -> Result<ParsedOutput> {
1124 let output = match output_end_marker_for_format(format) {
1125 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
1126 None => output,
1127 };
1128
1129 let (context, editable_range_in_context, context_range, cursor_offset) =
1130 resolve_cursor_region(prompt_inputs, format);
1131 let context_start = context_range.start;
1132 let old_editable_region = &context[editable_range_in_context.clone()];
1133 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
1134
1135 let (range_in_context, output) = match format {
1136 ZetaFormat::v0226Hashline => (
1137 editable_range_in_context,
1138 if hashline::output_has_edit_commands(output) {
1139 hashline::apply_edit_commands(old_editable_region, output)
1140 } else {
1141 output.to_string()
1142 },
1143 ),
1144 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
1145 ZetaFormat::V0304SeedNoEdits => (
1146 editable_range_in_context,
1147 if output.starts_with(seed_coder::NO_EDITS) {
1148 old_editable_region.to_string()
1149 } else {
1150 output.to_string()
1151 },
1152 ),
1153 ZetaFormat::V0306SeedMultiRegions => (
1154 editable_range_in_context,
1155 if output.starts_with(seed_coder::NO_EDITS) {
1156 old_editable_region.to_string()
1157 } else {
1158 multi_region::apply_marker_span(old_editable_region, output)?
1159 },
1160 ),
1161 ZetaFormat::V0316SeedMultiRegions => (
1162 editable_range_in_context,
1163 multi_region::apply_marker_span_v0316(old_editable_region, output)?,
1164 ),
1165 ZetaFormat::V0318SeedMultiRegions => (
1166 editable_range_in_context,
1167 multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1168 ),
1169 ZetaFormat::V0317SeedMultiRegions => (
1170 editable_range_in_context,
1171 multi_region::apply_marker_span_v0317(
1172 old_editable_region,
1173 output,
1174 Some(cursor_offset_in_editable),
1175 )?,
1176 ),
1177 ZetaFormat::V0327SingleFile => (
1178 editable_range_in_context,
1179 multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1180 ),
1181 _ => (editable_range_in_context, output.to_string()),
1182 };
1183
1184 let range_in_excerpt =
1185 range_in_context.start + context_start..range_in_context.end + context_start;
1186
1187 Ok(parsed_output_from_editable_region(range_in_excerpt, output))
1188}
1189
1190pub fn parse_zeta2_model_output_as_patch(
1191 output: &str,
1192 format: ZetaFormat,
1193 prompt_inputs: &ZetaPromptInput,
1194) -> Result<String> {
1195 let parsed = parse_zeta2_model_output(output, format, prompt_inputs)?;
1196 parsed_output_to_patch(prompt_inputs, parsed)
1197}
1198
1199pub fn cursor_position_from_parsed_output(
1200 prompt_inputs: &ZetaPromptInput,
1201 parsed: &ParsedOutput,
1202) -> Option<CursorPosition> {
1203 let cursor_offset = parsed.cursor_offset_in_new_editable_region?;
1204 let editable_region_offset = parsed.range_in_excerpt.start;
1205 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1206
1207 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
1208
1209 let new_editable_region = &parsed.new_editable_region;
1210 let prefix_end = cursor_offset.min(new_editable_region.len());
1211 let new_region_prefix = &new_editable_region[..prefix_end];
1212
1213 let row = editable_region_start_line + new_region_prefix.matches('\n').count();
1214
1215 let column = match new_region_prefix.rfind('\n') {
1216 Some(last_newline) => cursor_offset - last_newline - 1,
1217 None => {
1218 let content_prefix = &excerpt[..editable_region_offset];
1219 let content_column = match content_prefix.rfind('\n') {
1220 Some(last_newline) => editable_region_offset - last_newline - 1,
1221 None => editable_region_offset,
1222 };
1223 content_column + cursor_offset
1224 }
1225 };
1226
1227 Some(CursorPosition {
1228 path: prompt_inputs.cursor_path.to_string_lossy().into_owned(),
1229 row,
1230 column,
1231 offset: editable_region_offset + cursor_offset,
1232 editable_region_offset: cursor_offset,
1233 })
1234}
1235
1236pub fn parsed_output_to_patch(
1237 prompt_inputs: &ZetaPromptInput,
1238 parsed: ParsedOutput,
1239) -> Result<String> {
1240 let range_in_excerpt = parsed.range_in_excerpt;
1241 let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1242 let old_text = excerpt[range_in_excerpt.clone()].to_string();
1243 let mut new_text = parsed.new_editable_region;
1244
1245 let mut old_text_normalized = old_text;
1246 if !new_text.is_empty() && !new_text.ends_with('\n') {
1247 new_text.push('\n');
1248 }
1249 if !old_text_normalized.is_empty() && !old_text_normalized.ends_with('\n') {
1250 old_text_normalized.push('\n');
1251 }
1252
1253 let editable_region_offset = range_in_excerpt.start;
1254 let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count() as u32;
1255 let editable_region_lines = old_text_normalized.lines().count() as u32;
1256
1257 let diff = udiff::unified_diff_with_context(
1258 &old_text_normalized,
1259 &new_text,
1260 editable_region_start_line,
1261 editable_region_start_line,
1262 editable_region_lines,
1263 );
1264
1265 let path = prompt_inputs
1266 .cursor_path
1267 .to_string_lossy()
1268 .trim_start_matches('/')
1269 .to_string();
1270 let formatted_diff = format!("--- a/{path}\n+++ b/{path}\n{diff}");
1271
1272 Ok(udiff::encode_cursor_in_patch(
1273 &formatted_diff,
1274 parsed.cursor_offset_in_new_editable_region,
1275 ))
1276}
1277
1278pub fn excerpt_range_for_format(
1279 format: ZetaFormat,
1280 ranges: &ExcerptRanges,
1281) -> (Range<usize>, Range<usize>) {
1282 excerpt_ranges_for_format(format, ranges)
1283}
1284
1285pub fn resolve_cursor_region(
1286 input: &ZetaPromptInput,
1287 format: ZetaFormat,
1288) -> (&str, Range<usize>, Range<usize>, usize) {
1289 let (editable_range, context_range) = if format == ZetaFormat::V0327SingleFile {
1290 let (editable_tokens, _) = token_limits_for_format(format);
1291 let context_range = 0..input.cursor_excerpt.len();
1292 let editable_range = multi_region::compute_v0327_editable_range(
1293 &input.cursor_excerpt,
1294 input.cursor_offset_in_excerpt,
1295 editable_tokens,
1296 );
1297 (editable_range, context_range)
1298 } else if let Some(syntax_ranges) = &input.syntax_ranges {
1299 let (editable_tokens, context_tokens) = token_limits_for_format(format);
1300 compute_editable_and_context_ranges(
1301 &input.cursor_excerpt,
1302 input.cursor_offset_in_excerpt,
1303 syntax_ranges,
1304 editable_tokens,
1305 context_tokens,
1306 )
1307 } else {
1308 excerpt_range_for_format(format, &input.excerpt_ranges)
1309 };
1310
1311 let context_start = context_range.start;
1312 let context_text = &input.cursor_excerpt[context_range.clone()];
1313 let adjusted_editable =
1314 (editable_range.start - context_start)..(editable_range.end - context_start);
1315 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
1316
1317 (
1318 context_text,
1319 adjusted_editable,
1320 context_range,
1321 adjusted_cursor,
1322 )
1323}
1324
1325pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
1326 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1327 get_prefill_for_format(format, context, &editable_range)
1328}
1329
1330fn format_edit_history_within_budget(
1331 events: &[Arc<Event>],
1332 file_marker: &str,
1333 edit_history_name: &str,
1334 max_tokens: usize,
1335 max_edit_event_count: usize,
1336) -> String {
1337 let header = format!("{}{}\n", file_marker, edit_history_name);
1338 let header_tokens = estimate_tokens(header.len());
1339 if header_tokens >= max_tokens {
1340 return String::new();
1341 }
1342
1343 let mut event_strings: Vec<String> = Vec::new();
1344 let mut total_tokens = header_tokens;
1345
1346 for event in events.iter().rev().take(max_edit_event_count) {
1347 let mut event_str = String::new();
1348 write_event(&mut event_str, event);
1349 let event_tokens = estimate_tokens(event_str.len());
1350
1351 if total_tokens + event_tokens > max_tokens {
1352 break;
1353 }
1354 total_tokens += event_tokens;
1355 event_strings.push(event_str);
1356 }
1357
1358 if event_strings.is_empty() {
1359 return String::new();
1360 }
1361
1362 let mut result = header;
1363 for event_str in event_strings.iter().rev() {
1364 result.push_str(event_str);
1365 }
1366 result
1367}
1368
1369fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
1370 let needs_newline = !excerpt.text.ends_with('\n');
1371 let needs_ellipsis = excerpt.row_range.end < file_max_row;
1372 let len = excerpt.text.len()
1373 + if needs_newline { "\n".len() } else { 0 }
1374 + if needs_ellipsis { "...\n".len() } else { 0 };
1375 estimate_tokens(len)
1376}
1377
1378pub fn format_related_files_within_budget(
1379 related_files: &[RelatedFile],
1380 file_prefix: &str,
1381 file_suffix: &str,
1382 max_tokens: usize,
1383) -> String {
1384 struct ExcerptCandidate {
1385 file_ix: usize,
1386 excerpt_ix: usize,
1387 order: usize,
1388 }
1389
1390 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
1391 .iter()
1392 .enumerate()
1393 .flat_map(|(file_ix, file)| {
1394 file.excerpts
1395 .iter()
1396 .enumerate()
1397 .map(move |(excerpt_ix, e)| ExcerptCandidate {
1398 file_ix,
1399 excerpt_ix,
1400 order: e.order,
1401 })
1402 })
1403 .collect();
1404
1405 // Pre-compute file header strings and their token costs.
1406 let file_headers: Vec<String> = related_files
1407 .iter()
1408 .map(|file| {
1409 let path_str = file.path.to_string_lossy();
1410 format!("{}{}\n", file_prefix, path_str)
1411 })
1412 .collect();
1413
1414 // Sort the excerpts by their order and determine how many fit within the budget.
1415 let mut total_tokens = 0;
1416 let mut included_excerpt_count = 0_usize;
1417 let mut included_file_indices = vec![false; related_files.len()];
1418 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1419 for candidate in &excerpt_candidates {
1420 let file = &related_files[candidate.file_ix];
1421 let excerpt = &file.excerpts[candidate.excerpt_ix];
1422 let file_already_included = included_file_indices[candidate.file_ix];
1423 let header_cost = if file_already_included {
1424 0
1425 } else {
1426 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1427 };
1428 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1429 if total_tokens + header_cost + excerpt_cost > max_tokens {
1430 break;
1431 }
1432 total_tokens += header_cost + excerpt_cost;
1433 if !file_already_included {
1434 included_file_indices[candidate.file_ix] = true;
1435 }
1436 included_excerpt_count += 1;
1437 }
1438
1439 excerpt_candidates.truncate(included_excerpt_count);
1440 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1441
1442 // Render all of the files that fit within the token budget, in the original order.
1443 let mut result = String::new();
1444 let mut last_file_ix = None;
1445 for candidate in &excerpt_candidates {
1446 if last_file_ix != Some(candidate.file_ix) {
1447 if last_file_ix.is_some() {
1448 result.push_str(file_suffix);
1449 }
1450 result.push_str(&file_headers[candidate.file_ix]);
1451 last_file_ix = Some(candidate.file_ix);
1452 }
1453 let file = &related_files[candidate.file_ix];
1454 let excerpt = &file.excerpts[candidate.excerpt_ix];
1455 result.push_str(&excerpt.text);
1456 if !result.ends_with('\n') {
1457 result.push('\n');
1458 }
1459 if excerpt.row_range.end < file.max_row {
1460 result.push_str("...\n");
1461 }
1462 }
1463
1464 result
1465}
1466
1467pub fn write_related_files(
1468 prompt: &mut String,
1469 related_files: &[RelatedFile],
1470) -> Vec<Range<usize>> {
1471 let mut ranges = Vec::new();
1472 for file in related_files {
1473 let start = prompt.len();
1474 let path_str = file.path.to_string_lossy();
1475 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1476 for excerpt in &file.excerpts {
1477 prompt.push_str(&excerpt.text);
1478 if !prompt.ends_with('\n') {
1479 prompt.push('\n');
1480 }
1481 if excerpt.row_range.end < file.max_row {
1482 prompt.push_str("...\n");
1483 }
1484 }
1485 let end = prompt.len();
1486 ranges.push(start..end);
1487 }
1488 ranges
1489}
1490
1491mod v0112_middle_at_end {
1492 use super::*;
1493
1494 pub fn special_tokens() -> &'static [&'static str] {
1495 &[
1496 "<|fim_prefix|>",
1497 "<|fim_suffix|>",
1498 "<|fim_middle|>",
1499 "<|file_sep|>",
1500 CURSOR_MARKER,
1501 ]
1502 }
1503
1504 pub fn write_cursor_excerpt_section(
1505 prompt: &mut String,
1506 path: &Path,
1507 context: &str,
1508 editable_range: &Range<usize>,
1509 cursor_offset: usize,
1510 ) {
1511 let path_str = path.to_string_lossy();
1512 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1513
1514 prompt.push_str("<|fim_prefix|>\n");
1515 prompt.push_str(&context[..editable_range.start]);
1516
1517 prompt.push_str("<|fim_suffix|>\n");
1518 prompt.push_str(&context[editable_range.end..]);
1519 if !prompt.ends_with('\n') {
1520 prompt.push('\n');
1521 }
1522
1523 prompt.push_str("<|fim_middle|>current\n");
1524 prompt.push_str(&context[editable_range.start..cursor_offset]);
1525 prompt.push_str(CURSOR_MARKER);
1526 prompt.push_str(&context[cursor_offset..editable_range.end]);
1527 if !prompt.ends_with('\n') {
1528 prompt.push('\n');
1529 }
1530
1531 prompt.push_str("<|fim_middle|>updated\n");
1532 }
1533}
1534
1535mod v0113_ordered {
1536 use super::*;
1537
1538 pub fn special_tokens() -> &'static [&'static str] {
1539 &[
1540 "<|fim_prefix|>",
1541 "<|fim_suffix|>",
1542 "<|fim_middle|>",
1543 "<|file_sep|>",
1544 CURSOR_MARKER,
1545 ]
1546 }
1547
1548 pub fn write_cursor_excerpt_section(
1549 prompt: &mut String,
1550 path: &Path,
1551 context: &str,
1552 editable_range: &Range<usize>,
1553 cursor_offset: usize,
1554 ) {
1555 let path_str = path.to_string_lossy();
1556 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1557
1558 prompt.push_str("<|fim_prefix|>\n");
1559 prompt.push_str(&context[..editable_range.start]);
1560 if !prompt.ends_with('\n') {
1561 prompt.push('\n');
1562 }
1563
1564 prompt.push_str("<|fim_middle|>current\n");
1565 prompt.push_str(&context[editable_range.start..cursor_offset]);
1566 prompt.push_str(CURSOR_MARKER);
1567 prompt.push_str(&context[cursor_offset..editable_range.end]);
1568 if !prompt.ends_with('\n') {
1569 prompt.push('\n');
1570 }
1571
1572 prompt.push_str("<|fim_suffix|>\n");
1573 prompt.push_str(&context[editable_range.end..]);
1574 if !prompt.ends_with('\n') {
1575 prompt.push('\n');
1576 }
1577
1578 prompt.push_str("<|fim_middle|>updated\n");
1579 }
1580}
1581
1582mod v0114180_editable_region {
1583 use super::*;
1584
1585 pub fn special_tokens() -> &'static [&'static str] {
1586 v0113_ordered::special_tokens()
1587 }
1588}
1589
1590pub mod v0120_git_merge_markers {
1591 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1592 //!
1593 //! Example prompt:
1594 //!
1595 //! <|file_sep|>path/to/target_file.py
1596 //! <|fim_prefix|>
1597 //! code before editable region
1598 //! <|fim_suffix|>
1599 //! code after editable region
1600 //! <|fim_middle|>
1601 //! <<<<<<< CURRENT
1602 //! code that
1603 //! needs to<|user_cursor|>
1604 //! be rewritten
1605 //! =======
1606 //!
1607 //! Expected output (should be generated by the model):
1608 //!
1609 //! updated
1610 //! code with
1611 //! changes applied
1612 //! >>>>>>> UPDATED
1613
1614 use super::*;
1615
1616 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1617 pub const SEPARATOR: &str = "=======\n";
1618 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1619
1620 pub fn special_tokens() -> &'static [&'static str] {
1621 &[
1622 "<|fim_prefix|>",
1623 "<|fim_suffix|>",
1624 "<|fim_middle|>",
1625 "<|file_sep|>",
1626 START_MARKER,
1627 SEPARATOR,
1628 END_MARKER,
1629 CURSOR_MARKER,
1630 ]
1631 }
1632
1633 pub fn write_cursor_excerpt_section(
1634 prompt: &mut String,
1635 path: &Path,
1636 context: &str,
1637 editable_range: &Range<usize>,
1638 cursor_offset: usize,
1639 ) {
1640 let path_str = path.to_string_lossy();
1641 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1642
1643 prompt.push_str("<|fim_prefix|>");
1644 prompt.push_str(&context[..editable_range.start]);
1645
1646 prompt.push_str("<|fim_suffix|>");
1647 prompt.push_str(&context[editable_range.end..]);
1648 if !prompt.ends_with('\n') {
1649 prompt.push('\n');
1650 }
1651
1652 prompt.push_str("<|fim_middle|>");
1653 prompt.push_str(START_MARKER);
1654 prompt.push_str(&context[editable_range.start..cursor_offset]);
1655 prompt.push_str(CURSOR_MARKER);
1656 prompt.push_str(&context[cursor_offset..editable_range.end]);
1657 if !prompt.ends_with('\n') {
1658 prompt.push('\n');
1659 }
1660 prompt.push_str(SEPARATOR);
1661 }
1662}
1663
1664pub mod v0131_git_merge_markers_prefix {
1665 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1666 //!
1667 //! Example prompt:
1668 //!
1669 //! <|file_sep|>path/to/target_file.py
1670 //! <|fim_prefix|>
1671 //! code before editable region
1672 //! <<<<<<< CURRENT
1673 //! code that
1674 //! needs to<|user_cursor|>
1675 //! be rewritten
1676 //! =======
1677 //! <|fim_suffix|>
1678 //! code after editable region
1679 //! <|fim_middle|>
1680 //!
1681 //! Expected output (should be generated by the model):
1682 //!
1683 //! updated
1684 //! code with
1685 //! changes applied
1686 //! >>>>>>> UPDATED
1687
1688 use super::*;
1689
1690 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1691 pub const SEPARATOR: &str = "=======\n";
1692 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1693
1694 pub fn special_tokens() -> &'static [&'static str] {
1695 &[
1696 "<|fim_prefix|>",
1697 "<|fim_suffix|>",
1698 "<|fim_middle|>",
1699 "<|file_sep|>",
1700 START_MARKER,
1701 SEPARATOR,
1702 END_MARKER,
1703 CURSOR_MARKER,
1704 ]
1705 }
1706
1707 pub fn write_cursor_excerpt_section(
1708 prompt: &mut String,
1709 path: &Path,
1710 context: &str,
1711 editable_range: &Range<usize>,
1712 cursor_offset: usize,
1713 ) {
1714 let path_str = path.to_string_lossy();
1715 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1716
1717 prompt.push_str("<|fim_prefix|>");
1718 prompt.push_str(&context[..editable_range.start]);
1719 prompt.push_str(START_MARKER);
1720 prompt.push_str(&context[editable_range.start..cursor_offset]);
1721 prompt.push_str(CURSOR_MARKER);
1722 prompt.push_str(&context[cursor_offset..editable_range.end]);
1723 if !prompt.ends_with('\n') {
1724 prompt.push('\n');
1725 }
1726 prompt.push_str(SEPARATOR);
1727
1728 prompt.push_str("<|fim_suffix|>");
1729 prompt.push_str(&context[editable_range.end..]);
1730 if !prompt.ends_with('\n') {
1731 prompt.push('\n');
1732 }
1733
1734 prompt.push_str("<|fim_middle|>");
1735 }
1736}
1737
1738pub mod v0211_prefill {
1739 use super::*;
1740
1741 pub fn special_tokens() -> &'static [&'static str] {
1742 v0131_git_merge_markers_prefix::special_tokens()
1743 }
1744
1745 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1746 let editable_region = &context[editable_range.start..editable_range.end];
1747
1748 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1749 let prefill_len = editable_region.floor_char_boundary(prefill_len);
1750
1751 // Find a token boundary to avoid splitting tokens in the prefill.
1752 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1753 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1754 // the \n and consume any consecutive \n characters after it.
1755 let prefill = &editable_region[..prefill_len];
1756 match prefill.rfind('\n') {
1757 Some(pos) => {
1758 let mut end = pos + 1;
1759 while end < editable_region.len()
1760 && editable_region.as_bytes().get(end) == Some(&b'\n')
1761 {
1762 end += 1;
1763 }
1764 editable_region[..end].to_string()
1765 }
1766 // No newline found. Fall back to splitting before the last space
1767 // (word-level boundary)
1768 None => match prefill.rfind(' ') {
1769 Some(pos) => prefill[..pos].to_string(),
1770 None => prefill.to_string(),
1771 },
1772 }
1773 }
1774}
1775
1776pub mod hashline {
1777
1778 use std::fmt::Display;
1779
1780 pub const END_MARKER: &str = "<|fim_middle|>updated";
1781 pub const START_MARKER: &str = "<|fim_middle|>current";
1782
1783 use super::*;
1784
1785 const SET_COMMAND_MARKER: &str = "<|set|>";
1786 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1787 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1788
1789 pub fn special_tokens() -> &'static [&'static str] {
1790 return &[
1791 SET_COMMAND_MARKER,
1792 "<|set_range|>",
1793 INSERT_COMMAND_MARKER,
1794 NO_EDITS_COMMAND_MARKER,
1795 CURSOR_MARKER,
1796 "<|file_sep|>",
1797 "<|fim_prefix|>",
1798 "<|fim_suffix|>",
1799 "<|fim_middle|>",
1800 ];
1801 }
1802
1803 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1804 #[derive(Debug, Clone, PartialEq, Eq)]
1805 struct LineRef {
1806 index: usize,
1807 hash: u8,
1808 }
1809
1810 impl Display for LineRef {
1811 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1812 write!(f, "{}:{:02x}", self.index, self.hash)
1813 }
1814 }
1815
1816 pub fn hash_line(line: &[u8]) -> u8 {
1817 let mut h: u8 = 0;
1818 for &byte in line {
1819 h = h.wrapping_add(byte);
1820 }
1821 return h;
1822 }
1823
1824 /// Write the hashline-encoded editable region into `out`. Each line of
1825 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1826 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1827 /// to the start of `editable_text`).
1828 pub fn write_hashline_editable_region(
1829 out: &mut String,
1830 editable_text: &str,
1831 cursor_offset_in_editable: usize,
1832 ) {
1833 let mut offset = 0;
1834 for (i, line) in editable_text.lines().enumerate() {
1835 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1836 && cursor_offset_in_editable < offset + line.len()
1837 {
1838 (
1839 &line[..cursor_offset_in_editable - offset],
1840 CURSOR_MARKER,
1841 &line[cursor_offset_in_editable - offset..],
1842 )
1843 } else {
1844 (line, "", "")
1845 };
1846 write!(
1847 out,
1848 "\n{}|{head}{cursor}{tail}",
1849 LineRef {
1850 index: i,
1851 hash: hash_line(line.as_bytes())
1852 }
1853 )
1854 .unwrap();
1855 offset += line.len() + 1;
1856 }
1857 }
1858
1859 pub fn write_cursor_excerpt_section(
1860 prompt: &mut String,
1861 path: &Path,
1862 context: &str,
1863 editable_range: &Range<usize>,
1864 cursor_offset: usize,
1865 ) {
1866 let path_str = path.to_string_lossy();
1867 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1868
1869 prompt.push_str("<|fim_prefix|>\n");
1870 prompt.push_str(&context[..editable_range.start]);
1871 prompt.push_str(START_MARKER);
1872
1873 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1874 let editable_region = &context[editable_range.clone()];
1875 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1876
1877 if !prompt.ends_with('\n') {
1878 prompt.push('\n');
1879 }
1880
1881 prompt.push_str("<|fim_suffix|>\n");
1882 prompt.push_str(&context[editable_range.end..]);
1883 if !prompt.ends_with('\n') {
1884 prompt.push('\n');
1885 }
1886
1887 prompt.push_str(END_MARKER);
1888 prompt.push('\n');
1889 }
1890
1891 /// A single edit command parsed from the model output.
1892 #[derive(Debug)]
1893 enum EditCommand<'a> {
1894 /// Replace a range of lines (inclusive on both ends). Single-line set is
1895 /// represented by `start == end`.
1896 Set {
1897 start: LineRef,
1898 end: LineRef,
1899 content: &'a str,
1900 },
1901 /// Insert new lines after the given line, or before the first line if
1902 /// `after` is `None`.
1903 Insert {
1904 after: Option<LineRef>,
1905 content: &'a str,
1906 },
1907 }
1908
1909 /// Parse a line reference like `3:c3` into a `LineRef`.
1910 fn parse_line_ref(s: &str) -> Option<LineRef> {
1911 let (idx_str, hash_str) = s.split_once(':')?;
1912 let index = idx_str.parse::<usize>().ok()?;
1913 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1914 Some(LineRef { index, hash })
1915 }
1916
1917 /// Parse the model output into a list of `EditCommand`s.
1918 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1919 let mut commands = Vec::new();
1920 let mut offset = 0usize;
1921
1922 while offset < model_output.len() {
1923 let next_nl = model_output[offset..]
1924 .find('\n')
1925 .map(|i| offset + i)
1926 .unwrap_or(model_output.len());
1927 let line = &model_output[offset..next_nl];
1928 let line_end = if next_nl < model_output.len() {
1929 next_nl + 1
1930 } else {
1931 next_nl
1932 };
1933
1934 let trimmed = line.trim();
1935 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1936 (true, spec)
1937 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1938 (false, spec)
1939 } else {
1940 offset = line_end;
1941 continue;
1942 };
1943
1944 let mut content_end = line_end;
1945 let mut scan = line_end;
1946
1947 while scan < model_output.len() {
1948 let body_nl = model_output[scan..]
1949 .find('\n')
1950 .map(|i| scan + i)
1951 .unwrap_or(model_output.len());
1952 let body_line = &model_output[scan..body_nl];
1953 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1954 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1955 {
1956 break;
1957 }
1958 scan = if body_nl < model_output.len() {
1959 body_nl + 1
1960 } else {
1961 body_nl
1962 };
1963 content_end = scan;
1964 }
1965
1966 let content = &model_output[line_end..content_end];
1967
1968 if is_set {
1969 if let Some((start_str, end_str)) = specifier.split_once('-') {
1970 if let (Some(start), Some(end)) =
1971 (parse_line_ref(start_str), parse_line_ref(end_str))
1972 {
1973 commands.push(EditCommand::Set {
1974 start,
1975 end,
1976 content,
1977 });
1978 }
1979 } else if let Some(target) = parse_line_ref(specifier) {
1980 commands.push(EditCommand::Set {
1981 start: target.clone(),
1982 end: target,
1983 content,
1984 });
1985 }
1986 } else {
1987 let after = parse_line_ref(specifier);
1988 commands.push(EditCommand::Insert { after, content });
1989 }
1990
1991 offset = scan;
1992 }
1993
1994 commands
1995 }
1996
1997 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1998 /// (as opposed to being a plain full-replacement output).
1999 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
2000 /// editable region, returning the plain text content.
2001 pub fn strip_hashline_prefixes(region: &str) -> String {
2002 let mut decoded: String = region
2003 .lines()
2004 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
2005 .collect::<Vec<_>>()
2006 .join("\n");
2007 if region.ends_with('\n') {
2008 decoded.push('\n');
2009 }
2010 decoded
2011 }
2012
2013 pub fn output_has_edit_commands(model_output: &str) -> bool {
2014 model_output.contains(SET_COMMAND_MARKER)
2015 || model_output.contains(INSERT_COMMAND_MARKER)
2016 || model_output.contains(NO_EDITS_COMMAND_MARKER)
2017 }
2018
2019 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
2020 /// original editable region text.
2021 ///
2022 /// `editable_region` is the original text of the editable region (without hash
2023 /// prefixes). `model_output` is the raw model response containing edit commands.
2024 ///
2025 /// Returns the full replacement text for the editable region.
2026 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
2027 if model_output
2028 .trim_start()
2029 .starts_with(NO_EDITS_COMMAND_MARKER)
2030 {
2031 return editable_region.to_string();
2032 }
2033
2034 let original_lines: Vec<&str> = editable_region.lines().collect();
2035 let old_hashes: Vec<u8> = original_lines
2036 .iter()
2037 .map(|line| hash_line(line.as_bytes()))
2038 .collect();
2039
2040 let commands = parse_edit_commands(model_output);
2041
2042 // For set operations: indexed by start line → Some((end line index, content))
2043 // For insert operations: indexed by line index → vec of content to insert after
2044 // Insert-before-first is tracked separately.
2045 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
2046 let mut insert_before_first: Vec<&str> = Vec::new();
2047 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
2048
2049 for command in &commands {
2050 match command {
2051 EditCommand::Set {
2052 start,
2053 end,
2054 content,
2055 } => {
2056 if start.index < old_hashes.len()
2057 && end.index < old_hashes.len()
2058 && start.index <= end.index
2059 && old_hashes[start.index] == start.hash
2060 && old_hashes[end.index] == end.hash
2061 {
2062 set_ops[start.index] = Some((end.index, *content));
2063 }
2064 }
2065 EditCommand::Insert { after, content } => match after {
2066 None => insert_before_first.push(*content),
2067 Some(line_ref) => {
2068 if line_ref.index < old_hashes.len()
2069 && old_hashes[line_ref.index] == line_ref.hash
2070 {
2071 insert_after[line_ref.index].push(*content);
2072 }
2073 }
2074 },
2075 }
2076 }
2077
2078 let mut result = String::new();
2079
2080 // Emit any insertions before the first line
2081 for content in &insert_before_first {
2082 result.push_str(content);
2083 if !content.ends_with('\n') {
2084 result.push('\n');
2085 }
2086 }
2087
2088 let mut i = 0;
2089 while i < original_lines.len() {
2090 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
2091 // Replace lines i..=end_index with the replacement content
2092 result.push_str(replacement);
2093 if !replacement.is_empty() && !replacement.ends_with('\n') {
2094 result.push('\n');
2095 }
2096 // Emit any insertions after the end of this set range
2097 if *end_index < insert_after.len() {
2098 for content in &insert_after[*end_index] {
2099 result.push_str(content);
2100 if !content.ends_with('\n') {
2101 result.push('\n');
2102 }
2103 }
2104 }
2105 i = end_index + 1;
2106 } else {
2107 // Keep the original line
2108 result.push_str(original_lines[i]);
2109 result.push('\n');
2110 // Emit any insertions after this line
2111 for content in &insert_after[i] {
2112 result.push_str(content);
2113 if !content.ends_with('\n') {
2114 result.push('\n');
2115 }
2116 }
2117 i += 1;
2118 }
2119 }
2120
2121 // Preserve trailing newline behavior: if the original ended with a
2122 // newline the result already has one; if it didn't, trim the extra one
2123 // we added.
2124 if !editable_region.ends_with('\n') && result.ends_with('\n') {
2125 result.pop();
2126 }
2127
2128 result
2129 }
2130
2131 /// Convert a unified diff patch into hashline edit commands.
2132 ///
2133 /// Parses the unified diff `patch` directly to determine which lines of
2134 /// `old_text` are deleted/replaced and what new lines are added, then emits
2135 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
2136 /// `{index}:{hash}` identifiers.
2137 ///
2138 /// `cursor_offset` is an optional byte offset into the first hunk's new
2139 /// text (context + additions) where the cursor marker should be placed.
2140 pub fn patch_to_edit_commands(
2141 old_text: &str,
2142 patch: &str,
2143 cursor_offset: Option<usize>,
2144 ) -> Result<String> {
2145 let old_lines: Vec<&str> = old_text.lines().collect();
2146 let old_hashes: Vec<u8> = old_lines
2147 .iter()
2148 .map(|line| hash_line(line.as_bytes()))
2149 .collect();
2150
2151 let mut result = String::new();
2152 let mut first_hunk = true;
2153
2154 struct Hunk<'a> {
2155 line_range: Range<usize>,
2156 new_text_lines: Vec<&'a str>,
2157 cursor_line_offset_in_new_text: Option<(usize, usize)>,
2158 }
2159
2160 // Parse the patch line by line. We only care about hunk headers,
2161 // context, deletions, and additions.
2162 let mut old_line_index: usize = 0;
2163 let mut current_hunk: Option<Hunk> = None;
2164 // Byte offset tracking within the hunk's new text for cursor placement.
2165 let mut new_text_byte_offset: usize = 0;
2166 // The line index of the last old line seen before/in the current hunk
2167 // (used for insert-after reference).
2168 let mut last_old_line_before_hunk: Option<usize> = None;
2169
2170 fn flush_hunk(
2171 hunk: Hunk,
2172 last_old_line: Option<usize>,
2173 result: &mut String,
2174 old_hashes: &[u8],
2175 ) {
2176 if hunk.line_range.is_empty() {
2177 // Pure insertion — reference the old line to insert after when in bounds.
2178 if let Some(after) = last_old_line
2179 && let Some(&hash) = old_hashes.get(after)
2180 {
2181 write!(
2182 result,
2183 "{INSERT_COMMAND_MARKER}{}\n",
2184 LineRef { index: after, hash }
2185 )
2186 .unwrap();
2187 } else {
2188 result.push_str(INSERT_COMMAND_MARKER);
2189 result.push('\n');
2190 }
2191 } else {
2192 let start = hunk.line_range.start;
2193 let end_exclusive = hunk.line_range.end;
2194 let deleted_line_count = end_exclusive.saturating_sub(start);
2195
2196 if deleted_line_count == 1 {
2197 if let Some(&hash) = old_hashes.get(start) {
2198 write!(
2199 result,
2200 "{SET_COMMAND_MARKER}{}\n",
2201 LineRef { index: start, hash }
2202 )
2203 .unwrap();
2204 } else {
2205 result.push_str(SET_COMMAND_MARKER);
2206 result.push('\n');
2207 }
2208 } else {
2209 let end_inclusive = end_exclusive - 1;
2210 match (
2211 old_hashes.get(start).copied(),
2212 old_hashes.get(end_inclusive).copied(),
2213 ) {
2214 (Some(start_hash), Some(end_hash)) => {
2215 write!(
2216 result,
2217 "{SET_COMMAND_MARKER}{}-{}\n",
2218 LineRef {
2219 index: start,
2220 hash: start_hash
2221 },
2222 LineRef {
2223 index: end_inclusive,
2224 hash: end_hash
2225 }
2226 )
2227 .unwrap();
2228 }
2229 _ => {
2230 result.push_str(SET_COMMAND_MARKER);
2231 result.push('\n');
2232 }
2233 }
2234 }
2235 }
2236 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
2237 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
2238 && line_offset == cursor_line_offset
2239 {
2240 result.push_str(&line[..char_offset]);
2241 result.push_str(CURSOR_MARKER);
2242 result.push_str(&line[char_offset..]);
2243 continue;
2244 }
2245
2246 result.push_str(line);
2247 }
2248 }
2249
2250 for raw_line in patch.split_inclusive('\n') {
2251 if raw_line.starts_with("@@") {
2252 // Flush any pending change hunk from a previous patch hunk.
2253 if let Some(hunk) = current_hunk.take() {
2254 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2255 }
2256
2257 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
2258 // We intentionally do not trust old_start as a direct local index into `old_text`,
2259 // because some patches are produced against a larger file region and carry
2260 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
2261 if first_hunk {
2262 new_text_byte_offset = 0;
2263 first_hunk = false;
2264 }
2265 continue;
2266 }
2267
2268 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
2269 continue;
2270 }
2271 if raw_line.starts_with("\\ No newline") {
2272 continue;
2273 }
2274
2275 if raw_line.starts_with('-') {
2276 // Extend or start a change hunk with this deleted old line.
2277 match &mut current_hunk {
2278 Some(Hunk {
2279 line_range: range, ..
2280 }) => range.end = old_line_index + 1,
2281 None => {
2282 current_hunk = Some(Hunk {
2283 line_range: old_line_index..old_line_index + 1,
2284 new_text_lines: Vec::new(),
2285 cursor_line_offset_in_new_text: None,
2286 });
2287 }
2288 }
2289 old_line_index += 1;
2290 } else if let Some(added_content) = raw_line.strip_prefix('+') {
2291 // Place cursor marker if cursor_offset falls within this line.
2292 let mut cursor_line_offset = None;
2293 if let Some(cursor_off) = cursor_offset
2294 && (first_hunk
2295 || cursor_off >= new_text_byte_offset
2296 && cursor_off <= new_text_byte_offset + added_content.len())
2297 {
2298 let line_offset = added_content.floor_char_boundary(
2299 cursor_off
2300 .saturating_sub(new_text_byte_offset)
2301 .min(added_content.len()),
2302 );
2303 cursor_line_offset = Some(line_offset);
2304 }
2305
2306 new_text_byte_offset += added_content.len();
2307
2308 let hunk = current_hunk.get_or_insert(Hunk {
2309 line_range: old_line_index..old_line_index,
2310 new_text_lines: vec![],
2311 cursor_line_offset_in_new_text: None,
2312 });
2313 hunk.new_text_lines.push(added_content);
2314 hunk.cursor_line_offset_in_new_text = cursor_line_offset
2315 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
2316 } else {
2317 // Context line (starts with ' ' or is empty).
2318 if let Some(hunk) = current_hunk.take() {
2319 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2320 }
2321 last_old_line_before_hunk = Some(old_line_index);
2322 old_line_index += 1;
2323 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
2324 new_text_byte_offset += content.len();
2325 }
2326 }
2327
2328 // Flush final group.
2329 if let Some(hunk) = current_hunk.take() {
2330 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2331 }
2332
2333 // Trim a single trailing newline.
2334 if result.ends_with('\n') {
2335 result.pop();
2336 }
2337
2338 if result.is_empty() {
2339 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
2340 }
2341
2342 Ok(result)
2343 }
2344
2345 #[cfg(test)]
2346 mod tests {
2347 use super::*;
2348 use indoc::indoc;
2349
2350 #[test]
2351 fn test_format_cursor_region() {
2352 struct Case {
2353 name: &'static str,
2354 context: &'static str,
2355 editable_range: Range<usize>,
2356 cursor_offset: usize,
2357 expected: &'static str,
2358 }
2359
2360 let cases = [
2361 Case {
2362 name: "basic_cursor_placement",
2363 context: "hello world\n",
2364 editable_range: 0..12,
2365 cursor_offset: 5,
2366 expected: indoc! {"
2367 <|file_sep|>test.rs
2368 <|fim_prefix|>
2369 <|fim_middle|>current
2370 0:5c|hello<|user_cursor|> world
2371 <|fim_suffix|>
2372 <|fim_middle|>updated
2373 "},
2374 },
2375 Case {
2376 name: "multiline_cursor_on_second_line",
2377 context: "aaa\nbbb\nccc\n",
2378 editable_range: 0..12,
2379 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
2380 expected: indoc! {"
2381 <|file_sep|>test.rs
2382 <|fim_prefix|>
2383 <|fim_middle|>current
2384 0:23|aaa
2385 1:26|b<|user_cursor|>bb
2386 2:29|ccc
2387 <|fim_suffix|>
2388 <|fim_middle|>updated
2389 "},
2390 },
2391 Case {
2392 name: "no_trailing_newline_in_context",
2393 context: "line1\nline2",
2394 editable_range: 0..11,
2395 cursor_offset: 3,
2396 expected: indoc! {"
2397 <|file_sep|>test.rs
2398 <|fim_prefix|>
2399 <|fim_middle|>current
2400 0:d9|lin<|user_cursor|>e1
2401 1:da|line2
2402 <|fim_suffix|>
2403 <|fim_middle|>updated
2404 "},
2405 },
2406 Case {
2407 name: "leading_newline_in_editable_region",
2408 context: "\nabc\n",
2409 editable_range: 0..5,
2410 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2411 expected: indoc! {"
2412 <|file_sep|>test.rs
2413 <|fim_prefix|>
2414 <|fim_middle|>current
2415 0:00|
2416 1:26|a<|user_cursor|>bc
2417 <|fim_suffix|>
2418 <|fim_middle|>updated
2419 "},
2420 },
2421 Case {
2422 name: "with_suffix",
2423 context: "abc\ndef",
2424 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2425 cursor_offset: 2,
2426 expected: indoc! {"
2427 <|file_sep|>test.rs
2428 <|fim_prefix|>
2429 <|fim_middle|>current
2430 0:26|ab<|user_cursor|>c
2431 <|fim_suffix|>
2432 def
2433 <|fim_middle|>updated
2434 "},
2435 },
2436 Case {
2437 name: "unicode_two_byte_chars",
2438 context: "héllo\n",
2439 editable_range: 0..7,
2440 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2441 expected: indoc! {"
2442 <|file_sep|>test.rs
2443 <|fim_prefix|>
2444 <|fim_middle|>current
2445 0:1b|hé<|user_cursor|>llo
2446 <|fim_suffix|>
2447 <|fim_middle|>updated
2448 "},
2449 },
2450 Case {
2451 name: "unicode_three_byte_chars",
2452 context: "日本語\n",
2453 editable_range: 0..10,
2454 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2455 expected: indoc! {"
2456 <|file_sep|>test.rs
2457 <|fim_prefix|>
2458 <|fim_middle|>current
2459 0:80|日本<|user_cursor|>語
2460 <|fim_suffix|>
2461 <|fim_middle|>updated
2462 "},
2463 },
2464 Case {
2465 name: "unicode_four_byte_chars",
2466 context: "a🌍b\n",
2467 editable_range: 0..7,
2468 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2469 expected: indoc! {"
2470 <|file_sep|>test.rs
2471 <|fim_prefix|>
2472 <|fim_middle|>current
2473 0:6b|a🌍<|user_cursor|>b
2474 <|fim_suffix|>
2475 <|fim_middle|>updated
2476 "},
2477 },
2478 Case {
2479 name: "cursor_at_start_of_region_not_placed",
2480 context: "abc\n",
2481 editable_range: 0..4,
2482 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2483 expected: indoc! {"
2484 <|file_sep|>test.rs
2485 <|fim_prefix|>
2486 <|fim_middle|>current
2487 0:26|abc
2488 <|fim_suffix|>
2489 <|fim_middle|>updated
2490 "},
2491 },
2492 Case {
2493 name: "cursor_at_end_of_line_not_placed",
2494 context: "abc\ndef\n",
2495 editable_range: 0..8,
2496 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2497 expected: indoc! {"
2498 <|file_sep|>test.rs
2499 <|fim_prefix|>
2500 <|fim_middle|>current
2501 0:26|abc
2502 1:2f|def
2503 <|fim_suffix|>
2504 <|fim_middle|>updated
2505 "},
2506 },
2507 Case {
2508 name: "cursor_offset_relative_to_context_not_editable_region",
2509 // cursor_offset is relative to `context`, so when editable_range.start > 0,
2510 // write_cursor_excerpt_section must subtract it before comparing against
2511 // per-line offsets within the editable region.
2512 context: "pre\naaa\nbbb\nsuf\n",
2513 editable_range: 4..12, // editable region = "aaa\nbbb\n"
2514 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
2515 expected: indoc! {"
2516 <|file_sep|>test.rs
2517 <|fim_prefix|>
2518 pre
2519 <|fim_middle|>current
2520 0:23|aaa
2521 1:26|b<|user_cursor|>bb
2522 <|fim_suffix|>
2523 suf
2524 <|fim_middle|>updated
2525 "},
2526 },
2527 ];
2528
2529 for case in &cases {
2530 let mut prompt = String::new();
2531 hashline::write_cursor_excerpt_section(
2532 &mut prompt,
2533 Path::new("test.rs"),
2534 case.context,
2535 &case.editable_range,
2536 case.cursor_offset,
2537 );
2538 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2539 }
2540 }
2541
2542 #[test]
2543 fn test_apply_edit_commands() {
2544 struct Case {
2545 name: &'static str,
2546 original: &'static str,
2547 model_output: &'static str,
2548 expected: &'static str,
2549 }
2550
2551 let cases = vec![
2552 Case {
2553 name: "set_single_line",
2554 original: indoc! {"
2555 let mut total = 0;
2556 for product in products {
2557 total += ;
2558 }
2559 total
2560 "},
2561 model_output: indoc! {"
2562 <|set|>2:87
2563 total += product.price;
2564 "},
2565 expected: indoc! {"
2566 let mut total = 0;
2567 for product in products {
2568 total += product.price;
2569 }
2570 total
2571 "},
2572 },
2573 Case {
2574 name: "set_range",
2575 original: indoc! {"
2576 fn foo() {
2577 let x = 1;
2578 let y = 2;
2579 let z = 3;
2580 }
2581 "},
2582 model_output: indoc! {"
2583 <|set|>1:46-3:4a
2584 let sum = 6;
2585 "},
2586 expected: indoc! {"
2587 fn foo() {
2588 let sum = 6;
2589 }
2590 "},
2591 },
2592 Case {
2593 name: "insert_after_line",
2594 original: indoc! {"
2595 fn main() {
2596 let x = 1;
2597 }
2598 "},
2599 model_output: indoc! {"
2600 <|insert|>1:46
2601 let y = 2;
2602 "},
2603 expected: indoc! {"
2604 fn main() {
2605 let x = 1;
2606 let y = 2;
2607 }
2608 "},
2609 },
2610 Case {
2611 name: "insert_before_first",
2612 original: indoc! {"
2613 let x = 1;
2614 let y = 2;
2615 "},
2616 model_output: indoc! {"
2617 <|insert|>
2618 use std::io;
2619 "},
2620 expected: indoc! {"
2621 use std::io;
2622 let x = 1;
2623 let y = 2;
2624 "},
2625 },
2626 Case {
2627 name: "set_with_cursor_marker",
2628 original: indoc! {"
2629 fn main() {
2630 println!();
2631 }
2632 "},
2633 model_output: indoc! {"
2634 <|set|>1:34
2635 eprintln!(\"<|user_cursor|>\");
2636 "},
2637 expected: indoc! {"
2638 fn main() {
2639 eprintln!(\"<|user_cursor|>\");
2640 }
2641 "},
2642 },
2643 Case {
2644 name: "multiple_set_commands",
2645 original: indoc! {"
2646 aaa
2647 bbb
2648 ccc
2649 ddd
2650 "},
2651 model_output: indoc! {"
2652 <|set|>0:23
2653 AAA
2654 <|set|>2:29
2655 CCC
2656 "},
2657 expected: indoc! {"
2658 AAA
2659 bbb
2660 CCC
2661 ddd
2662 "},
2663 },
2664 Case {
2665 name: "set_range_multiline_replacement",
2666 original: indoc! {"
2667 fn handle_submit() {
2668 }
2669
2670 fn handle_keystroke() {
2671 "},
2672 model_output: indoc! {"
2673 <|set|>0:3f-1:7d
2674 fn handle_submit(modal_state: &mut ModalState) {
2675 <|user_cursor|>
2676 }
2677 "},
2678 expected: indoc! {"
2679 fn handle_submit(modal_state: &mut ModalState) {
2680 <|user_cursor|>
2681 }
2682
2683 fn handle_keystroke() {
2684 "},
2685 },
2686 Case {
2687 name: "no_edit_commands_returns_original",
2688 original: indoc! {"
2689 hello
2690 world
2691 "},
2692 model_output: "some random text with no commands",
2693 expected: indoc! {"
2694 hello
2695 world
2696 "},
2697 },
2698 Case {
2699 name: "no_edits_command_returns_original",
2700 original: indoc! {"
2701 hello
2702 world
2703 "},
2704 model_output: "<|no_edits|>",
2705 expected: indoc! {"
2706 hello
2707 world
2708 "},
2709 },
2710 Case {
2711 name: "wrong_hash_set_ignored",
2712 original: indoc! {"
2713 aaa
2714 bbb
2715 "},
2716 model_output: indoc! {"
2717 <|set|>0:ff
2718 ZZZ
2719 "},
2720 expected: indoc! {"
2721 aaa
2722 bbb
2723 "},
2724 },
2725 Case {
2726 name: "insert_and_set_combined",
2727 original: indoc! {"
2728 alpha
2729 beta
2730 gamma
2731 "},
2732 model_output: indoc! {"
2733 <|set|>0:06
2734 ALPHA
2735 <|insert|>1:9c
2736 beta_extra
2737 "},
2738 expected: indoc! {"
2739 ALPHA
2740 beta
2741 beta_extra
2742 gamma
2743 "},
2744 },
2745 Case {
2746 name: "no_trailing_newline_preserved",
2747 original: "hello\nworld",
2748 model_output: indoc! {"
2749 <|set|>0:14
2750 HELLO
2751 "},
2752 expected: "HELLO\nworld",
2753 },
2754 Case {
2755 name: "set_range_hash_mismatch_in_end_bound",
2756 original: indoc! {"
2757 one
2758 two
2759 three
2760 "},
2761 model_output: indoc! {"
2762 <|set|>0:42-2:ff
2763 ONE_TWO_THREE
2764 "},
2765 expected: indoc! {"
2766 one
2767 two
2768 three
2769 "},
2770 },
2771 Case {
2772 name: "set_range_start_greater_than_end_ignored",
2773 original: indoc! {"
2774 a
2775 b
2776 c
2777 "},
2778 model_output: indoc! {"
2779 <|set|>2:63-1:62
2780 X
2781 "},
2782 expected: indoc! {"
2783 a
2784 b
2785 c
2786 "},
2787 },
2788 Case {
2789 name: "insert_out_of_bounds_ignored",
2790 original: indoc! {"
2791 x
2792 y
2793 "},
2794 model_output: indoc! {"
2795 <|insert|>99:aa
2796 z
2797 "},
2798 expected: indoc! {"
2799 x
2800 y
2801 "},
2802 },
2803 Case {
2804 name: "set_out_of_bounds_ignored",
2805 original: indoc! {"
2806 x
2807 y
2808 "},
2809 model_output: indoc! {"
2810 <|set|>99:aa
2811 z
2812 "},
2813 expected: indoc! {"
2814 x
2815 y
2816 "},
2817 },
2818 Case {
2819 name: "malformed_set_command_ignored",
2820 original: indoc! {"
2821 alpha
2822 beta
2823 "},
2824 model_output: indoc! {"
2825 <|set|>not-a-line-ref
2826 UPDATED
2827 "},
2828 expected: indoc! {"
2829 alpha
2830 beta
2831 "},
2832 },
2833 Case {
2834 name: "malformed_insert_hash_treated_as_before_first",
2835 original: indoc! {"
2836 alpha
2837 beta
2838 "},
2839 model_output: indoc! {"
2840 <|insert|>1:nothex
2841 preamble
2842 "},
2843 expected: indoc! {"
2844 preamble
2845 alpha
2846 beta
2847 "},
2848 },
2849 Case {
2850 name: "set_then_insert_same_target_orders_insert_after_replacement",
2851 original: indoc! {"
2852 cat
2853 dog
2854 "},
2855 model_output: indoc! {"
2856 <|set|>0:38
2857 CAT
2858 <|insert|>0:38
2859 TAIL
2860 "},
2861 expected: indoc! {"
2862 CAT
2863 TAIL
2864 dog
2865 "},
2866 },
2867 Case {
2868 name: "overlapping_set_ranges_last_wins",
2869 original: indoc! {"
2870 a
2871 b
2872 c
2873 d
2874 "},
2875 model_output: indoc! {"
2876 <|set|>0:61-2:63
2877 FIRST
2878 <|set|>1:62-3:64
2879 SECOND
2880 "},
2881 expected: indoc! {"
2882 FIRST
2883 d
2884 "},
2885 },
2886 Case {
2887 name: "insert_before_first_and_after_line",
2888 original: indoc! {"
2889 a
2890 b
2891 "},
2892 model_output: indoc! {"
2893 <|insert|>
2894 HEAD
2895 <|insert|>0:61
2896 MID
2897 "},
2898 expected: indoc! {"
2899 HEAD
2900 a
2901 MID
2902 b
2903 "},
2904 },
2905 ];
2906
2907 for case in &cases {
2908 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2909 assert_eq!(result, case.expected, "failed case: {}", case.name);
2910 }
2911 }
2912
2913 #[test]
2914 fn test_output_has_edit_commands() {
2915 assert!(hashline::output_has_edit_commands(&format!(
2916 "{}0:ab\nnew",
2917 SET_COMMAND_MARKER
2918 )));
2919 assert!(hashline::output_has_edit_commands(&format!(
2920 "{}0:ab\nnew",
2921 INSERT_COMMAND_MARKER
2922 )));
2923 assert!(hashline::output_has_edit_commands(&format!(
2924 "some text\n{}1:cd\nstuff",
2925 SET_COMMAND_MARKER
2926 )));
2927 assert!(!hashline::output_has_edit_commands("just plain text"));
2928 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2929 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2930 }
2931
2932 // ---- hashline::patch_to_edit_commands round-trip tests ----
2933
2934 #[test]
2935 fn test_patch_to_edit_commands() {
2936 struct Case {
2937 name: &'static str,
2938 old: &'static str,
2939 patch: &'static str,
2940 expected_new: &'static str,
2941 }
2942
2943 let cases = [
2944 Case {
2945 name: "single_line_replacement",
2946 old: indoc! {"
2947 let mut total = 0;
2948 for product in products {
2949 total += ;
2950 }
2951 total
2952 "},
2953 patch: indoc! {"
2954 @@ -1,5 +1,5 @@
2955 let mut total = 0;
2956 for product in products {
2957 - total += ;
2958 + total += product.price;
2959 }
2960 total
2961 "},
2962 expected_new: indoc! {"
2963 let mut total = 0;
2964 for product in products {
2965 total += product.price;
2966 }
2967 total
2968 "},
2969 },
2970 Case {
2971 name: "multiline_replacement",
2972 old: indoc! {"
2973 fn foo() {
2974 let x = 1;
2975 let y = 2;
2976 let z = 3;
2977 }
2978 "},
2979 patch: indoc! {"
2980 @@ -1,5 +1,3 @@
2981 fn foo() {
2982 - let x = 1;
2983 - let y = 2;
2984 - let z = 3;
2985 + let sum = 1 + 2 + 3;
2986 }
2987 "},
2988 expected_new: indoc! {"
2989 fn foo() {
2990 let sum = 1 + 2 + 3;
2991 }
2992 "},
2993 },
2994 Case {
2995 name: "insertion",
2996 old: indoc! {"
2997 fn main() {
2998 let x = 1;
2999 }
3000 "},
3001 patch: indoc! {"
3002 @@ -1,3 +1,4 @@
3003 fn main() {
3004 let x = 1;
3005 + let y = 2;
3006 }
3007 "},
3008 expected_new: indoc! {"
3009 fn main() {
3010 let x = 1;
3011 let y = 2;
3012 }
3013 "},
3014 },
3015 Case {
3016 name: "insertion_before_first",
3017 old: indoc! {"
3018 let x = 1;
3019 let y = 2;
3020 "},
3021 patch: indoc! {"
3022 @@ -1,2 +1,3 @@
3023 +use std::io;
3024 let x = 1;
3025 let y = 2;
3026 "},
3027 expected_new: indoc! {"
3028 use std::io;
3029 let x = 1;
3030 let y = 2;
3031 "},
3032 },
3033 Case {
3034 name: "deletion",
3035 old: indoc! {"
3036 aaa
3037 bbb
3038 ccc
3039 ddd
3040 "},
3041 patch: indoc! {"
3042 @@ -1,4 +1,2 @@
3043 aaa
3044 -bbb
3045 -ccc
3046 ddd
3047 "},
3048 expected_new: indoc! {"
3049 aaa
3050 ddd
3051 "},
3052 },
3053 Case {
3054 name: "multiple_changes",
3055 old: indoc! {"
3056 alpha
3057 beta
3058 gamma
3059 delta
3060 epsilon
3061 "},
3062 patch: indoc! {"
3063 @@ -1,5 +1,5 @@
3064 -alpha
3065 +ALPHA
3066 beta
3067 gamma
3068 -delta
3069 +DELTA
3070 epsilon
3071 "},
3072 expected_new: indoc! {"
3073 ALPHA
3074 beta
3075 gamma
3076 DELTA
3077 epsilon
3078 "},
3079 },
3080 Case {
3081 name: "replace_with_insertion",
3082 old: indoc! {r#"
3083 fn handle() {
3084 modal_state.close();
3085 modal_state.dismiss();
3086 "#},
3087 patch: indoc! {r#"
3088 @@ -1,3 +1,4 @@
3089 fn handle() {
3090 modal_state.close();
3091 + eprintln!("");
3092 modal_state.dismiss();
3093 "#},
3094 expected_new: indoc! {r#"
3095 fn handle() {
3096 modal_state.close();
3097 eprintln!("");
3098 modal_state.dismiss();
3099 "#},
3100 },
3101 Case {
3102 name: "complete_replacement",
3103 old: indoc! {"
3104 aaa
3105 bbb
3106 ccc
3107 "},
3108 patch: indoc! {"
3109 @@ -1,3 +1,3 @@
3110 -aaa
3111 -bbb
3112 -ccc
3113 +xxx
3114 +yyy
3115 +zzz
3116 "},
3117 expected_new: indoc! {"
3118 xxx
3119 yyy
3120 zzz
3121 "},
3122 },
3123 Case {
3124 name: "add_function_body",
3125 old: indoc! {"
3126 fn foo() {
3127 modal_state.dismiss();
3128 }
3129
3130 fn
3131
3132 fn handle_keystroke() {
3133 "},
3134 patch: indoc! {"
3135 @@ -1,6 +1,8 @@
3136 fn foo() {
3137 modal_state.dismiss();
3138 }
3139
3140 -fn
3141 +fn handle_submit() {
3142 + todo()
3143 +}
3144
3145 fn handle_keystroke() {
3146 "},
3147 expected_new: indoc! {"
3148 fn foo() {
3149 modal_state.dismiss();
3150 }
3151
3152 fn handle_submit() {
3153 todo()
3154 }
3155
3156 fn handle_keystroke() {
3157 "},
3158 },
3159 Case {
3160 name: "with_cursor_offset",
3161 old: indoc! {r#"
3162 fn main() {
3163 println!();
3164 }
3165 "#},
3166 patch: indoc! {r#"
3167 @@ -1,3 +1,3 @@
3168 fn main() {
3169 - println!();
3170 + eprintln!("");
3171 }
3172 "#},
3173 expected_new: indoc! {r#"
3174 fn main() {
3175 eprintln!("<|user_cursor|>");
3176 }
3177 "#},
3178 },
3179 Case {
3180 name: "non_local_hunk_header_pure_insertion_repro",
3181 old: indoc! {"
3182 aaa
3183 bbb
3184 "},
3185 patch: indoc! {"
3186 @@ -20,2 +20,3 @@
3187 aaa
3188 +xxx
3189 bbb
3190 "},
3191 expected_new: indoc! {"
3192 aaa
3193 xxx
3194 bbb
3195 "},
3196 },
3197 Case {
3198 name: "empty_patch_produces_no_edits_marker",
3199 old: indoc! {"
3200 aaa
3201 bbb
3202 "},
3203 patch: "@@ -20,2 +20,3 @@\n",
3204 expected_new: indoc! {"
3205 aaa
3206 bbb
3207 "},
3208 },
3209 ];
3210
3211 for case in &cases {
3212 // The cursor_offset for patch_to_edit_commands is relative to
3213 // the first hunk's new text (context + additions). We compute
3214 // it by finding where the marker sits in the expected output
3215 // (which mirrors the new text of the hunk).
3216 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
3217
3218 let commands =
3219 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
3220 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
3221
3222 assert!(
3223 hashline::output_has_edit_commands(&commands),
3224 "case {}: expected edit commands, got: {commands:?}",
3225 case.name,
3226 );
3227
3228 let applied = hashline::apply_edit_commands(case.old, &commands);
3229 assert_eq!(applied, case.expected_new, "case {}", case.name);
3230 }
3231 }
3232 }
3233}
3234
3235pub mod seed_coder {
3236 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
3237 //!
3238 //! Seed-Coder uses different FIM tokens and order than Qwen:
3239 //! - SPM order: suffix comes FIRST, then prefix, then middle
3240 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
3241 //! - File markers: StarCoder-style `<filename>path` (single token + path)
3242 //!
3243 //! All context (related files, edit history) goes in the PREFIX section.
3244 //! The suffix contains only code after the editable region.
3245 //!
3246 //! Example prompt:
3247 //!
3248 //! <[fim-suffix]>
3249 //! code after editable region
3250 //! <[fim-prefix]><filename>related/file.py
3251 //! related file content
3252 //!
3253 //! <filename>edit_history
3254 //! --- a/some_file.py
3255 //! +++ b/some_file.py
3256 //! -old
3257 //! +new
3258 //!
3259 //! <filename>path/to/target_file.py
3260 //! code before editable region
3261 //! <<<<<<< CURRENT
3262 //! code that
3263 //! needs to<|user_cursor|>
3264 //! be rewritten
3265 //! =======
3266 //! <[fim-middle]>
3267 //!
3268 //! Expected output (model generates):
3269 //!
3270 //! updated
3271 //! code with
3272 //! changes applied
3273 //! >>>>>>> UPDATED
3274
3275 use super::*;
3276
3277 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
3278 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
3279 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
3280 pub const FILE_MARKER: &str = "<filename>";
3281
3282 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
3283 pub const SEPARATOR: &str = "=======\n";
3284 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
3285
3286 pub const NO_EDITS: &str = "NO_EDITS\n";
3287
3288 pub fn special_tokens() -> &'static [&'static str] {
3289 &[
3290 FIM_SUFFIX,
3291 FIM_PREFIX,
3292 FIM_MIDDLE,
3293 FILE_MARKER,
3294 START_MARKER,
3295 SEPARATOR,
3296 END_MARKER,
3297 CURSOR_MARKER,
3298 ]
3299 }
3300
3301 pub fn write_cursor_excerpt_section(
3302 prompt: &mut String,
3303 path: &Path,
3304 context: &str,
3305 editable_range: &Range<usize>,
3306 cursor_offset: usize,
3307 ) {
3308 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3309 prompt.push_str(§ion);
3310 }
3311
3312 pub fn format_prompt_with_budget(
3313 path: &Path,
3314 context: &str,
3315 editable_range: &Range<usize>,
3316 cursor_offset: usize,
3317 events: &[Arc<Event>],
3318 related_files: &[RelatedFile],
3319 max_tokens: usize,
3320 ) -> String {
3321 let cursor_prefix_section =
3322 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3323 assemble_fim_prompt(
3324 context,
3325 editable_range,
3326 &cursor_prefix_section,
3327 events,
3328 related_files,
3329 max_tokens,
3330 )
3331 }
3332
3333 pub fn assemble_fim_prompt(
3334 context: &str,
3335 editable_range: &Range<usize>,
3336 cursor_prefix_section: &str,
3337 events: &[Arc<Event>],
3338 related_files: &[RelatedFile],
3339 max_tokens: usize,
3340 ) -> String {
3341 let suffix_section = build_suffix_section(context, editable_range);
3342
3343 let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
3344 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
3345 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
3346
3347 let edit_history_section = super::format_edit_history_within_budget(
3348 events,
3349 FILE_MARKER,
3350 "edit_history",
3351 budget_after_cursor,
3352 max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
3353 );
3354 let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
3355 let budget_after_edit_history =
3356 budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
3357
3358 let related_files_section = super::format_related_files_within_budget(
3359 related_files,
3360 FILE_MARKER,
3361 "",
3362 budget_after_edit_history,
3363 );
3364
3365 let mut prompt = String::new();
3366 prompt.push_str(&suffix_section);
3367 prompt.push_str(FIM_PREFIX);
3368 prompt.push_str(&related_files_section);
3369 if !related_files_section.is_empty() {
3370 prompt.push('\n');
3371 }
3372 prompt.push_str(&edit_history_section);
3373 if !edit_history_section.is_empty() {
3374 prompt.push('\n');
3375 }
3376 prompt.push_str(cursor_prefix_section);
3377 prompt.push_str(FIM_MIDDLE);
3378
3379 prompt
3380 }
3381
3382 pub(crate) fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
3383 let mut section = String::new();
3384 section.push_str(FIM_SUFFIX);
3385 section.push_str(&context[editable_range.end..]);
3386 if !section.ends_with('\n') {
3387 section.push('\n');
3388 }
3389 section
3390 }
3391
3392 fn build_cursor_prefix_section(
3393 path: &Path,
3394 context: &str,
3395 editable_range: &Range<usize>,
3396 cursor_offset: usize,
3397 ) -> String {
3398 let mut section = String::new();
3399 let path_str = path.to_string_lossy();
3400 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3401
3402 section.push_str(&context[..editable_range.start]);
3403 section.push_str(START_MARKER);
3404 section.push_str(&context[editable_range.start..cursor_offset]);
3405 section.push_str(CURSOR_MARKER);
3406 section.push_str(&context[cursor_offset..editable_range.end]);
3407 if !section.ends_with('\n') {
3408 section.push('\n');
3409 }
3410 section.push_str(SEPARATOR);
3411 section
3412 }
3413
3414 /// Format patch as containing no changes if it's empty; otherwise return None.
3415 pub(crate) fn no_edits(patch: &str) -> Option<String> {
3416 // Count lines in the patch
3417 let empty_patch = patch.lines().count() <= 3;
3418 if empty_patch {
3419 Some(format!("{NO_EDITS}{END_MARKER}"))
3420 } else {
3421 None
3422 }
3423 }
3424}
3425
3426pub mod v0304_variable_edit {
3427 //! A prompt format with no fixed editable region. The entire context is shown
3428 //! to the model, and it chooses which text to replace by outputting surrounding
3429 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3430 //! text.
3431 //!
3432 //! Example prompt:
3433 //!
3434 //! <|file_sep|>path/to/file.py
3435 //! zero
3436 //! one
3437 //! two
3438 //! three<|user_cursor|>
3439 //! four
3440 //! five
3441 //! <|fim_prefix|>
3442 //
3443 //! Expected output (model generates):
3444 //!
3445 //! two
3446 //! <|fim_middle|>
3447 //! THREE
3448 //! <|fim_suffix|>
3449 //! four
3450 //!
3451 //! The output means: find "two\n...\nfour" in the context, and replace
3452 //! everything between "two\n" and "four" with "THREE\n".
3453
3454 use super::*;
3455
3456 pub fn special_tokens() -> &'static [&'static str] {
3457 &[
3458 "<|fim_prefix|>",
3459 "<|fim_suffix|>",
3460 "<|fim_middle|>",
3461 "<|file_sep|>",
3462 CURSOR_MARKER,
3463 ]
3464 }
3465
3466 pub fn write_cursor_excerpt_section(
3467 prompt: &mut String,
3468 path: &Path,
3469 context: &str,
3470 cursor_offset: usize,
3471 ) {
3472 let path_str = path.to_string_lossy();
3473 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3474
3475 prompt.push_str(&context[..cursor_offset]);
3476 prompt.push_str(CURSOR_MARKER);
3477 prompt.push_str(&context[cursor_offset..]);
3478 if !prompt.ends_with('\n') {
3479 prompt.push('\n');
3480 }
3481 prompt.push_str("<|fim_prefix|>\n")
3482 }
3483
3484 /// Apply a variable-edit model output to the original context text.
3485 ///
3486 /// The model output has the form:
3487 ///
3488 /// - prefix context lines
3489 /// - `<|fim_middle|>`
3490 /// - new text
3491 /// - `<|fim_suffix|>`
3492 /// - suffix context lines
3493 ///
3494 /// We locate the prefix/suffix context lines in the original text and replace
3495 /// everything between them with the new text.
3496 pub fn apply_variable_edit(
3497 context: &str,
3498 model_output: &str,
3499 ) -> Result<(Range<usize>, String)> {
3500 let (prefix_context, rest) = model_output
3501 .split_once("<|fim_middle|>\n")
3502 .or_else(|| model_output.split_once("<|fim_middle|>"))
3503 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3504
3505 let (new_text, suffix_context) = rest
3506 .split_once("<|fim_suffix|>\n")
3507 .or_else(|| rest.split_once("<|fim_suffix|>"))
3508 .unwrap_or((rest, ""));
3509
3510 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3511 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3512 } else {
3513 suffix_context
3514 };
3515
3516 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3517 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3518 + prefix_context.len();
3519 let suffix_offset = if suffix_context.is_empty() {
3520 context.len()
3521 } else {
3522 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3523 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3524 + prefix_offset
3525 };
3526
3527 let edit_range = prefix_offset..suffix_offset;
3528 return Ok((edit_range, new_text.to_string()));
3529 }
3530
3531 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3532 if needle.is_empty() {
3533 return Some(0);
3534 }
3535
3536 haystack.match_indices(needle).find_map(|(offset, _)| {
3537 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3538 matched_line_start.then_some(offset)
3539 })
3540 }
3541
3542 /// Convert a unified diff patch into the variable-edit output format.
3543 ///
3544 /// Parses `patch` as a unified diff against `old_text` and produces model
3545 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3546 /// delimiters. The diff is resolved by content matching rather than line
3547 /// numbers.
3548 pub fn patch_to_variable_edit_output(
3549 old_text: &str,
3550 patch: &str,
3551 cursor_offset: Option<usize>,
3552 ) -> Result<String> {
3553 // Parse the unified diff into hunks. Each hunk has an `old_context`
3554 // string (context + deleted lines interleaved in order) and a list of
3555 // edits expressed as byte ranges within that context plus replacement
3556 // text.
3557 let hunks = parse_hunks(patch);
3558 if hunks.is_empty() {
3559 return Ok(String::new());
3560 }
3561
3562 // Apply each hunk by finding its old_context in the text and
3563 // performing the edits. We search forward from where the previous
3564 // hunk ended so that hunks are applied in order.
3565 let mut new_text = old_text.to_string();
3566 let mut search_from: usize = 0;
3567 let mut first_hunk_pos: Option<usize> = None;
3568
3569 for hunk in &hunks {
3570 let context_pos = new_text[search_from..]
3571 .find(&hunk.old_context)
3572 .map(|pos| pos + search_from)
3573 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3574
3575 if first_hunk_pos.is_none() {
3576 first_hunk_pos = Some(context_pos);
3577 }
3578
3579 // Apply edits in reverse order so byte offsets remain valid.
3580 for edit in hunk.edits.iter().rev() {
3581 let abs_start = context_pos + edit.range.start;
3582 let abs_end = context_pos + edit.range.end;
3583 new_text.replace_range(abs_start..abs_end, &edit.text);
3584 }
3585
3586 // Advance past this hunk's region in the (now modified) text.
3587 let new_region_len: usize =
3588 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3589 len + edit.text.len() - (edit.range.end - edit.range.start)
3590 });
3591 search_from = context_pos + new_region_len;
3592 }
3593
3594 // Now we have old_text and new_text. Find the changed line range by
3595 // comparing them.
3596 let old_lines: Vec<&str> = old_text.lines().collect();
3597 let new_lines: Vec<&str> = new_text.lines().collect();
3598
3599 // Find first differing line.
3600 let first_changed_row = old_lines
3601 .iter()
3602 .zip(new_lines.iter())
3603 .position(|(a, b)| a != b)
3604 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3605
3606 // Find last differing line (from the end).
3607 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3608 let common_suffix = old_lines
3609 .iter()
3610 .rev()
3611 .zip(new_lines.iter().rev())
3612 .take(max_suffix)
3613 .take_while(|(a, b)| a == b)
3614 .count();
3615
3616 let old_end = old_lines.len() - common_suffix;
3617 let new_end = new_lines.len() - common_suffix;
3618
3619 if first_changed_row == old_end && first_changed_row == new_end {
3620 return Ok(String::new());
3621 }
3622
3623 // Build the replacement text from new_lines[first_diff..new_end].
3624 let mut merged_new_text = String::new();
3625 for line in &new_lines[first_changed_row..new_end] {
3626 merged_new_text.push_str(line);
3627 merged_new_text.push('\n');
3628 }
3629
3630 // cursor_offset is relative to the first hunk's new content in
3631 // new_text. Translate it to an offset within merged_new_text, which
3632 // only contains lines first_diff..new_end of new_text.
3633 if let Some(hunk_offset) = cursor_offset {
3634 let hunk_start = first_hunk_pos.unwrap_or(0);
3635 let absolute_pos = hunk_start + hunk_offset;
3636
3637 // Byte offset where first_diff starts in new_text.
3638 let merged_start: usize = new_lines[..first_changed_row]
3639 .iter()
3640 .map(|line| line.len() + 1)
3641 .sum();
3642
3643 if absolute_pos >= merged_start {
3644 let relative_offset = absolute_pos - merged_start;
3645 if relative_offset <= merged_new_text.len() {
3646 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3647 }
3648 }
3649 }
3650
3651 // Build output with 2 lines of context above and below.
3652 let context_lines_count = 2;
3653 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3654 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3655
3656 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3657 let pattern = &lines[line_range];
3658 let pattern_len = pattern.len();
3659
3660 let mut count = 0;
3661 for offset in 0..=lines.len() - pattern_len {
3662 if &lines[offset..offset + pattern_len] == pattern {
3663 count += 1;
3664 }
3665 }
3666 count
3667 }
3668
3669 // Expand prefix and suffix until they are unique
3670 while prefix_start > 0 {
3671 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3672 prefix_start -= 1;
3673 } else {
3674 break;
3675 }
3676 }
3677 while suffix_end < old_lines.len() {
3678 if count_matches(old_end..suffix_end, &old_lines) > 1 {
3679 suffix_end += 1;
3680 } else {
3681 break;
3682 }
3683 }
3684
3685 let mut output = String::new();
3686 for line in &old_lines[prefix_start..first_changed_row] {
3687 output.push_str(line);
3688 output.push('\n');
3689 }
3690 output.push_str("<|fim_middle|>\n");
3691 output.push_str(&merged_new_text);
3692 output.push_str("<|fim_suffix|>\n");
3693 for line in &old_lines[old_end..suffix_end] {
3694 output.push_str(line);
3695 output.push('\n');
3696 }
3697
3698 Ok(output)
3699 }
3700
3701 struct ParsedHunk {
3702 old_context: String,
3703 edits: Vec<ParsedEdit>,
3704 }
3705
3706 struct ParsedEdit {
3707 range: Range<usize>,
3708 text: String,
3709 }
3710
3711 /// Parse a unified diff into content-based hunks. Each hunk contains an
3712 /// `old_context` string (context lines + deleted lines, which together
3713 /// form the text that should be found in the original) and a list of edits
3714 /// expressed as byte ranges within that context.
3715 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3716 let mut hunks = Vec::new();
3717 let mut current: Option<ParsedHunk> = None;
3718
3719 for line in patch.lines() {
3720 if line.starts_with("@@") {
3721 if let Some(hunk) = current.take() {
3722 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3723 hunks.push(hunk);
3724 }
3725 }
3726 current = Some(ParsedHunk {
3727 old_context: String::new(),
3728 edits: Vec::new(),
3729 });
3730 } else if line.starts_with("---") || line.starts_with("+++") {
3731 continue;
3732 } else if let Some(hunk) = &mut current {
3733 if let Some(added) = line.strip_prefix('+') {
3734 let pos = hunk.old_context.len();
3735 if let Some(last_edit) = hunk.edits.last_mut() {
3736 if last_edit.range.end == pos {
3737 writeln!(&mut last_edit.text, "{added}").ok();
3738 continue;
3739 }
3740 }
3741 hunk.edits.push(ParsedEdit {
3742 range: pos..pos,
3743 text: format!("{added}\n"),
3744 });
3745 } else if let Some(removed) = line.strip_prefix('-') {
3746 let start = hunk.old_context.len();
3747 writeln!(&mut hunk.old_context, "{removed}").ok();
3748 let end = hunk.old_context.len();
3749 if let Some(last_edit) = hunk.edits.last_mut() {
3750 if last_edit.range.end == start {
3751 last_edit.range.end = end;
3752 continue;
3753 }
3754 }
3755 hunk.edits.push(ParsedEdit {
3756 range: start..end,
3757 text: String::new(),
3758 });
3759 } else {
3760 let ctx = line.strip_prefix(' ').unwrap_or(line);
3761 writeln!(&mut hunk.old_context, "{ctx}").ok();
3762 }
3763 }
3764 }
3765
3766 if let Some(hunk) = current {
3767 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3768 hunks.push(hunk);
3769 }
3770 }
3771
3772 hunks
3773 }
3774
3775 #[cfg(test)]
3776 mod tests {
3777 use super::*;
3778 use indoc::indoc;
3779
3780 #[test]
3781 fn test_apply_variable_edit() {
3782 struct Case {
3783 name: &'static str,
3784 original: &'static str,
3785 model_output: &'static str,
3786 expected: &'static str,
3787 }
3788
3789 let cases = [
3790 Case {
3791 name: "simple_single_line_replacement",
3792 original: indoc! {"
3793 zero
3794 one
3795 two
3796 three
3797 four
3798 five
3799 "},
3800 model_output: indoc! {"
3801 two
3802 <|fim_middle|>
3803 THREE
3804 <|fim_suffix|>
3805 four
3806 "},
3807 expected: indoc! {"
3808 zero
3809 one
3810 two
3811 THREE
3812 four
3813 five
3814 "},
3815 },
3816 Case {
3817 name: "multi_line_replacement",
3818 original: indoc! {"
3819 a
3820 b
3821 c
3822 d
3823 e
3824 "},
3825 model_output: indoc! {"
3826 a
3827 <|fim_middle|>
3828 B
3829 C
3830 D
3831 <|fim_suffix|>
3832 e
3833 "},
3834 expected: indoc! {"
3835 a
3836 B
3837 C
3838 D
3839 e
3840 "},
3841 },
3842 Case {
3843 name: "insertion_between_existing_lines",
3844 original: indoc! {"
3845 a
3846 b
3847 c
3848 "},
3849 model_output: indoc! {"
3850 a
3851 <|fim_middle|>
3852 X
3853 <|fim_suffix|>
3854 b
3855 "},
3856 expected: indoc! {"
3857 a
3858 X
3859 b
3860 c
3861 "},
3862 },
3863 Case {
3864 name: "deletion",
3865 original: indoc! {"
3866 a
3867 b
3868 c
3869 d
3870 "},
3871 model_output: indoc! {"
3872 a
3873 <|fim_middle|>
3874 <|fim_suffix|>
3875 c
3876 "},
3877 expected: indoc! {"
3878 a
3879 c
3880 d
3881 "},
3882 },
3883 Case {
3884 name: "replacement_at_start_no_prefix_context",
3885 original: indoc! {"
3886 a
3887 b
3888 c
3889 "},
3890 model_output: indoc! {"
3891 <|fim_middle|>
3892 X
3893 <|fim_suffix|>
3894 b
3895 "},
3896 expected: indoc! {"
3897 X
3898 b
3899 c
3900 "},
3901 },
3902 Case {
3903 name: "replacement_at_end_no_suffix_context",
3904 original: indoc! {"
3905 a
3906 b
3907 c
3908 "},
3909 model_output: indoc! {"
3910 b
3911 <|fim_middle|>
3912 Z
3913 <|fim_suffix|>
3914 "},
3915 expected: indoc! {"
3916 a
3917 b
3918 Z
3919 "},
3920 },
3921 Case {
3922 name: "context_with_trailing_newline_is_preserved",
3923 original: indoc! {"
3924 a
3925 b
3926 c
3927 "},
3928 model_output: indoc! {"
3929 a
3930 <|fim_middle|>
3931 B
3932 <|fim_suffix|>
3933 c
3934 "},
3935 expected: indoc! {"
3936 a
3937 B
3938 c
3939 "},
3940 },
3941 Case {
3942 name: "cursor_marker_passes_through_untouched",
3943 original: indoc! {"
3944 a
3945 b
3946 c
3947 "},
3948 model_output: indoc! {"
3949 a
3950 <|fim_middle|>
3951 B<|user_cursor|>B
3952 <|fim_suffix|>
3953 c
3954 "},
3955 expected: indoc! {"
3956 a
3957 B<|user_cursor|>B
3958 c
3959 "},
3960 },
3961 Case {
3962 name: "multiple_prefix_context_lines",
3963 original: indoc! {"
3964 a
3965 b
3966 c
3967 d
3968 e
3969 "},
3970 model_output: indoc! {"
3971 b
3972 c
3973 <|fim_middle|>
3974 D
3975 <|fim_suffix|>
3976 e
3977 "},
3978 expected: indoc! {"
3979 a
3980 b
3981 c
3982 D
3983 e
3984 "},
3985 },
3986 ];
3987
3988 for case in cases {
3989 let (edit_range, replacement) =
3990 apply_variable_edit(case.original, case.model_output).unwrap();
3991 let mut edited = case.original.to_string();
3992 edited.replace_range(edit_range, &replacement);
3993 assert_eq!(edited, case.expected, "{}", case.name);
3994 }
3995 }
3996
3997 #[test]
3998 fn test_patch_to_variable_edit() {
3999 struct Case {
4000 name: &'static str,
4001 old: &'static str,
4002 patch: &'static str,
4003 cursor_offset: Option<usize>,
4004 expected_variable_edit: &'static str,
4005 expected_after_apply: &'static str,
4006 }
4007
4008 let cases = [
4009 Case {
4010 name: "simple_replacement",
4011 old: indoc! {"
4012 zero
4013 one
4014 two
4015 three
4016 four
4017 five
4018 "},
4019 patch: indoc! {"
4020 @@ -3,3 +3,3 @@
4021 two
4022 -three
4023 +THREE
4024 four
4025 "},
4026 cursor_offset: None,
4027 expected_variable_edit: indoc! {"
4028 one
4029 two
4030 <|fim_middle|>
4031 THREE
4032 <|fim_suffix|>
4033 four
4034 five
4035 "},
4036 expected_after_apply: indoc! {"
4037 zero
4038 one
4039 two
4040 THREE
4041 four
4042 five
4043 "},
4044 },
4045 Case {
4046 name: "insertion",
4047 old: indoc! {"
4048 a
4049 b
4050 c
4051 d
4052 e
4053 "},
4054 patch: indoc! {"
4055 @@ -2,0 +3,1 @@
4056 b
4057 +X
4058 c
4059 "},
4060 cursor_offset: None,
4061 expected_variable_edit: indoc! {"
4062 a
4063 b
4064 <|fim_middle|>
4065 X
4066 <|fim_suffix|>
4067 c
4068 d
4069 "},
4070 expected_after_apply: indoc! {"
4071 a
4072 b
4073 X
4074 c
4075 d
4076 e
4077 "},
4078 },
4079 Case {
4080 name: "deletion",
4081 old: indoc! {"
4082 a
4083 b
4084 c
4085 d
4086 e
4087 "},
4088 patch: indoc! {"
4089 @@ -2,3 +2,2 @@
4090 b
4091 -c
4092 d
4093 "},
4094 cursor_offset: None,
4095 expected_variable_edit: indoc! {"
4096 a
4097 b
4098 <|fim_middle|>
4099 <|fim_suffix|>
4100 d
4101 e
4102 "},
4103 expected_after_apply: indoc! {"
4104 a
4105 b
4106 d
4107 e
4108 "},
4109 },
4110 Case {
4111 name: "edit_near_start",
4112 old: indoc! {"
4113 first
4114 second
4115 third
4116 fourth
4117 "},
4118 patch: indoc! {"
4119 @@ -1,1 +1,1 @@
4120 -first
4121 +FIRST
4122 "},
4123 cursor_offset: None,
4124 expected_variable_edit: indoc! {"
4125 <|fim_middle|>
4126 FIRST
4127 <|fim_suffix|>
4128 second
4129 third
4130 "},
4131 expected_after_apply: indoc! {"
4132 FIRST
4133 second
4134 third
4135 fourth
4136 "},
4137 },
4138 Case {
4139 name: "edit_near_end",
4140 old: indoc! {"
4141 first
4142 second
4143 third
4144 fourth
4145 "},
4146 patch: indoc! {"
4147 @@ -4,1 +4,1 @@
4148 -fourth
4149 +FOURTH
4150 "},
4151 cursor_offset: None,
4152 expected_variable_edit: indoc! {"
4153 second
4154 third
4155 <|fim_middle|>
4156 FOURTH
4157 <|fim_suffix|>
4158 "},
4159 expected_after_apply: indoc! {"
4160 first
4161 second
4162 third
4163 FOURTH
4164 "},
4165 },
4166 Case {
4167 name: "cursor_at_start_of_replacement",
4168 old: indoc! {"
4169 zero
4170 one
4171 two
4172 three
4173 four
4174 five
4175 "},
4176 patch: indoc! {"
4177 @@ -3,3 +3,3 @@
4178 two
4179 -three
4180 +THREE
4181 four
4182 "},
4183 cursor_offset: Some(4),
4184 expected_variable_edit: indoc! {"
4185 one
4186 two
4187 <|fim_middle|>
4188 <|user_cursor|>THREE
4189 <|fim_suffix|>
4190 four
4191 five
4192 "},
4193 expected_after_apply: indoc! {"
4194 zero
4195 one
4196 two
4197 <|user_cursor|>THREE
4198 four
4199 five
4200 "},
4201 },
4202 Case {
4203 name: "cursor_in_middle_of_replacement",
4204 old: indoc! {"
4205 zero
4206 one
4207 two
4208 three
4209 four
4210 five
4211 "},
4212 patch: indoc! {"
4213 @@ -3,3 +3,3 @@
4214 two
4215 -three
4216 +THREE
4217 four
4218 "},
4219 cursor_offset: Some(6),
4220 expected_variable_edit: indoc! {"
4221 one
4222 two
4223 <|fim_middle|>
4224 TH<|user_cursor|>REE
4225 <|fim_suffix|>
4226 four
4227 five
4228 "},
4229 expected_after_apply: indoc! {"
4230 zero
4231 one
4232 two
4233 TH<|user_cursor|>REE
4234 four
4235 five
4236 "},
4237 },
4238 Case {
4239 name: "expands_context_when_two_lines_not_unique_before_and_after",
4240 old: indoc! {"
4241 one
4242 a
4243 b
4244 c
4245 d
4246 two
4247 a
4248 b
4249 c
4250 d
4251 three
4252 a
4253 b
4254 c
4255 d
4256 four
4257 "},
4258 patch: indoc! {"
4259 @@ -4,5 +4,5 @@
4260 two
4261 a
4262 b
4263 -c
4264 +C
4265 d
4266 three
4267 "},
4268 cursor_offset: None,
4269 expected_variable_edit: indoc! {"
4270 two
4271 a
4272 b
4273 <|fim_middle|>
4274 C
4275 <|fim_suffix|>
4276 d
4277 three
4278 "},
4279 expected_after_apply: indoc! {"
4280 one
4281 a
4282 b
4283 c
4284 d
4285 two
4286 a
4287 b
4288 C
4289 d
4290 three
4291 a
4292 b
4293 c
4294 d
4295 four
4296 "},
4297 },
4298 Case {
4299 name: "expands_context_when_two_lines_not_unique_before_and_after",
4300 old: indoc! {"
4301 {
4302 {
4303 one();
4304 }
4305 }
4306 {
4307 {
4308 two();
4309 }
4310 }
4311 {
4312 {
4313 three();
4314 }
4315 }
4316 {
4317 {
4318 four();
4319 }
4320 }
4321 "},
4322 patch: indoc! {"
4323 @@ -4,5 +4,5 @@
4324 {
4325 - two();
4326 + TWO();
4327 }
4328 "},
4329 cursor_offset: None,
4330 expected_variable_edit: indoc! {"
4331 one();
4332 }
4333 }
4334 {
4335 {
4336 <|fim_middle|>
4337 TWO();
4338 <|fim_suffix|>
4339 }
4340 }
4341 {
4342 {
4343 three();
4344 "},
4345 expected_after_apply: indoc! {"
4346 {
4347 {
4348 one();
4349 }
4350 }
4351 {
4352 {
4353 TWO();
4354 }
4355 }
4356 {
4357 {
4358 three();
4359 }
4360 }
4361 {
4362 {
4363 four();
4364 }
4365 }
4366 "},
4367 },
4368 ];
4369
4370 for case in cases {
4371 let output =
4372 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
4373 .unwrap_or_else(|error| {
4374 panic!("failed converting patch for {}: {error}", case.name)
4375 });
4376 assert_eq!(
4377 output, case.expected_variable_edit,
4378 "patch->variable_edit mismatch for {}",
4379 case.name
4380 );
4381
4382 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
4383 .unwrap_or_else(|error| {
4384 panic!("failed applying variable_edit for {}: {error}", case.name)
4385 });
4386 let mut edited_by_variable_edit = case.old.to_string();
4387 edited_by_variable_edit.replace_range(edit_range, &replacement);
4388 assert_eq!(
4389 edited_by_variable_edit, case.expected_after_apply,
4390 "variable_edit apply mismatch for {}",
4391 case.name
4392 );
4393
4394 let (expected_edit_range, expected_replacement) =
4395 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4396 |error| {
4397 panic!(
4398 "failed applying expected variable_edit for {}: {error}",
4399 case.name
4400 )
4401 },
4402 );
4403 let mut edited_by_expected_variable_edit = case.old.to_string();
4404 edited_by_expected_variable_edit
4405 .replace_range(expected_edit_range, &expected_replacement);
4406 assert_eq!(
4407 edited_by_expected_variable_edit, case.expected_after_apply,
4408 "expected variable_edit apply mismatch for {}",
4409 case.name
4410 );
4411 }
4412 }
4413
4414 #[test]
4415 fn test_write_cursor_excerpt_section() {
4416 let path = Path::new("test.rs");
4417 let context = "fn main() {\n hello();\n}\n";
4418 let cursor_offset = 17;
4419 let mut prompt = String::new();
4420 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4421 assert_eq!(
4422 prompt,
4423 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4424 );
4425 }
4426 }
4427}
4428
4429/// The zeta1 prompt format
4430pub mod zeta1 {
4431 use super::*;
4432 use std::fmt::Write;
4433
4434 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4435 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4436 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4437 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4438
4439 const INSTRUCTION_HEADER: &str = concat!(
4440 "### Instruction:\n",
4441 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4442 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4443 "into account the cursor location.\n\n",
4444 "### User Edits:\n\n"
4445 );
4446 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4447 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4448
4449 /// Formats a complete zeta1 prompt from the input events and excerpt.
4450 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4451 let mut prompt = String::with_capacity(
4452 INSTRUCTION_HEADER.len()
4453 + input_events.len()
4454 + EXCERPT_HEADER.len()
4455 + input_excerpt.len()
4456 + RESPONSE_HEADER.len(),
4457 );
4458 prompt.push_str(INSTRUCTION_HEADER);
4459 prompt.push_str(input_events);
4460 prompt.push_str(EXCERPT_HEADER);
4461 prompt.push_str(input_excerpt);
4462 prompt.push_str(RESPONSE_HEADER);
4463 prompt
4464 }
4465
4466 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4467 /// editable and context byte-offset ranges within `cursor_excerpt`.
4468 pub fn format_zeta1_from_input(
4469 input: &ZetaPromptInput,
4470 editable_range: Range<usize>,
4471 context_range: Range<usize>,
4472 ) -> String {
4473 let events = format_zeta1_events(&input.events);
4474 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4475 format_zeta1_prompt(&events, &excerpt)
4476 }
4477
4478 /// Formats events in zeta1 style (oldest first).
4479 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4480 let mut result = String::new();
4481 for event in
4482 events
4483 .iter()
4484 .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4485 &ZetaFormat::V0114180EditableRegion,
4486 )))
4487 {
4488 let event_string = format_zeta1_event(event);
4489 if event_string.is_empty() {
4490 continue;
4491 }
4492 if !result.is_empty() {
4493 result.push_str("\n\n");
4494 }
4495 result.push_str(&event_string);
4496 }
4497 result
4498 }
4499
4500 fn format_zeta1_event(event: &Event) -> String {
4501 match event {
4502 Event::BufferChange {
4503 path,
4504 old_path,
4505 diff,
4506 ..
4507 } => {
4508 let mut prompt = String::new();
4509 if old_path != path {
4510 writeln!(
4511 prompt,
4512 "User renamed {} to {}\n",
4513 old_path.display(),
4514 path.display()
4515 )
4516 .ok();
4517 }
4518 if !diff.is_empty() {
4519 write!(
4520 prompt,
4521 "User edited {}:\n```diff\n{}\n```",
4522 path.display(),
4523 diff
4524 )
4525 .ok();
4526 }
4527 prompt
4528 }
4529 }
4530 }
4531
4532 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4533 /// within `cursor_excerpt`.
4534 fn format_zeta1_excerpt(
4535 input: &ZetaPromptInput,
4536 editable_range: Range<usize>,
4537 context_range: Range<usize>,
4538 ) -> String {
4539 let path_str = input.cursor_path.to_string_lossy();
4540 let excerpt = &*input.cursor_excerpt;
4541 let cursor_offset = input.cursor_offset_in_excerpt;
4542
4543 let mut prompt = String::new();
4544 writeln!(&mut prompt, "```{path_str}").ok();
4545
4546 let starts_at_file_beginning =
4547 input.excerpt_start_row == Some(0) && context_range.start == 0;
4548 if starts_at_file_beginning {
4549 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4550 }
4551
4552 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4553
4554 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4555 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4556 prompt.push_str(CURSOR_MARKER);
4557 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4558 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4559
4560 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4561 write!(prompt, "\n```").ok();
4562
4563 prompt
4564 }
4565
4566 /// Cleans zeta1 model output by extracting content between editable region
4567 /// markers and converting the zeta1 cursor marker to the universal one.
4568 /// Returns `None` if the output doesn't contain the expected markers.
4569 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4570 let content = output.replace(CURSOR_MARKER, "");
4571
4572 let content_start = content
4573 .find(EDITABLE_REGION_START_MARKER)
4574 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4575 .map(|pos| {
4576 if content.as_bytes().get(pos) == Some(&b'\n') {
4577 pos + 1
4578 } else {
4579 pos
4580 }
4581 })
4582 .unwrap_or(0);
4583
4584 let content_end = content
4585 .find(EDITABLE_REGION_END_MARKER)
4586 .map(|pos| {
4587 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4588 pos - 1
4589 } else {
4590 pos
4591 }
4592 })
4593 .unwrap_or(content.len());
4594
4595 if content_start > content_end {
4596 return Some(String::new());
4597 }
4598
4599 let extracted = &content[content_start..content_end];
4600
4601 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4602 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4603 let text_before_cursor = text_before_cursor
4604 .find(EDITABLE_REGION_START_MARKER)
4605 .map(|pos| {
4606 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4607 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4608 after_marker + 1
4609 } else {
4610 after_marker
4611 }
4612 })
4613 .unwrap_or(0);
4614 let offset_in_extracted = zeta1_cursor_pos
4615 .saturating_sub(text_before_cursor)
4616 .min(extracted.len());
4617 offset_in_extracted
4618 });
4619
4620 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4621 if let Some(offset) = cursor_offset {
4622 result.push_str(&extracted[..offset]);
4623 result.push_str(super::CURSOR_MARKER);
4624 result.push_str(&extracted[offset..]);
4625 } else {
4626 result.push_str(extracted);
4627 }
4628
4629 Some(result)
4630 }
4631}
4632
4633#[cfg(test)]
4634mod tests {
4635 use super::*;
4636 use indoc::indoc;
4637
4638 fn make_input(
4639 cursor_excerpt: &str,
4640 editable_range: Range<usize>,
4641 cursor_offset: usize,
4642 events: Vec<Event>,
4643 related_files: Vec<RelatedFile>,
4644 ) -> ZetaPromptInput {
4645 let context_range = 0..cursor_excerpt.len();
4646 ZetaPromptInput {
4647 cursor_path: Path::new("test.rs").into(),
4648 cursor_excerpt: cursor_excerpt.into(),
4649 cursor_offset_in_excerpt: cursor_offset,
4650 excerpt_start_row: None,
4651 events: events.into_iter().map(Arc::new).collect(),
4652 related_files: Some(related_files),
4653 active_buffer_diagnostics: vec![],
4654 excerpt_ranges: ExcerptRanges {
4655 editable_150: editable_range.clone(),
4656 editable_180: editable_range.clone(),
4657 editable_350: editable_range,
4658 editable_150_context_350: context_range.clone(),
4659 editable_180_context_350: context_range.clone(),
4660 editable_350_context_150: context_range,
4661 ..Default::default()
4662 },
4663 syntax_ranges: None,
4664 in_open_source_repo: false,
4665 can_collect_data: false,
4666 repo_url: None,
4667 }
4668 }
4669
4670 fn make_input_with_context_range(
4671 excerpt: &str,
4672 editable_range: Range<usize>,
4673 context_range: Range<usize>,
4674 cursor_offset: usize,
4675 ) -> ZetaPromptInput {
4676 ZetaPromptInput {
4677 cursor_path: Path::new("test.rs").into(),
4678 cursor_excerpt: excerpt.into(),
4679 cursor_offset_in_excerpt: cursor_offset,
4680 excerpt_start_row: None,
4681 events: vec![],
4682 related_files: Some(vec![]),
4683 active_buffer_diagnostics: vec![],
4684 excerpt_ranges: ExcerptRanges {
4685 editable_150: editable_range.clone(),
4686 editable_180: editable_range.clone(),
4687 editable_350: editable_range,
4688 editable_150_context_350: context_range.clone(),
4689 editable_180_context_350: context_range.clone(),
4690 editable_350_context_150: context_range,
4691 ..Default::default()
4692 },
4693 syntax_ranges: None,
4694 in_open_source_repo: false,
4695 can_collect_data: false,
4696 repo_url: None,
4697 }
4698 }
4699
4700 fn make_event(path: &str, diff: &str) -> Event {
4701 Event::BufferChange {
4702 path: Path::new(path).into(),
4703 old_path: Path::new(path).into(),
4704 diff: diff.to_string(),
4705 predicted: false,
4706 in_open_source_repo: false,
4707 }
4708 }
4709
4710 fn make_related_file(path: &str, content: &str) -> RelatedFile {
4711 RelatedFile {
4712 path: Path::new(path).into(),
4713 max_row: content.lines().count() as u32,
4714 excerpts: vec![RelatedExcerpt {
4715 row_range: 0..content.lines().count() as u32,
4716 text: content.into(),
4717 order: 0,
4718 }],
4719 in_open_source_repo: false,
4720 }
4721 }
4722
4723 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4724 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4725 }
4726
4727 fn budget_with_margin(requested_tokens: usize) -> usize {
4728 ((requested_tokens as f64) / 0.9).ceil() as usize
4729 }
4730
4731 #[test]
4732 fn test_no_truncation_when_within_budget() {
4733 let input = make_input(
4734 "prefix\neditable\nsuffix",
4735 7..15,
4736 10,
4737 vec![make_event("a.rs", "-old\n+new\n")],
4738 vec![make_related_file("related.rs", "fn helper() {}\n")],
4739 );
4740
4741 assert_eq!(
4742 format_with_budget(&input, 10000).unwrap(),
4743 indoc! {r#"
4744 <|file_sep|>related.rs
4745 fn helper() {}
4746 <|file_sep|>edit history
4747 --- a/a.rs
4748 +++ b/a.rs
4749 -old
4750 +new
4751 <|file_sep|>test.rs
4752 <|fim_prefix|>
4753 prefix
4754 <|fim_middle|>current
4755 edi<|user_cursor|>table
4756 <|fim_suffix|>
4757
4758 suffix
4759 <|fim_middle|>updated
4760 "#}
4761 .to_string()
4762 );
4763 }
4764
4765 #[test]
4766 fn test_truncation_drops_edit_history_when_budget_tight() {
4767 let input = make_input(
4768 "code",
4769 0..4,
4770 2,
4771 vec![make_event("a.rs", "-x\n+y\n")],
4772 vec![
4773 make_related_file("r1.rs", "aaaaaaa\n"),
4774 make_related_file("r2.rs", "bbbbbbb\n"),
4775 ],
4776 );
4777
4778 assert_eq!(
4779 format_with_budget(&input, 10000).unwrap(),
4780 indoc! {r#"
4781 <|file_sep|>r1.rs
4782 aaaaaaa
4783 <|file_sep|>r2.rs
4784 bbbbbbb
4785 <|file_sep|>edit history
4786 --- a/a.rs
4787 +++ b/a.rs
4788 -x
4789 +y
4790 <|file_sep|>test.rs
4791 <|fim_prefix|>
4792 <|fim_middle|>current
4793 co<|user_cursor|>de
4794 <|fim_suffix|>
4795 <|fim_middle|>updated
4796 "#}
4797 .to_string()
4798 );
4799
4800 assert_eq!(
4801 format_with_budget(&input, budget_with_margin(55)),
4802 Some(
4803 indoc! {r#"
4804 <|file_sep|>edit history
4805 --- a/a.rs
4806 +++ b/a.rs
4807 -x
4808 +y
4809 <|file_sep|>test.rs
4810 <|fim_prefix|>
4811 <|fim_middle|>current
4812 co<|user_cursor|>de
4813 <|fim_suffix|>
4814 <|fim_middle|>updated
4815 "#}
4816 .to_string()
4817 )
4818 );
4819 }
4820
4821 #[test]
4822 fn test_truncation_includes_partial_excerpts() {
4823 let input = make_input(
4824 "x",
4825 0..1,
4826 0,
4827 vec![],
4828 vec![RelatedFile {
4829 path: Path::new("big.rs").into(),
4830 max_row: 30,
4831 in_open_source_repo: false,
4832 excerpts: vec![
4833 RelatedExcerpt {
4834 row_range: 0..10,
4835 text: "first excerpt\n".into(),
4836 order: 0,
4837 },
4838 RelatedExcerpt {
4839 row_range: 10..20,
4840 text: "second excerpt\n".into(),
4841 order: 0,
4842 },
4843 RelatedExcerpt {
4844 row_range: 20..30,
4845 text: "third excerpt\n".into(),
4846 order: 0,
4847 },
4848 ],
4849 }],
4850 );
4851
4852 assert_eq!(
4853 format_with_budget(&input, 10000).unwrap(),
4854 indoc! {r#"
4855 <|file_sep|>big.rs
4856 first excerpt
4857 ...
4858 second excerpt
4859 ...
4860 third excerpt
4861 <|file_sep|>test.rs
4862 <|fim_prefix|>
4863 <|fim_middle|>current
4864 <|user_cursor|>x
4865 <|fim_suffix|>
4866 <|fim_middle|>updated
4867 "#}
4868 .to_string()
4869 );
4870
4871 assert_eq!(
4872 format_with_budget(&input, budget_with_margin(50)).unwrap(),
4873 indoc! {r#"
4874 <|file_sep|>big.rs
4875 first excerpt
4876 ...
4877 <|file_sep|>test.rs
4878 <|fim_prefix|>
4879 <|fim_middle|>current
4880 <|user_cursor|>x
4881 <|fim_suffix|>
4882 <|fim_middle|>updated
4883 "#}
4884 .to_string()
4885 );
4886 }
4887
4888 #[test]
4889 fn test_truncation_prioritizes_lower_order_excerpts() {
4890 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4891 // With tight budget, only the lower-order excerpt from file_b should be included.
4892 let input = make_input(
4893 "x",
4894 0..1,
4895 0,
4896 vec![],
4897 vec![
4898 RelatedFile {
4899 path: Path::new("file_a.rs").into(),
4900 max_row: 10,
4901 in_open_source_repo: false,
4902 excerpts: vec![RelatedExcerpt {
4903 row_range: 0..10,
4904 text: "low priority content\n".into(),
4905 order: 5,
4906 }],
4907 },
4908 RelatedFile {
4909 path: Path::new("file_b.rs").into(),
4910 max_row: 10,
4911 in_open_source_repo: false,
4912 excerpts: vec![RelatedExcerpt {
4913 row_range: 0..10,
4914 text: "high priority content\n".into(),
4915 order: 1,
4916 }],
4917 },
4918 ],
4919 );
4920
4921 // With large budget, both files included; rendered in stable lexicographic order.
4922 assert_eq!(
4923 format_with_budget(&input, 10000).unwrap(),
4924 indoc! {r#"
4925 <|file_sep|>file_a.rs
4926 low priority content
4927 <|file_sep|>file_b.rs
4928 high priority content
4929 <|file_sep|>test.rs
4930 <|fim_prefix|>
4931 <|fim_middle|>current
4932 <|user_cursor|>x
4933 <|fim_suffix|>
4934 <|fim_middle|>updated
4935 "#}
4936 .to_string()
4937 );
4938
4939 // With tight budget, only file_b (lower order) fits.
4940 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4941 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4942 // file_a would need another 14 tokens, which doesn't fit.
4943 assert_eq!(
4944 format_with_budget(&input, budget_with_margin(52)).unwrap(),
4945 indoc! {r#"
4946 <|file_sep|>file_b.rs
4947 high priority content
4948 <|file_sep|>test.rs
4949 <|fim_prefix|>
4950 <|fim_middle|>current
4951 <|user_cursor|>x
4952 <|fim_suffix|>
4953 <|fim_middle|>updated
4954 "#}
4955 .to_string()
4956 );
4957 }
4958
4959 #[test]
4960 fn test_truncation_drops_high_order_excerpts_within_file() {
4961 // A single file has excerpts at order 1 and order 3. With a tight budget,
4962 // only the order-1 excerpts are included while the order-3 excerpt is
4963 // dropped — even though they belong to the same file. This also preserves
4964 // the parent invariant: parent outline items have order ≤ their best
4965 // child, so they're always included when any child is.
4966 let input = make_input(
4967 "x",
4968 0..1,
4969 0,
4970 vec![],
4971 vec![RelatedFile {
4972 path: Path::new("mod.rs").into(),
4973 max_row: 30,
4974 in_open_source_repo: false,
4975 excerpts: vec![
4976 RelatedExcerpt {
4977 row_range: 0..5,
4978 text: "mod header\n".into(),
4979 order: 1,
4980 },
4981 RelatedExcerpt {
4982 row_range: 5..15,
4983 text: "important fn\n".into(),
4984 order: 1,
4985 },
4986 RelatedExcerpt {
4987 row_range: 15..30,
4988 text: "less important fn\n".into(),
4989 order: 3,
4990 },
4991 ],
4992 }],
4993 );
4994
4995 // With large budget, all three excerpts included.
4996 assert_eq!(
4997 format_with_budget(&input, 10000).unwrap(),
4998 indoc! {r#"
4999 <|file_sep|>mod.rs
5000 mod header
5001 ...
5002 important fn
5003 ...
5004 less important fn
5005 <|file_sep|>test.rs
5006 <|fim_prefix|>
5007 <|fim_middle|>current
5008 <|user_cursor|>x
5009 <|fim_suffix|>
5010 <|fim_middle|>updated
5011 "#}
5012 .to_string()
5013 );
5014
5015 // With tight budget, only order<=1 excerpts included (header + important fn).
5016 assert_eq!(
5017 format_with_budget(&input, budget_with_margin(55)).unwrap(),
5018 indoc! {r#"
5019 <|file_sep|>mod.rs
5020 mod header
5021 ...
5022 important fn
5023 ...
5024 <|file_sep|>test.rs
5025 <|fim_prefix|>
5026 <|fim_middle|>current
5027 <|user_cursor|>x
5028 <|fim_suffix|>
5029 <|fim_middle|>updated
5030 "#}
5031 .to_string()
5032 );
5033 }
5034
5035 #[test]
5036 fn test_truncation_drops_older_events_first() {
5037 let input = make_input(
5038 "x",
5039 0..1,
5040 0,
5041 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
5042 vec![],
5043 );
5044
5045 assert_eq!(
5046 format_with_budget(&input, 10000).unwrap(),
5047 indoc! {r#"
5048 <|file_sep|>edit history
5049 --- a/old.rs
5050 +++ b/old.rs
5051 -1
5052 --- a/new.rs
5053 +++ b/new.rs
5054 -2
5055 <|file_sep|>test.rs
5056 <|fim_prefix|>
5057 <|fim_middle|>current
5058 <|user_cursor|>x
5059 <|fim_suffix|>
5060 <|fim_middle|>updated
5061 "#}
5062 .to_string()
5063 );
5064
5065 assert_eq!(
5066 format_with_budget(&input, 60).unwrap(),
5067 indoc! {r#"
5068 <|file_sep|>edit history
5069 --- a/new.rs
5070 +++ b/new.rs
5071 -2
5072 <|file_sep|>test.rs
5073 <|fim_prefix|>
5074 <|fim_middle|>current
5075 <|user_cursor|>x
5076 <|fim_suffix|>
5077 <|fim_middle|>updated
5078 "#}
5079 .to_string()
5080 );
5081 }
5082
5083 #[test]
5084 fn test_cursor_excerpt_always_included_with_minimal_budget() {
5085 let input = make_input(
5086 "fn main() {}",
5087 0..12,
5088 3,
5089 vec![make_event("a.rs", "-old\n+new\n")],
5090 vec![make_related_file("related.rs", "helper\n")],
5091 );
5092
5093 assert!(format_with_budget(&input, 30).is_none())
5094 }
5095
5096 #[track_caller]
5097 fn format_seed_coder(input: &ZetaPromptInput) -> String {
5098 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
5099 .expect("seed coder prompt formatting should succeed")
5100 }
5101
5102 #[track_caller]
5103 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
5104 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
5105 .expect("seed coder prompt formatting should succeed")
5106 }
5107
5108 #[test]
5109 fn test_seed_coder_alias_matches_v0211_seed_coder() {
5110 let input = make_input(
5111 "prefix\neditable\nsuffix",
5112 7..15,
5113 10,
5114 vec![make_event("a.rs", "-old\n+new\n")],
5115 vec![make_related_file("related.rs", "fn helper() {}\n")],
5116 );
5117
5118 assert_eq!(
5119 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 10000),
5120 format_prompt_with_budget_for_format(&input, ZetaFormat::V0331SeedCoderModelPy, 10000)
5121 );
5122 assert_eq!(
5123 ZetaFormat::parse("V0331SeedCoderModelPy").unwrap(),
5124 ZetaFormat::V0331SeedCoderModelPy
5125 );
5126 }
5127
5128 #[test]
5129 fn test_seed_coder_basic_format() {
5130 let input = make_input(
5131 "prefix\neditable\nsuffix",
5132 7..15,
5133 10,
5134 vec![make_event("a.rs", "-old\n+new\n")],
5135 vec![make_related_file("related.rs", "fn helper() {}\n")],
5136 );
5137
5138 assert_eq!(
5139 format_seed_coder(&input),
5140 indoc! {r#"
5141 <[fim-suffix]>
5142 suffix
5143 <[fim-prefix]><filename>related.rs
5144 fn helper() {}
5145
5146 <filename>edit_history
5147 --- a/a.rs
5148 +++ b/a.rs
5149 -old
5150 +new
5151
5152 <filename>test.rs
5153 prefix
5154 <<<<<<< CURRENT
5155 edi<|user_cursor|>table
5156 =======
5157 <[fim-middle]>"#}
5158 );
5159 }
5160
5161 #[test]
5162 fn test_v0317_formats_prompt_with_many_related_files() {
5163 let related_files = (0..900)
5164 .map(|index| {
5165 make_related_file(
5166 &format!("related_{index}.rs"),
5167 "fn helper() {\n let value = 1;\n}\n",
5168 )
5169 })
5170 .collect();
5171
5172 let input = make_input(
5173 "code",
5174 0..4,
5175 2,
5176 vec![make_event("a.rs", "-x\n+y\n")],
5177 related_files,
5178 );
5179
5180 let prompt =
5181 format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
5182
5183 assert!(prompt.is_some());
5184 let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
5185 assert!(prompt.contains("test.rs"));
5186 assert!(prompt.contains(CURSOR_MARKER));
5187 }
5188
5189 #[test]
5190 fn test_v0327_formats_single_file_prompt_without_related_files() {
5191 let excerpt = indoc! {"
5192 line01
5193 line02
5194 line03
5195 line04
5196 line05
5197 line06
5198 line07
5199 line08
5200 line09
5201 line10
5202 line11
5203 line12
5204 line13
5205 line14
5206 line15
5207 line16
5208 line17
5209 line18
5210 line19
5211 line20
5212 "};
5213 let cursor_offset = excerpt.find("line10").expect("cursor line exists");
5214 let input = make_input(
5215 excerpt,
5216 0..excerpt.len(),
5217 cursor_offset,
5218 vec![make_event("a.rs", "-x\n+y\n")],
5219 vec![make_related_file("related.rs", "fn helper() {}\n")],
5220 );
5221
5222 let prompt =
5223 format_prompt_with_budget_for_format(&input, ZetaFormat::V0327SingleFile, 4096)
5224 .expect("v0327 prompt should fit");
5225
5226 assert!(prompt.contains("line01"));
5227 assert!(prompt.contains("line20"));
5228 assert!(prompt.contains("<filename>edit_history"));
5229 assert!(prompt.contains("<filename>test.rs"));
5230 assert!(prompt.contains(CURSOR_MARKER));
5231 assert!(!prompt.contains("related.rs"));
5232 assert!(!prompt.contains("fn helper() {}"));
5233 }
5234
5235 #[test]
5236 fn test_v0327_resolve_cursor_region_uses_full_excerpt_context() {
5237 let excerpt = (0..80)
5238 .map(|index| format!("l{index:02}\n"))
5239 .collect::<String>();
5240 let cursor_offset = excerpt.find("l40").expect("cursor line exists");
5241 let input = make_input(&excerpt, 0..excerpt.len(), cursor_offset, vec![], vec![]);
5242
5243 let (context, editable_range, context_range, adjusted_cursor) =
5244 resolve_cursor_region(&input, ZetaFormat::V0327SingleFile);
5245
5246 assert_eq!(context, excerpt);
5247 assert_eq!(context_range, 0..excerpt.len());
5248 assert_eq!(adjusted_cursor, cursor_offset);
5249 assert!(editable_range.start < adjusted_cursor);
5250 assert!(editable_range.end > adjusted_cursor);
5251 assert!(editable_range.end < excerpt.len());
5252 }
5253
5254 #[test]
5255 fn test_seed_coder_no_context() {
5256 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
5257
5258 assert_eq!(
5259 format_seed_coder(&input),
5260 indoc! {r#"
5261 <[fim-suffix]>
5262 after
5263 <[fim-prefix]><filename>test.rs
5264 before
5265 <<<<<<< CURRENT
5266 mid<|user_cursor|>dle
5267 =======
5268 <[fim-middle]>"#}
5269 );
5270 }
5271
5272 #[test]
5273 fn test_seed_coder_truncation_drops_context() {
5274 let input = make_input(
5275 "code",
5276 0..4,
5277 2,
5278 vec![make_event("a.rs", "-x\n+y\n")],
5279 vec![make_related_file("r1.rs", "content\n")],
5280 );
5281
5282 // With large budget, everything is included
5283 assert_eq!(
5284 format_seed_coder(&input),
5285 indoc! {r#"
5286 <[fim-suffix]>
5287 <[fim-prefix]><filename>r1.rs
5288 content
5289
5290 <filename>edit_history
5291 --- a/a.rs
5292 +++ b/a.rs
5293 -x
5294 +y
5295
5296 <filename>test.rs
5297 <<<<<<< CURRENT
5298 co<|user_cursor|>de
5299 =======
5300 <[fim-middle]>"#}
5301 );
5302
5303 assert_eq!(
5304 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
5305 None
5306 );
5307
5308 assert_eq!(
5309 format_seed_coder_with_budget(&input, 40),
5310 indoc! {r#"
5311 <[fim-suffix]>
5312 <[fim-prefix]><filename>test.rs
5313 <<<<<<< CURRENT
5314 co<|user_cursor|>de
5315 =======
5316 <[fim-middle]>"#
5317 }
5318 )
5319 }
5320
5321 #[test]
5322 fn test_seed_coder_truncation_prioritizes_lower_order() {
5323 let input = make_input(
5324 "code",
5325 0..4,
5326 2,
5327 vec![],
5328 vec![
5329 RelatedFile {
5330 path: Path::new("low_prio.rs").into(),
5331 max_row: 5,
5332 in_open_source_repo: false,
5333 excerpts: vec![RelatedExcerpt {
5334 row_range: 0..5,
5335 text: "low prio\n".into(),
5336 order: 10,
5337 }],
5338 },
5339 RelatedFile {
5340 path: Path::new("high_prio.rs").into(),
5341 max_row: 5,
5342 in_open_source_repo: false,
5343 excerpts: vec![RelatedExcerpt {
5344 row_range: 0..5,
5345 text: "high prio\n".into(),
5346 order: 1,
5347 }],
5348 },
5349 ],
5350 );
5351
5352 // With large budget, both included; rendered in stable lexicographic order.
5353 assert_eq!(
5354 format_seed_coder(&input),
5355 indoc! {r#"
5356 <[fim-suffix]>
5357 <[fim-prefix]><filename>low_prio.rs
5358 low prio
5359 <filename>high_prio.rs
5360 high prio
5361
5362 <filename>test.rs
5363 <<<<<<< CURRENT
5364 co<|user_cursor|>de
5365 =======
5366 <[fim-middle]>"#}
5367 );
5368
5369 // With tight budget under the generic heuristic, context is dropped but the
5370 // minimal cursor section still fits.
5371 assert_eq!(
5372 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
5373 Some(
5374 indoc! {r#"
5375 <[fim-suffix]>
5376 <[fim-prefix]><filename>test.rs
5377 <<<<<<< CURRENT
5378 co<|user_cursor|>de
5379 =======
5380 <[fim-middle]>"#}
5381 .to_string()
5382 )
5383 );
5384 }
5385
5386 #[test]
5387 fn test_format_zeta1_from_input_basic() {
5388 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
5389 let input = ZetaPromptInput {
5390 cursor_path: Path::new("src/main.rs").into(),
5391 cursor_excerpt: excerpt.into(),
5392 cursor_offset_in_excerpt: 30,
5393 excerpt_start_row: Some(0),
5394 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
5395 related_files: Some(vec![]),
5396 active_buffer_diagnostics: vec![],
5397 excerpt_ranges: ExcerptRanges {
5398 editable_150: 15..41,
5399 editable_180: 15..41,
5400 editable_350: 15..41,
5401 editable_150_context_350: 0..excerpt.len(),
5402 editable_180_context_350: 0..excerpt.len(),
5403 editable_350_context_150: 0..excerpt.len(),
5404 ..Default::default()
5405 },
5406 syntax_ranges: None,
5407 in_open_source_repo: false,
5408 can_collect_data: false,
5409 repo_url: None,
5410 };
5411
5412 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
5413
5414 assert_eq!(
5415 prompt,
5416 concat!(
5417 "### Instruction:\n",
5418 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5419 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5420 "into account the cursor location.\n",
5421 "\n",
5422 "### User Edits:\n",
5423 "\n",
5424 "User edited other.rs:\n",
5425 "```diff\n",
5426 "-old\n",
5427 "+new\n",
5428 "\n",
5429 "```\n",
5430 "\n",
5431 "### User Excerpt:\n",
5432 "\n",
5433 "```src/main.rs\n",
5434 "<|start_of_file|>\n",
5435 "fn before() {}\n",
5436 "<|editable_region_start|>\n",
5437 "fn foo() {\n",
5438 " <|user_cursor_is_here|>let x = 1;\n",
5439 "\n",
5440 "<|editable_region_end|>}\n",
5441 "fn after() {}\n",
5442 "\n",
5443 "```\n",
5444 "\n",
5445 "### Response:\n",
5446 ),
5447 );
5448 }
5449
5450 #[test]
5451 fn test_format_zeta1_from_input_no_start_of_file() {
5452 let excerpt = "fn foo() {\n let x = 1;\n}\n";
5453 let input = ZetaPromptInput {
5454 cursor_path: Path::new("src/main.rs").into(),
5455 cursor_excerpt: excerpt.into(),
5456 cursor_offset_in_excerpt: 15,
5457 excerpt_start_row: Some(10),
5458 events: vec![],
5459 related_files: Some(vec![]),
5460 active_buffer_diagnostics: vec![],
5461 excerpt_ranges: ExcerptRanges {
5462 editable_150: 0..28,
5463 editable_180: 0..28,
5464 editable_350: 0..28,
5465 editable_150_context_350: 0..28,
5466 editable_180_context_350: 0..28,
5467 editable_350_context_150: 0..28,
5468 ..Default::default()
5469 },
5470 syntax_ranges: None,
5471 in_open_source_repo: false,
5472 can_collect_data: false,
5473 repo_url: None,
5474 };
5475
5476 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5477
5478 assert_eq!(
5479 prompt,
5480 concat!(
5481 "### Instruction:\n",
5482 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5483 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5484 "into account the cursor location.\n",
5485 "\n",
5486 "### User Edits:\n",
5487 "\n",
5488 "\n",
5489 "\n",
5490 "### User Excerpt:\n",
5491 "\n",
5492 "```src/main.rs\n",
5493 "<|editable_region_start|>\n",
5494 "fn foo() {\n",
5495 " <|user_cursor_is_here|>let x = 1;\n",
5496 "}\n",
5497 "\n",
5498 "<|editable_region_end|>\n",
5499 "```\n",
5500 "\n",
5501 "### Response:\n",
5502 ),
5503 );
5504 }
5505
5506 #[test]
5507 fn test_format_zeta1_from_input_with_sub_ranges() {
5508 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
5509 let editable_range = 10..37;
5510 let context_range = 0..excerpt.len();
5511
5512 let input = ZetaPromptInput {
5513 cursor_path: Path::new("test.rs").into(),
5514 cursor_excerpt: excerpt.into(),
5515 cursor_offset_in_excerpt: 25,
5516 excerpt_start_row: Some(0),
5517 events: vec![],
5518 related_files: Some(vec![]),
5519 active_buffer_diagnostics: vec![],
5520 excerpt_ranges: ExcerptRanges {
5521 editable_150: editable_range.clone(),
5522 editable_180: editable_range.clone(),
5523 editable_350: editable_range.clone(),
5524 editable_150_context_350: context_range.clone(),
5525 editable_180_context_350: context_range.clone(),
5526 editable_350_context_150: context_range.clone(),
5527 ..Default::default()
5528 },
5529 syntax_ranges: None,
5530 in_open_source_repo: false,
5531 can_collect_data: false,
5532 repo_url: None,
5533 };
5534
5535 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5536
5537 assert_eq!(
5538 prompt,
5539 concat!(
5540 "### Instruction:\n",
5541 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5542 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5543 "into account the cursor location.\n",
5544 "\n",
5545 "### User Edits:\n",
5546 "\n",
5547 "\n",
5548 "\n",
5549 "### User Excerpt:\n",
5550 "\n",
5551 "```test.rs\n",
5552 "<|start_of_file|>\n",
5553 "// prefix\n",
5554 "<|editable_region_start|>\n",
5555 "fn foo() {\n",
5556 " <|user_cursor_is_here|>let x = 1;\n",
5557 "}\n",
5558 "<|editable_region_end|>\n",
5559 "// suffix\n",
5560 "\n",
5561 "```\n",
5562 "\n",
5563 "### Response:\n",
5564 ),
5565 );
5566 }
5567
5568 #[test]
5569 fn test_max_event_count() {
5570 fn make_numbered_event(index: usize) -> Event {
5571 return make_event(
5572 &format!("event-{index}.rs"),
5573 &format!("-old-{index}\n+new-{index}\n"),
5574 );
5575 }
5576 let input = make_input(
5577 "x",
5578 0..1,
5579 0,
5580 (0..3).map(make_numbered_event).collect(),
5581 vec![],
5582 );
5583
5584 let edit_history_section = format_edit_history_within_budget(
5585 &input.events,
5586 "<|file_sep|>",
5587 "edit history",
5588 usize::MAX,
5589 5,
5590 );
5591
5592 assert_eq!(
5593 &edit_history_section,
5594 indoc!(
5595 "
5596 <|file_sep|>edit history
5597 --- a/event-0.rs
5598 +++ b/event-0.rs
5599 -old-0
5600 +new-0
5601 --- a/event-1.rs
5602 +++ b/event-1.rs
5603 -old-1
5604 +new-1
5605 --- a/event-2.rs
5606 +++ b/event-2.rs
5607 -old-2
5608 +new-2
5609 "
5610 )
5611 );
5612
5613 let edit_history_section = format_edit_history_within_budget(
5614 &input.events,
5615 "<|file_sep|>",
5616 "edit history",
5617 usize::MAX,
5618 2,
5619 );
5620
5621 assert_eq!(
5622 &edit_history_section,
5623 indoc!(
5624 "
5625 <|file_sep|>edit history
5626 --- a/event-1.rs
5627 +++ b/event-1.rs
5628 -old-1
5629 +new-1
5630 --- a/event-2.rs
5631 +++ b/event-2.rs
5632 -old-2
5633 +new-2
5634 "
5635 )
5636 );
5637
5638 let edit_history_section = format_edit_history_within_budget(
5639 &input.events,
5640 "<|file_sep|>",
5641 "edit history",
5642 usize::MAX,
5643 0,
5644 );
5645
5646 assert_eq!(&edit_history_section, "");
5647 }
5648
5649 #[test]
5650 fn test_clean_zeta1_model_output_basic() {
5651 let output = indoc! {"
5652 <|editable_region_start|>
5653 fn main() {
5654 println!(\"hello\");
5655 }
5656 <|editable_region_end|>
5657 "};
5658
5659 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5660 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
5661 }
5662
5663 #[test]
5664 fn test_clean_zeta1_model_output_with_cursor() {
5665 let output = indoc! {"
5666 <|editable_region_start|>
5667 fn main() {
5668 <|user_cursor_is_here|>println!(\"hello\");
5669 }
5670 <|editable_region_end|>
5671 "};
5672
5673 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5674 assert_eq!(
5675 cleaned,
5676 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
5677 );
5678 }
5679
5680 #[test]
5681 fn test_clean_zeta1_model_output_no_markers() {
5682 let output = "fn main() {}\n";
5683 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5684 assert_eq!(cleaned, "fn main() {}\n");
5685 }
5686
5687 #[test]
5688 fn test_clean_zeta1_model_output_empty_region() {
5689 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5690 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5691 assert_eq!(cleaned, "");
5692 }
5693
5694 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5695 let mut result = excerpt.to_string();
5696 result.replace_range(
5697 parsed_output.range_in_excerpt.clone(),
5698 &parsed_output.new_editable_region,
5699 );
5700 result
5701 }
5702
5703 #[test]
5704 fn test_parse_zeta2_model_output() {
5705 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5706 let context_start = excerpt.find("ctx start").unwrap();
5707 let context_end = excerpt.find("after ctx").unwrap();
5708 let editable_start = excerpt.find("editable old").unwrap();
5709 let editable_end = editable_start + "editable old\n".len();
5710 let input = make_input_with_context_range(
5711 excerpt,
5712 editable_start..editable_end,
5713 context_start..context_end,
5714 editable_start,
5715 );
5716
5717 let output = parse_zeta2_model_output(
5718 "editable new\n>>>>>>> UPDATED\n",
5719 ZetaFormat::V0131GitMergeMarkersPrefix,
5720 &input,
5721 )
5722 .unwrap();
5723
5724 assert_eq!(
5725 apply_edit(excerpt, &output),
5726 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5727 );
5728 }
5729
5730 #[test]
5731 fn test_parse_zeta2_model_output_identity() {
5732 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5733 let editable_start = excerpt.find("bbb").unwrap();
5734 let editable_end = excerpt.find("ddd").unwrap();
5735 let input = make_input_with_context_range(
5736 excerpt,
5737 editable_start..editable_end,
5738 0..excerpt.len(),
5739 editable_start,
5740 );
5741
5742 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5743 let output =
5744 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5745
5746 assert_eq!(apply_edit(excerpt, &output), excerpt);
5747 }
5748
5749 #[test]
5750 fn test_parse_zeta2_model_output_strips_end_marker() {
5751 let excerpt = "hello\nworld\n";
5752 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5753
5754 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5755 let output1 =
5756 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5757 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5758
5759 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5760 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5761 }
5762
5763 #[test]
5764 fn test_parsed_output_to_patch_round_trips_through_udiff_application() {
5765 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5766 let context_start = excerpt.find("ctx start").unwrap();
5767 let context_end = excerpt.find("after ctx").unwrap();
5768 let editable_start = excerpt.find("editable old").unwrap();
5769 let editable_end = editable_start + "editable old\n".len();
5770 let input = make_input_with_context_range(
5771 excerpt,
5772 editable_start..editable_end,
5773 context_start..context_end,
5774 editable_start,
5775 );
5776
5777 let parsed = parse_zeta2_model_output(
5778 "editable new\n>>>>>>> UPDATED\n",
5779 ZetaFormat::V0131GitMergeMarkersPrefix,
5780 &input,
5781 )
5782 .unwrap();
5783 let expected = apply_edit(excerpt, &parsed);
5784 let patch = parsed_output_to_patch(&input, parsed).unwrap();
5785 let patched = udiff::apply_diff_to_string(&patch, excerpt).unwrap();
5786
5787 assert_eq!(patched, expected);
5788 }
5789
5790 #[test]
5791 fn test_special_tokens_not_triggered_by_comment_separator() {
5792 // Regression test for https://github.com/zed-industries/zed/issues/52489
5793 let excerpt = "fn main() {\n // =======\n println!(\"hello\");\n}\n";
5794 let input = make_input(excerpt, 0..excerpt.len(), 0, vec![], vec![]);
5795 assert!(
5796 !prompt_input_contains_special_tokens(&input, ZetaFormat::V0131GitMergeMarkersPrefix),
5797 "comment containing ======= should not trigger special token detection"
5798 );
5799 }
5800}