1pub mod excerpt_ranges;
2pub mod multi_region;
3
4use anyhow::{Result, anyhow};
5use serde::{Deserialize, Serialize};
6use std::fmt::Write;
7use std::ops::Range;
8use std::path::Path;
9use std::sync::Arc;
10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
11
12pub use crate::excerpt_ranges::{
13 ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
14};
15
16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
17pub const MAX_PROMPT_TOKENS: usize = 4096;
18
19/// Use up to this amount of the editable region for prefill.
20/// Larger values may result in more robust generation, but
21/// this region becomes non-editable.
22pub const PREFILL_RATIO: f64 = 0.1; // 10%
23
24fn estimate_tokens(bytes: usize) -> usize {
25 bytes / 3
26}
27
28#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
29pub struct ZetaPromptInput {
30 pub cursor_path: Arc<Path>,
31 pub cursor_excerpt: Arc<str>,
32 pub cursor_offset_in_excerpt: usize,
33 #[serde(default, skip_serializing_if = "Option::is_none")]
34 pub excerpt_start_row: Option<u32>,
35 pub events: Vec<Arc<Event>>,
36 #[serde(default)]
37 pub related_files: Option<Vec<RelatedFile>>,
38 #[serde(default, skip_serializing_if = "Vec::is_empty")]
39 pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
40 /// These ranges let the server select model-appropriate subsets.
41 pub excerpt_ranges: ExcerptRanges,
42 /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
43 /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
44 /// When present, the server uses these to compute editable/context ranges
45 /// instead of `excerpt_ranges`.
46 #[serde(default, skip_serializing_if = "Option::is_none")]
47 pub syntax_ranges: Option<Vec<Range<usize>>>,
48 /// The name of the edit prediction model experiment to use.
49 #[serde(default, skip_serializing_if = "Option::is_none")]
50 pub experiment: Option<String>,
51 #[serde(default)]
52 pub in_open_source_repo: bool,
53 #[serde(default)]
54 pub can_collect_data: bool,
55 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub repo_url: Option<String>,
57}
58
59#[derive(
60 Default,
61 Clone,
62 Copy,
63 Debug,
64 PartialEq,
65 Eq,
66 Hash,
67 EnumIter,
68 IntoStaticStr,
69 Serialize,
70 Deserialize,
71)]
72#[allow(non_camel_case_types)]
73pub enum ZetaFormat {
74 V0112MiddleAtEnd,
75 V0113Ordered,
76 V0114180EditableRegion,
77 V0120GitMergeMarkers,
78 #[default]
79 V0131GitMergeMarkersPrefix,
80 V0211Prefill,
81 V0211SeedCoder,
82 v0226Hashline,
83 V0304VariableEdit,
84 V0304SeedNoEdits,
85 V0306SeedMultiRegions,
86}
87
88impl std::fmt::Display for ZetaFormat {
89 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90 write!(f, "{}", <&'static str>::from(self))
91 }
92}
93
94impl ZetaFormat {
95 pub fn parse(format_name: &str) -> Result<Self> {
96 let mut results = ZetaFormat::iter().filter(|version| {
97 <&'static str>::from(version)
98 .to_lowercase()
99 .contains(&format_name.to_lowercase())
100 });
101 let Some(result) = results.next() else {
102 anyhow::bail!(
103 "`{format_name}` did not match any of:\n{}",
104 Self::options_as_string()
105 );
106 };
107 if results.next().is_some() {
108 anyhow::bail!(
109 "`{format_name}` matched more than one of:\n{}",
110 Self::options_as_string()
111 );
112 }
113 Ok(result)
114 }
115
116 pub fn options_as_string() -> String {
117 ZetaFormat::iter()
118 .map(|format| format!("- {}\n", <&'static str>::from(format)))
119 .collect::<Vec<_>>()
120 .concat()
121 }
122}
123
124#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
125#[serde(tag = "event")]
126pub enum Event {
127 BufferChange {
128 path: Arc<Path>,
129 old_path: Arc<Path>,
130 diff: String,
131 predicted: bool,
132 in_open_source_repo: bool,
133 },
134}
135
136impl Event {
137 pub fn in_open_source_repo(&self) -> bool {
138 match self {
139 Event::BufferChange {
140 in_open_source_repo,
141 ..
142 } => *in_open_source_repo,
143 }
144 }
145}
146
147pub fn write_event(prompt: &mut String, event: &Event) {
148 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
149 for component in path.components() {
150 prompt.push('/');
151 write!(prompt, "{}", component.as_os_str().display()).ok();
152 }
153 }
154 match event {
155 Event::BufferChange {
156 path,
157 old_path,
158 diff,
159 predicted,
160 in_open_source_repo: _,
161 } => {
162 if *predicted {
163 prompt.push_str("// User accepted prediction:\n");
164 }
165 prompt.push_str("--- a");
166 write_path_as_unix_str(prompt, old_path.as_ref());
167 prompt.push_str("\n+++ b");
168 write_path_as_unix_str(prompt, path.as_ref());
169 prompt.push('\n');
170 prompt.push_str(diff);
171 }
172 }
173}
174
175#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
176pub struct ActiveBufferDiagnostic {
177 pub severity: Option<i32>,
178 pub message: String,
179 pub snippet: String,
180 pub snippet_buffer_row_range: Range<u32>,
181 pub diagnostic_range_in_snippet: Range<usize>,
182}
183
184#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
185pub struct RelatedFile {
186 pub path: Arc<Path>,
187 pub max_row: u32,
188 pub excerpts: Vec<RelatedExcerpt>,
189 #[serde(default)]
190 pub in_open_source_repo: bool,
191}
192
193#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
194pub struct RelatedExcerpt {
195 pub row_range: Range<u32>,
196 pub text: Arc<str>,
197 #[serde(default)]
198 pub order: usize,
199}
200
201pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
202 special_tokens_for_format(format)
203 .iter()
204 .any(|token| input.cursor_excerpt.contains(token))
205}
206
207pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
208 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
209}
210
211pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
212 match format {
213 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
214 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
215 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
216 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
217 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
218 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
219 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
220 ZetaFormat::v0226Hashline => hashline::special_tokens(),
221 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
222 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
223 ZetaFormat::V0306SeedMultiRegions => {
224 static TOKENS: &[&str] = &[
225 seed_coder::FIM_SUFFIX,
226 seed_coder::FIM_PREFIX,
227 seed_coder::FIM_MIDDLE,
228 seed_coder::FILE_MARKER,
229 seed_coder::START_MARKER,
230 seed_coder::SEPARATOR,
231 seed_coder::END_MARKER,
232 CURSOR_MARKER,
233 multi_region::MARKER_TAG_PREFIX,
234 ];
235 TOKENS
236 }
237 }
238}
239
240/// Returns the (editable_token_limit, context_token_limit) for a given format.
241pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
242 match format {
243 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
244 ZetaFormat::V0114180EditableRegion => (180, 350),
245 ZetaFormat::V0120GitMergeMarkers
246 | ZetaFormat::V0131GitMergeMarkersPrefix
247 | ZetaFormat::V0211Prefill
248 | ZetaFormat::V0211SeedCoder
249 | ZetaFormat::v0226Hashline
250 | ZetaFormat::V0306SeedMultiRegions
251 | ZetaFormat::V0304SeedNoEdits => (350, 150),
252 ZetaFormat::V0304VariableEdit => (1024, 0),
253 }
254}
255
256pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
257 match format {
258 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
259 ZetaFormat::V0112MiddleAtEnd
260 | ZetaFormat::V0113Ordered
261 | ZetaFormat::V0114180EditableRegion
262 | ZetaFormat::V0120GitMergeMarkers
263 | ZetaFormat::V0131GitMergeMarkersPrefix
264 | ZetaFormat::V0211Prefill
265 | ZetaFormat::V0211SeedCoder
266 | ZetaFormat::V0304VariableEdit
267 | ZetaFormat::V0306SeedMultiRegions
268 | ZetaFormat::V0304SeedNoEdits => &[],
269 }
270}
271
272pub fn excerpt_ranges_for_format(
273 format: ZetaFormat,
274 ranges: &ExcerptRanges,
275) -> (Range<usize>, Range<usize>) {
276 match format {
277 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
278 ranges.editable_150.clone(),
279 ranges.editable_150_context_350.clone(),
280 ),
281 ZetaFormat::V0114180EditableRegion => (
282 ranges.editable_180.clone(),
283 ranges.editable_180_context_350.clone(),
284 ),
285 ZetaFormat::V0120GitMergeMarkers
286 | ZetaFormat::V0131GitMergeMarkersPrefix
287 | ZetaFormat::V0211Prefill
288 | ZetaFormat::V0211SeedCoder
289 | ZetaFormat::v0226Hashline
290 | ZetaFormat::V0304SeedNoEdits
291 | ZetaFormat::V0306SeedMultiRegions => (
292 ranges.editable_350.clone(),
293 ranges.editable_350_context_150.clone(),
294 ),
295 ZetaFormat::V0304VariableEdit => {
296 let context = ranges
297 .editable_350_context_1024
298 .clone()
299 .or(ranges.editable_350_context_512.clone())
300 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
301 (context.clone(), context)
302 }
303 }
304}
305
306pub fn write_cursor_excerpt_section_for_format(
307 format: ZetaFormat,
308 prompt: &mut String,
309 path: &Path,
310 context: &str,
311 editable_range: &Range<usize>,
312 cursor_offset: usize,
313) {
314 match format {
315 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
316 prompt,
317 path,
318 context,
319 editable_range,
320 cursor_offset,
321 ),
322 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
323 v0113_ordered::write_cursor_excerpt_section(
324 prompt,
325 path,
326 context,
327 editable_range,
328 cursor_offset,
329 )
330 }
331 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
332 prompt,
333 path,
334 context,
335 editable_range,
336 cursor_offset,
337 ),
338 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
339 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
340 prompt,
341 path,
342 context,
343 editable_range,
344 cursor_offset,
345 )
346 }
347 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
348 seed_coder::write_cursor_excerpt_section(
349 prompt,
350 path,
351 context,
352 editable_range,
353 cursor_offset,
354 )
355 }
356 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
357 prompt,
358 path,
359 context,
360 editable_range,
361 cursor_offset,
362 ),
363 ZetaFormat::V0304VariableEdit => {
364 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
365 }
366 ZetaFormat::V0306SeedMultiRegions => {
367 prompt.push_str(&build_v0306_cursor_prefix(
368 path,
369 context,
370 editable_range,
371 cursor_offset,
372 ));
373 }
374 }
375}
376
377fn build_v0306_cursor_prefix(
378 path: &Path,
379 context: &str,
380 editable_range: &Range<usize>,
381 cursor_offset: usize,
382) -> String {
383 let mut section = String::new();
384 let path_str = path.to_string_lossy();
385 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
386
387 section.push_str(&context[..editable_range.start]);
388 section.push_str(seed_coder::START_MARKER);
389
390 let editable_text = &context[editable_range.clone()];
391 let cursor_in_editable = cursor_offset - editable_range.start;
392 multi_region::write_editable_with_markers(
393 &mut section,
394 editable_text,
395 cursor_in_editable,
396 CURSOR_MARKER,
397 );
398
399 if !section.ends_with('\n') {
400 section.push('\n');
401 }
402 section.push_str(seed_coder::SEPARATOR);
403 section
404}
405
406fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
407 let start_row = text[0..range.start].matches('\n').count() as u32;
408 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
409 if !text[..range.end].ends_with('\n') {
410 end_row += 1;
411 }
412 return start_row..end_row;
413}
414
415pub fn format_prompt_with_budget_for_format(
416 input: &ZetaPromptInput,
417 format: ZetaFormat,
418 max_tokens: usize,
419) -> String {
420 let (context, editable_range, context_range, cursor_offset) =
421 resolve_cursor_region(input, format);
422 let path = &*input.cursor_path;
423
424 let empty_files = Vec::new();
425 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
426 let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
427 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
428 let row_range = relative_row_range.start + cursor_excerpt_start_row
429 ..relative_row_range.end + cursor_excerpt_start_row;
430 &filter_redundant_excerpts(
431 input_related_files.to_vec(),
432 input.cursor_path.as_ref(),
433 row_range,
434 )
435 } else {
436 input_related_files
437 };
438
439 match format {
440 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
441 seed_coder::format_prompt_with_budget(
442 path,
443 context,
444 &editable_range,
445 cursor_offset,
446 &input.events,
447 related_files,
448 max_tokens,
449 )
450 }
451 ZetaFormat::V0306SeedMultiRegions => {
452 let cursor_prefix =
453 build_v0306_cursor_prefix(path, context, &editable_range, cursor_offset);
454 seed_coder::assemble_fim_prompt(
455 context,
456 &editable_range,
457 &cursor_prefix,
458 &input.events,
459 related_files,
460 max_tokens,
461 )
462 }
463 _ => {
464 let mut cursor_section = String::new();
465 write_cursor_excerpt_section_for_format(
466 format,
467 &mut cursor_section,
468 path,
469 context,
470 &editable_range,
471 cursor_offset,
472 );
473
474 let cursor_tokens = estimate_tokens(cursor_section.len());
475 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
476
477 let edit_history_section = format_edit_history_within_budget(
478 &input.events,
479 "<|file_sep|>",
480 "edit history",
481 budget_after_cursor,
482 max_edit_event_count_for_format(&format),
483 );
484 let edit_history_tokens = estimate_tokens(edit_history_section.len());
485 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
486
487 let related_files_section = format_related_files_within_budget(
488 &related_files,
489 "<|file_sep|>",
490 "",
491 budget_after_edit_history,
492 );
493
494 let mut prompt = String::new();
495 prompt.push_str(&related_files_section);
496 prompt.push_str(&edit_history_section);
497 prompt.push_str(&cursor_section);
498 prompt
499 }
500 }
501}
502
503pub fn filter_redundant_excerpts(
504 mut related_files: Vec<RelatedFile>,
505 cursor_path: &Path,
506 cursor_row_range: Range<u32>,
507) -> Vec<RelatedFile> {
508 for file in &mut related_files {
509 if file.path.as_ref() == cursor_path {
510 file.excerpts.retain(|excerpt| {
511 excerpt.row_range.start < cursor_row_range.start
512 || excerpt.row_range.end > cursor_row_range.end
513 });
514 }
515 }
516 related_files.retain(|file| !file.excerpts.is_empty());
517 related_files
518}
519
520pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
521 match format {
522 ZetaFormat::V0112MiddleAtEnd
523 | ZetaFormat::V0113Ordered
524 | ZetaFormat::V0114180EditableRegion
525 | ZetaFormat::V0120GitMergeMarkers
526 | ZetaFormat::V0131GitMergeMarkersPrefix
527 | ZetaFormat::V0211Prefill
528 | ZetaFormat::V0211SeedCoder
529 | ZetaFormat::v0226Hashline
530 | ZetaFormat::V0304SeedNoEdits
531 | ZetaFormat::V0304VariableEdit
532 | ZetaFormat::V0306SeedMultiRegions => 6,
533 }
534}
535
536pub fn get_prefill_for_format(
537 format: ZetaFormat,
538 context: &str,
539 editable_range: &Range<usize>,
540) -> String {
541 match format {
542 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
543 ZetaFormat::V0112MiddleAtEnd
544 | ZetaFormat::V0113Ordered
545 | ZetaFormat::V0114180EditableRegion
546 | ZetaFormat::V0120GitMergeMarkers
547 | ZetaFormat::V0131GitMergeMarkersPrefix
548 | ZetaFormat::V0211SeedCoder
549 | ZetaFormat::v0226Hashline
550 | ZetaFormat::V0304VariableEdit => String::new(),
551 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => String::new(),
552 }
553}
554
555pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
556 match format {
557 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
558 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
559 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
560 ZetaFormat::V0211SeedCoder
561 | ZetaFormat::V0304SeedNoEdits
562 | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
563 ZetaFormat::V0112MiddleAtEnd
564 | ZetaFormat::V0113Ordered
565 | ZetaFormat::V0114180EditableRegion
566 | ZetaFormat::v0226Hashline
567 | ZetaFormat::V0304VariableEdit => None,
568 }
569}
570
571pub fn encode_patch_as_output_for_format(
572 format: ZetaFormat,
573 old_editable_region: &str,
574 patch: &str,
575 cursor_offset: Option<usize>,
576) -> Result<Option<String>> {
577 match format {
578 ZetaFormat::v0226Hashline => {
579 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
580 }
581 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
582 old_editable_region,
583 patch,
584 cursor_offset,
585 )
586 .map(Some),
587 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
588 Ok(seed_coder::no_edits(patch))
589 }
590 _ => Ok(None),
591 }
592}
593
594pub struct ParsedOutput {
595 /// Text that should replace the editable region
596 pub new_editable_region: String,
597 /// The byte range within `cursor_excerpt` that this replacement applies to
598 pub range_in_excerpt: Range<usize>,
599}
600
601/// Parse model output for the given zeta format
602pub fn parse_zeta2_model_output(
603 output: &str,
604 format: ZetaFormat,
605 prompt_inputs: &ZetaPromptInput,
606) -> Result<ParsedOutput> {
607 let output = match output_end_marker_for_format(format) {
608 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
609 None => output,
610 };
611
612 let (context, editable_range_in_context, context_range, _) =
613 resolve_cursor_region(prompt_inputs, format);
614 let context_start = context_range.start;
615 let old_editable_region = &context[editable_range_in_context.clone()];
616
617 let (range_in_context, output) = match format {
618 ZetaFormat::v0226Hashline => (
619 editable_range_in_context,
620 if hashline::output_has_edit_commands(output) {
621 hashline::apply_edit_commands(old_editable_region, output)
622 } else {
623 output.to_string()
624 },
625 ),
626 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
627 ZetaFormat::V0304SeedNoEdits => (
628 editable_range_in_context,
629 if output.starts_with(seed_coder::NO_EDITS) {
630 old_editable_region.to_string()
631 } else {
632 output.to_string()
633 },
634 ),
635 ZetaFormat::V0306SeedMultiRegions => (
636 editable_range_in_context,
637 if output.starts_with(seed_coder::NO_EDITS) {
638 old_editable_region.to_string()
639 } else {
640 multi_region::apply_marker_span(old_editable_region, output)?
641 },
642 ),
643 _ => (editable_range_in_context, output.to_string()),
644 };
645
646 let range_in_excerpt =
647 range_in_context.start + context_start..range_in_context.end + context_start;
648
649 Ok(ParsedOutput {
650 new_editable_region: output,
651 range_in_excerpt,
652 })
653}
654
655pub fn excerpt_range_for_format(
656 format: ZetaFormat,
657 ranges: &ExcerptRanges,
658) -> (Range<usize>, Range<usize>) {
659 excerpt_ranges_for_format(format, ranges)
660}
661
662pub fn resolve_cursor_region(
663 input: &ZetaPromptInput,
664 format: ZetaFormat,
665) -> (&str, Range<usize>, Range<usize>, usize) {
666 let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
667 let (editable_tokens, context_tokens) = token_limits_for_format(format);
668 compute_editable_and_context_ranges(
669 &input.cursor_excerpt,
670 input.cursor_offset_in_excerpt,
671 syntax_ranges,
672 editable_tokens,
673 context_tokens,
674 )
675 } else {
676 excerpt_range_for_format(format, &input.excerpt_ranges)
677 };
678 let context_start = context_range.start;
679 let context_text = &input.cursor_excerpt[context_range.clone()];
680 let adjusted_editable =
681 (editable_range.start - context_start)..(editable_range.end - context_start);
682 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
683
684 (
685 context_text,
686 adjusted_editable,
687 context_range,
688 adjusted_cursor,
689 )
690}
691
692pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
693 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
694 get_prefill_for_format(format, context, &editable_range)
695}
696
697fn format_edit_history_within_budget(
698 events: &[Arc<Event>],
699 file_marker: &str,
700 edit_history_name: &str,
701 max_tokens: usize,
702 max_edit_event_count: usize,
703) -> String {
704 let header = format!("{}{}\n", file_marker, edit_history_name);
705 let header_tokens = estimate_tokens(header.len());
706 if header_tokens >= max_tokens {
707 return String::new();
708 }
709
710 let mut event_strings: Vec<String> = Vec::new();
711 let mut total_tokens = header_tokens;
712
713 for event in events.iter().rev().take(max_edit_event_count) {
714 let mut event_str = String::new();
715 write_event(&mut event_str, event);
716 let event_tokens = estimate_tokens(event_str.len());
717
718 if total_tokens + event_tokens > max_tokens {
719 break;
720 }
721 total_tokens += event_tokens;
722 event_strings.push(event_str);
723 }
724
725 if event_strings.is_empty() {
726 return String::new();
727 }
728
729 let mut result = header;
730 for event_str in event_strings.iter().rev() {
731 result.push_str(event_str);
732 }
733 result
734}
735
736fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
737 let needs_newline = !excerpt.text.ends_with('\n');
738 let needs_ellipsis = excerpt.row_range.end < file_max_row;
739 let len = excerpt.text.len()
740 + if needs_newline { "\n".len() } else { 0 }
741 + if needs_ellipsis { "...\n".len() } else { 0 };
742 estimate_tokens(len)
743}
744
745pub fn format_related_files_within_budget(
746 related_files: &[RelatedFile],
747 file_prefix: &str,
748 file_suffix: &str,
749 max_tokens: usize,
750) -> String {
751 struct ExcerptCandidate {
752 file_ix: usize,
753 excerpt_ix: usize,
754 order: usize,
755 }
756
757 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
758 .iter()
759 .enumerate()
760 .flat_map(|(file_ix, file)| {
761 file.excerpts
762 .iter()
763 .enumerate()
764 .map(move |(excerpt_ix, e)| ExcerptCandidate {
765 file_ix,
766 excerpt_ix,
767 order: e.order,
768 })
769 })
770 .collect();
771
772 // Pre-compute file header strings and their token costs.
773 let file_headers: Vec<String> = related_files
774 .iter()
775 .map(|file| {
776 let path_str = file.path.to_string_lossy();
777 format!("{}{}\n", file_prefix, path_str)
778 })
779 .collect();
780
781 // Sort the excerpts by their order and determine how many fit within the budget.
782 let mut total_tokens = 0;
783 let mut included_excerpt_count = 0_usize;
784 let mut included_file_indices = vec![false; related_files.len()];
785 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
786 for candidate in &excerpt_candidates {
787 let file = &related_files[candidate.file_ix];
788 let excerpt = &file.excerpts[candidate.excerpt_ix];
789 let file_already_included = included_file_indices[candidate.file_ix];
790 let header_cost = if file_already_included {
791 0
792 } else {
793 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
794 };
795 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
796 if total_tokens + header_cost + excerpt_cost > max_tokens {
797 break;
798 }
799 total_tokens += header_cost + excerpt_cost;
800 if !file_already_included {
801 included_file_indices[candidate.file_ix] = true;
802 }
803 included_excerpt_count += 1;
804 }
805
806 excerpt_candidates.truncate(included_excerpt_count);
807 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
808
809 // Render all of the files that fit within the token budget, in the original order.
810 let mut result = String::new();
811 let mut last_file_ix = None;
812 for candidate in &excerpt_candidates {
813 if last_file_ix != Some(candidate.file_ix) {
814 if last_file_ix.is_some() {
815 result.push_str(file_suffix);
816 }
817 result.push_str(&file_headers[candidate.file_ix]);
818 last_file_ix = Some(candidate.file_ix);
819 }
820 let file = &related_files[candidate.file_ix];
821 let excerpt = &file.excerpts[candidate.excerpt_ix];
822 result.push_str(&excerpt.text);
823 if !result.ends_with('\n') {
824 result.push('\n');
825 }
826 if excerpt.row_range.end < file.max_row {
827 result.push_str("...\n");
828 }
829 }
830
831 result
832}
833
834pub fn write_related_files(
835 prompt: &mut String,
836 related_files: &[RelatedFile],
837) -> Vec<Range<usize>> {
838 let mut ranges = Vec::new();
839 for file in related_files {
840 let start = prompt.len();
841 let path_str = file.path.to_string_lossy();
842 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
843 for excerpt in &file.excerpts {
844 prompt.push_str(&excerpt.text);
845 if !prompt.ends_with('\n') {
846 prompt.push('\n');
847 }
848 if excerpt.row_range.end < file.max_row {
849 prompt.push_str("...\n");
850 }
851 }
852 let end = prompt.len();
853 ranges.push(start..end);
854 }
855 ranges
856}
857
858mod v0112_middle_at_end {
859 use super::*;
860
861 pub fn special_tokens() -> &'static [&'static str] {
862 &[
863 "<|fim_prefix|>",
864 "<|fim_suffix|>",
865 "<|fim_middle|>",
866 "<|file_sep|>",
867 CURSOR_MARKER,
868 ]
869 }
870
871 pub fn write_cursor_excerpt_section(
872 prompt: &mut String,
873 path: &Path,
874 context: &str,
875 editable_range: &Range<usize>,
876 cursor_offset: usize,
877 ) {
878 let path_str = path.to_string_lossy();
879 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
880
881 prompt.push_str("<|fim_prefix|>\n");
882 prompt.push_str(&context[..editable_range.start]);
883
884 prompt.push_str("<|fim_suffix|>\n");
885 prompt.push_str(&context[editable_range.end..]);
886 if !prompt.ends_with('\n') {
887 prompt.push('\n');
888 }
889
890 prompt.push_str("<|fim_middle|>current\n");
891 prompt.push_str(&context[editable_range.start..cursor_offset]);
892 prompt.push_str(CURSOR_MARKER);
893 prompt.push_str(&context[cursor_offset..editable_range.end]);
894 if !prompt.ends_with('\n') {
895 prompt.push('\n');
896 }
897
898 prompt.push_str("<|fim_middle|>updated\n");
899 }
900}
901
902mod v0113_ordered {
903 use super::*;
904
905 pub fn special_tokens() -> &'static [&'static str] {
906 &[
907 "<|fim_prefix|>",
908 "<|fim_suffix|>",
909 "<|fim_middle|>",
910 "<|file_sep|>",
911 CURSOR_MARKER,
912 ]
913 }
914
915 pub fn write_cursor_excerpt_section(
916 prompt: &mut String,
917 path: &Path,
918 context: &str,
919 editable_range: &Range<usize>,
920 cursor_offset: usize,
921 ) {
922 let path_str = path.to_string_lossy();
923 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
924
925 prompt.push_str("<|fim_prefix|>\n");
926 prompt.push_str(&context[..editable_range.start]);
927 if !prompt.ends_with('\n') {
928 prompt.push('\n');
929 }
930
931 prompt.push_str("<|fim_middle|>current\n");
932 prompt.push_str(&context[editable_range.start..cursor_offset]);
933 prompt.push_str(CURSOR_MARKER);
934 prompt.push_str(&context[cursor_offset..editable_range.end]);
935 if !prompt.ends_with('\n') {
936 prompt.push('\n');
937 }
938
939 prompt.push_str("<|fim_suffix|>\n");
940 prompt.push_str(&context[editable_range.end..]);
941 if !prompt.ends_with('\n') {
942 prompt.push('\n');
943 }
944
945 prompt.push_str("<|fim_middle|>updated\n");
946 }
947}
948
949mod v0114180_editable_region {
950 use super::*;
951
952 pub fn special_tokens() -> &'static [&'static str] {
953 v0113_ordered::special_tokens()
954 }
955}
956
957pub mod v0120_git_merge_markers {
958 //! A prompt that uses git-style merge conflict markers to represent the editable region.
959 //!
960 //! Example prompt:
961 //!
962 //! <|file_sep|>path/to/target_file.py
963 //! <|fim_prefix|>
964 //! code before editable region
965 //! <|fim_suffix|>
966 //! code after editable region
967 //! <|fim_middle|>
968 //! <<<<<<< CURRENT
969 //! code that
970 //! needs to<|user_cursor|>
971 //! be rewritten
972 //! =======
973 //!
974 //! Expected output (should be generated by the model):
975 //!
976 //! updated
977 //! code with
978 //! changes applied
979 //! >>>>>>> UPDATED
980
981 use super::*;
982
983 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
984 pub const SEPARATOR: &str = "=======\n";
985 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
986
987 pub fn special_tokens() -> &'static [&'static str] {
988 &[
989 "<|fim_prefix|>",
990 "<|fim_suffix|>",
991 "<|fim_middle|>",
992 "<|file_sep|>",
993 START_MARKER,
994 SEPARATOR,
995 END_MARKER,
996 CURSOR_MARKER,
997 ]
998 }
999
1000 pub fn write_cursor_excerpt_section(
1001 prompt: &mut String,
1002 path: &Path,
1003 context: &str,
1004 editable_range: &Range<usize>,
1005 cursor_offset: usize,
1006 ) {
1007 let path_str = path.to_string_lossy();
1008 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1009
1010 prompt.push_str("<|fim_prefix|>");
1011 prompt.push_str(&context[..editable_range.start]);
1012
1013 prompt.push_str("<|fim_suffix|>");
1014 prompt.push_str(&context[editable_range.end..]);
1015 if !prompt.ends_with('\n') {
1016 prompt.push('\n');
1017 }
1018
1019 prompt.push_str("<|fim_middle|>");
1020 prompt.push_str(START_MARKER);
1021 prompt.push_str(&context[editable_range.start..cursor_offset]);
1022 prompt.push_str(CURSOR_MARKER);
1023 prompt.push_str(&context[cursor_offset..editable_range.end]);
1024 if !prompt.ends_with('\n') {
1025 prompt.push('\n');
1026 }
1027 prompt.push_str(SEPARATOR);
1028 }
1029}
1030
1031pub mod v0131_git_merge_markers_prefix {
1032 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1033 //!
1034 //! Example prompt:
1035 //!
1036 //! <|file_sep|>path/to/target_file.py
1037 //! <|fim_prefix|>
1038 //! code before editable region
1039 //! <<<<<<< CURRENT
1040 //! code that
1041 //! needs to<|user_cursor|>
1042 //! be rewritten
1043 //! =======
1044 //! <|fim_suffix|>
1045 //! code after editable region
1046 //! <|fim_middle|>
1047 //!
1048 //! Expected output (should be generated by the model):
1049 //!
1050 //! updated
1051 //! code with
1052 //! changes applied
1053 //! >>>>>>> UPDATED
1054
1055 use super::*;
1056
1057 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1058 pub const SEPARATOR: &str = "=======\n";
1059 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1060
1061 pub fn special_tokens() -> &'static [&'static str] {
1062 &[
1063 "<|fim_prefix|>",
1064 "<|fim_suffix|>",
1065 "<|fim_middle|>",
1066 "<|file_sep|>",
1067 START_MARKER,
1068 SEPARATOR,
1069 END_MARKER,
1070 CURSOR_MARKER,
1071 ]
1072 }
1073
1074 pub fn write_cursor_excerpt_section(
1075 prompt: &mut String,
1076 path: &Path,
1077 context: &str,
1078 editable_range: &Range<usize>,
1079 cursor_offset: usize,
1080 ) {
1081 let path_str = path.to_string_lossy();
1082 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1083
1084 prompt.push_str("<|fim_prefix|>");
1085 prompt.push_str(&context[..editable_range.start]);
1086 prompt.push_str(START_MARKER);
1087 prompt.push_str(&context[editable_range.start..cursor_offset]);
1088 prompt.push_str(CURSOR_MARKER);
1089 prompt.push_str(&context[cursor_offset..editable_range.end]);
1090 if !prompt.ends_with('\n') {
1091 prompt.push('\n');
1092 }
1093 prompt.push_str(SEPARATOR);
1094
1095 prompt.push_str("<|fim_suffix|>");
1096 prompt.push_str(&context[editable_range.end..]);
1097 if !prompt.ends_with('\n') {
1098 prompt.push('\n');
1099 }
1100
1101 prompt.push_str("<|fim_middle|>");
1102 }
1103}
1104
1105pub mod v0211_prefill {
1106 use super::*;
1107
1108 pub fn special_tokens() -> &'static [&'static str] {
1109 v0131_git_merge_markers_prefix::special_tokens()
1110 }
1111
1112 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1113 let editable_region = &context[editable_range.start..editable_range.end];
1114
1115 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1116 let prefill_len = editable_region.floor_char_boundary(prefill_len);
1117
1118 // Find a token boundary to avoid splitting tokens in the prefill.
1119 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1120 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1121 // the \n and consume any consecutive \n characters after it.
1122 let prefill = &editable_region[..prefill_len];
1123 match prefill.rfind('\n') {
1124 Some(pos) => {
1125 let mut end = pos + 1;
1126 while end < editable_region.len()
1127 && editable_region.as_bytes().get(end) == Some(&b'\n')
1128 {
1129 end += 1;
1130 }
1131 editable_region[..end].to_string()
1132 }
1133 // No newline found. Fall back to splitting before the last space
1134 // (word-level boundary)
1135 None => match prefill.rfind(' ') {
1136 Some(pos) => prefill[..pos].to_string(),
1137 None => prefill.to_string(),
1138 },
1139 }
1140 }
1141}
1142
1143pub mod hashline {
1144
1145 use std::fmt::Display;
1146
1147 pub const END_MARKER: &str = "<|fim_middle|>updated";
1148 pub const START_MARKER: &str = "<|fim_middle|>current";
1149
1150 use super::*;
1151
1152 const SET_COMMAND_MARKER: &str = "<|set|>";
1153 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1154 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1155
1156 pub fn special_tokens() -> &'static [&'static str] {
1157 return &[
1158 SET_COMMAND_MARKER,
1159 "<|set_range|>",
1160 INSERT_COMMAND_MARKER,
1161 NO_EDITS_COMMAND_MARKER,
1162 CURSOR_MARKER,
1163 "<|file_sep|>",
1164 "<|fim_prefix|>",
1165 "<|fim_suffix|>",
1166 "<|fim_middle|>",
1167 ];
1168 }
1169
1170 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1171 #[derive(Debug, Clone, PartialEq, Eq)]
1172 struct LineRef {
1173 index: usize,
1174 hash: u8,
1175 }
1176
1177 impl Display for LineRef {
1178 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1179 write!(f, "{}:{:02x}", self.index, self.hash)
1180 }
1181 }
1182
1183 pub fn hash_line(line: &[u8]) -> u8 {
1184 let mut h: u8 = 0;
1185 for &byte in line {
1186 h = h.wrapping_add(byte);
1187 }
1188 return h;
1189 }
1190
1191 /// Write the hashline-encoded editable region into `out`. Each line of
1192 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1193 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1194 /// to the start of `editable_text`).
1195 pub fn write_hashline_editable_region(
1196 out: &mut String,
1197 editable_text: &str,
1198 cursor_offset_in_editable: usize,
1199 ) {
1200 let mut offset = 0;
1201 for (i, line) in editable_text.lines().enumerate() {
1202 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1203 && cursor_offset_in_editable < offset + line.len()
1204 {
1205 (
1206 &line[..cursor_offset_in_editable - offset],
1207 CURSOR_MARKER,
1208 &line[cursor_offset_in_editable - offset..],
1209 )
1210 } else {
1211 (line, "", "")
1212 };
1213 write!(
1214 out,
1215 "\n{}|{head}{cursor}{tail}",
1216 LineRef {
1217 index: i,
1218 hash: hash_line(line.as_bytes())
1219 }
1220 )
1221 .unwrap();
1222 offset += line.len() + 1;
1223 }
1224 }
1225
1226 pub fn write_cursor_excerpt_section(
1227 prompt: &mut String,
1228 path: &Path,
1229 context: &str,
1230 editable_range: &Range<usize>,
1231 cursor_offset: usize,
1232 ) {
1233 let path_str = path.to_string_lossy();
1234 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1235
1236 prompt.push_str("<|fim_prefix|>\n");
1237 prompt.push_str(&context[..editable_range.start]);
1238 prompt.push_str(START_MARKER);
1239
1240 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1241 let editable_region = &context[editable_range.clone()];
1242 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1243
1244 if !prompt.ends_with('\n') {
1245 prompt.push('\n');
1246 }
1247
1248 prompt.push_str("<|fim_suffix|>\n");
1249 prompt.push_str(&context[editable_range.end..]);
1250 if !prompt.ends_with('\n') {
1251 prompt.push('\n');
1252 }
1253
1254 prompt.push_str(END_MARKER);
1255 prompt.push('\n');
1256 }
1257
1258 /// A single edit command parsed from the model output.
1259 #[derive(Debug)]
1260 enum EditCommand<'a> {
1261 /// Replace a range of lines (inclusive on both ends). Single-line set is
1262 /// represented by `start == end`.
1263 Set {
1264 start: LineRef,
1265 end: LineRef,
1266 content: &'a str,
1267 },
1268 /// Insert new lines after the given line, or before the first line if
1269 /// `after` is `None`.
1270 Insert {
1271 after: Option<LineRef>,
1272 content: &'a str,
1273 },
1274 }
1275
1276 /// Parse a line reference like `3:c3` into a `LineRef`.
1277 fn parse_line_ref(s: &str) -> Option<LineRef> {
1278 let (idx_str, hash_str) = s.split_once(':')?;
1279 let index = idx_str.parse::<usize>().ok()?;
1280 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1281 Some(LineRef { index, hash })
1282 }
1283
1284 /// Parse the model output into a list of `EditCommand`s.
1285 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1286 let mut commands = Vec::new();
1287 let mut offset = 0usize;
1288
1289 while offset < model_output.len() {
1290 let next_nl = model_output[offset..]
1291 .find('\n')
1292 .map(|i| offset + i)
1293 .unwrap_or(model_output.len());
1294 let line = &model_output[offset..next_nl];
1295 let line_end = if next_nl < model_output.len() {
1296 next_nl + 1
1297 } else {
1298 next_nl
1299 };
1300
1301 let trimmed = line.trim();
1302 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1303 (true, spec)
1304 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1305 (false, spec)
1306 } else {
1307 offset = line_end;
1308 continue;
1309 };
1310
1311 let mut content_end = line_end;
1312 let mut scan = line_end;
1313
1314 while scan < model_output.len() {
1315 let body_nl = model_output[scan..]
1316 .find('\n')
1317 .map(|i| scan + i)
1318 .unwrap_or(model_output.len());
1319 let body_line = &model_output[scan..body_nl];
1320 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1321 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1322 {
1323 break;
1324 }
1325 scan = if body_nl < model_output.len() {
1326 body_nl + 1
1327 } else {
1328 body_nl
1329 };
1330 content_end = scan;
1331 }
1332
1333 let content = &model_output[line_end..content_end];
1334
1335 if is_set {
1336 if let Some((start_str, end_str)) = specifier.split_once('-') {
1337 if let (Some(start), Some(end)) =
1338 (parse_line_ref(start_str), parse_line_ref(end_str))
1339 {
1340 commands.push(EditCommand::Set {
1341 start,
1342 end,
1343 content,
1344 });
1345 }
1346 } else if let Some(target) = parse_line_ref(specifier) {
1347 commands.push(EditCommand::Set {
1348 start: target.clone(),
1349 end: target,
1350 content,
1351 });
1352 }
1353 } else {
1354 let after = parse_line_ref(specifier);
1355 commands.push(EditCommand::Insert { after, content });
1356 }
1357
1358 offset = scan;
1359 }
1360
1361 commands
1362 }
1363
1364 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1365 /// (as opposed to being a plain full-replacement output).
1366 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1367 /// editable region, returning the plain text content.
1368 pub fn strip_hashline_prefixes(region: &str) -> String {
1369 let mut decoded: String = region
1370 .lines()
1371 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1372 .collect::<Vec<_>>()
1373 .join("\n");
1374 if region.ends_with('\n') {
1375 decoded.push('\n');
1376 }
1377 decoded
1378 }
1379
1380 pub fn output_has_edit_commands(model_output: &str) -> bool {
1381 model_output.contains(SET_COMMAND_MARKER)
1382 || model_output.contains(INSERT_COMMAND_MARKER)
1383 || model_output.contains(NO_EDITS_COMMAND_MARKER)
1384 }
1385
1386 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1387 /// original editable region text.
1388 ///
1389 /// `editable_region` is the original text of the editable region (without hash
1390 /// prefixes). `model_output` is the raw model response containing edit commands.
1391 ///
1392 /// Returns the full replacement text for the editable region.
1393 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1394 if model_output
1395 .trim_start()
1396 .starts_with(NO_EDITS_COMMAND_MARKER)
1397 {
1398 return editable_region.to_string();
1399 }
1400
1401 let original_lines: Vec<&str> = editable_region.lines().collect();
1402 let old_hashes: Vec<u8> = original_lines
1403 .iter()
1404 .map(|line| hash_line(line.as_bytes()))
1405 .collect();
1406
1407 let commands = parse_edit_commands(model_output);
1408
1409 // For set operations: indexed by start line → Some((end line index, content))
1410 // For insert operations: indexed by line index → vec of content to insert after
1411 // Insert-before-first is tracked separately.
1412 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1413 let mut insert_before_first: Vec<&str> = Vec::new();
1414 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1415
1416 for command in &commands {
1417 match command {
1418 EditCommand::Set {
1419 start,
1420 end,
1421 content,
1422 } => {
1423 if start.index < old_hashes.len()
1424 && end.index < old_hashes.len()
1425 && start.index <= end.index
1426 && old_hashes[start.index] == start.hash
1427 && old_hashes[end.index] == end.hash
1428 {
1429 set_ops[start.index] = Some((end.index, *content));
1430 }
1431 }
1432 EditCommand::Insert { after, content } => match after {
1433 None => insert_before_first.push(*content),
1434 Some(line_ref) => {
1435 if line_ref.index < old_hashes.len()
1436 && old_hashes[line_ref.index] == line_ref.hash
1437 {
1438 insert_after[line_ref.index].push(*content);
1439 }
1440 }
1441 },
1442 }
1443 }
1444
1445 let mut result = String::new();
1446
1447 // Emit any insertions before the first line
1448 for content in &insert_before_first {
1449 result.push_str(content);
1450 if !content.ends_with('\n') {
1451 result.push('\n');
1452 }
1453 }
1454
1455 let mut i = 0;
1456 while i < original_lines.len() {
1457 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1458 // Replace lines i..=end_index with the replacement content
1459 result.push_str(replacement);
1460 if !replacement.is_empty() && !replacement.ends_with('\n') {
1461 result.push('\n');
1462 }
1463 // Emit any insertions after the end of this set range
1464 if *end_index < insert_after.len() {
1465 for content in &insert_after[*end_index] {
1466 result.push_str(content);
1467 if !content.ends_with('\n') {
1468 result.push('\n');
1469 }
1470 }
1471 }
1472 i = end_index + 1;
1473 } else {
1474 // Keep the original line
1475 result.push_str(original_lines[i]);
1476 result.push('\n');
1477 // Emit any insertions after this line
1478 for content in &insert_after[i] {
1479 result.push_str(content);
1480 if !content.ends_with('\n') {
1481 result.push('\n');
1482 }
1483 }
1484 i += 1;
1485 }
1486 }
1487
1488 // Preserve trailing newline behavior: if the original ended with a
1489 // newline the result already has one; if it didn't, trim the extra one
1490 // we added.
1491 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1492 result.pop();
1493 }
1494
1495 result
1496 }
1497
1498 /// Convert a unified diff patch into hashline edit commands.
1499 ///
1500 /// Parses the unified diff `patch` directly to determine which lines of
1501 /// `old_text` are deleted/replaced and what new lines are added, then emits
1502 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1503 /// `{index}:{hash}` identifiers.
1504 ///
1505 /// `cursor_offset` is an optional byte offset into the first hunk's new
1506 /// text (context + additions) where the cursor marker should be placed.
1507 pub fn patch_to_edit_commands(
1508 old_text: &str,
1509 patch: &str,
1510 cursor_offset: Option<usize>,
1511 ) -> Result<String> {
1512 let old_lines: Vec<&str> = old_text.lines().collect();
1513 let old_hashes: Vec<u8> = old_lines
1514 .iter()
1515 .map(|line| hash_line(line.as_bytes()))
1516 .collect();
1517
1518 let mut result = String::new();
1519 let mut first_hunk = true;
1520
1521 struct Hunk<'a> {
1522 line_range: Range<usize>,
1523 new_text_lines: Vec<&'a str>,
1524 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1525 }
1526
1527 // Parse the patch line by line. We only care about hunk headers,
1528 // context, deletions, and additions.
1529 let mut old_line_index: usize = 0;
1530 let mut current_hunk: Option<Hunk> = None;
1531 // Byte offset tracking within the hunk's new text for cursor placement.
1532 let mut new_text_byte_offset: usize = 0;
1533 // The line index of the last old line seen before/in the current hunk
1534 // (used for insert-after reference).
1535 let mut last_old_line_before_hunk: Option<usize> = None;
1536
1537 fn flush_hunk(
1538 hunk: Hunk,
1539 last_old_line: Option<usize>,
1540 result: &mut String,
1541 old_hashes: &[u8],
1542 ) {
1543 if hunk.line_range.is_empty() {
1544 // Pure insertion — reference the old line to insert after when in bounds.
1545 if let Some(after) = last_old_line
1546 && let Some(&hash) = old_hashes.get(after)
1547 {
1548 write!(
1549 result,
1550 "{INSERT_COMMAND_MARKER}{}\n",
1551 LineRef { index: after, hash }
1552 )
1553 .unwrap();
1554 } else {
1555 result.push_str(INSERT_COMMAND_MARKER);
1556 result.push('\n');
1557 }
1558 } else {
1559 let start = hunk.line_range.start;
1560 let end_exclusive = hunk.line_range.end;
1561 let deleted_line_count = end_exclusive.saturating_sub(start);
1562
1563 if deleted_line_count == 1 {
1564 if let Some(&hash) = old_hashes.get(start) {
1565 write!(
1566 result,
1567 "{SET_COMMAND_MARKER}{}\n",
1568 LineRef { index: start, hash }
1569 )
1570 .unwrap();
1571 } else {
1572 result.push_str(SET_COMMAND_MARKER);
1573 result.push('\n');
1574 }
1575 } else {
1576 let end_inclusive = end_exclusive - 1;
1577 match (
1578 old_hashes.get(start).copied(),
1579 old_hashes.get(end_inclusive).copied(),
1580 ) {
1581 (Some(start_hash), Some(end_hash)) => {
1582 write!(
1583 result,
1584 "{SET_COMMAND_MARKER}{}-{}\n",
1585 LineRef {
1586 index: start,
1587 hash: start_hash
1588 },
1589 LineRef {
1590 index: end_inclusive,
1591 hash: end_hash
1592 }
1593 )
1594 .unwrap();
1595 }
1596 _ => {
1597 result.push_str(SET_COMMAND_MARKER);
1598 result.push('\n');
1599 }
1600 }
1601 }
1602 }
1603 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1604 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1605 && line_offset == cursor_line_offset
1606 {
1607 result.push_str(&line[..char_offset]);
1608 result.push_str(CURSOR_MARKER);
1609 result.push_str(&line[char_offset..]);
1610 continue;
1611 }
1612
1613 result.push_str(line);
1614 }
1615 }
1616
1617 for raw_line in patch.split_inclusive('\n') {
1618 if raw_line.starts_with("@@") {
1619 // Flush any pending change hunk from a previous patch hunk.
1620 if let Some(hunk) = current_hunk.take() {
1621 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1622 }
1623
1624 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1625 // We intentionally do not trust old_start as a direct local index into `old_text`,
1626 // because some patches are produced against a larger file region and carry
1627 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1628 if first_hunk {
1629 new_text_byte_offset = 0;
1630 first_hunk = false;
1631 }
1632 continue;
1633 }
1634
1635 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1636 continue;
1637 }
1638 if raw_line.starts_with("\\ No newline") {
1639 continue;
1640 }
1641
1642 if raw_line.starts_with('-') {
1643 // Extend or start a change hunk with this deleted old line.
1644 match &mut current_hunk {
1645 Some(Hunk {
1646 line_range: range, ..
1647 }) => range.end = old_line_index + 1,
1648 None => {
1649 current_hunk = Some(Hunk {
1650 line_range: old_line_index..old_line_index + 1,
1651 new_text_lines: Vec::new(),
1652 cursor_line_offset_in_new_text: None,
1653 });
1654 }
1655 }
1656 old_line_index += 1;
1657 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1658 // Place cursor marker if cursor_offset falls within this line.
1659 let mut cursor_line_offset = None;
1660 if let Some(cursor_off) = cursor_offset
1661 && (first_hunk
1662 || cursor_off >= new_text_byte_offset
1663 && cursor_off <= new_text_byte_offset + added_content.len())
1664 {
1665 let line_offset = added_content.floor_char_boundary(
1666 cursor_off
1667 .saturating_sub(new_text_byte_offset)
1668 .min(added_content.len()),
1669 );
1670 cursor_line_offset = Some(line_offset);
1671 }
1672
1673 new_text_byte_offset += added_content.len();
1674
1675 let hunk = current_hunk.get_or_insert(Hunk {
1676 line_range: old_line_index..old_line_index,
1677 new_text_lines: vec![],
1678 cursor_line_offset_in_new_text: None,
1679 });
1680 hunk.new_text_lines.push(added_content);
1681 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1682 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1683 } else {
1684 // Context line (starts with ' ' or is empty).
1685 if let Some(hunk) = current_hunk.take() {
1686 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1687 }
1688 last_old_line_before_hunk = Some(old_line_index);
1689 old_line_index += 1;
1690 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1691 new_text_byte_offset += content.len();
1692 }
1693 }
1694
1695 // Flush final group.
1696 if let Some(hunk) = current_hunk.take() {
1697 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1698 }
1699
1700 // Trim a single trailing newline.
1701 if result.ends_with('\n') {
1702 result.pop();
1703 }
1704
1705 if result.is_empty() {
1706 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1707 }
1708
1709 Ok(result)
1710 }
1711
1712 #[cfg(test)]
1713 mod tests {
1714 use super::*;
1715 use indoc::indoc;
1716
1717 #[test]
1718 fn test_format_cursor_region() {
1719 struct Case {
1720 name: &'static str,
1721 context: &'static str,
1722 editable_range: Range<usize>,
1723 cursor_offset: usize,
1724 expected: &'static str,
1725 }
1726
1727 let cases = [
1728 Case {
1729 name: "basic_cursor_placement",
1730 context: "hello world\n",
1731 editable_range: 0..12,
1732 cursor_offset: 5,
1733 expected: indoc! {"
1734 <|file_sep|>test.rs
1735 <|fim_prefix|>
1736 <|fim_middle|>current
1737 0:5c|hello<|user_cursor|> world
1738 <|fim_suffix|>
1739 <|fim_middle|>updated
1740 "},
1741 },
1742 Case {
1743 name: "multiline_cursor_on_second_line",
1744 context: "aaa\nbbb\nccc\n",
1745 editable_range: 0..12,
1746 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1747 expected: indoc! {"
1748 <|file_sep|>test.rs
1749 <|fim_prefix|>
1750 <|fim_middle|>current
1751 0:23|aaa
1752 1:26|b<|user_cursor|>bb
1753 2:29|ccc
1754 <|fim_suffix|>
1755 <|fim_middle|>updated
1756 "},
1757 },
1758 Case {
1759 name: "no_trailing_newline_in_context",
1760 context: "line1\nline2",
1761 editable_range: 0..11,
1762 cursor_offset: 3,
1763 expected: indoc! {"
1764 <|file_sep|>test.rs
1765 <|fim_prefix|>
1766 <|fim_middle|>current
1767 0:d9|lin<|user_cursor|>e1
1768 1:da|line2
1769 <|fim_suffix|>
1770 <|fim_middle|>updated
1771 "},
1772 },
1773 Case {
1774 name: "leading_newline_in_editable_region",
1775 context: "\nabc\n",
1776 editable_range: 0..5,
1777 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1778 expected: indoc! {"
1779 <|file_sep|>test.rs
1780 <|fim_prefix|>
1781 <|fim_middle|>current
1782 0:00|
1783 1:26|a<|user_cursor|>bc
1784 <|fim_suffix|>
1785 <|fim_middle|>updated
1786 "},
1787 },
1788 Case {
1789 name: "with_suffix",
1790 context: "abc\ndef",
1791 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1792 cursor_offset: 2,
1793 expected: indoc! {"
1794 <|file_sep|>test.rs
1795 <|fim_prefix|>
1796 <|fim_middle|>current
1797 0:26|ab<|user_cursor|>c
1798 <|fim_suffix|>
1799 def
1800 <|fim_middle|>updated
1801 "},
1802 },
1803 Case {
1804 name: "unicode_two_byte_chars",
1805 context: "héllo\n",
1806 editable_range: 0..7,
1807 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1808 expected: indoc! {"
1809 <|file_sep|>test.rs
1810 <|fim_prefix|>
1811 <|fim_middle|>current
1812 0:1b|hé<|user_cursor|>llo
1813 <|fim_suffix|>
1814 <|fim_middle|>updated
1815 "},
1816 },
1817 Case {
1818 name: "unicode_three_byte_chars",
1819 context: "日本語\n",
1820 editable_range: 0..10,
1821 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1822 expected: indoc! {"
1823 <|file_sep|>test.rs
1824 <|fim_prefix|>
1825 <|fim_middle|>current
1826 0:80|日本<|user_cursor|>語
1827 <|fim_suffix|>
1828 <|fim_middle|>updated
1829 "},
1830 },
1831 Case {
1832 name: "unicode_four_byte_chars",
1833 context: "a🌍b\n",
1834 editable_range: 0..7,
1835 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1836 expected: indoc! {"
1837 <|file_sep|>test.rs
1838 <|fim_prefix|>
1839 <|fim_middle|>current
1840 0:6b|a🌍<|user_cursor|>b
1841 <|fim_suffix|>
1842 <|fim_middle|>updated
1843 "},
1844 },
1845 Case {
1846 name: "cursor_at_start_of_region_not_placed",
1847 context: "abc\n",
1848 editable_range: 0..4,
1849 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1850 expected: indoc! {"
1851 <|file_sep|>test.rs
1852 <|fim_prefix|>
1853 <|fim_middle|>current
1854 0:26|abc
1855 <|fim_suffix|>
1856 <|fim_middle|>updated
1857 "},
1858 },
1859 Case {
1860 name: "cursor_at_end_of_line_not_placed",
1861 context: "abc\ndef\n",
1862 editable_range: 0..8,
1863 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1864 expected: indoc! {"
1865 <|file_sep|>test.rs
1866 <|fim_prefix|>
1867 <|fim_middle|>current
1868 0:26|abc
1869 1:2f|def
1870 <|fim_suffix|>
1871 <|fim_middle|>updated
1872 "},
1873 },
1874 Case {
1875 name: "cursor_offset_relative_to_context_not_editable_region",
1876 // cursor_offset is relative to `context`, so when editable_range.start > 0,
1877 // write_cursor_excerpt_section must subtract it before comparing against
1878 // per-line offsets within the editable region.
1879 context: "pre\naaa\nbbb\nsuf\n",
1880 editable_range: 4..12, // editable region = "aaa\nbbb\n"
1881 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
1882 expected: indoc! {"
1883 <|file_sep|>test.rs
1884 <|fim_prefix|>
1885 pre
1886 <|fim_middle|>current
1887 0:23|aaa
1888 1:26|b<|user_cursor|>bb
1889 <|fim_suffix|>
1890 suf
1891 <|fim_middle|>updated
1892 "},
1893 },
1894 ];
1895
1896 for case in &cases {
1897 let mut prompt = String::new();
1898 hashline::write_cursor_excerpt_section(
1899 &mut prompt,
1900 Path::new("test.rs"),
1901 case.context,
1902 &case.editable_range,
1903 case.cursor_offset,
1904 );
1905 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1906 }
1907 }
1908
1909 #[test]
1910 fn test_apply_edit_commands() {
1911 struct Case {
1912 name: &'static str,
1913 original: &'static str,
1914 model_output: &'static str,
1915 expected: &'static str,
1916 }
1917
1918 let cases = vec![
1919 Case {
1920 name: "set_single_line",
1921 original: indoc! {"
1922 let mut total = 0;
1923 for product in products {
1924 total += ;
1925 }
1926 total
1927 "},
1928 model_output: indoc! {"
1929 <|set|>2:87
1930 total += product.price;
1931 "},
1932 expected: indoc! {"
1933 let mut total = 0;
1934 for product in products {
1935 total += product.price;
1936 }
1937 total
1938 "},
1939 },
1940 Case {
1941 name: "set_range",
1942 original: indoc! {"
1943 fn foo() {
1944 let x = 1;
1945 let y = 2;
1946 let z = 3;
1947 }
1948 "},
1949 model_output: indoc! {"
1950 <|set|>1:46-3:4a
1951 let sum = 6;
1952 "},
1953 expected: indoc! {"
1954 fn foo() {
1955 let sum = 6;
1956 }
1957 "},
1958 },
1959 Case {
1960 name: "insert_after_line",
1961 original: indoc! {"
1962 fn main() {
1963 let x = 1;
1964 }
1965 "},
1966 model_output: indoc! {"
1967 <|insert|>1:46
1968 let y = 2;
1969 "},
1970 expected: indoc! {"
1971 fn main() {
1972 let x = 1;
1973 let y = 2;
1974 }
1975 "},
1976 },
1977 Case {
1978 name: "insert_before_first",
1979 original: indoc! {"
1980 let x = 1;
1981 let y = 2;
1982 "},
1983 model_output: indoc! {"
1984 <|insert|>
1985 use std::io;
1986 "},
1987 expected: indoc! {"
1988 use std::io;
1989 let x = 1;
1990 let y = 2;
1991 "},
1992 },
1993 Case {
1994 name: "set_with_cursor_marker",
1995 original: indoc! {"
1996 fn main() {
1997 println!();
1998 }
1999 "},
2000 model_output: indoc! {"
2001 <|set|>1:34
2002 eprintln!(\"<|user_cursor|>\");
2003 "},
2004 expected: indoc! {"
2005 fn main() {
2006 eprintln!(\"<|user_cursor|>\");
2007 }
2008 "},
2009 },
2010 Case {
2011 name: "multiple_set_commands",
2012 original: indoc! {"
2013 aaa
2014 bbb
2015 ccc
2016 ddd
2017 "},
2018 model_output: indoc! {"
2019 <|set|>0:23
2020 AAA
2021 <|set|>2:29
2022 CCC
2023 "},
2024 expected: indoc! {"
2025 AAA
2026 bbb
2027 CCC
2028 ddd
2029 "},
2030 },
2031 Case {
2032 name: "set_range_multiline_replacement",
2033 original: indoc! {"
2034 fn handle_submit() {
2035 }
2036
2037 fn handle_keystroke() {
2038 "},
2039 model_output: indoc! {"
2040 <|set|>0:3f-1:7d
2041 fn handle_submit(modal_state: &mut ModalState) {
2042 <|user_cursor|>
2043 }
2044 "},
2045 expected: indoc! {"
2046 fn handle_submit(modal_state: &mut ModalState) {
2047 <|user_cursor|>
2048 }
2049
2050 fn handle_keystroke() {
2051 "},
2052 },
2053 Case {
2054 name: "no_edit_commands_returns_original",
2055 original: indoc! {"
2056 hello
2057 world
2058 "},
2059 model_output: "some random text with no commands",
2060 expected: indoc! {"
2061 hello
2062 world
2063 "},
2064 },
2065 Case {
2066 name: "no_edits_command_returns_original",
2067 original: indoc! {"
2068 hello
2069 world
2070 "},
2071 model_output: "<|no_edits|>",
2072 expected: indoc! {"
2073 hello
2074 world
2075 "},
2076 },
2077 Case {
2078 name: "wrong_hash_set_ignored",
2079 original: indoc! {"
2080 aaa
2081 bbb
2082 "},
2083 model_output: indoc! {"
2084 <|set|>0:ff
2085 ZZZ
2086 "},
2087 expected: indoc! {"
2088 aaa
2089 bbb
2090 "},
2091 },
2092 Case {
2093 name: "insert_and_set_combined",
2094 original: indoc! {"
2095 alpha
2096 beta
2097 gamma
2098 "},
2099 model_output: indoc! {"
2100 <|set|>0:06
2101 ALPHA
2102 <|insert|>1:9c
2103 beta_extra
2104 "},
2105 expected: indoc! {"
2106 ALPHA
2107 beta
2108 beta_extra
2109 gamma
2110 "},
2111 },
2112 Case {
2113 name: "no_trailing_newline_preserved",
2114 original: "hello\nworld",
2115 model_output: indoc! {"
2116 <|set|>0:14
2117 HELLO
2118 "},
2119 expected: "HELLO\nworld",
2120 },
2121 Case {
2122 name: "set_range_hash_mismatch_in_end_bound",
2123 original: indoc! {"
2124 one
2125 two
2126 three
2127 "},
2128 model_output: indoc! {"
2129 <|set|>0:42-2:ff
2130 ONE_TWO_THREE
2131 "},
2132 expected: indoc! {"
2133 one
2134 two
2135 three
2136 "},
2137 },
2138 Case {
2139 name: "set_range_start_greater_than_end_ignored",
2140 original: indoc! {"
2141 a
2142 b
2143 c
2144 "},
2145 model_output: indoc! {"
2146 <|set|>2:63-1:62
2147 X
2148 "},
2149 expected: indoc! {"
2150 a
2151 b
2152 c
2153 "},
2154 },
2155 Case {
2156 name: "insert_out_of_bounds_ignored",
2157 original: indoc! {"
2158 x
2159 y
2160 "},
2161 model_output: indoc! {"
2162 <|insert|>99:aa
2163 z
2164 "},
2165 expected: indoc! {"
2166 x
2167 y
2168 "},
2169 },
2170 Case {
2171 name: "set_out_of_bounds_ignored",
2172 original: indoc! {"
2173 x
2174 y
2175 "},
2176 model_output: indoc! {"
2177 <|set|>99:aa
2178 z
2179 "},
2180 expected: indoc! {"
2181 x
2182 y
2183 "},
2184 },
2185 Case {
2186 name: "malformed_set_command_ignored",
2187 original: indoc! {"
2188 alpha
2189 beta
2190 "},
2191 model_output: indoc! {"
2192 <|set|>not-a-line-ref
2193 UPDATED
2194 "},
2195 expected: indoc! {"
2196 alpha
2197 beta
2198 "},
2199 },
2200 Case {
2201 name: "malformed_insert_hash_treated_as_before_first",
2202 original: indoc! {"
2203 alpha
2204 beta
2205 "},
2206 model_output: indoc! {"
2207 <|insert|>1:nothex
2208 preamble
2209 "},
2210 expected: indoc! {"
2211 preamble
2212 alpha
2213 beta
2214 "},
2215 },
2216 Case {
2217 name: "set_then_insert_same_target_orders_insert_after_replacement",
2218 original: indoc! {"
2219 cat
2220 dog
2221 "},
2222 model_output: indoc! {"
2223 <|set|>0:38
2224 CAT
2225 <|insert|>0:38
2226 TAIL
2227 "},
2228 expected: indoc! {"
2229 CAT
2230 TAIL
2231 dog
2232 "},
2233 },
2234 Case {
2235 name: "overlapping_set_ranges_last_wins",
2236 original: indoc! {"
2237 a
2238 b
2239 c
2240 d
2241 "},
2242 model_output: indoc! {"
2243 <|set|>0:61-2:63
2244 FIRST
2245 <|set|>1:62-3:64
2246 SECOND
2247 "},
2248 expected: indoc! {"
2249 FIRST
2250 d
2251 "},
2252 },
2253 Case {
2254 name: "insert_before_first_and_after_line",
2255 original: indoc! {"
2256 a
2257 b
2258 "},
2259 model_output: indoc! {"
2260 <|insert|>
2261 HEAD
2262 <|insert|>0:61
2263 MID
2264 "},
2265 expected: indoc! {"
2266 HEAD
2267 a
2268 MID
2269 b
2270 "},
2271 },
2272 ];
2273
2274 for case in &cases {
2275 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2276 assert_eq!(result, case.expected, "failed case: {}", case.name);
2277 }
2278 }
2279
2280 #[test]
2281 fn test_output_has_edit_commands() {
2282 assert!(hashline::output_has_edit_commands(&format!(
2283 "{}0:ab\nnew",
2284 SET_COMMAND_MARKER
2285 )));
2286 assert!(hashline::output_has_edit_commands(&format!(
2287 "{}0:ab\nnew",
2288 INSERT_COMMAND_MARKER
2289 )));
2290 assert!(hashline::output_has_edit_commands(&format!(
2291 "some text\n{}1:cd\nstuff",
2292 SET_COMMAND_MARKER
2293 )));
2294 assert!(!hashline::output_has_edit_commands("just plain text"));
2295 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2296 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2297 }
2298
2299 // ---- hashline::patch_to_edit_commands round-trip tests ----
2300
2301 #[test]
2302 fn test_patch_to_edit_commands() {
2303 struct Case {
2304 name: &'static str,
2305 old: &'static str,
2306 patch: &'static str,
2307 expected_new: &'static str,
2308 }
2309
2310 let cases = [
2311 Case {
2312 name: "single_line_replacement",
2313 old: indoc! {"
2314 let mut total = 0;
2315 for product in products {
2316 total += ;
2317 }
2318 total
2319 "},
2320 patch: indoc! {"
2321 @@ -1,5 +1,5 @@
2322 let mut total = 0;
2323 for product in products {
2324 - total += ;
2325 + total += product.price;
2326 }
2327 total
2328 "},
2329 expected_new: indoc! {"
2330 let mut total = 0;
2331 for product in products {
2332 total += product.price;
2333 }
2334 total
2335 "},
2336 },
2337 Case {
2338 name: "multiline_replacement",
2339 old: indoc! {"
2340 fn foo() {
2341 let x = 1;
2342 let y = 2;
2343 let z = 3;
2344 }
2345 "},
2346 patch: indoc! {"
2347 @@ -1,5 +1,3 @@
2348 fn foo() {
2349 - let x = 1;
2350 - let y = 2;
2351 - let z = 3;
2352 + let sum = 1 + 2 + 3;
2353 }
2354 "},
2355 expected_new: indoc! {"
2356 fn foo() {
2357 let sum = 1 + 2 + 3;
2358 }
2359 "},
2360 },
2361 Case {
2362 name: "insertion",
2363 old: indoc! {"
2364 fn main() {
2365 let x = 1;
2366 }
2367 "},
2368 patch: indoc! {"
2369 @@ -1,3 +1,4 @@
2370 fn main() {
2371 let x = 1;
2372 + let y = 2;
2373 }
2374 "},
2375 expected_new: indoc! {"
2376 fn main() {
2377 let x = 1;
2378 let y = 2;
2379 }
2380 "},
2381 },
2382 Case {
2383 name: "insertion_before_first",
2384 old: indoc! {"
2385 let x = 1;
2386 let y = 2;
2387 "},
2388 patch: indoc! {"
2389 @@ -1,2 +1,3 @@
2390 +use std::io;
2391 let x = 1;
2392 let y = 2;
2393 "},
2394 expected_new: indoc! {"
2395 use std::io;
2396 let x = 1;
2397 let y = 2;
2398 "},
2399 },
2400 Case {
2401 name: "deletion",
2402 old: indoc! {"
2403 aaa
2404 bbb
2405 ccc
2406 ddd
2407 "},
2408 patch: indoc! {"
2409 @@ -1,4 +1,2 @@
2410 aaa
2411 -bbb
2412 -ccc
2413 ddd
2414 "},
2415 expected_new: indoc! {"
2416 aaa
2417 ddd
2418 "},
2419 },
2420 Case {
2421 name: "multiple_changes",
2422 old: indoc! {"
2423 alpha
2424 beta
2425 gamma
2426 delta
2427 epsilon
2428 "},
2429 patch: indoc! {"
2430 @@ -1,5 +1,5 @@
2431 -alpha
2432 +ALPHA
2433 beta
2434 gamma
2435 -delta
2436 +DELTA
2437 epsilon
2438 "},
2439 expected_new: indoc! {"
2440 ALPHA
2441 beta
2442 gamma
2443 DELTA
2444 epsilon
2445 "},
2446 },
2447 Case {
2448 name: "replace_with_insertion",
2449 old: indoc! {r#"
2450 fn handle() {
2451 modal_state.close();
2452 modal_state.dismiss();
2453 "#},
2454 patch: indoc! {r#"
2455 @@ -1,3 +1,4 @@
2456 fn handle() {
2457 modal_state.close();
2458 + eprintln!("");
2459 modal_state.dismiss();
2460 "#},
2461 expected_new: indoc! {r#"
2462 fn handle() {
2463 modal_state.close();
2464 eprintln!("");
2465 modal_state.dismiss();
2466 "#},
2467 },
2468 Case {
2469 name: "complete_replacement",
2470 old: indoc! {"
2471 aaa
2472 bbb
2473 ccc
2474 "},
2475 patch: indoc! {"
2476 @@ -1,3 +1,3 @@
2477 -aaa
2478 -bbb
2479 -ccc
2480 +xxx
2481 +yyy
2482 +zzz
2483 "},
2484 expected_new: indoc! {"
2485 xxx
2486 yyy
2487 zzz
2488 "},
2489 },
2490 Case {
2491 name: "add_function_body",
2492 old: indoc! {"
2493 fn foo() {
2494 modal_state.dismiss();
2495 }
2496
2497 fn
2498
2499 fn handle_keystroke() {
2500 "},
2501 patch: indoc! {"
2502 @@ -1,6 +1,8 @@
2503 fn foo() {
2504 modal_state.dismiss();
2505 }
2506
2507 -fn
2508 +fn handle_submit() {
2509 + todo()
2510 +}
2511
2512 fn handle_keystroke() {
2513 "},
2514 expected_new: indoc! {"
2515 fn foo() {
2516 modal_state.dismiss();
2517 }
2518
2519 fn handle_submit() {
2520 todo()
2521 }
2522
2523 fn handle_keystroke() {
2524 "},
2525 },
2526 Case {
2527 name: "with_cursor_offset",
2528 old: indoc! {r#"
2529 fn main() {
2530 println!();
2531 }
2532 "#},
2533 patch: indoc! {r#"
2534 @@ -1,3 +1,3 @@
2535 fn main() {
2536 - println!();
2537 + eprintln!("");
2538 }
2539 "#},
2540 expected_new: indoc! {r#"
2541 fn main() {
2542 eprintln!("<|user_cursor|>");
2543 }
2544 "#},
2545 },
2546 Case {
2547 name: "non_local_hunk_header_pure_insertion_repro",
2548 old: indoc! {"
2549 aaa
2550 bbb
2551 "},
2552 patch: indoc! {"
2553 @@ -20,2 +20,3 @@
2554 aaa
2555 +xxx
2556 bbb
2557 "},
2558 expected_new: indoc! {"
2559 aaa
2560 xxx
2561 bbb
2562 "},
2563 },
2564 Case {
2565 name: "empty_patch_produces_no_edits_marker",
2566 old: indoc! {"
2567 aaa
2568 bbb
2569 "},
2570 patch: "@@ -20,2 +20,3 @@\n",
2571 expected_new: indoc! {"
2572 aaa
2573 bbb
2574 "},
2575 },
2576 ];
2577
2578 for case in &cases {
2579 // The cursor_offset for patch_to_edit_commands is relative to
2580 // the first hunk's new text (context + additions). We compute
2581 // it by finding where the marker sits in the expected output
2582 // (which mirrors the new text of the hunk).
2583 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2584
2585 let commands =
2586 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2587 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2588
2589 assert!(
2590 hashline::output_has_edit_commands(&commands),
2591 "case {}: expected edit commands, got: {commands:?}",
2592 case.name,
2593 );
2594
2595 let applied = hashline::apply_edit_commands(case.old, &commands);
2596 assert_eq!(applied, case.expected_new, "case {}", case.name);
2597 }
2598 }
2599 }
2600}
2601
2602pub mod seed_coder {
2603 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2604 //!
2605 //! Seed-Coder uses different FIM tokens and order than Qwen:
2606 //! - SPM order: suffix comes FIRST, then prefix, then middle
2607 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2608 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2609 //!
2610 //! All context (related files, edit history) goes in the PREFIX section.
2611 //! The suffix contains only code after the editable region.
2612 //!
2613 //! Example prompt:
2614 //!
2615 //! <[fim-suffix]>
2616 //! code after editable region
2617 //! <[fim-prefix]><filename>related/file.py
2618 //! related file content
2619 //!
2620 //! <filename>edit_history
2621 //! --- a/some_file.py
2622 //! +++ b/some_file.py
2623 //! -old
2624 //! +new
2625 //!
2626 //! <filename>path/to/target_file.py
2627 //! code before editable region
2628 //! <<<<<<< CURRENT
2629 //! code that
2630 //! needs to<|user_cursor|>
2631 //! be rewritten
2632 //! =======
2633 //! <[fim-middle]>
2634 //!
2635 //! Expected output (model generates):
2636 //!
2637 //! updated
2638 //! code with
2639 //! changes applied
2640 //! >>>>>>> UPDATED
2641
2642 use super::*;
2643
2644 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2645 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2646 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2647 pub const FILE_MARKER: &str = "<filename>";
2648
2649 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2650 pub const SEPARATOR: &str = "=======\n";
2651 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2652
2653 pub const NO_EDITS: &str = "NO_EDITS\n";
2654
2655 pub fn special_tokens() -> &'static [&'static str] {
2656 &[
2657 FIM_SUFFIX,
2658 FIM_PREFIX,
2659 FIM_MIDDLE,
2660 FILE_MARKER,
2661 START_MARKER,
2662 SEPARATOR,
2663 END_MARKER,
2664 CURSOR_MARKER,
2665 ]
2666 }
2667
2668 pub fn write_cursor_excerpt_section(
2669 prompt: &mut String,
2670 path: &Path,
2671 context: &str,
2672 editable_range: &Range<usize>,
2673 cursor_offset: usize,
2674 ) {
2675 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2676 prompt.push_str(§ion);
2677 }
2678
2679 pub fn format_prompt_with_budget(
2680 path: &Path,
2681 context: &str,
2682 editable_range: &Range<usize>,
2683 cursor_offset: usize,
2684 events: &[Arc<Event>],
2685 related_files: &[RelatedFile],
2686 max_tokens: usize,
2687 ) -> String {
2688 let cursor_prefix_section =
2689 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2690 assemble_fim_prompt(
2691 context,
2692 editable_range,
2693 &cursor_prefix_section,
2694 events,
2695 related_files,
2696 max_tokens,
2697 )
2698 }
2699
2700 pub fn assemble_fim_prompt(
2701 context: &str,
2702 editable_range: &Range<usize>,
2703 cursor_prefix_section: &str,
2704 events: &[Arc<Event>],
2705 related_files: &[RelatedFile],
2706 max_tokens: usize,
2707 ) -> String {
2708 let suffix_section = build_suffix_section(context, editable_range);
2709
2710 let suffix_tokens = estimate_tokens(suffix_section.len());
2711 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2712 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2713
2714 let edit_history_section = super::format_edit_history_within_budget(
2715 events,
2716 FILE_MARKER,
2717 "edit_history",
2718 budget_after_cursor,
2719 max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
2720 );
2721 let edit_history_tokens = estimate_tokens(edit_history_section.len());
2722 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2723
2724 let related_files_section = super::format_related_files_within_budget(
2725 related_files,
2726 FILE_MARKER,
2727 "",
2728 budget_after_edit_history,
2729 );
2730
2731 let mut prompt = String::new();
2732 prompt.push_str(&suffix_section);
2733 prompt.push_str(FIM_PREFIX);
2734 prompt.push_str(&related_files_section);
2735 if !related_files_section.is_empty() {
2736 prompt.push('\n');
2737 }
2738 prompt.push_str(&edit_history_section);
2739 if !edit_history_section.is_empty() {
2740 prompt.push('\n');
2741 }
2742 prompt.push_str(cursor_prefix_section);
2743 prompt.push_str(FIM_MIDDLE);
2744 prompt
2745 }
2746
2747 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2748 let mut section = String::new();
2749 section.push_str(FIM_SUFFIX);
2750 section.push_str(&context[editable_range.end..]);
2751 if !section.ends_with('\n') {
2752 section.push('\n');
2753 }
2754 section
2755 }
2756
2757 fn build_cursor_prefix_section(
2758 path: &Path,
2759 context: &str,
2760 editable_range: &Range<usize>,
2761 cursor_offset: usize,
2762 ) -> String {
2763 let mut section = String::new();
2764 let path_str = path.to_string_lossy();
2765 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2766
2767 section.push_str(&context[..editable_range.start]);
2768 section.push_str(START_MARKER);
2769 section.push_str(&context[editable_range.start..cursor_offset]);
2770 section.push_str(CURSOR_MARKER);
2771 section.push_str(&context[cursor_offset..editable_range.end]);
2772 if !section.ends_with('\n') {
2773 section.push('\n');
2774 }
2775 section.push_str(SEPARATOR);
2776 section
2777 }
2778
2779 /// Format patch as containing no changes if it's empty; otherwise return None.
2780 pub(crate) fn no_edits(patch: &str) -> Option<String> {
2781 // Count lines in the patch
2782 let empty_patch = patch.lines().count() <= 3;
2783 if empty_patch {
2784 Some(format!("{NO_EDITS}{END_MARKER}"))
2785 } else {
2786 None
2787 }
2788 }
2789}
2790
2791pub mod v0304_variable_edit {
2792 //! A prompt format with no fixed editable region. The entire context is shown
2793 //! to the model, and it chooses which text to replace by outputting surrounding
2794 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2795 //! text.
2796 //!
2797 //! Example prompt:
2798 //!
2799 //! <|file_sep|>path/to/file.py
2800 //! zero
2801 //! one
2802 //! two
2803 //! three<|user_cursor|>
2804 //! four
2805 //! five
2806 //! <|fim_prefix|>
2807 //
2808 //! Expected output (model generates):
2809 //!
2810 //! two
2811 //! <|fim_middle|>
2812 //! THREE
2813 //! <|fim_suffix|>
2814 //! four
2815 //!
2816 //! The output means: find "two\n...\nfour" in the context, and replace
2817 //! everything between "two\n" and "four" with "THREE\n".
2818
2819 use super::*;
2820
2821 pub fn special_tokens() -> &'static [&'static str] {
2822 &[
2823 "<|fim_prefix|>",
2824 "<|fim_suffix|>",
2825 "<|fim_middle|>",
2826 "<|file_sep|>",
2827 CURSOR_MARKER,
2828 ]
2829 }
2830
2831 pub fn write_cursor_excerpt_section(
2832 prompt: &mut String,
2833 path: &Path,
2834 context: &str,
2835 cursor_offset: usize,
2836 ) {
2837 let path_str = path.to_string_lossy();
2838 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2839
2840 prompt.push_str(&context[..cursor_offset]);
2841 prompt.push_str(CURSOR_MARKER);
2842 prompt.push_str(&context[cursor_offset..]);
2843 if !prompt.ends_with('\n') {
2844 prompt.push('\n');
2845 }
2846 prompt.push_str("<|fim_prefix|>\n")
2847 }
2848
2849 /// Apply a variable-edit model output to the original context text.
2850 ///
2851 /// The model output has the form:
2852 ///
2853 /// - prefix context lines
2854 /// - `<|fim_middle|>`
2855 /// - new text
2856 /// - `<|fim_suffix|>`
2857 /// - suffix context lines
2858 ///
2859 /// We locate the prefix/suffix context lines in the original text and replace
2860 /// everything between them with the new text.
2861 pub fn apply_variable_edit(
2862 context: &str,
2863 model_output: &str,
2864 ) -> Result<(Range<usize>, String)> {
2865 let (prefix_context, rest) = model_output
2866 .split_once("<|fim_middle|>\n")
2867 .or_else(|| model_output.split_once("<|fim_middle|>"))
2868 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2869
2870 let (new_text, suffix_context) = rest
2871 .split_once("<|fim_suffix|>\n")
2872 .or_else(|| rest.split_once("<|fim_suffix|>"))
2873 .unwrap_or((rest, ""));
2874
2875 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2876 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2877 } else {
2878 suffix_context
2879 };
2880
2881 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2882 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2883 + prefix_context.len();
2884 let suffix_offset = if suffix_context.is_empty() {
2885 context.len()
2886 } else {
2887 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2888 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2889 + prefix_offset
2890 };
2891
2892 let edit_range = prefix_offset..suffix_offset;
2893 return Ok((edit_range, new_text.to_string()));
2894 }
2895
2896 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2897 if needle.is_empty() {
2898 return Some(0);
2899 }
2900
2901 haystack.match_indices(needle).find_map(|(offset, _)| {
2902 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2903 matched_line_start.then_some(offset)
2904 })
2905 }
2906
2907 /// Convert a unified diff patch into the variable-edit output format.
2908 ///
2909 /// Parses `patch` as a unified diff against `old_text` and produces model
2910 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2911 /// delimiters. The diff is resolved by content matching rather than line
2912 /// numbers.
2913 pub fn patch_to_variable_edit_output(
2914 old_text: &str,
2915 patch: &str,
2916 cursor_offset: Option<usize>,
2917 ) -> Result<String> {
2918 // Parse the unified diff into hunks. Each hunk has an `old_context`
2919 // string (context + deleted lines interleaved in order) and a list of
2920 // edits expressed as byte ranges within that context plus replacement
2921 // text.
2922 let hunks = parse_hunks(patch);
2923 if hunks.is_empty() {
2924 return Ok(String::new());
2925 }
2926
2927 // Apply each hunk by finding its old_context in the text and
2928 // performing the edits. We search forward from where the previous
2929 // hunk ended so that hunks are applied in order.
2930 let mut new_text = old_text.to_string();
2931 let mut search_from: usize = 0;
2932 let mut first_hunk_pos: Option<usize> = None;
2933
2934 for hunk in &hunks {
2935 let context_pos = new_text[search_from..]
2936 .find(&hunk.old_context)
2937 .map(|pos| pos + search_from)
2938 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2939
2940 if first_hunk_pos.is_none() {
2941 first_hunk_pos = Some(context_pos);
2942 }
2943
2944 // Apply edits in reverse order so byte offsets remain valid.
2945 for edit in hunk.edits.iter().rev() {
2946 let abs_start = context_pos + edit.range.start;
2947 let abs_end = context_pos + edit.range.end;
2948 new_text.replace_range(abs_start..abs_end, &edit.text);
2949 }
2950
2951 // Advance past this hunk's region in the (now modified) text.
2952 let new_region_len: usize =
2953 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2954 len + edit.text.len() - (edit.range.end - edit.range.start)
2955 });
2956 search_from = context_pos + new_region_len;
2957 }
2958
2959 // Now we have old_text and new_text. Find the changed line range by
2960 // comparing them.
2961 let old_lines: Vec<&str> = old_text.lines().collect();
2962 let new_lines: Vec<&str> = new_text.lines().collect();
2963
2964 // Find first differing line.
2965 let first_changed_row = old_lines
2966 .iter()
2967 .zip(new_lines.iter())
2968 .position(|(a, b)| a != b)
2969 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2970
2971 // Find last differing line (from the end).
2972 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2973 let common_suffix = old_lines
2974 .iter()
2975 .rev()
2976 .zip(new_lines.iter().rev())
2977 .take(max_suffix)
2978 .take_while(|(a, b)| a == b)
2979 .count();
2980
2981 let old_end = old_lines.len() - common_suffix;
2982 let new_end = new_lines.len() - common_suffix;
2983
2984 if first_changed_row == old_end && first_changed_row == new_end {
2985 return Ok(String::new());
2986 }
2987
2988 // Build the replacement text from new_lines[first_diff..new_end].
2989 let mut merged_new_text = String::new();
2990 for line in &new_lines[first_changed_row..new_end] {
2991 merged_new_text.push_str(line);
2992 merged_new_text.push('\n');
2993 }
2994
2995 // cursor_offset is relative to the first hunk's new content in
2996 // new_text. Translate it to an offset within merged_new_text, which
2997 // only contains lines first_diff..new_end of new_text.
2998 if let Some(hunk_offset) = cursor_offset {
2999 let hunk_start = first_hunk_pos.unwrap_or(0);
3000 let absolute_pos = hunk_start + hunk_offset;
3001
3002 // Byte offset where first_diff starts in new_text.
3003 let merged_start: usize = new_lines[..first_changed_row]
3004 .iter()
3005 .map(|line| line.len() + 1)
3006 .sum();
3007
3008 if absolute_pos >= merged_start {
3009 let relative_offset = absolute_pos - merged_start;
3010 if relative_offset <= merged_new_text.len() {
3011 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3012 }
3013 }
3014 }
3015
3016 // Build output with 2 lines of context above and below.
3017 let context_lines_count = 2;
3018 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3019 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3020
3021 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3022 let pattern = &lines[line_range];
3023 let pattern_len = pattern.len();
3024
3025 let mut count = 0;
3026 for offset in 0..=lines.len() - pattern_len {
3027 if &lines[offset..offset + pattern_len] == pattern {
3028 count += 1;
3029 }
3030 }
3031 count
3032 }
3033
3034 // Expand prefix and suffix until they are unique
3035 while prefix_start > 0 {
3036 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3037 prefix_start -= 1;
3038 } else {
3039 break;
3040 }
3041 }
3042 while suffix_end < old_lines.len() {
3043 if count_matches(old_end..suffix_end, &old_lines) > 1 {
3044 suffix_end += 1;
3045 } else {
3046 break;
3047 }
3048 }
3049
3050 let mut output = String::new();
3051 for line in &old_lines[prefix_start..first_changed_row] {
3052 output.push_str(line);
3053 output.push('\n');
3054 }
3055 output.push_str("<|fim_middle|>\n");
3056 output.push_str(&merged_new_text);
3057 output.push_str("<|fim_suffix|>\n");
3058 for line in &old_lines[old_end..suffix_end] {
3059 output.push_str(line);
3060 output.push('\n');
3061 }
3062
3063 Ok(output)
3064 }
3065
3066 struct ParsedHunk {
3067 old_context: String,
3068 edits: Vec<ParsedEdit>,
3069 }
3070
3071 struct ParsedEdit {
3072 range: Range<usize>,
3073 text: String,
3074 }
3075
3076 /// Parse a unified diff into content-based hunks. Each hunk contains an
3077 /// `old_context` string (context lines + deleted lines, which together
3078 /// form the text that should be found in the original) and a list of edits
3079 /// expressed as byte ranges within that context.
3080 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3081 let mut hunks = Vec::new();
3082 let mut current: Option<ParsedHunk> = None;
3083
3084 for line in patch.lines() {
3085 if line.starts_with("@@") {
3086 if let Some(hunk) = current.take() {
3087 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3088 hunks.push(hunk);
3089 }
3090 }
3091 current = Some(ParsedHunk {
3092 old_context: String::new(),
3093 edits: Vec::new(),
3094 });
3095 } else if line.starts_with("---") || line.starts_with("+++") {
3096 continue;
3097 } else if let Some(hunk) = &mut current {
3098 if let Some(added) = line.strip_prefix('+') {
3099 let pos = hunk.old_context.len();
3100 if let Some(last_edit) = hunk.edits.last_mut() {
3101 if last_edit.range.end == pos {
3102 writeln!(&mut last_edit.text, "{added}").ok();
3103 continue;
3104 }
3105 }
3106 hunk.edits.push(ParsedEdit {
3107 range: pos..pos,
3108 text: format!("{added}\n"),
3109 });
3110 } else if let Some(removed) = line.strip_prefix('-') {
3111 let start = hunk.old_context.len();
3112 writeln!(&mut hunk.old_context, "{removed}").ok();
3113 let end = hunk.old_context.len();
3114 if let Some(last_edit) = hunk.edits.last_mut() {
3115 if last_edit.range.end == start {
3116 last_edit.range.end = end;
3117 continue;
3118 }
3119 }
3120 hunk.edits.push(ParsedEdit {
3121 range: start..end,
3122 text: String::new(),
3123 });
3124 } else {
3125 let ctx = line.strip_prefix(' ').unwrap_or(line);
3126 writeln!(&mut hunk.old_context, "{ctx}").ok();
3127 }
3128 }
3129 }
3130
3131 if let Some(hunk) = current {
3132 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3133 hunks.push(hunk);
3134 }
3135 }
3136
3137 hunks
3138 }
3139
3140 #[cfg(test)]
3141 mod tests {
3142 use super::*;
3143 use indoc::indoc;
3144
3145 #[test]
3146 fn test_apply_variable_edit() {
3147 struct Case {
3148 name: &'static str,
3149 original: &'static str,
3150 model_output: &'static str,
3151 expected: &'static str,
3152 }
3153
3154 let cases = [
3155 Case {
3156 name: "simple_single_line_replacement",
3157 original: indoc! {"
3158 zero
3159 one
3160 two
3161 three
3162 four
3163 five
3164 "},
3165 model_output: indoc! {"
3166 two
3167 <|fim_middle|>
3168 THREE
3169 <|fim_suffix|>
3170 four
3171 "},
3172 expected: indoc! {"
3173 zero
3174 one
3175 two
3176 THREE
3177 four
3178 five
3179 "},
3180 },
3181 Case {
3182 name: "multi_line_replacement",
3183 original: indoc! {"
3184 a
3185 b
3186 c
3187 d
3188 e
3189 "},
3190 model_output: indoc! {"
3191 a
3192 <|fim_middle|>
3193 B
3194 C
3195 D
3196 <|fim_suffix|>
3197 e
3198 "},
3199 expected: indoc! {"
3200 a
3201 B
3202 C
3203 D
3204 e
3205 "},
3206 },
3207 Case {
3208 name: "insertion_between_existing_lines",
3209 original: indoc! {"
3210 a
3211 b
3212 c
3213 "},
3214 model_output: indoc! {"
3215 a
3216 <|fim_middle|>
3217 X
3218 <|fim_suffix|>
3219 b
3220 "},
3221 expected: indoc! {"
3222 a
3223 X
3224 b
3225 c
3226 "},
3227 },
3228 Case {
3229 name: "deletion",
3230 original: indoc! {"
3231 a
3232 b
3233 c
3234 d
3235 "},
3236 model_output: indoc! {"
3237 a
3238 <|fim_middle|>
3239 <|fim_suffix|>
3240 c
3241 "},
3242 expected: indoc! {"
3243 a
3244 c
3245 d
3246 "},
3247 },
3248 Case {
3249 name: "replacement_at_start_no_prefix_context",
3250 original: indoc! {"
3251 a
3252 b
3253 c
3254 "},
3255 model_output: indoc! {"
3256 <|fim_middle|>
3257 X
3258 <|fim_suffix|>
3259 b
3260 "},
3261 expected: indoc! {"
3262 X
3263 b
3264 c
3265 "},
3266 },
3267 Case {
3268 name: "replacement_at_end_no_suffix_context",
3269 original: indoc! {"
3270 a
3271 b
3272 c
3273 "},
3274 model_output: indoc! {"
3275 b
3276 <|fim_middle|>
3277 Z
3278 <|fim_suffix|>
3279 "},
3280 expected: indoc! {"
3281 a
3282 b
3283 Z
3284 "},
3285 },
3286 Case {
3287 name: "context_with_trailing_newline_is_preserved",
3288 original: indoc! {"
3289 a
3290 b
3291 c
3292 "},
3293 model_output: indoc! {"
3294 a
3295 <|fim_middle|>
3296 B
3297 <|fim_suffix|>
3298 c
3299 "},
3300 expected: indoc! {"
3301 a
3302 B
3303 c
3304 "},
3305 },
3306 Case {
3307 name: "cursor_marker_passes_through_untouched",
3308 original: indoc! {"
3309 a
3310 b
3311 c
3312 "},
3313 model_output: indoc! {"
3314 a
3315 <|fim_middle|>
3316 B<|user_cursor|>B
3317 <|fim_suffix|>
3318 c
3319 "},
3320 expected: indoc! {"
3321 a
3322 B<|user_cursor|>B
3323 c
3324 "},
3325 },
3326 Case {
3327 name: "multiple_prefix_context_lines",
3328 original: indoc! {"
3329 a
3330 b
3331 c
3332 d
3333 e
3334 "},
3335 model_output: indoc! {"
3336 b
3337 c
3338 <|fim_middle|>
3339 D
3340 <|fim_suffix|>
3341 e
3342 "},
3343 expected: indoc! {"
3344 a
3345 b
3346 c
3347 D
3348 e
3349 "},
3350 },
3351 ];
3352
3353 for case in cases {
3354 let (edit_range, replacement) =
3355 apply_variable_edit(case.original, case.model_output).unwrap();
3356 let mut edited = case.original.to_string();
3357 edited.replace_range(edit_range, &replacement);
3358 assert_eq!(edited, case.expected, "{}", case.name);
3359 }
3360 }
3361
3362 #[test]
3363 fn test_patch_to_variable_edit() {
3364 struct Case {
3365 name: &'static str,
3366 old: &'static str,
3367 patch: &'static str,
3368 cursor_offset: Option<usize>,
3369 expected_variable_edit: &'static str,
3370 expected_after_apply: &'static str,
3371 }
3372
3373 let cases = [
3374 Case {
3375 name: "simple_replacement",
3376 old: indoc! {"
3377 zero
3378 one
3379 two
3380 three
3381 four
3382 five
3383 "},
3384 patch: indoc! {"
3385 @@ -3,3 +3,3 @@
3386 two
3387 -three
3388 +THREE
3389 four
3390 "},
3391 cursor_offset: None,
3392 expected_variable_edit: indoc! {"
3393 one
3394 two
3395 <|fim_middle|>
3396 THREE
3397 <|fim_suffix|>
3398 four
3399 five
3400 "},
3401 expected_after_apply: indoc! {"
3402 zero
3403 one
3404 two
3405 THREE
3406 four
3407 five
3408 "},
3409 },
3410 Case {
3411 name: "insertion",
3412 old: indoc! {"
3413 a
3414 b
3415 c
3416 d
3417 e
3418 "},
3419 patch: indoc! {"
3420 @@ -2,0 +3,1 @@
3421 b
3422 +X
3423 c
3424 "},
3425 cursor_offset: None,
3426 expected_variable_edit: indoc! {"
3427 a
3428 b
3429 <|fim_middle|>
3430 X
3431 <|fim_suffix|>
3432 c
3433 d
3434 "},
3435 expected_after_apply: indoc! {"
3436 a
3437 b
3438 X
3439 c
3440 d
3441 e
3442 "},
3443 },
3444 Case {
3445 name: "deletion",
3446 old: indoc! {"
3447 a
3448 b
3449 c
3450 d
3451 e
3452 "},
3453 patch: indoc! {"
3454 @@ -2,3 +2,2 @@
3455 b
3456 -c
3457 d
3458 "},
3459 cursor_offset: None,
3460 expected_variable_edit: indoc! {"
3461 a
3462 b
3463 <|fim_middle|>
3464 <|fim_suffix|>
3465 d
3466 e
3467 "},
3468 expected_after_apply: indoc! {"
3469 a
3470 b
3471 d
3472 e
3473 "},
3474 },
3475 Case {
3476 name: "edit_near_start",
3477 old: indoc! {"
3478 first
3479 second
3480 third
3481 fourth
3482 "},
3483 patch: indoc! {"
3484 @@ -1,1 +1,1 @@
3485 -first
3486 +FIRST
3487 "},
3488 cursor_offset: None,
3489 expected_variable_edit: indoc! {"
3490 <|fim_middle|>
3491 FIRST
3492 <|fim_suffix|>
3493 second
3494 third
3495 "},
3496 expected_after_apply: indoc! {"
3497 FIRST
3498 second
3499 third
3500 fourth
3501 "},
3502 },
3503 Case {
3504 name: "edit_near_end",
3505 old: indoc! {"
3506 first
3507 second
3508 third
3509 fourth
3510 "},
3511 patch: indoc! {"
3512 @@ -4,1 +4,1 @@
3513 -fourth
3514 +FOURTH
3515 "},
3516 cursor_offset: None,
3517 expected_variable_edit: indoc! {"
3518 second
3519 third
3520 <|fim_middle|>
3521 FOURTH
3522 <|fim_suffix|>
3523 "},
3524 expected_after_apply: indoc! {"
3525 first
3526 second
3527 third
3528 FOURTH
3529 "},
3530 },
3531 Case {
3532 name: "cursor_at_start_of_replacement",
3533 old: indoc! {"
3534 zero
3535 one
3536 two
3537 three
3538 four
3539 five
3540 "},
3541 patch: indoc! {"
3542 @@ -3,3 +3,3 @@
3543 two
3544 -three
3545 +THREE
3546 four
3547 "},
3548 cursor_offset: Some(4),
3549 expected_variable_edit: indoc! {"
3550 one
3551 two
3552 <|fim_middle|>
3553 <|user_cursor|>THREE
3554 <|fim_suffix|>
3555 four
3556 five
3557 "},
3558 expected_after_apply: indoc! {"
3559 zero
3560 one
3561 two
3562 <|user_cursor|>THREE
3563 four
3564 five
3565 "},
3566 },
3567 Case {
3568 name: "cursor_in_middle_of_replacement",
3569 old: indoc! {"
3570 zero
3571 one
3572 two
3573 three
3574 four
3575 five
3576 "},
3577 patch: indoc! {"
3578 @@ -3,3 +3,3 @@
3579 two
3580 -three
3581 +THREE
3582 four
3583 "},
3584 cursor_offset: Some(6),
3585 expected_variable_edit: indoc! {"
3586 one
3587 two
3588 <|fim_middle|>
3589 TH<|user_cursor|>REE
3590 <|fim_suffix|>
3591 four
3592 five
3593 "},
3594 expected_after_apply: indoc! {"
3595 zero
3596 one
3597 two
3598 TH<|user_cursor|>REE
3599 four
3600 five
3601 "},
3602 },
3603 Case {
3604 name: "expands_context_when_two_lines_not_unique_before_and_after",
3605 old: indoc! {"
3606 one
3607 a
3608 b
3609 c
3610 d
3611 two
3612 a
3613 b
3614 c
3615 d
3616 three
3617 a
3618 b
3619 c
3620 d
3621 four
3622 "},
3623 patch: indoc! {"
3624 @@ -4,5 +4,5 @@
3625 two
3626 a
3627 b
3628 -c
3629 +C
3630 d
3631 three
3632 "},
3633 cursor_offset: None,
3634 expected_variable_edit: indoc! {"
3635 two
3636 a
3637 b
3638 <|fim_middle|>
3639 C
3640 <|fim_suffix|>
3641 d
3642 three
3643 "},
3644 expected_after_apply: indoc! {"
3645 one
3646 a
3647 b
3648 c
3649 d
3650 two
3651 a
3652 b
3653 C
3654 d
3655 three
3656 a
3657 b
3658 c
3659 d
3660 four
3661 "},
3662 },
3663 Case {
3664 name: "expands_context_when_two_lines_not_unique_before_and_after",
3665 old: indoc! {"
3666 {
3667 {
3668 one();
3669 }
3670 }
3671 {
3672 {
3673 two();
3674 }
3675 }
3676 {
3677 {
3678 three();
3679 }
3680 }
3681 {
3682 {
3683 four();
3684 }
3685 }
3686 "},
3687 patch: indoc! {"
3688 @@ -4,5 +4,5 @@
3689 {
3690 - two();
3691 + TWO();
3692 }
3693 "},
3694 cursor_offset: None,
3695 expected_variable_edit: indoc! {"
3696 one();
3697 }
3698 }
3699 {
3700 {
3701 <|fim_middle|>
3702 TWO();
3703 <|fim_suffix|>
3704 }
3705 }
3706 {
3707 {
3708 three();
3709 "},
3710 expected_after_apply: indoc! {"
3711 {
3712 {
3713 one();
3714 }
3715 }
3716 {
3717 {
3718 TWO();
3719 }
3720 }
3721 {
3722 {
3723 three();
3724 }
3725 }
3726 {
3727 {
3728 four();
3729 }
3730 }
3731 "},
3732 },
3733 ];
3734
3735 for case in cases {
3736 let output =
3737 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3738 .unwrap_or_else(|error| {
3739 panic!("failed converting patch for {}: {error}", case.name)
3740 });
3741 assert_eq!(
3742 output, case.expected_variable_edit,
3743 "patch->variable_edit mismatch for {}",
3744 case.name
3745 );
3746
3747 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3748 .unwrap_or_else(|error| {
3749 panic!("failed applying variable_edit for {}: {error}", case.name)
3750 });
3751 let mut edited_by_variable_edit = case.old.to_string();
3752 edited_by_variable_edit.replace_range(edit_range, &replacement);
3753 assert_eq!(
3754 edited_by_variable_edit, case.expected_after_apply,
3755 "variable_edit apply mismatch for {}",
3756 case.name
3757 );
3758
3759 let (expected_edit_range, expected_replacement) =
3760 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3761 |error| {
3762 panic!(
3763 "failed applying expected variable_edit for {}: {error}",
3764 case.name
3765 )
3766 },
3767 );
3768 let mut edited_by_expected_variable_edit = case.old.to_string();
3769 edited_by_expected_variable_edit
3770 .replace_range(expected_edit_range, &expected_replacement);
3771 assert_eq!(
3772 edited_by_expected_variable_edit, case.expected_after_apply,
3773 "expected variable_edit apply mismatch for {}",
3774 case.name
3775 );
3776 }
3777 }
3778
3779 #[test]
3780 fn test_write_cursor_excerpt_section() {
3781 let path = Path::new("test.rs");
3782 let context = "fn main() {\n hello();\n}\n";
3783 let cursor_offset = 17;
3784 let mut prompt = String::new();
3785 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3786 assert_eq!(
3787 prompt,
3788 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3789 );
3790 }
3791 }
3792}
3793
3794/// The zeta1 prompt format
3795pub mod zeta1 {
3796 use super::*;
3797 use std::fmt::Write;
3798
3799 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3800 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3801 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3802 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3803
3804 const INSTRUCTION_HEADER: &str = concat!(
3805 "### Instruction:\n",
3806 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3807 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3808 "into account the cursor location.\n\n",
3809 "### User Edits:\n\n"
3810 );
3811 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3812 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3813
3814 /// Formats a complete zeta1 prompt from the input events and excerpt.
3815 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3816 let mut prompt = String::with_capacity(
3817 INSTRUCTION_HEADER.len()
3818 + input_events.len()
3819 + EXCERPT_HEADER.len()
3820 + input_excerpt.len()
3821 + RESPONSE_HEADER.len(),
3822 );
3823 prompt.push_str(INSTRUCTION_HEADER);
3824 prompt.push_str(input_events);
3825 prompt.push_str(EXCERPT_HEADER);
3826 prompt.push_str(input_excerpt);
3827 prompt.push_str(RESPONSE_HEADER);
3828 prompt
3829 }
3830
3831 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3832 /// editable and context byte-offset ranges within `cursor_excerpt`.
3833 pub fn format_zeta1_from_input(
3834 input: &ZetaPromptInput,
3835 editable_range: Range<usize>,
3836 context_range: Range<usize>,
3837 ) -> String {
3838 let events = format_zeta1_events(&input.events);
3839 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3840 format_zeta1_prompt(&events, &excerpt)
3841 }
3842
3843 /// Formats events in zeta1 style (oldest first).
3844 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3845 let mut result = String::new();
3846 for event in
3847 events
3848 .iter()
3849 .skip(events.len().saturating_sub(max_edit_event_count_for_format(
3850 &ZetaFormat::V0114180EditableRegion,
3851 )))
3852 {
3853 let event_string = format_zeta1_event(event);
3854 if event_string.is_empty() {
3855 continue;
3856 }
3857 if !result.is_empty() {
3858 result.push_str("\n\n");
3859 }
3860 result.push_str(&event_string);
3861 }
3862 result
3863 }
3864
3865 fn format_zeta1_event(event: &Event) -> String {
3866 match event {
3867 Event::BufferChange {
3868 path,
3869 old_path,
3870 diff,
3871 ..
3872 } => {
3873 let mut prompt = String::new();
3874 if old_path != path {
3875 writeln!(
3876 prompt,
3877 "User renamed {} to {}\n",
3878 old_path.display(),
3879 path.display()
3880 )
3881 .ok();
3882 }
3883 if !diff.is_empty() {
3884 write!(
3885 prompt,
3886 "User edited {}:\n```diff\n{}\n```",
3887 path.display(),
3888 diff
3889 )
3890 .ok();
3891 }
3892 prompt
3893 }
3894 }
3895 }
3896
3897 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3898 /// within `cursor_excerpt`.
3899 fn format_zeta1_excerpt(
3900 input: &ZetaPromptInput,
3901 editable_range: Range<usize>,
3902 context_range: Range<usize>,
3903 ) -> String {
3904 let path_str = input.cursor_path.to_string_lossy();
3905 let excerpt = &*input.cursor_excerpt;
3906 let cursor_offset = input.cursor_offset_in_excerpt;
3907
3908 let mut prompt = String::new();
3909 writeln!(&mut prompt, "```{path_str}").ok();
3910
3911 let starts_at_file_beginning =
3912 input.excerpt_start_row == Some(0) && context_range.start == 0;
3913 if starts_at_file_beginning {
3914 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3915 }
3916
3917 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3918
3919 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3920 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3921 prompt.push_str(CURSOR_MARKER);
3922 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3923 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3924
3925 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3926 write!(prompt, "\n```").ok();
3927
3928 prompt
3929 }
3930
3931 /// Cleans zeta1 model output by extracting content between editable region
3932 /// markers and converting the zeta1 cursor marker to the universal one.
3933 /// Returns `None` if the output doesn't contain the expected markers.
3934 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3935 let content = output.replace(CURSOR_MARKER, "");
3936
3937 let content_start = content
3938 .find(EDITABLE_REGION_START_MARKER)
3939 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3940 .map(|pos| {
3941 if content.as_bytes().get(pos) == Some(&b'\n') {
3942 pos + 1
3943 } else {
3944 pos
3945 }
3946 })
3947 .unwrap_or(0);
3948
3949 let content_end = content
3950 .find(EDITABLE_REGION_END_MARKER)
3951 .map(|pos| {
3952 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3953 pos - 1
3954 } else {
3955 pos
3956 }
3957 })
3958 .unwrap_or(content.len());
3959
3960 if content_start > content_end {
3961 return Some(String::new());
3962 }
3963
3964 let extracted = &content[content_start..content_end];
3965
3966 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3967 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3968 let text_before_cursor = text_before_cursor
3969 .find(EDITABLE_REGION_START_MARKER)
3970 .map(|pos| {
3971 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3972 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3973 after_marker + 1
3974 } else {
3975 after_marker
3976 }
3977 })
3978 .unwrap_or(0);
3979 let offset_in_extracted = zeta1_cursor_pos
3980 .saturating_sub(text_before_cursor)
3981 .min(extracted.len());
3982 offset_in_extracted
3983 });
3984
3985 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3986 if let Some(offset) = cursor_offset {
3987 result.push_str(&extracted[..offset]);
3988 result.push_str(super::CURSOR_MARKER);
3989 result.push_str(&extracted[offset..]);
3990 } else {
3991 result.push_str(extracted);
3992 }
3993
3994 Some(result)
3995 }
3996}
3997
3998#[cfg(test)]
3999mod tests {
4000 use super::*;
4001 use indoc::indoc;
4002
4003 fn make_input(
4004 cursor_excerpt: &str,
4005 editable_range: Range<usize>,
4006 cursor_offset: usize,
4007 events: Vec<Event>,
4008 related_files: Vec<RelatedFile>,
4009 ) -> ZetaPromptInput {
4010 let context_range = 0..cursor_excerpt.len();
4011 ZetaPromptInput {
4012 cursor_path: Path::new("test.rs").into(),
4013 cursor_excerpt: cursor_excerpt.into(),
4014 cursor_offset_in_excerpt: cursor_offset,
4015 excerpt_start_row: None,
4016 events: events.into_iter().map(Arc::new).collect(),
4017 related_files: Some(related_files),
4018 active_buffer_diagnostics: vec![],
4019 excerpt_ranges: ExcerptRanges {
4020 editable_150: editable_range.clone(),
4021 editable_180: editable_range.clone(),
4022 editable_350: editable_range,
4023 editable_150_context_350: context_range.clone(),
4024 editable_180_context_350: context_range.clone(),
4025 editable_350_context_150: context_range,
4026 ..Default::default()
4027 },
4028 syntax_ranges: None,
4029 experiment: None,
4030 in_open_source_repo: false,
4031 can_collect_data: false,
4032 repo_url: None,
4033 }
4034 }
4035
4036 fn make_input_with_context_range(
4037 excerpt: &str,
4038 editable_range: Range<usize>,
4039 context_range: Range<usize>,
4040 cursor_offset: usize,
4041 ) -> ZetaPromptInput {
4042 ZetaPromptInput {
4043 cursor_path: Path::new("test.rs").into(),
4044 cursor_excerpt: excerpt.into(),
4045 cursor_offset_in_excerpt: cursor_offset,
4046 excerpt_start_row: None,
4047 events: vec![],
4048 related_files: Some(vec![]),
4049 active_buffer_diagnostics: vec![],
4050 excerpt_ranges: ExcerptRanges {
4051 editable_150: editable_range.clone(),
4052 editable_180: editable_range.clone(),
4053 editable_350: editable_range,
4054 editable_150_context_350: context_range.clone(),
4055 editable_180_context_350: context_range.clone(),
4056 editable_350_context_150: context_range,
4057 ..Default::default()
4058 },
4059 syntax_ranges: None,
4060 experiment: None,
4061 in_open_source_repo: false,
4062 can_collect_data: false,
4063 repo_url: None,
4064 }
4065 }
4066
4067 fn make_event(path: &str, diff: &str) -> Event {
4068 Event::BufferChange {
4069 path: Path::new(path).into(),
4070 old_path: Path::new(path).into(),
4071 diff: diff.to_string(),
4072 predicted: false,
4073 in_open_source_repo: false,
4074 }
4075 }
4076
4077 fn make_related_file(path: &str, content: &str) -> RelatedFile {
4078 RelatedFile {
4079 path: Path::new(path).into(),
4080 max_row: content.lines().count() as u32,
4081 excerpts: vec![RelatedExcerpt {
4082 row_range: 0..content.lines().count() as u32,
4083 text: content.into(),
4084 order: 0,
4085 }],
4086 in_open_source_repo: false,
4087 }
4088 }
4089
4090 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4091 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4092 }
4093
4094 #[test]
4095 fn test_no_truncation_when_within_budget() {
4096 let input = make_input(
4097 "prefix\neditable\nsuffix",
4098 7..15,
4099 10,
4100 vec![make_event("a.rs", "-old\n+new\n")],
4101 vec![make_related_file("related.rs", "fn helper() {}\n")],
4102 );
4103
4104 assert_eq!(
4105 format_with_budget(&input, 10000),
4106 indoc! {r#"
4107 <|file_sep|>related.rs
4108 fn helper() {}
4109 <|file_sep|>edit history
4110 --- a/a.rs
4111 +++ b/a.rs
4112 -old
4113 +new
4114 <|file_sep|>test.rs
4115 <|fim_prefix|>
4116 prefix
4117 <|fim_middle|>current
4118 edi<|user_cursor|>table
4119 <|fim_suffix|>
4120
4121 suffix
4122 <|fim_middle|>updated
4123 "#}
4124 );
4125 }
4126
4127 #[test]
4128 fn test_truncation_drops_edit_history_when_budget_tight() {
4129 let input = make_input(
4130 "code",
4131 0..4,
4132 2,
4133 vec![make_event("a.rs", "-x\n+y\n")],
4134 vec![
4135 make_related_file("r1.rs", "a\n"),
4136 make_related_file("r2.rs", "b\n"),
4137 ],
4138 );
4139
4140 assert_eq!(
4141 format_with_budget(&input, 10000),
4142 indoc! {r#"
4143 <|file_sep|>r1.rs
4144 a
4145 <|file_sep|>r2.rs
4146 b
4147 <|file_sep|>edit history
4148 --- a/a.rs
4149 +++ b/a.rs
4150 -x
4151 +y
4152 <|file_sep|>test.rs
4153 <|fim_prefix|>
4154 <|fim_middle|>current
4155 co<|user_cursor|>de
4156 <|fim_suffix|>
4157 <|fim_middle|>updated
4158 "#}
4159 );
4160
4161 assert_eq!(
4162 format_with_budget(&input, 50),
4163 indoc! {r#"
4164 <|file_sep|>r1.rs
4165 a
4166 <|file_sep|>r2.rs
4167 b
4168 <|file_sep|>test.rs
4169 <|fim_prefix|>
4170 <|fim_middle|>current
4171 co<|user_cursor|>de
4172 <|fim_suffix|>
4173 <|fim_middle|>updated
4174 "#}
4175 );
4176 }
4177
4178 #[test]
4179 fn test_truncation_includes_partial_excerpts() {
4180 let input = make_input(
4181 "x",
4182 0..1,
4183 0,
4184 vec![],
4185 vec![RelatedFile {
4186 path: Path::new("big.rs").into(),
4187 max_row: 30,
4188 in_open_source_repo: false,
4189 excerpts: vec![
4190 RelatedExcerpt {
4191 row_range: 0..10,
4192 text: "first excerpt\n".into(),
4193 order: 0,
4194 },
4195 RelatedExcerpt {
4196 row_range: 10..20,
4197 text: "second excerpt\n".into(),
4198 order: 0,
4199 },
4200 RelatedExcerpt {
4201 row_range: 20..30,
4202 text: "third excerpt\n".into(),
4203 order: 0,
4204 },
4205 ],
4206 }],
4207 );
4208
4209 assert_eq!(
4210 format_with_budget(&input, 10000),
4211 indoc! {r#"
4212 <|file_sep|>big.rs
4213 first excerpt
4214 ...
4215 second excerpt
4216 ...
4217 third excerpt
4218 <|file_sep|>test.rs
4219 <|fim_prefix|>
4220 <|fim_middle|>current
4221 <|user_cursor|>x
4222 <|fim_suffix|>
4223 <|fim_middle|>updated
4224 "#}
4225 );
4226
4227 assert_eq!(
4228 format_with_budget(&input, 50),
4229 indoc! {r#"
4230 <|file_sep|>big.rs
4231 first excerpt
4232 ...
4233 <|file_sep|>test.rs
4234 <|fim_prefix|>
4235 <|fim_middle|>current
4236 <|user_cursor|>x
4237 <|fim_suffix|>
4238 <|fim_middle|>updated
4239 "#}
4240 );
4241 }
4242
4243 #[test]
4244 fn test_truncation_prioritizes_lower_order_excerpts() {
4245 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4246 // With tight budget, only the lower-order excerpt from file_b should be included.
4247 let input = make_input(
4248 "x",
4249 0..1,
4250 0,
4251 vec![],
4252 vec![
4253 RelatedFile {
4254 path: Path::new("file_a.rs").into(),
4255 max_row: 10,
4256 in_open_source_repo: false,
4257 excerpts: vec![RelatedExcerpt {
4258 row_range: 0..10,
4259 text: "low priority content\n".into(),
4260 order: 5,
4261 }],
4262 },
4263 RelatedFile {
4264 path: Path::new("file_b.rs").into(),
4265 max_row: 10,
4266 in_open_source_repo: false,
4267 excerpts: vec![RelatedExcerpt {
4268 row_range: 0..10,
4269 text: "high priority content\n".into(),
4270 order: 1,
4271 }],
4272 },
4273 ],
4274 );
4275
4276 // With large budget, both files included; rendered in stable lexicographic order.
4277 assert_eq!(
4278 format_with_budget(&input, 10000),
4279 indoc! {r#"
4280 <|file_sep|>file_a.rs
4281 low priority content
4282 <|file_sep|>file_b.rs
4283 high priority content
4284 <|file_sep|>test.rs
4285 <|fim_prefix|>
4286 <|fim_middle|>current
4287 <|user_cursor|>x
4288 <|fim_suffix|>
4289 <|fim_middle|>updated
4290 "#}
4291 );
4292
4293 // With tight budget, only file_b (lower order) fits.
4294 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4295 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4296 // file_a would need another 14 tokens, which doesn't fit.
4297 assert_eq!(
4298 format_with_budget(&input, 52),
4299 indoc! {r#"
4300 <|file_sep|>file_b.rs
4301 high priority content
4302 <|file_sep|>test.rs
4303 <|fim_prefix|>
4304 <|fim_middle|>current
4305 <|user_cursor|>x
4306 <|fim_suffix|>
4307 <|fim_middle|>updated
4308 "#}
4309 );
4310 }
4311
4312 #[test]
4313 fn test_truncation_drops_high_order_excerpts_within_file() {
4314 // A single file has excerpts at order 1 and order 3. With a tight budget,
4315 // only the order-1 excerpts are included while the order-3 excerpt is
4316 // dropped — even though they belong to the same file. This also preserves
4317 // the parent invariant: parent outline items have order ≤ their best
4318 // child, so they're always included when any child is.
4319 let input = make_input(
4320 "x",
4321 0..1,
4322 0,
4323 vec![],
4324 vec![RelatedFile {
4325 path: Path::new("mod.rs").into(),
4326 max_row: 30,
4327 in_open_source_repo: false,
4328 excerpts: vec![
4329 RelatedExcerpt {
4330 row_range: 0..5,
4331 text: "mod header\n".into(),
4332 order: 1,
4333 },
4334 RelatedExcerpt {
4335 row_range: 5..15,
4336 text: "important fn\n".into(),
4337 order: 1,
4338 },
4339 RelatedExcerpt {
4340 row_range: 15..30,
4341 text: "less important fn\n".into(),
4342 order: 3,
4343 },
4344 ],
4345 }],
4346 );
4347
4348 // With large budget, all three excerpts included.
4349 assert_eq!(
4350 format_with_budget(&input, 10000),
4351 indoc! {r#"
4352 <|file_sep|>mod.rs
4353 mod header
4354 ...
4355 important fn
4356 ...
4357 less important fn
4358 <|file_sep|>test.rs
4359 <|fim_prefix|>
4360 <|fim_middle|>current
4361 <|user_cursor|>x
4362 <|fim_suffix|>
4363 <|fim_middle|>updated
4364 "#}
4365 );
4366
4367 // With tight budget, only order<=1 excerpts included (header + important fn).
4368 assert_eq!(
4369 format_with_budget(&input, 55),
4370 indoc! {r#"
4371 <|file_sep|>mod.rs
4372 mod header
4373 ...
4374 important fn
4375 ...
4376 <|file_sep|>test.rs
4377 <|fim_prefix|>
4378 <|fim_middle|>current
4379 <|user_cursor|>x
4380 <|fim_suffix|>
4381 <|fim_middle|>updated
4382 "#}
4383 );
4384 }
4385
4386 #[test]
4387 fn test_truncation_drops_older_events_first() {
4388 let input = make_input(
4389 "x",
4390 0..1,
4391 0,
4392 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4393 vec![],
4394 );
4395
4396 assert_eq!(
4397 format_with_budget(&input, 10000),
4398 indoc! {r#"
4399 <|file_sep|>edit history
4400 --- a/old.rs
4401 +++ b/old.rs
4402 -1
4403 --- a/new.rs
4404 +++ b/new.rs
4405 -2
4406 <|file_sep|>test.rs
4407 <|fim_prefix|>
4408 <|fim_middle|>current
4409 <|user_cursor|>x
4410 <|fim_suffix|>
4411 <|fim_middle|>updated
4412 "#}
4413 );
4414
4415 assert_eq!(
4416 format_with_budget(&input, 55),
4417 indoc! {r#"
4418 <|file_sep|>edit history
4419 --- a/new.rs
4420 +++ b/new.rs
4421 -2
4422 <|file_sep|>test.rs
4423 <|fim_prefix|>
4424 <|fim_middle|>current
4425 <|user_cursor|>x
4426 <|fim_suffix|>
4427 <|fim_middle|>updated
4428 "#}
4429 );
4430 }
4431
4432 #[test]
4433 fn test_cursor_excerpt_always_included_with_minimal_budget() {
4434 let input = make_input(
4435 "fn main() {}",
4436 0..12,
4437 3,
4438 vec![make_event("a.rs", "-old\n+new\n")],
4439 vec![make_related_file("related.rs", "helper\n")],
4440 );
4441
4442 assert_eq!(
4443 format_with_budget(&input, 30),
4444 indoc! {r#"
4445 <|file_sep|>test.rs
4446 <|fim_prefix|>
4447 <|fim_middle|>current
4448 fn <|user_cursor|>main() {}
4449 <|fim_suffix|>
4450 <|fim_middle|>updated
4451 "#}
4452 );
4453 }
4454
4455 fn format_seed_coder(input: &ZetaPromptInput) -> String {
4456 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4457 }
4458
4459 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4460 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4461 }
4462
4463 #[test]
4464 fn test_seed_coder_basic_format() {
4465 let input = make_input(
4466 "prefix\neditable\nsuffix",
4467 7..15,
4468 10,
4469 vec![make_event("a.rs", "-old\n+new\n")],
4470 vec![make_related_file("related.rs", "fn helper() {}\n")],
4471 );
4472
4473 assert_eq!(
4474 format_seed_coder(&input),
4475 indoc! {r#"
4476 <[fim-suffix]>
4477 suffix
4478 <[fim-prefix]><filename>related.rs
4479 fn helper() {}
4480
4481 <filename>edit_history
4482 --- a/a.rs
4483 +++ b/a.rs
4484 -old
4485 +new
4486
4487 <filename>test.rs
4488 prefix
4489 <<<<<<< CURRENT
4490 edi<|user_cursor|>table
4491 =======
4492 <[fim-middle]>"#}
4493 );
4494 }
4495
4496 #[test]
4497 fn test_seed_coder_no_context() {
4498 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4499
4500 assert_eq!(
4501 format_seed_coder(&input),
4502 indoc! {r#"
4503 <[fim-suffix]>
4504 after
4505 <[fim-prefix]><filename>test.rs
4506 before
4507 <<<<<<< CURRENT
4508 mid<|user_cursor|>dle
4509 =======
4510 <[fim-middle]>"#}
4511 );
4512 }
4513
4514 #[test]
4515 fn test_seed_coder_truncation_drops_context() {
4516 let input = make_input(
4517 "code",
4518 0..4,
4519 2,
4520 vec![make_event("a.rs", "-x\n+y\n")],
4521 vec![make_related_file("r1.rs", "content\n")],
4522 );
4523
4524 // With large budget, everything is included
4525 assert_eq!(
4526 format_seed_coder(&input),
4527 indoc! {r#"
4528 <[fim-suffix]>
4529 <[fim-prefix]><filename>r1.rs
4530 content
4531
4532 <filename>edit_history
4533 --- a/a.rs
4534 +++ b/a.rs
4535 -x
4536 +y
4537
4538 <filename>test.rs
4539 <<<<<<< CURRENT
4540 co<|user_cursor|>de
4541 =======
4542 <[fim-middle]>"#}
4543 );
4544
4545 // With tight budget, context is dropped but cursor section remains
4546 assert_eq!(
4547 format_seed_coder_with_budget(&input, 30),
4548 indoc! {r#"
4549 <[fim-suffix]>
4550 <[fim-prefix]><filename>test.rs
4551 <<<<<<< CURRENT
4552 co<|user_cursor|>de
4553 =======
4554 <[fim-middle]>"#}
4555 );
4556 }
4557
4558 #[test]
4559 fn test_seed_coder_truncation_prioritizes_lower_order() {
4560 let input = make_input(
4561 "code",
4562 0..4,
4563 2,
4564 vec![],
4565 vec![
4566 RelatedFile {
4567 path: Path::new("low_prio.rs").into(),
4568 max_row: 5,
4569 in_open_source_repo: false,
4570 excerpts: vec![RelatedExcerpt {
4571 row_range: 0..5,
4572 text: "low prio\n".into(),
4573 order: 10,
4574 }],
4575 },
4576 RelatedFile {
4577 path: Path::new("high_prio.rs").into(),
4578 max_row: 5,
4579 in_open_source_repo: false,
4580 excerpts: vec![RelatedExcerpt {
4581 row_range: 0..5,
4582 text: "high prio\n".into(),
4583 order: 1,
4584 }],
4585 },
4586 ],
4587 );
4588
4589 // With large budget, both included; rendered in stable lexicographic order.
4590 assert_eq!(
4591 format_seed_coder(&input),
4592 indoc! {r#"
4593 <[fim-suffix]>
4594 <[fim-prefix]><filename>low_prio.rs
4595 low prio
4596 <filename>high_prio.rs
4597 high prio
4598
4599 <filename>test.rs
4600 <<<<<<< CURRENT
4601 co<|user_cursor|>de
4602 =======
4603 <[fim-middle]>"#}
4604 );
4605
4606 // With tight budget, only high_prio included.
4607 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
4608 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
4609 assert_eq!(
4610 format_seed_coder_with_budget(&input, 44),
4611 indoc! {r#"
4612 <[fim-suffix]>
4613 <[fim-prefix]><filename>high_prio.rs
4614 high prio
4615
4616 <filename>test.rs
4617 <<<<<<< CURRENT
4618 co<|user_cursor|>de
4619 =======
4620 <[fim-middle]>"#}
4621 );
4622 }
4623
4624 #[test]
4625 fn test_format_zeta1_from_input_basic() {
4626 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
4627 let input = ZetaPromptInput {
4628 cursor_path: Path::new("src/main.rs").into(),
4629 cursor_excerpt: excerpt.into(),
4630 cursor_offset_in_excerpt: 30,
4631 excerpt_start_row: Some(0),
4632 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4633 related_files: Some(vec![]),
4634 active_buffer_diagnostics: vec![],
4635 excerpt_ranges: ExcerptRanges {
4636 editable_150: 15..41,
4637 editable_180: 15..41,
4638 editable_350: 15..41,
4639 editable_150_context_350: 0..excerpt.len(),
4640 editable_180_context_350: 0..excerpt.len(),
4641 editable_350_context_150: 0..excerpt.len(),
4642 ..Default::default()
4643 },
4644 syntax_ranges: None,
4645 experiment: None,
4646 in_open_source_repo: false,
4647 can_collect_data: false,
4648 repo_url: None,
4649 };
4650
4651 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4652
4653 assert_eq!(
4654 prompt,
4655 concat!(
4656 "### Instruction:\n",
4657 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4658 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4659 "into account the cursor location.\n",
4660 "\n",
4661 "### User Edits:\n",
4662 "\n",
4663 "User edited other.rs:\n",
4664 "```diff\n",
4665 "-old\n",
4666 "+new\n",
4667 "\n",
4668 "```\n",
4669 "\n",
4670 "### User Excerpt:\n",
4671 "\n",
4672 "```src/main.rs\n",
4673 "<|start_of_file|>\n",
4674 "fn before() {}\n",
4675 "<|editable_region_start|>\n",
4676 "fn foo() {\n",
4677 " <|user_cursor_is_here|>let x = 1;\n",
4678 "\n",
4679 "<|editable_region_end|>}\n",
4680 "fn after() {}\n",
4681 "\n",
4682 "```\n",
4683 "\n",
4684 "### Response:\n",
4685 ),
4686 );
4687 }
4688
4689 #[test]
4690 fn test_format_zeta1_from_input_no_start_of_file() {
4691 let excerpt = "fn foo() {\n let x = 1;\n}\n";
4692 let input = ZetaPromptInput {
4693 cursor_path: Path::new("src/main.rs").into(),
4694 cursor_excerpt: excerpt.into(),
4695 cursor_offset_in_excerpt: 15,
4696 excerpt_start_row: Some(10),
4697 events: vec![],
4698 related_files: Some(vec![]),
4699 active_buffer_diagnostics: vec![],
4700 excerpt_ranges: ExcerptRanges {
4701 editable_150: 0..28,
4702 editable_180: 0..28,
4703 editable_350: 0..28,
4704 editable_150_context_350: 0..28,
4705 editable_180_context_350: 0..28,
4706 editable_350_context_150: 0..28,
4707 ..Default::default()
4708 },
4709 syntax_ranges: None,
4710 experiment: None,
4711 in_open_source_repo: false,
4712 can_collect_data: false,
4713 repo_url: None,
4714 };
4715
4716 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4717
4718 assert_eq!(
4719 prompt,
4720 concat!(
4721 "### Instruction:\n",
4722 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4723 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4724 "into account the cursor location.\n",
4725 "\n",
4726 "### User Edits:\n",
4727 "\n",
4728 "\n",
4729 "\n",
4730 "### User Excerpt:\n",
4731 "\n",
4732 "```src/main.rs\n",
4733 "<|editable_region_start|>\n",
4734 "fn foo() {\n",
4735 " <|user_cursor_is_here|>let x = 1;\n",
4736 "}\n",
4737 "\n",
4738 "<|editable_region_end|>\n",
4739 "```\n",
4740 "\n",
4741 "### Response:\n",
4742 ),
4743 );
4744 }
4745
4746 #[test]
4747 fn test_format_zeta1_from_input_with_sub_ranges() {
4748 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
4749 let editable_range = 10..37;
4750 let context_range = 0..excerpt.len();
4751
4752 let input = ZetaPromptInput {
4753 cursor_path: Path::new("test.rs").into(),
4754 cursor_excerpt: excerpt.into(),
4755 cursor_offset_in_excerpt: 25,
4756 excerpt_start_row: Some(0),
4757 events: vec![],
4758 related_files: Some(vec![]),
4759 active_buffer_diagnostics: vec![],
4760 excerpt_ranges: ExcerptRanges {
4761 editable_150: editable_range.clone(),
4762 editable_180: editable_range.clone(),
4763 editable_350: editable_range.clone(),
4764 editable_150_context_350: context_range.clone(),
4765 editable_180_context_350: context_range.clone(),
4766 editable_350_context_150: context_range.clone(),
4767 ..Default::default()
4768 },
4769 syntax_ranges: None,
4770 experiment: None,
4771 in_open_source_repo: false,
4772 can_collect_data: false,
4773 repo_url: None,
4774 };
4775
4776 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4777
4778 assert_eq!(
4779 prompt,
4780 concat!(
4781 "### Instruction:\n",
4782 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4783 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4784 "into account the cursor location.\n",
4785 "\n",
4786 "### User Edits:\n",
4787 "\n",
4788 "\n",
4789 "\n",
4790 "### User Excerpt:\n",
4791 "\n",
4792 "```test.rs\n",
4793 "<|start_of_file|>\n",
4794 "// prefix\n",
4795 "<|editable_region_start|>\n",
4796 "fn foo() {\n",
4797 " <|user_cursor_is_here|>let x = 1;\n",
4798 "}\n",
4799 "<|editable_region_end|>\n",
4800 "// suffix\n",
4801 "\n",
4802 "```\n",
4803 "\n",
4804 "### Response:\n",
4805 ),
4806 );
4807 }
4808
4809 #[test]
4810 fn test_max_event_count() {
4811 fn make_numbered_event(index: usize) -> Event {
4812 return make_event(
4813 &format!("event-{index}.rs"),
4814 &format!("-old-{index}\n+new-{index}\n"),
4815 );
4816 }
4817 let input = make_input(
4818 "x",
4819 0..1,
4820 0,
4821 (0..3).map(make_numbered_event).collect(),
4822 vec![],
4823 );
4824
4825 let edit_history_section = format_edit_history_within_budget(
4826 &input.events,
4827 "<|file_sep|>",
4828 "edit history",
4829 usize::MAX,
4830 5,
4831 );
4832
4833 assert_eq!(
4834 &edit_history_section,
4835 indoc!(
4836 "
4837 <|file_sep|>edit history
4838 --- a/event-0.rs
4839 +++ b/event-0.rs
4840 -old-0
4841 +new-0
4842 --- a/event-1.rs
4843 +++ b/event-1.rs
4844 -old-1
4845 +new-1
4846 --- a/event-2.rs
4847 +++ b/event-2.rs
4848 -old-2
4849 +new-2
4850 "
4851 )
4852 );
4853
4854 let edit_history_section = format_edit_history_within_budget(
4855 &input.events,
4856 "<|file_sep|>",
4857 "edit history",
4858 usize::MAX,
4859 2,
4860 );
4861
4862 assert_eq!(
4863 &edit_history_section,
4864 indoc!(
4865 "
4866 <|file_sep|>edit history
4867 --- a/event-1.rs
4868 +++ b/event-1.rs
4869 -old-1
4870 +new-1
4871 --- a/event-2.rs
4872 +++ b/event-2.rs
4873 -old-2
4874 +new-2
4875 "
4876 )
4877 );
4878
4879 let edit_history_section = format_edit_history_within_budget(
4880 &input.events,
4881 "<|file_sep|>",
4882 "edit history",
4883 usize::MAX,
4884 0,
4885 );
4886
4887 assert_eq!(&edit_history_section, "");
4888 }
4889
4890 #[test]
4891 fn test_clean_zeta1_model_output_basic() {
4892 let output = indoc! {"
4893 <|editable_region_start|>
4894 fn main() {
4895 println!(\"hello\");
4896 }
4897 <|editable_region_end|>
4898 "};
4899
4900 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4901 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
4902 }
4903
4904 #[test]
4905 fn test_clean_zeta1_model_output_with_cursor() {
4906 let output = indoc! {"
4907 <|editable_region_start|>
4908 fn main() {
4909 <|user_cursor_is_here|>println!(\"hello\");
4910 }
4911 <|editable_region_end|>
4912 "};
4913
4914 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4915 assert_eq!(
4916 cleaned,
4917 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
4918 );
4919 }
4920
4921 #[test]
4922 fn test_clean_zeta1_model_output_no_markers() {
4923 let output = "fn main() {}\n";
4924 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4925 assert_eq!(cleaned, "fn main() {}\n");
4926 }
4927
4928 #[test]
4929 fn test_clean_zeta1_model_output_empty_region() {
4930 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4931 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4932 assert_eq!(cleaned, "");
4933 }
4934
4935 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
4936 let mut result = excerpt.to_string();
4937 result.replace_range(
4938 parsed_output.range_in_excerpt.clone(),
4939 &parsed_output.new_editable_region,
4940 );
4941 result
4942 }
4943
4944 #[test]
4945 fn test_parse_zeta2_model_output() {
4946 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4947 let context_start = excerpt.find("ctx start").unwrap();
4948 let context_end = excerpt.find("after ctx").unwrap();
4949 let editable_start = excerpt.find("editable old").unwrap();
4950 let editable_end = editable_start + "editable old\n".len();
4951 let input = make_input_with_context_range(
4952 excerpt,
4953 editable_start..editable_end,
4954 context_start..context_end,
4955 editable_start,
4956 );
4957
4958 let output = parse_zeta2_model_output(
4959 "editable new\n>>>>>>> UPDATED\n",
4960 ZetaFormat::V0131GitMergeMarkersPrefix,
4961 &input,
4962 )
4963 .unwrap();
4964
4965 assert_eq!(
4966 apply_edit(excerpt, &output),
4967 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4968 );
4969 }
4970
4971 #[test]
4972 fn test_parse_zeta2_model_output_identity() {
4973 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4974 let editable_start = excerpt.find("bbb").unwrap();
4975 let editable_end = excerpt.find("ddd").unwrap();
4976 let input = make_input_with_context_range(
4977 excerpt,
4978 editable_start..editable_end,
4979 0..excerpt.len(),
4980 editable_start,
4981 );
4982
4983 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4984 let output =
4985 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
4986
4987 assert_eq!(apply_edit(excerpt, &output), excerpt);
4988 }
4989
4990 #[test]
4991 fn test_parse_zeta2_model_output_strips_end_marker() {
4992 let excerpt = "hello\nworld\n";
4993 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
4994
4995 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4996 let output1 =
4997 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
4998 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
4999
5000 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5001 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5002 }
5003}