1pub mod excerpt_ranges;
2pub mod multi_region;
3
4use anyhow::{Result, anyhow};
5use serde::{Deserialize, Serialize};
6use std::fmt::Write;
7use std::ops::Range;
8use std::path::Path;
9use std::sync::Arc;
10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
11
12pub use crate::excerpt_ranges::{
13 ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
14};
15
16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
17pub const MAX_PROMPT_TOKENS: usize = 4096;
18
19/// Use up to this amount of the editable region for prefill.
20/// Larger values may result in more robust generation, but
21/// this region becomes non-editable.
22pub const PREFILL_RATIO: f64 = 0.1; // 10%
23
24fn estimate_tokens(bytes: usize) -> usize {
25 bytes / 3
26}
27
28#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
29pub struct ZetaPromptInput {
30 pub cursor_path: Arc<Path>,
31 pub cursor_excerpt: Arc<str>,
32 pub cursor_offset_in_excerpt: usize,
33 #[serde(default, skip_serializing_if = "Option::is_none")]
34 pub excerpt_start_row: Option<u32>,
35 pub events: Vec<Arc<Event>>,
36 #[serde(default)]
37 pub related_files: Option<Vec<RelatedFile>>,
38 #[serde(default, skip_serializing_if = "Vec::is_empty")]
39 pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
40 /// These ranges let the server select model-appropriate subsets.
41 pub excerpt_ranges: ExcerptRanges,
42 /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
43 /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
44 /// When present, the server uses these to compute editable/context ranges
45 /// instead of `excerpt_ranges`.
46 #[serde(default, skip_serializing_if = "Option::is_none")]
47 pub syntax_ranges: Option<Vec<Range<usize>>>,
48 /// The name of the edit prediction model experiment to use.
49 #[serde(default, skip_serializing_if = "Option::is_none")]
50 pub experiment: Option<String>,
51 #[serde(default)]
52 pub in_open_source_repo: bool,
53 #[serde(default)]
54 pub can_collect_data: bool,
55 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub repo_url: Option<String>,
57}
58
59#[derive(
60 Default,
61 Clone,
62 Copy,
63 Debug,
64 PartialEq,
65 Eq,
66 Hash,
67 EnumIter,
68 IntoStaticStr,
69 Serialize,
70 Deserialize,
71)]
72#[allow(non_camel_case_types)]
73pub enum ZetaFormat {
74 V0112MiddleAtEnd,
75 V0113Ordered,
76 V0114180EditableRegion,
77 V0120GitMergeMarkers,
78 #[default]
79 V0131GitMergeMarkersPrefix,
80 V0211Prefill,
81 V0211SeedCoder,
82 v0226Hashline,
83 V0304VariableEdit,
84 V0304SeedNoEdits,
85 V0306SeedMultiRegions,
86}
87
88impl std::fmt::Display for ZetaFormat {
89 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90 write!(f, "{}", <&'static str>::from(self))
91 }
92}
93
94impl ZetaFormat {
95 pub fn parse(format_name: &str) -> Result<Self> {
96 let mut results = ZetaFormat::iter().filter(|version| {
97 <&'static str>::from(version)
98 .to_lowercase()
99 .contains(&format_name.to_lowercase())
100 });
101 let Some(result) = results.next() else {
102 anyhow::bail!(
103 "`{format_name}` did not match any of:\n{}",
104 Self::options_as_string()
105 );
106 };
107 if results.next().is_some() {
108 anyhow::bail!(
109 "`{format_name}` matched more than one of:\n{}",
110 Self::options_as_string()
111 );
112 }
113 Ok(result)
114 }
115
116 pub fn options_as_string() -> String {
117 ZetaFormat::iter()
118 .map(|format| format!("- {}\n", <&'static str>::from(format)))
119 .collect::<Vec<_>>()
120 .concat()
121 }
122}
123
124#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
125#[serde(tag = "event")]
126pub enum Event {
127 BufferChange {
128 path: Arc<Path>,
129 old_path: Arc<Path>,
130 diff: String,
131 predicted: bool,
132 in_open_source_repo: bool,
133 },
134}
135
136impl Event {
137 pub fn in_open_source_repo(&self) -> bool {
138 match self {
139 Event::BufferChange {
140 in_open_source_repo,
141 ..
142 } => *in_open_source_repo,
143 }
144 }
145}
146
147pub fn write_event(prompt: &mut String, event: &Event) {
148 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
149 for component in path.components() {
150 prompt.push('/');
151 write!(prompt, "{}", component.as_os_str().display()).ok();
152 }
153 }
154 match event {
155 Event::BufferChange {
156 path,
157 old_path,
158 diff,
159 predicted,
160 in_open_source_repo: _,
161 } => {
162 if *predicted {
163 prompt.push_str("// User accepted prediction:\n");
164 }
165 prompt.push_str("--- a");
166 write_path_as_unix_str(prompt, old_path.as_ref());
167 prompt.push_str("\n+++ b");
168 write_path_as_unix_str(prompt, path.as_ref());
169 prompt.push('\n');
170 prompt.push_str(diff);
171 }
172 }
173}
174
175#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
176pub struct ActiveBufferDiagnostic {
177 pub severity: Option<i32>,
178 pub message: String,
179 pub snippet: String,
180 pub snippet_buffer_row_range: Range<u32>,
181 pub diagnostic_range_in_snippet: Range<usize>,
182}
183
184#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
185pub struct RelatedFile {
186 pub path: Arc<Path>,
187 pub max_row: u32,
188 pub excerpts: Vec<RelatedExcerpt>,
189 #[serde(default)]
190 pub in_open_source_repo: bool,
191}
192
193#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
194pub struct RelatedExcerpt {
195 pub row_range: Range<u32>,
196 pub text: Arc<str>,
197 #[serde(default)]
198 pub order: usize,
199}
200
201pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
202 special_tokens_for_format(format)
203 .iter()
204 .any(|token| input.cursor_excerpt.contains(token))
205}
206
207pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
208 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
209}
210
211pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
212 match format {
213 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
214 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
215 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
216 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
217 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
218 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
219 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
220 ZetaFormat::v0226Hashline => hashline::special_tokens(),
221 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
222 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
223 ZetaFormat::V0306SeedMultiRegions => {
224 static TOKENS: &[&str] = &[
225 seed_coder::FIM_SUFFIX,
226 seed_coder::FIM_PREFIX,
227 seed_coder::FIM_MIDDLE,
228 seed_coder::FILE_MARKER,
229 seed_coder::START_MARKER,
230 seed_coder::SEPARATOR,
231 seed_coder::END_MARKER,
232 CURSOR_MARKER,
233 multi_region::MARKER_TAG_PREFIX,
234 ];
235 TOKENS
236 }
237 }
238}
239
240/// Returns the (editable_token_limit, context_token_limit) for a given format.
241pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
242 match format {
243 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
244 ZetaFormat::V0114180EditableRegion => (180, 350),
245 ZetaFormat::V0120GitMergeMarkers
246 | ZetaFormat::V0131GitMergeMarkersPrefix
247 | ZetaFormat::V0211Prefill
248 | ZetaFormat::V0211SeedCoder
249 | ZetaFormat::v0226Hashline
250 | ZetaFormat::V0306SeedMultiRegions
251 | ZetaFormat::V0304SeedNoEdits => (350, 150),
252 ZetaFormat::V0304VariableEdit => (1024, 0),
253 }
254}
255
256pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
257 match format {
258 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
259 ZetaFormat::V0112MiddleAtEnd
260 | ZetaFormat::V0113Ordered
261 | ZetaFormat::V0114180EditableRegion
262 | ZetaFormat::V0120GitMergeMarkers
263 | ZetaFormat::V0131GitMergeMarkersPrefix
264 | ZetaFormat::V0211Prefill
265 | ZetaFormat::V0211SeedCoder
266 | ZetaFormat::V0304VariableEdit
267 | ZetaFormat::V0306SeedMultiRegions
268 | ZetaFormat::V0304SeedNoEdits => &[],
269 }
270}
271
272pub fn excerpt_ranges_for_format(
273 format: ZetaFormat,
274 ranges: &ExcerptRanges,
275) -> (Range<usize>, Range<usize>) {
276 match format {
277 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
278 ranges.editable_150.clone(),
279 ranges.editable_150_context_350.clone(),
280 ),
281 ZetaFormat::V0114180EditableRegion => (
282 ranges.editable_180.clone(),
283 ranges.editable_180_context_350.clone(),
284 ),
285 ZetaFormat::V0120GitMergeMarkers
286 | ZetaFormat::V0131GitMergeMarkersPrefix
287 | ZetaFormat::V0211Prefill
288 | ZetaFormat::V0211SeedCoder
289 | ZetaFormat::v0226Hashline
290 | ZetaFormat::V0304SeedNoEdits
291 | ZetaFormat::V0306SeedMultiRegions => (
292 ranges.editable_350.clone(),
293 ranges.editable_350_context_150.clone(),
294 ),
295 ZetaFormat::V0304VariableEdit => {
296 let context = ranges
297 .editable_350_context_1024
298 .clone()
299 .or(ranges.editable_350_context_512.clone())
300 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
301 (context.clone(), context)
302 }
303 }
304}
305
306pub fn write_cursor_excerpt_section_for_format(
307 format: ZetaFormat,
308 prompt: &mut String,
309 path: &Path,
310 context: &str,
311 editable_range: &Range<usize>,
312 cursor_offset: usize,
313) {
314 match format {
315 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
316 prompt,
317 path,
318 context,
319 editable_range,
320 cursor_offset,
321 ),
322 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
323 v0113_ordered::write_cursor_excerpt_section(
324 prompt,
325 path,
326 context,
327 editable_range,
328 cursor_offset,
329 )
330 }
331 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
332 prompt,
333 path,
334 context,
335 editable_range,
336 cursor_offset,
337 ),
338 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
339 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
340 prompt,
341 path,
342 context,
343 editable_range,
344 cursor_offset,
345 )
346 }
347 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
348 seed_coder::write_cursor_excerpt_section(
349 prompt,
350 path,
351 context,
352 editable_range,
353 cursor_offset,
354 )
355 }
356 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
357 prompt,
358 path,
359 context,
360 editable_range,
361 cursor_offset,
362 ),
363 ZetaFormat::V0304VariableEdit => {
364 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
365 }
366 ZetaFormat::V0306SeedMultiRegions => {
367 prompt.push_str(&build_v0306_cursor_prefix(
368 path,
369 context,
370 editable_range,
371 cursor_offset,
372 ));
373 }
374 }
375}
376
377fn build_v0306_cursor_prefix(
378 path: &Path,
379 context: &str,
380 editable_range: &Range<usize>,
381 cursor_offset: usize,
382) -> String {
383 let mut section = String::new();
384 let path_str = path.to_string_lossy();
385 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
386
387 section.push_str(&context[..editable_range.start]);
388 section.push_str(seed_coder::START_MARKER);
389
390 let editable_text = &context[editable_range.clone()];
391 let cursor_in_editable = cursor_offset - editable_range.start;
392 multi_region::write_editable_with_markers(
393 &mut section,
394 editable_text,
395 cursor_in_editable,
396 CURSOR_MARKER,
397 );
398
399 if !section.ends_with('\n') {
400 section.push('\n');
401 }
402 section.push_str(seed_coder::SEPARATOR);
403 section
404}
405
406fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
407 let start_row = text[0..range.start].matches('\n').count() as u32;
408 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
409 if !text[..range.end].ends_with('\n') {
410 end_row += 1;
411 }
412 return start_row..end_row;
413}
414
415pub fn format_prompt_with_budget_for_format(
416 input: &ZetaPromptInput,
417 format: ZetaFormat,
418 max_tokens: usize,
419) -> Option<String> {
420 let (context, editable_range, context_range, cursor_offset) =
421 resolve_cursor_region(input, format);
422 let path = &*input.cursor_path;
423
424 let empty_files = Vec::new();
425 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
426 let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
427 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
428 let row_range = relative_row_range.start + cursor_excerpt_start_row
429 ..relative_row_range.end + cursor_excerpt_start_row;
430 &filter_redundant_excerpts(
431 input_related_files.to_vec(),
432 input.cursor_path.as_ref(),
433 row_range,
434 )
435 } else {
436 input_related_files
437 };
438
439 let prompt = match format {
440 ZetaFormat::V0211SeedCoder
441 | ZetaFormat::V0304SeedNoEdits
442 | ZetaFormat::V0306SeedMultiRegions => {
443 let mut cursor_section = String::new();
444 write_cursor_excerpt_section_for_format(
445 format,
446 &mut cursor_section,
447 path,
448 context,
449 &editable_range,
450 cursor_offset,
451 );
452
453 seed_coder::assemble_fim_prompt(
454 context,
455 &editable_range,
456 &cursor_section,
457 &input.events,
458 related_files,
459 max_tokens,
460 )
461 }
462 _ => {
463 let mut cursor_section = String::new();
464 write_cursor_excerpt_section_for_format(
465 format,
466 &mut cursor_section,
467 path,
468 context,
469 &editable_range,
470 cursor_offset,
471 );
472
473 let cursor_tokens = estimate_tokens(cursor_section.len());
474 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
475
476 let edit_history_section = format_edit_history_within_budget(
477 &input.events,
478 "<|file_sep|>",
479 "edit history",
480 budget_after_cursor,
481 max_edit_event_count_for_format(&format),
482 );
483 let edit_history_tokens = estimate_tokens(edit_history_section.len());
484 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
485
486 let related_files_section = format_related_files_within_budget(
487 &related_files,
488 "<|file_sep|>",
489 "",
490 budget_after_edit_history,
491 );
492
493 let mut prompt = String::new();
494 prompt.push_str(&related_files_section);
495 prompt.push_str(&edit_history_section);
496 prompt.push_str(&cursor_section);
497 prompt
498 }
499 };
500 let prompt_tokens = estimate_tokens(prompt.len());
501 if prompt_tokens > max_tokens {
502 return None;
503 }
504 return Some(prompt);
505}
506
507pub fn filter_redundant_excerpts(
508 mut related_files: Vec<RelatedFile>,
509 cursor_path: &Path,
510 cursor_row_range: Range<u32>,
511) -> Vec<RelatedFile> {
512 for file in &mut related_files {
513 if file.path.as_ref() == cursor_path {
514 file.excerpts.retain(|excerpt| {
515 excerpt.row_range.start < cursor_row_range.start
516 || excerpt.row_range.end > cursor_row_range.end
517 });
518 }
519 }
520 related_files.retain(|file| !file.excerpts.is_empty());
521 related_files
522}
523
524pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
525 match format {
526 ZetaFormat::V0112MiddleAtEnd
527 | ZetaFormat::V0113Ordered
528 | ZetaFormat::V0114180EditableRegion
529 | ZetaFormat::V0120GitMergeMarkers
530 | ZetaFormat::V0131GitMergeMarkersPrefix
531 | ZetaFormat::V0211Prefill
532 | ZetaFormat::V0211SeedCoder
533 | ZetaFormat::v0226Hashline
534 | ZetaFormat::V0304SeedNoEdits
535 | ZetaFormat::V0304VariableEdit
536 | ZetaFormat::V0306SeedMultiRegions => 6,
537 }
538}
539
540pub fn get_prefill_for_format(
541 format: ZetaFormat,
542 context: &str,
543 editable_range: &Range<usize>,
544) -> String {
545 match format {
546 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
547 ZetaFormat::V0112MiddleAtEnd
548 | ZetaFormat::V0113Ordered
549 | ZetaFormat::V0114180EditableRegion
550 | ZetaFormat::V0120GitMergeMarkers
551 | ZetaFormat::V0131GitMergeMarkersPrefix
552 | ZetaFormat::V0211SeedCoder
553 | ZetaFormat::v0226Hashline
554 | ZetaFormat::V0304VariableEdit => String::new(),
555 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => String::new(),
556 }
557}
558
559pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
560 match format {
561 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
562 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
563 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
564 ZetaFormat::V0211SeedCoder
565 | ZetaFormat::V0304SeedNoEdits
566 | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
567 ZetaFormat::V0112MiddleAtEnd
568 | ZetaFormat::V0113Ordered
569 | ZetaFormat::V0114180EditableRegion
570 | ZetaFormat::v0226Hashline
571 | ZetaFormat::V0304VariableEdit => None,
572 }
573}
574
575pub fn encode_patch_as_output_for_format(
576 format: ZetaFormat,
577 old_editable_region: &str,
578 patch: &str,
579 cursor_offset: Option<usize>,
580) -> Result<Option<String>> {
581 match format {
582 ZetaFormat::v0226Hashline => {
583 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
584 }
585 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
586 old_editable_region,
587 patch,
588 cursor_offset,
589 )
590 .map(Some),
591 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
592 Ok(seed_coder::no_edits(patch))
593 }
594 _ => Ok(None),
595 }
596}
597
598pub struct ParsedOutput {
599 /// Text that should replace the editable region
600 pub new_editable_region: String,
601 /// The byte range within `cursor_excerpt` that this replacement applies to
602 pub range_in_excerpt: Range<usize>,
603}
604
605/// Parse model output for the given zeta format
606pub fn parse_zeta2_model_output(
607 output: &str,
608 format: ZetaFormat,
609 prompt_inputs: &ZetaPromptInput,
610) -> Result<ParsedOutput> {
611 let output = match output_end_marker_for_format(format) {
612 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
613 None => output,
614 };
615
616 let (context, editable_range_in_context, context_range, _) =
617 resolve_cursor_region(prompt_inputs, format);
618 let context_start = context_range.start;
619 let old_editable_region = &context[editable_range_in_context.clone()];
620
621 let (range_in_context, output) = match format {
622 ZetaFormat::v0226Hashline => (
623 editable_range_in_context,
624 if hashline::output_has_edit_commands(output) {
625 hashline::apply_edit_commands(old_editable_region, output)
626 } else {
627 output.to_string()
628 },
629 ),
630 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
631 ZetaFormat::V0304SeedNoEdits => (
632 editable_range_in_context,
633 if output.starts_with(seed_coder::NO_EDITS) {
634 old_editable_region.to_string()
635 } else {
636 output.to_string()
637 },
638 ),
639 ZetaFormat::V0306SeedMultiRegions => (
640 editable_range_in_context,
641 if output.starts_with(seed_coder::NO_EDITS) {
642 old_editable_region.to_string()
643 } else {
644 multi_region::apply_marker_span(old_editable_region, output)?
645 },
646 ),
647 _ => (editable_range_in_context, output.to_string()),
648 };
649
650 let range_in_excerpt =
651 range_in_context.start + context_start..range_in_context.end + context_start;
652
653 Ok(ParsedOutput {
654 new_editable_region: output,
655 range_in_excerpt,
656 })
657}
658
659pub fn excerpt_range_for_format(
660 format: ZetaFormat,
661 ranges: &ExcerptRanges,
662) -> (Range<usize>, Range<usize>) {
663 excerpt_ranges_for_format(format, ranges)
664}
665
666pub fn resolve_cursor_region(
667 input: &ZetaPromptInput,
668 format: ZetaFormat,
669) -> (&str, Range<usize>, Range<usize>, usize) {
670 let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
671 let (editable_tokens, context_tokens) = token_limits_for_format(format);
672 compute_editable_and_context_ranges(
673 &input.cursor_excerpt,
674 input.cursor_offset_in_excerpt,
675 syntax_ranges,
676 editable_tokens,
677 context_tokens,
678 )
679 } else {
680 excerpt_range_for_format(format, &input.excerpt_ranges)
681 };
682 let context_start = context_range.start;
683 let context_text = &input.cursor_excerpt[context_range.clone()];
684 let adjusted_editable =
685 (editable_range.start - context_start)..(editable_range.end - context_start);
686 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
687
688 (
689 context_text,
690 adjusted_editable,
691 context_range,
692 adjusted_cursor,
693 )
694}
695
696pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
697 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
698 get_prefill_for_format(format, context, &editable_range)
699}
700
701fn format_edit_history_within_budget(
702 events: &[Arc<Event>],
703 file_marker: &str,
704 edit_history_name: &str,
705 max_tokens: usize,
706 max_edit_event_count: usize,
707) -> String {
708 let header = format!("{}{}\n", file_marker, edit_history_name);
709 let header_tokens = estimate_tokens(header.len());
710 if header_tokens >= max_tokens {
711 return String::new();
712 }
713
714 let mut event_strings: Vec<String> = Vec::new();
715 let mut total_tokens = header_tokens;
716
717 for event in events.iter().rev().take(max_edit_event_count) {
718 let mut event_str = String::new();
719 write_event(&mut event_str, event);
720 let event_tokens = estimate_tokens(event_str.len());
721
722 if total_tokens + event_tokens > max_tokens {
723 break;
724 }
725 total_tokens += event_tokens;
726 event_strings.push(event_str);
727 }
728
729 if event_strings.is_empty() {
730 return String::new();
731 }
732
733 let mut result = header;
734 for event_str in event_strings.iter().rev() {
735 result.push_str(event_str);
736 }
737 result
738}
739
740fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
741 let needs_newline = !excerpt.text.ends_with('\n');
742 let needs_ellipsis = excerpt.row_range.end < file_max_row;
743 let len = excerpt.text.len()
744 + if needs_newline { "\n".len() } else { 0 }
745 + if needs_ellipsis { "...\n".len() } else { 0 };
746 estimate_tokens(len)
747}
748
749pub fn format_related_files_within_budget(
750 related_files: &[RelatedFile],
751 file_prefix: &str,
752 file_suffix: &str,
753 max_tokens: usize,
754) -> String {
755 struct ExcerptCandidate {
756 file_ix: usize,
757 excerpt_ix: usize,
758 order: usize,
759 }
760
761 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
762 .iter()
763 .enumerate()
764 .flat_map(|(file_ix, file)| {
765 file.excerpts
766 .iter()
767 .enumerate()
768 .map(move |(excerpt_ix, e)| ExcerptCandidate {
769 file_ix,
770 excerpt_ix,
771 order: e.order,
772 })
773 })
774 .collect();
775
776 // Pre-compute file header strings and their token costs.
777 let file_headers: Vec<String> = related_files
778 .iter()
779 .map(|file| {
780 let path_str = file.path.to_string_lossy();
781 format!("{}{}\n", file_prefix, path_str)
782 })
783 .collect();
784
785 // Sort the excerpts by their order and determine how many fit within the budget.
786 let mut total_tokens = 0;
787 let mut included_excerpt_count = 0_usize;
788 let mut included_file_indices = vec![false; related_files.len()];
789 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
790 for candidate in &excerpt_candidates {
791 let file = &related_files[candidate.file_ix];
792 let excerpt = &file.excerpts[candidate.excerpt_ix];
793 let file_already_included = included_file_indices[candidate.file_ix];
794 let header_cost = if file_already_included {
795 0
796 } else {
797 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
798 };
799 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
800 if total_tokens + header_cost + excerpt_cost > max_tokens {
801 break;
802 }
803 total_tokens += header_cost + excerpt_cost;
804 if !file_already_included {
805 included_file_indices[candidate.file_ix] = true;
806 }
807 included_excerpt_count += 1;
808 }
809
810 excerpt_candidates.truncate(included_excerpt_count);
811 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
812
813 // Render all of the files that fit within the token budget, in the original order.
814 let mut result = String::new();
815 let mut last_file_ix = None;
816 for candidate in &excerpt_candidates {
817 if last_file_ix != Some(candidate.file_ix) {
818 if last_file_ix.is_some() {
819 result.push_str(file_suffix);
820 }
821 result.push_str(&file_headers[candidate.file_ix]);
822 last_file_ix = Some(candidate.file_ix);
823 }
824 let file = &related_files[candidate.file_ix];
825 let excerpt = &file.excerpts[candidate.excerpt_ix];
826 result.push_str(&excerpt.text);
827 if !result.ends_with('\n') {
828 result.push('\n');
829 }
830 if excerpt.row_range.end < file.max_row {
831 result.push_str("...\n");
832 }
833 }
834
835 result
836}
837
838pub fn write_related_files(
839 prompt: &mut String,
840 related_files: &[RelatedFile],
841) -> Vec<Range<usize>> {
842 let mut ranges = Vec::new();
843 for file in related_files {
844 let start = prompt.len();
845 let path_str = file.path.to_string_lossy();
846 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
847 for excerpt in &file.excerpts {
848 prompt.push_str(&excerpt.text);
849 if !prompt.ends_with('\n') {
850 prompt.push('\n');
851 }
852 if excerpt.row_range.end < file.max_row {
853 prompt.push_str("...\n");
854 }
855 }
856 let end = prompt.len();
857 ranges.push(start..end);
858 }
859 ranges
860}
861
862mod v0112_middle_at_end {
863 use super::*;
864
865 pub fn special_tokens() -> &'static [&'static str] {
866 &[
867 "<|fim_prefix|>",
868 "<|fim_suffix|>",
869 "<|fim_middle|>",
870 "<|file_sep|>",
871 CURSOR_MARKER,
872 ]
873 }
874
875 pub fn write_cursor_excerpt_section(
876 prompt: &mut String,
877 path: &Path,
878 context: &str,
879 editable_range: &Range<usize>,
880 cursor_offset: usize,
881 ) {
882 let path_str = path.to_string_lossy();
883 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
884
885 prompt.push_str("<|fim_prefix|>\n");
886 prompt.push_str(&context[..editable_range.start]);
887
888 prompt.push_str("<|fim_suffix|>\n");
889 prompt.push_str(&context[editable_range.end..]);
890 if !prompt.ends_with('\n') {
891 prompt.push('\n');
892 }
893
894 prompt.push_str("<|fim_middle|>current\n");
895 prompt.push_str(&context[editable_range.start..cursor_offset]);
896 prompt.push_str(CURSOR_MARKER);
897 prompt.push_str(&context[cursor_offset..editable_range.end]);
898 if !prompt.ends_with('\n') {
899 prompt.push('\n');
900 }
901
902 prompt.push_str("<|fim_middle|>updated\n");
903 }
904}
905
906mod v0113_ordered {
907 use super::*;
908
909 pub fn special_tokens() -> &'static [&'static str] {
910 &[
911 "<|fim_prefix|>",
912 "<|fim_suffix|>",
913 "<|fim_middle|>",
914 "<|file_sep|>",
915 CURSOR_MARKER,
916 ]
917 }
918
919 pub fn write_cursor_excerpt_section(
920 prompt: &mut String,
921 path: &Path,
922 context: &str,
923 editable_range: &Range<usize>,
924 cursor_offset: usize,
925 ) {
926 let path_str = path.to_string_lossy();
927 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
928
929 prompt.push_str("<|fim_prefix|>\n");
930 prompt.push_str(&context[..editable_range.start]);
931 if !prompt.ends_with('\n') {
932 prompt.push('\n');
933 }
934
935 prompt.push_str("<|fim_middle|>current\n");
936 prompt.push_str(&context[editable_range.start..cursor_offset]);
937 prompt.push_str(CURSOR_MARKER);
938 prompt.push_str(&context[cursor_offset..editable_range.end]);
939 if !prompt.ends_with('\n') {
940 prompt.push('\n');
941 }
942
943 prompt.push_str("<|fim_suffix|>\n");
944 prompt.push_str(&context[editable_range.end..]);
945 if !prompt.ends_with('\n') {
946 prompt.push('\n');
947 }
948
949 prompt.push_str("<|fim_middle|>updated\n");
950 }
951}
952
953mod v0114180_editable_region {
954 use super::*;
955
956 pub fn special_tokens() -> &'static [&'static str] {
957 v0113_ordered::special_tokens()
958 }
959}
960
961pub mod v0120_git_merge_markers {
962 //! A prompt that uses git-style merge conflict markers to represent the editable region.
963 //!
964 //! Example prompt:
965 //!
966 //! <|file_sep|>path/to/target_file.py
967 //! <|fim_prefix|>
968 //! code before editable region
969 //! <|fim_suffix|>
970 //! code after editable region
971 //! <|fim_middle|>
972 //! <<<<<<< CURRENT
973 //! code that
974 //! needs to<|user_cursor|>
975 //! be rewritten
976 //! =======
977 //!
978 //! Expected output (should be generated by the model):
979 //!
980 //! updated
981 //! code with
982 //! changes applied
983 //! >>>>>>> UPDATED
984
985 use super::*;
986
987 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
988 pub const SEPARATOR: &str = "=======\n";
989 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
990
991 pub fn special_tokens() -> &'static [&'static str] {
992 &[
993 "<|fim_prefix|>",
994 "<|fim_suffix|>",
995 "<|fim_middle|>",
996 "<|file_sep|>",
997 START_MARKER,
998 SEPARATOR,
999 END_MARKER,
1000 CURSOR_MARKER,
1001 ]
1002 }
1003
1004 pub fn write_cursor_excerpt_section(
1005 prompt: &mut String,
1006 path: &Path,
1007 context: &str,
1008 editable_range: &Range<usize>,
1009 cursor_offset: usize,
1010 ) {
1011 let path_str = path.to_string_lossy();
1012 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1013
1014 prompt.push_str("<|fim_prefix|>");
1015 prompt.push_str(&context[..editable_range.start]);
1016
1017 prompt.push_str("<|fim_suffix|>");
1018 prompt.push_str(&context[editable_range.end..]);
1019 if !prompt.ends_with('\n') {
1020 prompt.push('\n');
1021 }
1022
1023 prompt.push_str("<|fim_middle|>");
1024 prompt.push_str(START_MARKER);
1025 prompt.push_str(&context[editable_range.start..cursor_offset]);
1026 prompt.push_str(CURSOR_MARKER);
1027 prompt.push_str(&context[cursor_offset..editable_range.end]);
1028 if !prompt.ends_with('\n') {
1029 prompt.push('\n');
1030 }
1031 prompt.push_str(SEPARATOR);
1032 }
1033}
1034
1035pub mod v0131_git_merge_markers_prefix {
1036 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1037 //!
1038 //! Example prompt:
1039 //!
1040 //! <|file_sep|>path/to/target_file.py
1041 //! <|fim_prefix|>
1042 //! code before editable region
1043 //! <<<<<<< CURRENT
1044 //! code that
1045 //! needs to<|user_cursor|>
1046 //! be rewritten
1047 //! =======
1048 //! <|fim_suffix|>
1049 //! code after editable region
1050 //! <|fim_middle|>
1051 //!
1052 //! Expected output (should be generated by the model):
1053 //!
1054 //! updated
1055 //! code with
1056 //! changes applied
1057 //! >>>>>>> UPDATED
1058
1059 use super::*;
1060
1061 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1062 pub const SEPARATOR: &str = "=======\n";
1063 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1064
1065 pub fn special_tokens() -> &'static [&'static str] {
1066 &[
1067 "<|fim_prefix|>",
1068 "<|fim_suffix|>",
1069 "<|fim_middle|>",
1070 "<|file_sep|>",
1071 START_MARKER,
1072 SEPARATOR,
1073 END_MARKER,
1074 CURSOR_MARKER,
1075 ]
1076 }
1077
1078 pub fn write_cursor_excerpt_section(
1079 prompt: &mut String,
1080 path: &Path,
1081 context: &str,
1082 editable_range: &Range<usize>,
1083 cursor_offset: usize,
1084 ) {
1085 let path_str = path.to_string_lossy();
1086 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1087
1088 prompt.push_str("<|fim_prefix|>");
1089 prompt.push_str(&context[..editable_range.start]);
1090 prompt.push_str(START_MARKER);
1091 prompt.push_str(&context[editable_range.start..cursor_offset]);
1092 prompt.push_str(CURSOR_MARKER);
1093 prompt.push_str(&context[cursor_offset..editable_range.end]);
1094 if !prompt.ends_with('\n') {
1095 prompt.push('\n');
1096 }
1097 prompt.push_str(SEPARATOR);
1098
1099 prompt.push_str("<|fim_suffix|>");
1100 prompt.push_str(&context[editable_range.end..]);
1101 if !prompt.ends_with('\n') {
1102 prompt.push('\n');
1103 }
1104
1105 prompt.push_str("<|fim_middle|>");
1106 }
1107}
1108
1109pub mod v0211_prefill {
1110 use super::*;
1111
1112 pub fn special_tokens() -> &'static [&'static str] {
1113 v0131_git_merge_markers_prefix::special_tokens()
1114 }
1115
1116 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1117 let editable_region = &context[editable_range.start..editable_range.end];
1118
1119 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1120 let prefill_len = editable_region.floor_char_boundary(prefill_len);
1121
1122 // Find a token boundary to avoid splitting tokens in the prefill.
1123 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1124 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1125 // the \n and consume any consecutive \n characters after it.
1126 let prefill = &editable_region[..prefill_len];
1127 match prefill.rfind('\n') {
1128 Some(pos) => {
1129 let mut end = pos + 1;
1130 while end < editable_region.len()
1131 && editable_region.as_bytes().get(end) == Some(&b'\n')
1132 {
1133 end += 1;
1134 }
1135 editable_region[..end].to_string()
1136 }
1137 // No newline found. Fall back to splitting before the last space
1138 // (word-level boundary)
1139 None => match prefill.rfind(' ') {
1140 Some(pos) => prefill[..pos].to_string(),
1141 None => prefill.to_string(),
1142 },
1143 }
1144 }
1145}
1146
1147pub mod hashline {
1148
1149 use std::fmt::Display;
1150
1151 pub const END_MARKER: &str = "<|fim_middle|>updated";
1152 pub const START_MARKER: &str = "<|fim_middle|>current";
1153
1154 use super::*;
1155
1156 const SET_COMMAND_MARKER: &str = "<|set|>";
1157 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1158 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1159
1160 pub fn special_tokens() -> &'static [&'static str] {
1161 return &[
1162 SET_COMMAND_MARKER,
1163 "<|set_range|>",
1164 INSERT_COMMAND_MARKER,
1165 NO_EDITS_COMMAND_MARKER,
1166 CURSOR_MARKER,
1167 "<|file_sep|>",
1168 "<|fim_prefix|>",
1169 "<|fim_suffix|>",
1170 "<|fim_middle|>",
1171 ];
1172 }
1173
1174 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1175 #[derive(Debug, Clone, PartialEq, Eq)]
1176 struct LineRef {
1177 index: usize,
1178 hash: u8,
1179 }
1180
1181 impl Display for LineRef {
1182 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1183 write!(f, "{}:{:02x}", self.index, self.hash)
1184 }
1185 }
1186
1187 pub fn hash_line(line: &[u8]) -> u8 {
1188 let mut h: u8 = 0;
1189 for &byte in line {
1190 h = h.wrapping_add(byte);
1191 }
1192 return h;
1193 }
1194
1195 /// Write the hashline-encoded editable region into `out`. Each line of
1196 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1197 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1198 /// to the start of `editable_text`).
1199 pub fn write_hashline_editable_region(
1200 out: &mut String,
1201 editable_text: &str,
1202 cursor_offset_in_editable: usize,
1203 ) {
1204 let mut offset = 0;
1205 for (i, line) in editable_text.lines().enumerate() {
1206 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1207 && cursor_offset_in_editable < offset + line.len()
1208 {
1209 (
1210 &line[..cursor_offset_in_editable - offset],
1211 CURSOR_MARKER,
1212 &line[cursor_offset_in_editable - offset..],
1213 )
1214 } else {
1215 (line, "", "")
1216 };
1217 write!(
1218 out,
1219 "\n{}|{head}{cursor}{tail}",
1220 LineRef {
1221 index: i,
1222 hash: hash_line(line.as_bytes())
1223 }
1224 )
1225 .unwrap();
1226 offset += line.len() + 1;
1227 }
1228 }
1229
1230 pub fn write_cursor_excerpt_section(
1231 prompt: &mut String,
1232 path: &Path,
1233 context: &str,
1234 editable_range: &Range<usize>,
1235 cursor_offset: usize,
1236 ) {
1237 let path_str = path.to_string_lossy();
1238 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1239
1240 prompt.push_str("<|fim_prefix|>\n");
1241 prompt.push_str(&context[..editable_range.start]);
1242 prompt.push_str(START_MARKER);
1243
1244 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1245 let editable_region = &context[editable_range.clone()];
1246 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1247
1248 if !prompt.ends_with('\n') {
1249 prompt.push('\n');
1250 }
1251
1252 prompt.push_str("<|fim_suffix|>\n");
1253 prompt.push_str(&context[editable_range.end..]);
1254 if !prompt.ends_with('\n') {
1255 prompt.push('\n');
1256 }
1257
1258 prompt.push_str(END_MARKER);
1259 prompt.push('\n');
1260 }
1261
1262 /// A single edit command parsed from the model output.
1263 #[derive(Debug)]
1264 enum EditCommand<'a> {
1265 /// Replace a range of lines (inclusive on both ends). Single-line set is
1266 /// represented by `start == end`.
1267 Set {
1268 start: LineRef,
1269 end: LineRef,
1270 content: &'a str,
1271 },
1272 /// Insert new lines after the given line, or before the first line if
1273 /// `after` is `None`.
1274 Insert {
1275 after: Option<LineRef>,
1276 content: &'a str,
1277 },
1278 }
1279
1280 /// Parse a line reference like `3:c3` into a `LineRef`.
1281 fn parse_line_ref(s: &str) -> Option<LineRef> {
1282 let (idx_str, hash_str) = s.split_once(':')?;
1283 let index = idx_str.parse::<usize>().ok()?;
1284 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1285 Some(LineRef { index, hash })
1286 }
1287
1288 /// Parse the model output into a list of `EditCommand`s.
1289 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1290 let mut commands = Vec::new();
1291 let mut offset = 0usize;
1292
1293 while offset < model_output.len() {
1294 let next_nl = model_output[offset..]
1295 .find('\n')
1296 .map(|i| offset + i)
1297 .unwrap_or(model_output.len());
1298 let line = &model_output[offset..next_nl];
1299 let line_end = if next_nl < model_output.len() {
1300 next_nl + 1
1301 } else {
1302 next_nl
1303 };
1304
1305 let trimmed = line.trim();
1306 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1307 (true, spec)
1308 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1309 (false, spec)
1310 } else {
1311 offset = line_end;
1312 continue;
1313 };
1314
1315 let mut content_end = line_end;
1316 let mut scan = line_end;
1317
1318 while scan < model_output.len() {
1319 let body_nl = model_output[scan..]
1320 .find('\n')
1321 .map(|i| scan + i)
1322 .unwrap_or(model_output.len());
1323 let body_line = &model_output[scan..body_nl];
1324 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1325 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1326 {
1327 break;
1328 }
1329 scan = if body_nl < model_output.len() {
1330 body_nl + 1
1331 } else {
1332 body_nl
1333 };
1334 content_end = scan;
1335 }
1336
1337 let content = &model_output[line_end..content_end];
1338
1339 if is_set {
1340 if let Some((start_str, end_str)) = specifier.split_once('-') {
1341 if let (Some(start), Some(end)) =
1342 (parse_line_ref(start_str), parse_line_ref(end_str))
1343 {
1344 commands.push(EditCommand::Set {
1345 start,
1346 end,
1347 content,
1348 });
1349 }
1350 } else if let Some(target) = parse_line_ref(specifier) {
1351 commands.push(EditCommand::Set {
1352 start: target.clone(),
1353 end: target,
1354 content,
1355 });
1356 }
1357 } else {
1358 let after = parse_line_ref(specifier);
1359 commands.push(EditCommand::Insert { after, content });
1360 }
1361
1362 offset = scan;
1363 }
1364
1365 commands
1366 }
1367
1368 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1369 /// (as opposed to being a plain full-replacement output).
1370 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1371 /// editable region, returning the plain text content.
1372 pub fn strip_hashline_prefixes(region: &str) -> String {
1373 let mut decoded: String = region
1374 .lines()
1375 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1376 .collect::<Vec<_>>()
1377 .join("\n");
1378 if region.ends_with('\n') {
1379 decoded.push('\n');
1380 }
1381 decoded
1382 }
1383
1384 pub fn output_has_edit_commands(model_output: &str) -> bool {
1385 model_output.contains(SET_COMMAND_MARKER)
1386 || model_output.contains(INSERT_COMMAND_MARKER)
1387 || model_output.contains(NO_EDITS_COMMAND_MARKER)
1388 }
1389
1390 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1391 /// original editable region text.
1392 ///
1393 /// `editable_region` is the original text of the editable region (without hash
1394 /// prefixes). `model_output` is the raw model response containing edit commands.
1395 ///
1396 /// Returns the full replacement text for the editable region.
1397 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1398 if model_output
1399 .trim_start()
1400 .starts_with(NO_EDITS_COMMAND_MARKER)
1401 {
1402 return editable_region.to_string();
1403 }
1404
1405 let original_lines: Vec<&str> = editable_region.lines().collect();
1406 let old_hashes: Vec<u8> = original_lines
1407 .iter()
1408 .map(|line| hash_line(line.as_bytes()))
1409 .collect();
1410
1411 let commands = parse_edit_commands(model_output);
1412
1413 // For set operations: indexed by start line → Some((end line index, content))
1414 // For insert operations: indexed by line index → vec of content to insert after
1415 // Insert-before-first is tracked separately.
1416 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1417 let mut insert_before_first: Vec<&str> = Vec::new();
1418 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1419
1420 for command in &commands {
1421 match command {
1422 EditCommand::Set {
1423 start,
1424 end,
1425 content,
1426 } => {
1427 if start.index < old_hashes.len()
1428 && end.index < old_hashes.len()
1429 && start.index <= end.index
1430 && old_hashes[start.index] == start.hash
1431 && old_hashes[end.index] == end.hash
1432 {
1433 set_ops[start.index] = Some((end.index, *content));
1434 }
1435 }
1436 EditCommand::Insert { after, content } => match after {
1437 None => insert_before_first.push(*content),
1438 Some(line_ref) => {
1439 if line_ref.index < old_hashes.len()
1440 && old_hashes[line_ref.index] == line_ref.hash
1441 {
1442 insert_after[line_ref.index].push(*content);
1443 }
1444 }
1445 },
1446 }
1447 }
1448
1449 let mut result = String::new();
1450
1451 // Emit any insertions before the first line
1452 for content in &insert_before_first {
1453 result.push_str(content);
1454 if !content.ends_with('\n') {
1455 result.push('\n');
1456 }
1457 }
1458
1459 let mut i = 0;
1460 while i < original_lines.len() {
1461 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1462 // Replace lines i..=end_index with the replacement content
1463 result.push_str(replacement);
1464 if !replacement.is_empty() && !replacement.ends_with('\n') {
1465 result.push('\n');
1466 }
1467 // Emit any insertions after the end of this set range
1468 if *end_index < insert_after.len() {
1469 for content in &insert_after[*end_index] {
1470 result.push_str(content);
1471 if !content.ends_with('\n') {
1472 result.push('\n');
1473 }
1474 }
1475 }
1476 i = end_index + 1;
1477 } else {
1478 // Keep the original line
1479 result.push_str(original_lines[i]);
1480 result.push('\n');
1481 // Emit any insertions after this line
1482 for content in &insert_after[i] {
1483 result.push_str(content);
1484 if !content.ends_with('\n') {
1485 result.push('\n');
1486 }
1487 }
1488 i += 1;
1489 }
1490 }
1491
1492 // Preserve trailing newline behavior: if the original ended with a
1493 // newline the result already has one; if it didn't, trim the extra one
1494 // we added.
1495 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1496 result.pop();
1497 }
1498
1499 result
1500 }
1501
1502 /// Convert a unified diff patch into hashline edit commands.
1503 ///
1504 /// Parses the unified diff `patch` directly to determine which lines of
1505 /// `old_text` are deleted/replaced and what new lines are added, then emits
1506 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1507 /// `{index}:{hash}` identifiers.
1508 ///
1509 /// `cursor_offset` is an optional byte offset into the first hunk's new
1510 /// text (context + additions) where the cursor marker should be placed.
1511 pub fn patch_to_edit_commands(
1512 old_text: &str,
1513 patch: &str,
1514 cursor_offset: Option<usize>,
1515 ) -> Result<String> {
1516 let old_lines: Vec<&str> = old_text.lines().collect();
1517 let old_hashes: Vec<u8> = old_lines
1518 .iter()
1519 .map(|line| hash_line(line.as_bytes()))
1520 .collect();
1521
1522 let mut result = String::new();
1523 let mut first_hunk = true;
1524
1525 struct Hunk<'a> {
1526 line_range: Range<usize>,
1527 new_text_lines: Vec<&'a str>,
1528 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1529 }
1530
1531 // Parse the patch line by line. We only care about hunk headers,
1532 // context, deletions, and additions.
1533 let mut old_line_index: usize = 0;
1534 let mut current_hunk: Option<Hunk> = None;
1535 // Byte offset tracking within the hunk's new text for cursor placement.
1536 let mut new_text_byte_offset: usize = 0;
1537 // The line index of the last old line seen before/in the current hunk
1538 // (used for insert-after reference).
1539 let mut last_old_line_before_hunk: Option<usize> = None;
1540
1541 fn flush_hunk(
1542 hunk: Hunk,
1543 last_old_line: Option<usize>,
1544 result: &mut String,
1545 old_hashes: &[u8],
1546 ) {
1547 if hunk.line_range.is_empty() {
1548 // Pure insertion — reference the old line to insert after when in bounds.
1549 if let Some(after) = last_old_line
1550 && let Some(&hash) = old_hashes.get(after)
1551 {
1552 write!(
1553 result,
1554 "{INSERT_COMMAND_MARKER}{}\n",
1555 LineRef { index: after, hash }
1556 )
1557 .unwrap();
1558 } else {
1559 result.push_str(INSERT_COMMAND_MARKER);
1560 result.push('\n');
1561 }
1562 } else {
1563 let start = hunk.line_range.start;
1564 let end_exclusive = hunk.line_range.end;
1565 let deleted_line_count = end_exclusive.saturating_sub(start);
1566
1567 if deleted_line_count == 1 {
1568 if let Some(&hash) = old_hashes.get(start) {
1569 write!(
1570 result,
1571 "{SET_COMMAND_MARKER}{}\n",
1572 LineRef { index: start, hash }
1573 )
1574 .unwrap();
1575 } else {
1576 result.push_str(SET_COMMAND_MARKER);
1577 result.push('\n');
1578 }
1579 } else {
1580 let end_inclusive = end_exclusive - 1;
1581 match (
1582 old_hashes.get(start).copied(),
1583 old_hashes.get(end_inclusive).copied(),
1584 ) {
1585 (Some(start_hash), Some(end_hash)) => {
1586 write!(
1587 result,
1588 "{SET_COMMAND_MARKER}{}-{}\n",
1589 LineRef {
1590 index: start,
1591 hash: start_hash
1592 },
1593 LineRef {
1594 index: end_inclusive,
1595 hash: end_hash
1596 }
1597 )
1598 .unwrap();
1599 }
1600 _ => {
1601 result.push_str(SET_COMMAND_MARKER);
1602 result.push('\n');
1603 }
1604 }
1605 }
1606 }
1607 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1608 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1609 && line_offset == cursor_line_offset
1610 {
1611 result.push_str(&line[..char_offset]);
1612 result.push_str(CURSOR_MARKER);
1613 result.push_str(&line[char_offset..]);
1614 continue;
1615 }
1616
1617 result.push_str(line);
1618 }
1619 }
1620
1621 for raw_line in patch.split_inclusive('\n') {
1622 if raw_line.starts_with("@@") {
1623 // Flush any pending change hunk from a previous patch hunk.
1624 if let Some(hunk) = current_hunk.take() {
1625 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1626 }
1627
1628 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1629 // We intentionally do not trust old_start as a direct local index into `old_text`,
1630 // because some patches are produced against a larger file region and carry
1631 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1632 if first_hunk {
1633 new_text_byte_offset = 0;
1634 first_hunk = false;
1635 }
1636 continue;
1637 }
1638
1639 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1640 continue;
1641 }
1642 if raw_line.starts_with("\\ No newline") {
1643 continue;
1644 }
1645
1646 if raw_line.starts_with('-') {
1647 // Extend or start a change hunk with this deleted old line.
1648 match &mut current_hunk {
1649 Some(Hunk {
1650 line_range: range, ..
1651 }) => range.end = old_line_index + 1,
1652 None => {
1653 current_hunk = Some(Hunk {
1654 line_range: old_line_index..old_line_index + 1,
1655 new_text_lines: Vec::new(),
1656 cursor_line_offset_in_new_text: None,
1657 });
1658 }
1659 }
1660 old_line_index += 1;
1661 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1662 // Place cursor marker if cursor_offset falls within this line.
1663 let mut cursor_line_offset = None;
1664 if let Some(cursor_off) = cursor_offset
1665 && (first_hunk
1666 || cursor_off >= new_text_byte_offset
1667 && cursor_off <= new_text_byte_offset + added_content.len())
1668 {
1669 let line_offset = added_content.floor_char_boundary(
1670 cursor_off
1671 .saturating_sub(new_text_byte_offset)
1672 .min(added_content.len()),
1673 );
1674 cursor_line_offset = Some(line_offset);
1675 }
1676
1677 new_text_byte_offset += added_content.len();
1678
1679 let hunk = current_hunk.get_or_insert(Hunk {
1680 line_range: old_line_index..old_line_index,
1681 new_text_lines: vec![],
1682 cursor_line_offset_in_new_text: None,
1683 });
1684 hunk.new_text_lines.push(added_content);
1685 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1686 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1687 } else {
1688 // Context line (starts with ' ' or is empty).
1689 if let Some(hunk) = current_hunk.take() {
1690 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1691 }
1692 last_old_line_before_hunk = Some(old_line_index);
1693 old_line_index += 1;
1694 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1695 new_text_byte_offset += content.len();
1696 }
1697 }
1698
1699 // Flush final group.
1700 if let Some(hunk) = current_hunk.take() {
1701 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1702 }
1703
1704 // Trim a single trailing newline.
1705 if result.ends_with('\n') {
1706 result.pop();
1707 }
1708
1709 if result.is_empty() {
1710 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1711 }
1712
1713 Ok(result)
1714 }
1715
1716 #[cfg(test)]
1717 mod tests {
1718 use super::*;
1719 use indoc::indoc;
1720
1721 #[test]
1722 fn test_format_cursor_region() {
1723 struct Case {
1724 name: &'static str,
1725 context: &'static str,
1726 editable_range: Range<usize>,
1727 cursor_offset: usize,
1728 expected: &'static str,
1729 }
1730
1731 let cases = [
1732 Case {
1733 name: "basic_cursor_placement",
1734 context: "hello world\n",
1735 editable_range: 0..12,
1736 cursor_offset: 5,
1737 expected: indoc! {"
1738 <|file_sep|>test.rs
1739 <|fim_prefix|>
1740 <|fim_middle|>current
1741 0:5c|hello<|user_cursor|> world
1742 <|fim_suffix|>
1743 <|fim_middle|>updated
1744 "},
1745 },
1746 Case {
1747 name: "multiline_cursor_on_second_line",
1748 context: "aaa\nbbb\nccc\n",
1749 editable_range: 0..12,
1750 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1751 expected: indoc! {"
1752 <|file_sep|>test.rs
1753 <|fim_prefix|>
1754 <|fim_middle|>current
1755 0:23|aaa
1756 1:26|b<|user_cursor|>bb
1757 2:29|ccc
1758 <|fim_suffix|>
1759 <|fim_middle|>updated
1760 "},
1761 },
1762 Case {
1763 name: "no_trailing_newline_in_context",
1764 context: "line1\nline2",
1765 editable_range: 0..11,
1766 cursor_offset: 3,
1767 expected: indoc! {"
1768 <|file_sep|>test.rs
1769 <|fim_prefix|>
1770 <|fim_middle|>current
1771 0:d9|lin<|user_cursor|>e1
1772 1:da|line2
1773 <|fim_suffix|>
1774 <|fim_middle|>updated
1775 "},
1776 },
1777 Case {
1778 name: "leading_newline_in_editable_region",
1779 context: "\nabc\n",
1780 editable_range: 0..5,
1781 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1782 expected: indoc! {"
1783 <|file_sep|>test.rs
1784 <|fim_prefix|>
1785 <|fim_middle|>current
1786 0:00|
1787 1:26|a<|user_cursor|>bc
1788 <|fim_suffix|>
1789 <|fim_middle|>updated
1790 "},
1791 },
1792 Case {
1793 name: "with_suffix",
1794 context: "abc\ndef",
1795 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1796 cursor_offset: 2,
1797 expected: indoc! {"
1798 <|file_sep|>test.rs
1799 <|fim_prefix|>
1800 <|fim_middle|>current
1801 0:26|ab<|user_cursor|>c
1802 <|fim_suffix|>
1803 def
1804 <|fim_middle|>updated
1805 "},
1806 },
1807 Case {
1808 name: "unicode_two_byte_chars",
1809 context: "héllo\n",
1810 editable_range: 0..7,
1811 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1812 expected: indoc! {"
1813 <|file_sep|>test.rs
1814 <|fim_prefix|>
1815 <|fim_middle|>current
1816 0:1b|hé<|user_cursor|>llo
1817 <|fim_suffix|>
1818 <|fim_middle|>updated
1819 "},
1820 },
1821 Case {
1822 name: "unicode_three_byte_chars",
1823 context: "日本語\n",
1824 editable_range: 0..10,
1825 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1826 expected: indoc! {"
1827 <|file_sep|>test.rs
1828 <|fim_prefix|>
1829 <|fim_middle|>current
1830 0:80|日本<|user_cursor|>語
1831 <|fim_suffix|>
1832 <|fim_middle|>updated
1833 "},
1834 },
1835 Case {
1836 name: "unicode_four_byte_chars",
1837 context: "a🌍b\n",
1838 editable_range: 0..7,
1839 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1840 expected: indoc! {"
1841 <|file_sep|>test.rs
1842 <|fim_prefix|>
1843 <|fim_middle|>current
1844 0:6b|a🌍<|user_cursor|>b
1845 <|fim_suffix|>
1846 <|fim_middle|>updated
1847 "},
1848 },
1849 Case {
1850 name: "cursor_at_start_of_region_not_placed",
1851 context: "abc\n",
1852 editable_range: 0..4,
1853 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1854 expected: indoc! {"
1855 <|file_sep|>test.rs
1856 <|fim_prefix|>
1857 <|fim_middle|>current
1858 0:26|abc
1859 <|fim_suffix|>
1860 <|fim_middle|>updated
1861 "},
1862 },
1863 Case {
1864 name: "cursor_at_end_of_line_not_placed",
1865 context: "abc\ndef\n",
1866 editable_range: 0..8,
1867 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1868 expected: indoc! {"
1869 <|file_sep|>test.rs
1870 <|fim_prefix|>
1871 <|fim_middle|>current
1872 0:26|abc
1873 1:2f|def
1874 <|fim_suffix|>
1875 <|fim_middle|>updated
1876 "},
1877 },
1878 Case {
1879 name: "cursor_offset_relative_to_context_not_editable_region",
1880 // cursor_offset is relative to `context`, so when editable_range.start > 0,
1881 // write_cursor_excerpt_section must subtract it before comparing against
1882 // per-line offsets within the editable region.
1883 context: "pre\naaa\nbbb\nsuf\n",
1884 editable_range: 4..12, // editable region = "aaa\nbbb\n"
1885 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
1886 expected: indoc! {"
1887 <|file_sep|>test.rs
1888 <|fim_prefix|>
1889 pre
1890 <|fim_middle|>current
1891 0:23|aaa
1892 1:26|b<|user_cursor|>bb
1893 <|fim_suffix|>
1894 suf
1895 <|fim_middle|>updated
1896 "},
1897 },
1898 ];
1899
1900 for case in &cases {
1901 let mut prompt = String::new();
1902 hashline::write_cursor_excerpt_section(
1903 &mut prompt,
1904 Path::new("test.rs"),
1905 case.context,
1906 &case.editable_range,
1907 case.cursor_offset,
1908 );
1909 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1910 }
1911 }
1912
1913 #[test]
1914 fn test_apply_edit_commands() {
1915 struct Case {
1916 name: &'static str,
1917 original: &'static str,
1918 model_output: &'static str,
1919 expected: &'static str,
1920 }
1921
1922 let cases = vec![
1923 Case {
1924 name: "set_single_line",
1925 original: indoc! {"
1926 let mut total = 0;
1927 for product in products {
1928 total += ;
1929 }
1930 total
1931 "},
1932 model_output: indoc! {"
1933 <|set|>2:87
1934 total += product.price;
1935 "},
1936 expected: indoc! {"
1937 let mut total = 0;
1938 for product in products {
1939 total += product.price;
1940 }
1941 total
1942 "},
1943 },
1944 Case {
1945 name: "set_range",
1946 original: indoc! {"
1947 fn foo() {
1948 let x = 1;
1949 let y = 2;
1950 let z = 3;
1951 }
1952 "},
1953 model_output: indoc! {"
1954 <|set|>1:46-3:4a
1955 let sum = 6;
1956 "},
1957 expected: indoc! {"
1958 fn foo() {
1959 let sum = 6;
1960 }
1961 "},
1962 },
1963 Case {
1964 name: "insert_after_line",
1965 original: indoc! {"
1966 fn main() {
1967 let x = 1;
1968 }
1969 "},
1970 model_output: indoc! {"
1971 <|insert|>1:46
1972 let y = 2;
1973 "},
1974 expected: indoc! {"
1975 fn main() {
1976 let x = 1;
1977 let y = 2;
1978 }
1979 "},
1980 },
1981 Case {
1982 name: "insert_before_first",
1983 original: indoc! {"
1984 let x = 1;
1985 let y = 2;
1986 "},
1987 model_output: indoc! {"
1988 <|insert|>
1989 use std::io;
1990 "},
1991 expected: indoc! {"
1992 use std::io;
1993 let x = 1;
1994 let y = 2;
1995 "},
1996 },
1997 Case {
1998 name: "set_with_cursor_marker",
1999 original: indoc! {"
2000 fn main() {
2001 println!();
2002 }
2003 "},
2004 model_output: indoc! {"
2005 <|set|>1:34
2006 eprintln!(\"<|user_cursor|>\");
2007 "},
2008 expected: indoc! {"
2009 fn main() {
2010 eprintln!(\"<|user_cursor|>\");
2011 }
2012 "},
2013 },
2014 Case {
2015 name: "multiple_set_commands",
2016 original: indoc! {"
2017 aaa
2018 bbb
2019 ccc
2020 ddd
2021 "},
2022 model_output: indoc! {"
2023 <|set|>0:23
2024 AAA
2025 <|set|>2:29
2026 CCC
2027 "},
2028 expected: indoc! {"
2029 AAA
2030 bbb
2031 CCC
2032 ddd
2033 "},
2034 },
2035 Case {
2036 name: "set_range_multiline_replacement",
2037 original: indoc! {"
2038 fn handle_submit() {
2039 }
2040
2041 fn handle_keystroke() {
2042 "},
2043 model_output: indoc! {"
2044 <|set|>0:3f-1:7d
2045 fn handle_submit(modal_state: &mut ModalState) {
2046 <|user_cursor|>
2047 }
2048 "},
2049 expected: indoc! {"
2050 fn handle_submit(modal_state: &mut ModalState) {
2051 <|user_cursor|>
2052 }
2053
2054 fn handle_keystroke() {
2055 "},
2056 },
2057 Case {
2058 name: "no_edit_commands_returns_original",
2059 original: indoc! {"
2060 hello
2061 world
2062 "},
2063 model_output: "some random text with no commands",
2064 expected: indoc! {"
2065 hello
2066 world
2067 "},
2068 },
2069 Case {
2070 name: "no_edits_command_returns_original",
2071 original: indoc! {"
2072 hello
2073 world
2074 "},
2075 model_output: "<|no_edits|>",
2076 expected: indoc! {"
2077 hello
2078 world
2079 "},
2080 },
2081 Case {
2082 name: "wrong_hash_set_ignored",
2083 original: indoc! {"
2084 aaa
2085 bbb
2086 "},
2087 model_output: indoc! {"
2088 <|set|>0:ff
2089 ZZZ
2090 "},
2091 expected: indoc! {"
2092 aaa
2093 bbb
2094 "},
2095 },
2096 Case {
2097 name: "insert_and_set_combined",
2098 original: indoc! {"
2099 alpha
2100 beta
2101 gamma
2102 "},
2103 model_output: indoc! {"
2104 <|set|>0:06
2105 ALPHA
2106 <|insert|>1:9c
2107 beta_extra
2108 "},
2109 expected: indoc! {"
2110 ALPHA
2111 beta
2112 beta_extra
2113 gamma
2114 "},
2115 },
2116 Case {
2117 name: "no_trailing_newline_preserved",
2118 original: "hello\nworld",
2119 model_output: indoc! {"
2120 <|set|>0:14
2121 HELLO
2122 "},
2123 expected: "HELLO\nworld",
2124 },
2125 Case {
2126 name: "set_range_hash_mismatch_in_end_bound",
2127 original: indoc! {"
2128 one
2129 two
2130 three
2131 "},
2132 model_output: indoc! {"
2133 <|set|>0:42-2:ff
2134 ONE_TWO_THREE
2135 "},
2136 expected: indoc! {"
2137 one
2138 two
2139 three
2140 "},
2141 },
2142 Case {
2143 name: "set_range_start_greater_than_end_ignored",
2144 original: indoc! {"
2145 a
2146 b
2147 c
2148 "},
2149 model_output: indoc! {"
2150 <|set|>2:63-1:62
2151 X
2152 "},
2153 expected: indoc! {"
2154 a
2155 b
2156 c
2157 "},
2158 },
2159 Case {
2160 name: "insert_out_of_bounds_ignored",
2161 original: indoc! {"
2162 x
2163 y
2164 "},
2165 model_output: indoc! {"
2166 <|insert|>99:aa
2167 z
2168 "},
2169 expected: indoc! {"
2170 x
2171 y
2172 "},
2173 },
2174 Case {
2175 name: "set_out_of_bounds_ignored",
2176 original: indoc! {"
2177 x
2178 y
2179 "},
2180 model_output: indoc! {"
2181 <|set|>99:aa
2182 z
2183 "},
2184 expected: indoc! {"
2185 x
2186 y
2187 "},
2188 },
2189 Case {
2190 name: "malformed_set_command_ignored",
2191 original: indoc! {"
2192 alpha
2193 beta
2194 "},
2195 model_output: indoc! {"
2196 <|set|>not-a-line-ref
2197 UPDATED
2198 "},
2199 expected: indoc! {"
2200 alpha
2201 beta
2202 "},
2203 },
2204 Case {
2205 name: "malformed_insert_hash_treated_as_before_first",
2206 original: indoc! {"
2207 alpha
2208 beta
2209 "},
2210 model_output: indoc! {"
2211 <|insert|>1:nothex
2212 preamble
2213 "},
2214 expected: indoc! {"
2215 preamble
2216 alpha
2217 beta
2218 "},
2219 },
2220 Case {
2221 name: "set_then_insert_same_target_orders_insert_after_replacement",
2222 original: indoc! {"
2223 cat
2224 dog
2225 "},
2226 model_output: indoc! {"
2227 <|set|>0:38
2228 CAT
2229 <|insert|>0:38
2230 TAIL
2231 "},
2232 expected: indoc! {"
2233 CAT
2234 TAIL
2235 dog
2236 "},
2237 },
2238 Case {
2239 name: "overlapping_set_ranges_last_wins",
2240 original: indoc! {"
2241 a
2242 b
2243 c
2244 d
2245 "},
2246 model_output: indoc! {"
2247 <|set|>0:61-2:63
2248 FIRST
2249 <|set|>1:62-3:64
2250 SECOND
2251 "},
2252 expected: indoc! {"
2253 FIRST
2254 d
2255 "},
2256 },
2257 Case {
2258 name: "insert_before_first_and_after_line",
2259 original: indoc! {"
2260 a
2261 b
2262 "},
2263 model_output: indoc! {"
2264 <|insert|>
2265 HEAD
2266 <|insert|>0:61
2267 MID
2268 "},
2269 expected: indoc! {"
2270 HEAD
2271 a
2272 MID
2273 b
2274 "},
2275 },
2276 ];
2277
2278 for case in &cases {
2279 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2280 assert_eq!(result, case.expected, "failed case: {}", case.name);
2281 }
2282 }
2283
2284 #[test]
2285 fn test_output_has_edit_commands() {
2286 assert!(hashline::output_has_edit_commands(&format!(
2287 "{}0:ab\nnew",
2288 SET_COMMAND_MARKER
2289 )));
2290 assert!(hashline::output_has_edit_commands(&format!(
2291 "{}0:ab\nnew",
2292 INSERT_COMMAND_MARKER
2293 )));
2294 assert!(hashline::output_has_edit_commands(&format!(
2295 "some text\n{}1:cd\nstuff",
2296 SET_COMMAND_MARKER
2297 )));
2298 assert!(!hashline::output_has_edit_commands("just plain text"));
2299 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2300 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2301 }
2302
2303 // ---- hashline::patch_to_edit_commands round-trip tests ----
2304
2305 #[test]
2306 fn test_patch_to_edit_commands() {
2307 struct Case {
2308 name: &'static str,
2309 old: &'static str,
2310 patch: &'static str,
2311 expected_new: &'static str,
2312 }
2313
2314 let cases = [
2315 Case {
2316 name: "single_line_replacement",
2317 old: indoc! {"
2318 let mut total = 0;
2319 for product in products {
2320 total += ;
2321 }
2322 total
2323 "},
2324 patch: indoc! {"
2325 @@ -1,5 +1,5 @@
2326 let mut total = 0;
2327 for product in products {
2328 - total += ;
2329 + total += product.price;
2330 }
2331 total
2332 "},
2333 expected_new: indoc! {"
2334 let mut total = 0;
2335 for product in products {
2336 total += product.price;
2337 }
2338 total
2339 "},
2340 },
2341 Case {
2342 name: "multiline_replacement",
2343 old: indoc! {"
2344 fn foo() {
2345 let x = 1;
2346 let y = 2;
2347 let z = 3;
2348 }
2349 "},
2350 patch: indoc! {"
2351 @@ -1,5 +1,3 @@
2352 fn foo() {
2353 - let x = 1;
2354 - let y = 2;
2355 - let z = 3;
2356 + let sum = 1 + 2 + 3;
2357 }
2358 "},
2359 expected_new: indoc! {"
2360 fn foo() {
2361 let sum = 1 + 2 + 3;
2362 }
2363 "},
2364 },
2365 Case {
2366 name: "insertion",
2367 old: indoc! {"
2368 fn main() {
2369 let x = 1;
2370 }
2371 "},
2372 patch: indoc! {"
2373 @@ -1,3 +1,4 @@
2374 fn main() {
2375 let x = 1;
2376 + let y = 2;
2377 }
2378 "},
2379 expected_new: indoc! {"
2380 fn main() {
2381 let x = 1;
2382 let y = 2;
2383 }
2384 "},
2385 },
2386 Case {
2387 name: "insertion_before_first",
2388 old: indoc! {"
2389 let x = 1;
2390 let y = 2;
2391 "},
2392 patch: indoc! {"
2393 @@ -1,2 +1,3 @@
2394 +use std::io;
2395 let x = 1;
2396 let y = 2;
2397 "},
2398 expected_new: indoc! {"
2399 use std::io;
2400 let x = 1;
2401 let y = 2;
2402 "},
2403 },
2404 Case {
2405 name: "deletion",
2406 old: indoc! {"
2407 aaa
2408 bbb
2409 ccc
2410 ddd
2411 "},
2412 patch: indoc! {"
2413 @@ -1,4 +1,2 @@
2414 aaa
2415 -bbb
2416 -ccc
2417 ddd
2418 "},
2419 expected_new: indoc! {"
2420 aaa
2421 ddd
2422 "},
2423 },
2424 Case {
2425 name: "multiple_changes",
2426 old: indoc! {"
2427 alpha
2428 beta
2429 gamma
2430 delta
2431 epsilon
2432 "},
2433 patch: indoc! {"
2434 @@ -1,5 +1,5 @@
2435 -alpha
2436 +ALPHA
2437 beta
2438 gamma
2439 -delta
2440 +DELTA
2441 epsilon
2442 "},
2443 expected_new: indoc! {"
2444 ALPHA
2445 beta
2446 gamma
2447 DELTA
2448 epsilon
2449 "},
2450 },
2451 Case {
2452 name: "replace_with_insertion",
2453 old: indoc! {r#"
2454 fn handle() {
2455 modal_state.close();
2456 modal_state.dismiss();
2457 "#},
2458 patch: indoc! {r#"
2459 @@ -1,3 +1,4 @@
2460 fn handle() {
2461 modal_state.close();
2462 + eprintln!("");
2463 modal_state.dismiss();
2464 "#},
2465 expected_new: indoc! {r#"
2466 fn handle() {
2467 modal_state.close();
2468 eprintln!("");
2469 modal_state.dismiss();
2470 "#},
2471 },
2472 Case {
2473 name: "complete_replacement",
2474 old: indoc! {"
2475 aaa
2476 bbb
2477 ccc
2478 "},
2479 patch: indoc! {"
2480 @@ -1,3 +1,3 @@
2481 -aaa
2482 -bbb
2483 -ccc
2484 +xxx
2485 +yyy
2486 +zzz
2487 "},
2488 expected_new: indoc! {"
2489 xxx
2490 yyy
2491 zzz
2492 "},
2493 },
2494 Case {
2495 name: "add_function_body",
2496 old: indoc! {"
2497 fn foo() {
2498 modal_state.dismiss();
2499 }
2500
2501 fn
2502
2503 fn handle_keystroke() {
2504 "},
2505 patch: indoc! {"
2506 @@ -1,6 +1,8 @@
2507 fn foo() {
2508 modal_state.dismiss();
2509 }
2510
2511 -fn
2512 +fn handle_submit() {
2513 + todo()
2514 +}
2515
2516 fn handle_keystroke() {
2517 "},
2518 expected_new: indoc! {"
2519 fn foo() {
2520 modal_state.dismiss();
2521 }
2522
2523 fn handle_submit() {
2524 todo()
2525 }
2526
2527 fn handle_keystroke() {
2528 "},
2529 },
2530 Case {
2531 name: "with_cursor_offset",
2532 old: indoc! {r#"
2533 fn main() {
2534 println!();
2535 }
2536 "#},
2537 patch: indoc! {r#"
2538 @@ -1,3 +1,3 @@
2539 fn main() {
2540 - println!();
2541 + eprintln!("");
2542 }
2543 "#},
2544 expected_new: indoc! {r#"
2545 fn main() {
2546 eprintln!("<|user_cursor|>");
2547 }
2548 "#},
2549 },
2550 Case {
2551 name: "non_local_hunk_header_pure_insertion_repro",
2552 old: indoc! {"
2553 aaa
2554 bbb
2555 "},
2556 patch: indoc! {"
2557 @@ -20,2 +20,3 @@
2558 aaa
2559 +xxx
2560 bbb
2561 "},
2562 expected_new: indoc! {"
2563 aaa
2564 xxx
2565 bbb
2566 "},
2567 },
2568 Case {
2569 name: "empty_patch_produces_no_edits_marker",
2570 old: indoc! {"
2571 aaa
2572 bbb
2573 "},
2574 patch: "@@ -20,2 +20,3 @@\n",
2575 expected_new: indoc! {"
2576 aaa
2577 bbb
2578 "},
2579 },
2580 ];
2581
2582 for case in &cases {
2583 // The cursor_offset for patch_to_edit_commands is relative to
2584 // the first hunk's new text (context + additions). We compute
2585 // it by finding where the marker sits in the expected output
2586 // (which mirrors the new text of the hunk).
2587 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2588
2589 let commands =
2590 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2591 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2592
2593 assert!(
2594 hashline::output_has_edit_commands(&commands),
2595 "case {}: expected edit commands, got: {commands:?}",
2596 case.name,
2597 );
2598
2599 let applied = hashline::apply_edit_commands(case.old, &commands);
2600 assert_eq!(applied, case.expected_new, "case {}", case.name);
2601 }
2602 }
2603 }
2604}
2605
2606pub mod seed_coder {
2607 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2608 //!
2609 //! Seed-Coder uses different FIM tokens and order than Qwen:
2610 //! - SPM order: suffix comes FIRST, then prefix, then middle
2611 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2612 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2613 //!
2614 //! All context (related files, edit history) goes in the PREFIX section.
2615 //! The suffix contains only code after the editable region.
2616 //!
2617 //! Example prompt:
2618 //!
2619 //! <[fim-suffix]>
2620 //! code after editable region
2621 //! <[fim-prefix]><filename>related/file.py
2622 //! related file content
2623 //!
2624 //! <filename>edit_history
2625 //! --- a/some_file.py
2626 //! +++ b/some_file.py
2627 //! -old
2628 //! +new
2629 //!
2630 //! <filename>path/to/target_file.py
2631 //! code before editable region
2632 //! <<<<<<< CURRENT
2633 //! code that
2634 //! needs to<|user_cursor|>
2635 //! be rewritten
2636 //! =======
2637 //! <[fim-middle]>
2638 //!
2639 //! Expected output (model generates):
2640 //!
2641 //! updated
2642 //! code with
2643 //! changes applied
2644 //! >>>>>>> UPDATED
2645
2646 use super::*;
2647
2648 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2649 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2650 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2651 pub const FILE_MARKER: &str = "<filename>";
2652
2653 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2654 pub const SEPARATOR: &str = "=======\n";
2655 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2656
2657 pub const NO_EDITS: &str = "NO_EDITS\n";
2658
2659 pub fn special_tokens() -> &'static [&'static str] {
2660 &[
2661 FIM_SUFFIX,
2662 FIM_PREFIX,
2663 FIM_MIDDLE,
2664 FILE_MARKER,
2665 START_MARKER,
2666 SEPARATOR,
2667 END_MARKER,
2668 CURSOR_MARKER,
2669 ]
2670 }
2671
2672 pub fn write_cursor_excerpt_section(
2673 prompt: &mut String,
2674 path: &Path,
2675 context: &str,
2676 editable_range: &Range<usize>,
2677 cursor_offset: usize,
2678 ) {
2679 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2680 prompt.push_str(§ion);
2681 }
2682
2683 pub fn format_prompt_with_budget(
2684 path: &Path,
2685 context: &str,
2686 editable_range: &Range<usize>,
2687 cursor_offset: usize,
2688 events: &[Arc<Event>],
2689 related_files: &[RelatedFile],
2690 max_tokens: usize,
2691 ) -> String {
2692 let cursor_prefix_section =
2693 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2694 assemble_fim_prompt(
2695 context,
2696 editable_range,
2697 &cursor_prefix_section,
2698 events,
2699 related_files,
2700 max_tokens,
2701 )
2702 }
2703
2704 pub fn assemble_fim_prompt(
2705 context: &str,
2706 editable_range: &Range<usize>,
2707 cursor_prefix_section: &str,
2708 events: &[Arc<Event>],
2709 related_files: &[RelatedFile],
2710 max_tokens: usize,
2711 ) -> String {
2712 let suffix_section = build_suffix_section(context, editable_range);
2713
2714 let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
2715 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
2716 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2717
2718 let edit_history_section = super::format_edit_history_within_budget(
2719 events,
2720 FILE_MARKER,
2721 "edit_history",
2722 budget_after_cursor,
2723 max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
2724 );
2725 let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
2726 let budget_after_edit_history =
2727 budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
2728
2729 let related_files_section = super::format_related_files_within_budget(
2730 related_files,
2731 FILE_MARKER,
2732 "",
2733 budget_after_edit_history,
2734 );
2735
2736 let mut prompt = String::new();
2737 prompt.push_str(&suffix_section);
2738 prompt.push_str(FIM_PREFIX);
2739 prompt.push_str(&related_files_section);
2740 if !related_files_section.is_empty() {
2741 prompt.push('\n');
2742 }
2743 prompt.push_str(&edit_history_section);
2744 if !edit_history_section.is_empty() {
2745 prompt.push('\n');
2746 }
2747 prompt.push_str(cursor_prefix_section);
2748 prompt.push_str(FIM_MIDDLE);
2749
2750 prompt
2751 }
2752
2753 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2754 let mut section = String::new();
2755 section.push_str(FIM_SUFFIX);
2756 section.push_str(&context[editable_range.end..]);
2757 if !section.ends_with('\n') {
2758 section.push('\n');
2759 }
2760 section
2761 }
2762
2763 fn build_cursor_prefix_section(
2764 path: &Path,
2765 context: &str,
2766 editable_range: &Range<usize>,
2767 cursor_offset: usize,
2768 ) -> String {
2769 let mut section = String::new();
2770 let path_str = path.to_string_lossy();
2771 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2772
2773 section.push_str(&context[..editable_range.start]);
2774 section.push_str(START_MARKER);
2775 section.push_str(&context[editable_range.start..cursor_offset]);
2776 section.push_str(CURSOR_MARKER);
2777 section.push_str(&context[cursor_offset..editable_range.end]);
2778 if !section.ends_with('\n') {
2779 section.push('\n');
2780 }
2781 section.push_str(SEPARATOR);
2782 section
2783 }
2784
2785 /// Format patch as containing no changes if it's empty; otherwise return None.
2786 pub(crate) fn no_edits(patch: &str) -> Option<String> {
2787 // Count lines in the patch
2788 let empty_patch = patch.lines().count() <= 3;
2789 if empty_patch {
2790 Some(format!("{NO_EDITS}{END_MARKER}"))
2791 } else {
2792 None
2793 }
2794 }
2795}
2796
2797pub mod v0304_variable_edit {
2798 //! A prompt format with no fixed editable region. The entire context is shown
2799 //! to the model, and it chooses which text to replace by outputting surrounding
2800 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2801 //! text.
2802 //!
2803 //! Example prompt:
2804 //!
2805 //! <|file_sep|>path/to/file.py
2806 //! zero
2807 //! one
2808 //! two
2809 //! three<|user_cursor|>
2810 //! four
2811 //! five
2812 //! <|fim_prefix|>
2813 //
2814 //! Expected output (model generates):
2815 //!
2816 //! two
2817 //! <|fim_middle|>
2818 //! THREE
2819 //! <|fim_suffix|>
2820 //! four
2821 //!
2822 //! The output means: find "two\n...\nfour" in the context, and replace
2823 //! everything between "two\n" and "four" with "THREE\n".
2824
2825 use super::*;
2826
2827 pub fn special_tokens() -> &'static [&'static str] {
2828 &[
2829 "<|fim_prefix|>",
2830 "<|fim_suffix|>",
2831 "<|fim_middle|>",
2832 "<|file_sep|>",
2833 CURSOR_MARKER,
2834 ]
2835 }
2836
2837 pub fn write_cursor_excerpt_section(
2838 prompt: &mut String,
2839 path: &Path,
2840 context: &str,
2841 cursor_offset: usize,
2842 ) {
2843 let path_str = path.to_string_lossy();
2844 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2845
2846 prompt.push_str(&context[..cursor_offset]);
2847 prompt.push_str(CURSOR_MARKER);
2848 prompt.push_str(&context[cursor_offset..]);
2849 if !prompt.ends_with('\n') {
2850 prompt.push('\n');
2851 }
2852 prompt.push_str("<|fim_prefix|>\n")
2853 }
2854
2855 /// Apply a variable-edit model output to the original context text.
2856 ///
2857 /// The model output has the form:
2858 ///
2859 /// - prefix context lines
2860 /// - `<|fim_middle|>`
2861 /// - new text
2862 /// - `<|fim_suffix|>`
2863 /// - suffix context lines
2864 ///
2865 /// We locate the prefix/suffix context lines in the original text and replace
2866 /// everything between them with the new text.
2867 pub fn apply_variable_edit(
2868 context: &str,
2869 model_output: &str,
2870 ) -> Result<(Range<usize>, String)> {
2871 let (prefix_context, rest) = model_output
2872 .split_once("<|fim_middle|>\n")
2873 .or_else(|| model_output.split_once("<|fim_middle|>"))
2874 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2875
2876 let (new_text, suffix_context) = rest
2877 .split_once("<|fim_suffix|>\n")
2878 .or_else(|| rest.split_once("<|fim_suffix|>"))
2879 .unwrap_or((rest, ""));
2880
2881 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2882 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2883 } else {
2884 suffix_context
2885 };
2886
2887 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2888 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2889 + prefix_context.len();
2890 let suffix_offset = if suffix_context.is_empty() {
2891 context.len()
2892 } else {
2893 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2894 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2895 + prefix_offset
2896 };
2897
2898 let edit_range = prefix_offset..suffix_offset;
2899 return Ok((edit_range, new_text.to_string()));
2900 }
2901
2902 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2903 if needle.is_empty() {
2904 return Some(0);
2905 }
2906
2907 haystack.match_indices(needle).find_map(|(offset, _)| {
2908 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2909 matched_line_start.then_some(offset)
2910 })
2911 }
2912
2913 /// Convert a unified diff patch into the variable-edit output format.
2914 ///
2915 /// Parses `patch` as a unified diff against `old_text` and produces model
2916 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2917 /// delimiters. The diff is resolved by content matching rather than line
2918 /// numbers.
2919 pub fn patch_to_variable_edit_output(
2920 old_text: &str,
2921 patch: &str,
2922 cursor_offset: Option<usize>,
2923 ) -> Result<String> {
2924 // Parse the unified diff into hunks. Each hunk has an `old_context`
2925 // string (context + deleted lines interleaved in order) and a list of
2926 // edits expressed as byte ranges within that context plus replacement
2927 // text.
2928 let hunks = parse_hunks(patch);
2929 if hunks.is_empty() {
2930 return Ok(String::new());
2931 }
2932
2933 // Apply each hunk by finding its old_context in the text and
2934 // performing the edits. We search forward from where the previous
2935 // hunk ended so that hunks are applied in order.
2936 let mut new_text = old_text.to_string();
2937 let mut search_from: usize = 0;
2938 let mut first_hunk_pos: Option<usize> = None;
2939
2940 for hunk in &hunks {
2941 let context_pos = new_text[search_from..]
2942 .find(&hunk.old_context)
2943 .map(|pos| pos + search_from)
2944 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2945
2946 if first_hunk_pos.is_none() {
2947 first_hunk_pos = Some(context_pos);
2948 }
2949
2950 // Apply edits in reverse order so byte offsets remain valid.
2951 for edit in hunk.edits.iter().rev() {
2952 let abs_start = context_pos + edit.range.start;
2953 let abs_end = context_pos + edit.range.end;
2954 new_text.replace_range(abs_start..abs_end, &edit.text);
2955 }
2956
2957 // Advance past this hunk's region in the (now modified) text.
2958 let new_region_len: usize =
2959 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2960 len + edit.text.len() - (edit.range.end - edit.range.start)
2961 });
2962 search_from = context_pos + new_region_len;
2963 }
2964
2965 // Now we have old_text and new_text. Find the changed line range by
2966 // comparing them.
2967 let old_lines: Vec<&str> = old_text.lines().collect();
2968 let new_lines: Vec<&str> = new_text.lines().collect();
2969
2970 // Find first differing line.
2971 let first_changed_row = old_lines
2972 .iter()
2973 .zip(new_lines.iter())
2974 .position(|(a, b)| a != b)
2975 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2976
2977 // Find last differing line (from the end).
2978 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2979 let common_suffix = old_lines
2980 .iter()
2981 .rev()
2982 .zip(new_lines.iter().rev())
2983 .take(max_suffix)
2984 .take_while(|(a, b)| a == b)
2985 .count();
2986
2987 let old_end = old_lines.len() - common_suffix;
2988 let new_end = new_lines.len() - common_suffix;
2989
2990 if first_changed_row == old_end && first_changed_row == new_end {
2991 return Ok(String::new());
2992 }
2993
2994 // Build the replacement text from new_lines[first_diff..new_end].
2995 let mut merged_new_text = String::new();
2996 for line in &new_lines[first_changed_row..new_end] {
2997 merged_new_text.push_str(line);
2998 merged_new_text.push('\n');
2999 }
3000
3001 // cursor_offset is relative to the first hunk's new content in
3002 // new_text. Translate it to an offset within merged_new_text, which
3003 // only contains lines first_diff..new_end of new_text.
3004 if let Some(hunk_offset) = cursor_offset {
3005 let hunk_start = first_hunk_pos.unwrap_or(0);
3006 let absolute_pos = hunk_start + hunk_offset;
3007
3008 // Byte offset where first_diff starts in new_text.
3009 let merged_start: usize = new_lines[..first_changed_row]
3010 .iter()
3011 .map(|line| line.len() + 1)
3012 .sum();
3013
3014 if absolute_pos >= merged_start {
3015 let relative_offset = absolute_pos - merged_start;
3016 if relative_offset <= merged_new_text.len() {
3017 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3018 }
3019 }
3020 }
3021
3022 // Build output with 2 lines of context above and below.
3023 let context_lines_count = 2;
3024 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3025 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3026
3027 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3028 let pattern = &lines[line_range];
3029 let pattern_len = pattern.len();
3030
3031 let mut count = 0;
3032 for offset in 0..=lines.len() - pattern_len {
3033 if &lines[offset..offset + pattern_len] == pattern {
3034 count += 1;
3035 }
3036 }
3037 count
3038 }
3039
3040 // Expand prefix and suffix until they are unique
3041 while prefix_start > 0 {
3042 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3043 prefix_start -= 1;
3044 } else {
3045 break;
3046 }
3047 }
3048 while suffix_end < old_lines.len() {
3049 if count_matches(old_end..suffix_end, &old_lines) > 1 {
3050 suffix_end += 1;
3051 } else {
3052 break;
3053 }
3054 }
3055
3056 let mut output = String::new();
3057 for line in &old_lines[prefix_start..first_changed_row] {
3058 output.push_str(line);
3059 output.push('\n');
3060 }
3061 output.push_str("<|fim_middle|>\n");
3062 output.push_str(&merged_new_text);
3063 output.push_str("<|fim_suffix|>\n");
3064 for line in &old_lines[old_end..suffix_end] {
3065 output.push_str(line);
3066 output.push('\n');
3067 }
3068
3069 Ok(output)
3070 }
3071
3072 struct ParsedHunk {
3073 old_context: String,
3074 edits: Vec<ParsedEdit>,
3075 }
3076
3077 struct ParsedEdit {
3078 range: Range<usize>,
3079 text: String,
3080 }
3081
3082 /// Parse a unified diff into content-based hunks. Each hunk contains an
3083 /// `old_context` string (context lines + deleted lines, which together
3084 /// form the text that should be found in the original) and a list of edits
3085 /// expressed as byte ranges within that context.
3086 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3087 let mut hunks = Vec::new();
3088 let mut current: Option<ParsedHunk> = None;
3089
3090 for line in patch.lines() {
3091 if line.starts_with("@@") {
3092 if let Some(hunk) = current.take() {
3093 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3094 hunks.push(hunk);
3095 }
3096 }
3097 current = Some(ParsedHunk {
3098 old_context: String::new(),
3099 edits: Vec::new(),
3100 });
3101 } else if line.starts_with("---") || line.starts_with("+++") {
3102 continue;
3103 } else if let Some(hunk) = &mut current {
3104 if let Some(added) = line.strip_prefix('+') {
3105 let pos = hunk.old_context.len();
3106 if let Some(last_edit) = hunk.edits.last_mut() {
3107 if last_edit.range.end == pos {
3108 writeln!(&mut last_edit.text, "{added}").ok();
3109 continue;
3110 }
3111 }
3112 hunk.edits.push(ParsedEdit {
3113 range: pos..pos,
3114 text: format!("{added}\n"),
3115 });
3116 } else if let Some(removed) = line.strip_prefix('-') {
3117 let start = hunk.old_context.len();
3118 writeln!(&mut hunk.old_context, "{removed}").ok();
3119 let end = hunk.old_context.len();
3120 if let Some(last_edit) = hunk.edits.last_mut() {
3121 if last_edit.range.end == start {
3122 last_edit.range.end = end;
3123 continue;
3124 }
3125 }
3126 hunk.edits.push(ParsedEdit {
3127 range: start..end,
3128 text: String::new(),
3129 });
3130 } else {
3131 let ctx = line.strip_prefix(' ').unwrap_or(line);
3132 writeln!(&mut hunk.old_context, "{ctx}").ok();
3133 }
3134 }
3135 }
3136
3137 if let Some(hunk) = current {
3138 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3139 hunks.push(hunk);
3140 }
3141 }
3142
3143 hunks
3144 }
3145
3146 #[cfg(test)]
3147 mod tests {
3148 use super::*;
3149 use indoc::indoc;
3150
3151 #[test]
3152 fn test_apply_variable_edit() {
3153 struct Case {
3154 name: &'static str,
3155 original: &'static str,
3156 model_output: &'static str,
3157 expected: &'static str,
3158 }
3159
3160 let cases = [
3161 Case {
3162 name: "simple_single_line_replacement",
3163 original: indoc! {"
3164 zero
3165 one
3166 two
3167 three
3168 four
3169 five
3170 "},
3171 model_output: indoc! {"
3172 two
3173 <|fim_middle|>
3174 THREE
3175 <|fim_suffix|>
3176 four
3177 "},
3178 expected: indoc! {"
3179 zero
3180 one
3181 two
3182 THREE
3183 four
3184 five
3185 "},
3186 },
3187 Case {
3188 name: "multi_line_replacement",
3189 original: indoc! {"
3190 a
3191 b
3192 c
3193 d
3194 e
3195 "},
3196 model_output: indoc! {"
3197 a
3198 <|fim_middle|>
3199 B
3200 C
3201 D
3202 <|fim_suffix|>
3203 e
3204 "},
3205 expected: indoc! {"
3206 a
3207 B
3208 C
3209 D
3210 e
3211 "},
3212 },
3213 Case {
3214 name: "insertion_between_existing_lines",
3215 original: indoc! {"
3216 a
3217 b
3218 c
3219 "},
3220 model_output: indoc! {"
3221 a
3222 <|fim_middle|>
3223 X
3224 <|fim_suffix|>
3225 b
3226 "},
3227 expected: indoc! {"
3228 a
3229 X
3230 b
3231 c
3232 "},
3233 },
3234 Case {
3235 name: "deletion",
3236 original: indoc! {"
3237 a
3238 b
3239 c
3240 d
3241 "},
3242 model_output: indoc! {"
3243 a
3244 <|fim_middle|>
3245 <|fim_suffix|>
3246 c
3247 "},
3248 expected: indoc! {"
3249 a
3250 c
3251 d
3252 "},
3253 },
3254 Case {
3255 name: "replacement_at_start_no_prefix_context",
3256 original: indoc! {"
3257 a
3258 b
3259 c
3260 "},
3261 model_output: indoc! {"
3262 <|fim_middle|>
3263 X
3264 <|fim_suffix|>
3265 b
3266 "},
3267 expected: indoc! {"
3268 X
3269 b
3270 c
3271 "},
3272 },
3273 Case {
3274 name: "replacement_at_end_no_suffix_context",
3275 original: indoc! {"
3276 a
3277 b
3278 c
3279 "},
3280 model_output: indoc! {"
3281 b
3282 <|fim_middle|>
3283 Z
3284 <|fim_suffix|>
3285 "},
3286 expected: indoc! {"
3287 a
3288 b
3289 Z
3290 "},
3291 },
3292 Case {
3293 name: "context_with_trailing_newline_is_preserved",
3294 original: indoc! {"
3295 a
3296 b
3297 c
3298 "},
3299 model_output: indoc! {"
3300 a
3301 <|fim_middle|>
3302 B
3303 <|fim_suffix|>
3304 c
3305 "},
3306 expected: indoc! {"
3307 a
3308 B
3309 c
3310 "},
3311 },
3312 Case {
3313 name: "cursor_marker_passes_through_untouched",
3314 original: indoc! {"
3315 a
3316 b
3317 c
3318 "},
3319 model_output: indoc! {"
3320 a
3321 <|fim_middle|>
3322 B<|user_cursor|>B
3323 <|fim_suffix|>
3324 c
3325 "},
3326 expected: indoc! {"
3327 a
3328 B<|user_cursor|>B
3329 c
3330 "},
3331 },
3332 Case {
3333 name: "multiple_prefix_context_lines",
3334 original: indoc! {"
3335 a
3336 b
3337 c
3338 d
3339 e
3340 "},
3341 model_output: indoc! {"
3342 b
3343 c
3344 <|fim_middle|>
3345 D
3346 <|fim_suffix|>
3347 e
3348 "},
3349 expected: indoc! {"
3350 a
3351 b
3352 c
3353 D
3354 e
3355 "},
3356 },
3357 ];
3358
3359 for case in cases {
3360 let (edit_range, replacement) =
3361 apply_variable_edit(case.original, case.model_output).unwrap();
3362 let mut edited = case.original.to_string();
3363 edited.replace_range(edit_range, &replacement);
3364 assert_eq!(edited, case.expected, "{}", case.name);
3365 }
3366 }
3367
3368 #[test]
3369 fn test_patch_to_variable_edit() {
3370 struct Case {
3371 name: &'static str,
3372 old: &'static str,
3373 patch: &'static str,
3374 cursor_offset: Option<usize>,
3375 expected_variable_edit: &'static str,
3376 expected_after_apply: &'static str,
3377 }
3378
3379 let cases = [
3380 Case {
3381 name: "simple_replacement",
3382 old: indoc! {"
3383 zero
3384 one
3385 two
3386 three
3387 four
3388 five
3389 "},
3390 patch: indoc! {"
3391 @@ -3,3 +3,3 @@
3392 two
3393 -three
3394 +THREE
3395 four
3396 "},
3397 cursor_offset: None,
3398 expected_variable_edit: indoc! {"
3399 one
3400 two
3401 <|fim_middle|>
3402 THREE
3403 <|fim_suffix|>
3404 four
3405 five
3406 "},
3407 expected_after_apply: indoc! {"
3408 zero
3409 one
3410 two
3411 THREE
3412 four
3413 five
3414 "},
3415 },
3416 Case {
3417 name: "insertion",
3418 old: indoc! {"
3419 a
3420 b
3421 c
3422 d
3423 e
3424 "},
3425 patch: indoc! {"
3426 @@ -2,0 +3,1 @@
3427 b
3428 +X
3429 c
3430 "},
3431 cursor_offset: None,
3432 expected_variable_edit: indoc! {"
3433 a
3434 b
3435 <|fim_middle|>
3436 X
3437 <|fim_suffix|>
3438 c
3439 d
3440 "},
3441 expected_after_apply: indoc! {"
3442 a
3443 b
3444 X
3445 c
3446 d
3447 e
3448 "},
3449 },
3450 Case {
3451 name: "deletion",
3452 old: indoc! {"
3453 a
3454 b
3455 c
3456 d
3457 e
3458 "},
3459 patch: indoc! {"
3460 @@ -2,3 +2,2 @@
3461 b
3462 -c
3463 d
3464 "},
3465 cursor_offset: None,
3466 expected_variable_edit: indoc! {"
3467 a
3468 b
3469 <|fim_middle|>
3470 <|fim_suffix|>
3471 d
3472 e
3473 "},
3474 expected_after_apply: indoc! {"
3475 a
3476 b
3477 d
3478 e
3479 "},
3480 },
3481 Case {
3482 name: "edit_near_start",
3483 old: indoc! {"
3484 first
3485 second
3486 third
3487 fourth
3488 "},
3489 patch: indoc! {"
3490 @@ -1,1 +1,1 @@
3491 -first
3492 +FIRST
3493 "},
3494 cursor_offset: None,
3495 expected_variable_edit: indoc! {"
3496 <|fim_middle|>
3497 FIRST
3498 <|fim_suffix|>
3499 second
3500 third
3501 "},
3502 expected_after_apply: indoc! {"
3503 FIRST
3504 second
3505 third
3506 fourth
3507 "},
3508 },
3509 Case {
3510 name: "edit_near_end",
3511 old: indoc! {"
3512 first
3513 second
3514 third
3515 fourth
3516 "},
3517 patch: indoc! {"
3518 @@ -4,1 +4,1 @@
3519 -fourth
3520 +FOURTH
3521 "},
3522 cursor_offset: None,
3523 expected_variable_edit: indoc! {"
3524 second
3525 third
3526 <|fim_middle|>
3527 FOURTH
3528 <|fim_suffix|>
3529 "},
3530 expected_after_apply: indoc! {"
3531 first
3532 second
3533 third
3534 FOURTH
3535 "},
3536 },
3537 Case {
3538 name: "cursor_at_start_of_replacement",
3539 old: indoc! {"
3540 zero
3541 one
3542 two
3543 three
3544 four
3545 five
3546 "},
3547 patch: indoc! {"
3548 @@ -3,3 +3,3 @@
3549 two
3550 -three
3551 +THREE
3552 four
3553 "},
3554 cursor_offset: Some(4),
3555 expected_variable_edit: indoc! {"
3556 one
3557 two
3558 <|fim_middle|>
3559 <|user_cursor|>THREE
3560 <|fim_suffix|>
3561 four
3562 five
3563 "},
3564 expected_after_apply: indoc! {"
3565 zero
3566 one
3567 two
3568 <|user_cursor|>THREE
3569 four
3570 five
3571 "},
3572 },
3573 Case {
3574 name: "cursor_in_middle_of_replacement",
3575 old: indoc! {"
3576 zero
3577 one
3578 two
3579 three
3580 four
3581 five
3582 "},
3583 patch: indoc! {"
3584 @@ -3,3 +3,3 @@
3585 two
3586 -three
3587 +THREE
3588 four
3589 "},
3590 cursor_offset: Some(6),
3591 expected_variable_edit: indoc! {"
3592 one
3593 two
3594 <|fim_middle|>
3595 TH<|user_cursor|>REE
3596 <|fim_suffix|>
3597 four
3598 five
3599 "},
3600 expected_after_apply: indoc! {"
3601 zero
3602 one
3603 two
3604 TH<|user_cursor|>REE
3605 four
3606 five
3607 "},
3608 },
3609 Case {
3610 name: "expands_context_when_two_lines_not_unique_before_and_after",
3611 old: indoc! {"
3612 one
3613 a
3614 b
3615 c
3616 d
3617 two
3618 a
3619 b
3620 c
3621 d
3622 three
3623 a
3624 b
3625 c
3626 d
3627 four
3628 "},
3629 patch: indoc! {"
3630 @@ -4,5 +4,5 @@
3631 two
3632 a
3633 b
3634 -c
3635 +C
3636 d
3637 three
3638 "},
3639 cursor_offset: None,
3640 expected_variable_edit: indoc! {"
3641 two
3642 a
3643 b
3644 <|fim_middle|>
3645 C
3646 <|fim_suffix|>
3647 d
3648 three
3649 "},
3650 expected_after_apply: indoc! {"
3651 one
3652 a
3653 b
3654 c
3655 d
3656 two
3657 a
3658 b
3659 C
3660 d
3661 three
3662 a
3663 b
3664 c
3665 d
3666 four
3667 "},
3668 },
3669 Case {
3670 name: "expands_context_when_two_lines_not_unique_before_and_after",
3671 old: indoc! {"
3672 {
3673 {
3674 one();
3675 }
3676 }
3677 {
3678 {
3679 two();
3680 }
3681 }
3682 {
3683 {
3684 three();
3685 }
3686 }
3687 {
3688 {
3689 four();
3690 }
3691 }
3692 "},
3693 patch: indoc! {"
3694 @@ -4,5 +4,5 @@
3695 {
3696 - two();
3697 + TWO();
3698 }
3699 "},
3700 cursor_offset: None,
3701 expected_variable_edit: indoc! {"
3702 one();
3703 }
3704 }
3705 {
3706 {
3707 <|fim_middle|>
3708 TWO();
3709 <|fim_suffix|>
3710 }
3711 }
3712 {
3713 {
3714 three();
3715 "},
3716 expected_after_apply: indoc! {"
3717 {
3718 {
3719 one();
3720 }
3721 }
3722 {
3723 {
3724 TWO();
3725 }
3726 }
3727 {
3728 {
3729 three();
3730 }
3731 }
3732 {
3733 {
3734 four();
3735 }
3736 }
3737 "},
3738 },
3739 ];
3740
3741 for case in cases {
3742 let output =
3743 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3744 .unwrap_or_else(|error| {
3745 panic!("failed converting patch for {}: {error}", case.name)
3746 });
3747 assert_eq!(
3748 output, case.expected_variable_edit,
3749 "patch->variable_edit mismatch for {}",
3750 case.name
3751 );
3752
3753 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3754 .unwrap_or_else(|error| {
3755 panic!("failed applying variable_edit for {}: {error}", case.name)
3756 });
3757 let mut edited_by_variable_edit = case.old.to_string();
3758 edited_by_variable_edit.replace_range(edit_range, &replacement);
3759 assert_eq!(
3760 edited_by_variable_edit, case.expected_after_apply,
3761 "variable_edit apply mismatch for {}",
3762 case.name
3763 );
3764
3765 let (expected_edit_range, expected_replacement) =
3766 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3767 |error| {
3768 panic!(
3769 "failed applying expected variable_edit for {}: {error}",
3770 case.name
3771 )
3772 },
3773 );
3774 let mut edited_by_expected_variable_edit = case.old.to_string();
3775 edited_by_expected_variable_edit
3776 .replace_range(expected_edit_range, &expected_replacement);
3777 assert_eq!(
3778 edited_by_expected_variable_edit, case.expected_after_apply,
3779 "expected variable_edit apply mismatch for {}",
3780 case.name
3781 );
3782 }
3783 }
3784
3785 #[test]
3786 fn test_write_cursor_excerpt_section() {
3787 let path = Path::new("test.rs");
3788 let context = "fn main() {\n hello();\n}\n";
3789 let cursor_offset = 17;
3790 let mut prompt = String::new();
3791 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3792 assert_eq!(
3793 prompt,
3794 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3795 );
3796 }
3797 }
3798}
3799
3800/// The zeta1 prompt format
3801pub mod zeta1 {
3802 use super::*;
3803 use std::fmt::Write;
3804
3805 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3806 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3807 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3808 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3809
3810 const INSTRUCTION_HEADER: &str = concat!(
3811 "### Instruction:\n",
3812 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3813 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3814 "into account the cursor location.\n\n",
3815 "### User Edits:\n\n"
3816 );
3817 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3818 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3819
3820 /// Formats a complete zeta1 prompt from the input events and excerpt.
3821 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3822 let mut prompt = String::with_capacity(
3823 INSTRUCTION_HEADER.len()
3824 + input_events.len()
3825 + EXCERPT_HEADER.len()
3826 + input_excerpt.len()
3827 + RESPONSE_HEADER.len(),
3828 );
3829 prompt.push_str(INSTRUCTION_HEADER);
3830 prompt.push_str(input_events);
3831 prompt.push_str(EXCERPT_HEADER);
3832 prompt.push_str(input_excerpt);
3833 prompt.push_str(RESPONSE_HEADER);
3834 prompt
3835 }
3836
3837 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3838 /// editable and context byte-offset ranges within `cursor_excerpt`.
3839 pub fn format_zeta1_from_input(
3840 input: &ZetaPromptInput,
3841 editable_range: Range<usize>,
3842 context_range: Range<usize>,
3843 ) -> String {
3844 let events = format_zeta1_events(&input.events);
3845 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3846 format_zeta1_prompt(&events, &excerpt)
3847 }
3848
3849 /// Formats events in zeta1 style (oldest first).
3850 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3851 let mut result = String::new();
3852 for event in
3853 events
3854 .iter()
3855 .skip(events.len().saturating_sub(max_edit_event_count_for_format(
3856 &ZetaFormat::V0114180EditableRegion,
3857 )))
3858 {
3859 let event_string = format_zeta1_event(event);
3860 if event_string.is_empty() {
3861 continue;
3862 }
3863 if !result.is_empty() {
3864 result.push_str("\n\n");
3865 }
3866 result.push_str(&event_string);
3867 }
3868 result
3869 }
3870
3871 fn format_zeta1_event(event: &Event) -> String {
3872 match event {
3873 Event::BufferChange {
3874 path,
3875 old_path,
3876 diff,
3877 ..
3878 } => {
3879 let mut prompt = String::new();
3880 if old_path != path {
3881 writeln!(
3882 prompt,
3883 "User renamed {} to {}\n",
3884 old_path.display(),
3885 path.display()
3886 )
3887 .ok();
3888 }
3889 if !diff.is_empty() {
3890 write!(
3891 prompt,
3892 "User edited {}:\n```diff\n{}\n```",
3893 path.display(),
3894 diff
3895 )
3896 .ok();
3897 }
3898 prompt
3899 }
3900 }
3901 }
3902
3903 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3904 /// within `cursor_excerpt`.
3905 fn format_zeta1_excerpt(
3906 input: &ZetaPromptInput,
3907 editable_range: Range<usize>,
3908 context_range: Range<usize>,
3909 ) -> String {
3910 let path_str = input.cursor_path.to_string_lossy();
3911 let excerpt = &*input.cursor_excerpt;
3912 let cursor_offset = input.cursor_offset_in_excerpt;
3913
3914 let mut prompt = String::new();
3915 writeln!(&mut prompt, "```{path_str}").ok();
3916
3917 let starts_at_file_beginning =
3918 input.excerpt_start_row == Some(0) && context_range.start == 0;
3919 if starts_at_file_beginning {
3920 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3921 }
3922
3923 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3924
3925 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3926 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3927 prompt.push_str(CURSOR_MARKER);
3928 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3929 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3930
3931 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3932 write!(prompt, "\n```").ok();
3933
3934 prompt
3935 }
3936
3937 /// Cleans zeta1 model output by extracting content between editable region
3938 /// markers and converting the zeta1 cursor marker to the universal one.
3939 /// Returns `None` if the output doesn't contain the expected markers.
3940 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3941 let content = output.replace(CURSOR_MARKER, "");
3942
3943 let content_start = content
3944 .find(EDITABLE_REGION_START_MARKER)
3945 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3946 .map(|pos| {
3947 if content.as_bytes().get(pos) == Some(&b'\n') {
3948 pos + 1
3949 } else {
3950 pos
3951 }
3952 })
3953 .unwrap_or(0);
3954
3955 let content_end = content
3956 .find(EDITABLE_REGION_END_MARKER)
3957 .map(|pos| {
3958 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3959 pos - 1
3960 } else {
3961 pos
3962 }
3963 })
3964 .unwrap_or(content.len());
3965
3966 if content_start > content_end {
3967 return Some(String::new());
3968 }
3969
3970 let extracted = &content[content_start..content_end];
3971
3972 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3973 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3974 let text_before_cursor = text_before_cursor
3975 .find(EDITABLE_REGION_START_MARKER)
3976 .map(|pos| {
3977 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3978 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3979 after_marker + 1
3980 } else {
3981 after_marker
3982 }
3983 })
3984 .unwrap_or(0);
3985 let offset_in_extracted = zeta1_cursor_pos
3986 .saturating_sub(text_before_cursor)
3987 .min(extracted.len());
3988 offset_in_extracted
3989 });
3990
3991 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3992 if let Some(offset) = cursor_offset {
3993 result.push_str(&extracted[..offset]);
3994 result.push_str(super::CURSOR_MARKER);
3995 result.push_str(&extracted[offset..]);
3996 } else {
3997 result.push_str(extracted);
3998 }
3999
4000 Some(result)
4001 }
4002}
4003
4004#[cfg(test)]
4005mod tests {
4006 use super::*;
4007 use indoc::indoc;
4008
4009 fn make_input(
4010 cursor_excerpt: &str,
4011 editable_range: Range<usize>,
4012 cursor_offset: usize,
4013 events: Vec<Event>,
4014 related_files: Vec<RelatedFile>,
4015 ) -> ZetaPromptInput {
4016 let context_range = 0..cursor_excerpt.len();
4017 ZetaPromptInput {
4018 cursor_path: Path::new("test.rs").into(),
4019 cursor_excerpt: cursor_excerpt.into(),
4020 cursor_offset_in_excerpt: cursor_offset,
4021 excerpt_start_row: None,
4022 events: events.into_iter().map(Arc::new).collect(),
4023 related_files: Some(related_files),
4024 active_buffer_diagnostics: vec![],
4025 excerpt_ranges: ExcerptRanges {
4026 editable_150: editable_range.clone(),
4027 editable_180: editable_range.clone(),
4028 editable_350: editable_range,
4029 editable_150_context_350: context_range.clone(),
4030 editable_180_context_350: context_range.clone(),
4031 editable_350_context_150: context_range,
4032 ..Default::default()
4033 },
4034 syntax_ranges: None,
4035 experiment: None,
4036 in_open_source_repo: false,
4037 can_collect_data: false,
4038 repo_url: None,
4039 }
4040 }
4041
4042 fn make_input_with_context_range(
4043 excerpt: &str,
4044 editable_range: Range<usize>,
4045 context_range: Range<usize>,
4046 cursor_offset: usize,
4047 ) -> ZetaPromptInput {
4048 ZetaPromptInput {
4049 cursor_path: Path::new("test.rs").into(),
4050 cursor_excerpt: excerpt.into(),
4051 cursor_offset_in_excerpt: cursor_offset,
4052 excerpt_start_row: None,
4053 events: vec![],
4054 related_files: Some(vec![]),
4055 active_buffer_diagnostics: vec![],
4056 excerpt_ranges: ExcerptRanges {
4057 editable_150: editable_range.clone(),
4058 editable_180: editable_range.clone(),
4059 editable_350: editable_range,
4060 editable_150_context_350: context_range.clone(),
4061 editable_180_context_350: context_range.clone(),
4062 editable_350_context_150: context_range,
4063 ..Default::default()
4064 },
4065 syntax_ranges: None,
4066 experiment: None,
4067 in_open_source_repo: false,
4068 can_collect_data: false,
4069 repo_url: None,
4070 }
4071 }
4072
4073 fn make_event(path: &str, diff: &str) -> Event {
4074 Event::BufferChange {
4075 path: Path::new(path).into(),
4076 old_path: Path::new(path).into(),
4077 diff: diff.to_string(),
4078 predicted: false,
4079 in_open_source_repo: false,
4080 }
4081 }
4082
4083 fn make_related_file(path: &str, content: &str) -> RelatedFile {
4084 RelatedFile {
4085 path: Path::new(path).into(),
4086 max_row: content.lines().count() as u32,
4087 excerpts: vec![RelatedExcerpt {
4088 row_range: 0..content.lines().count() as u32,
4089 text: content.into(),
4090 order: 0,
4091 }],
4092 in_open_source_repo: false,
4093 }
4094 }
4095
4096 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4097 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4098 }
4099
4100 #[test]
4101 fn test_no_truncation_when_within_budget() {
4102 let input = make_input(
4103 "prefix\neditable\nsuffix",
4104 7..15,
4105 10,
4106 vec![make_event("a.rs", "-old\n+new\n")],
4107 vec![make_related_file("related.rs", "fn helper() {}\n")],
4108 );
4109
4110 assert_eq!(
4111 format_with_budget(&input, 10000).unwrap(),
4112 indoc! {r#"
4113 <|file_sep|>related.rs
4114 fn helper() {}
4115 <|file_sep|>edit history
4116 --- a/a.rs
4117 +++ b/a.rs
4118 -old
4119 +new
4120 <|file_sep|>test.rs
4121 <|fim_prefix|>
4122 prefix
4123 <|fim_middle|>current
4124 edi<|user_cursor|>table
4125 <|fim_suffix|>
4126
4127 suffix
4128 <|fim_middle|>updated
4129 "#}
4130 .to_string()
4131 );
4132 }
4133
4134 #[test]
4135 fn test_truncation_drops_edit_history_when_budget_tight() {
4136 let input = make_input(
4137 "code",
4138 0..4,
4139 2,
4140 vec![make_event("a.rs", "-x\n+y\n")],
4141 vec![
4142 make_related_file("r1.rs", "aaaaaaa\n"),
4143 make_related_file("r2.rs", "bbbbbbb\n"),
4144 ],
4145 );
4146
4147 assert_eq!(
4148 format_with_budget(&input, 10000).unwrap(),
4149 indoc! {r#"
4150 <|file_sep|>r1.rs
4151 aaaaaaa
4152 <|file_sep|>r2.rs
4153 bbbbbbb
4154 <|file_sep|>edit history
4155 --- a/a.rs
4156 +++ b/a.rs
4157 -x
4158 +y
4159 <|file_sep|>test.rs
4160 <|fim_prefix|>
4161 <|fim_middle|>current
4162 co<|user_cursor|>de
4163 <|fim_suffix|>
4164 <|fim_middle|>updated
4165 "#}
4166 .to_string()
4167 );
4168
4169 assert_eq!(
4170 format_with_budget(&input, 55),
4171 Some(
4172 indoc! {r#"
4173 <|file_sep|>edit history
4174 --- a/a.rs
4175 +++ b/a.rs
4176 -x
4177 +y
4178 <|file_sep|>test.rs
4179 <|fim_prefix|>
4180 <|fim_middle|>current
4181 co<|user_cursor|>de
4182 <|fim_suffix|>
4183 <|fim_middle|>updated
4184 "#}
4185 .to_string()
4186 )
4187 );
4188 }
4189
4190 #[test]
4191 fn test_truncation_includes_partial_excerpts() {
4192 let input = make_input(
4193 "x",
4194 0..1,
4195 0,
4196 vec![],
4197 vec![RelatedFile {
4198 path: Path::new("big.rs").into(),
4199 max_row: 30,
4200 in_open_source_repo: false,
4201 excerpts: vec![
4202 RelatedExcerpt {
4203 row_range: 0..10,
4204 text: "first excerpt\n".into(),
4205 order: 0,
4206 },
4207 RelatedExcerpt {
4208 row_range: 10..20,
4209 text: "second excerpt\n".into(),
4210 order: 0,
4211 },
4212 RelatedExcerpt {
4213 row_range: 20..30,
4214 text: "third excerpt\n".into(),
4215 order: 0,
4216 },
4217 ],
4218 }],
4219 );
4220
4221 assert_eq!(
4222 format_with_budget(&input, 10000).unwrap(),
4223 indoc! {r#"
4224 <|file_sep|>big.rs
4225 first excerpt
4226 ...
4227 second excerpt
4228 ...
4229 third excerpt
4230 <|file_sep|>test.rs
4231 <|fim_prefix|>
4232 <|fim_middle|>current
4233 <|user_cursor|>x
4234 <|fim_suffix|>
4235 <|fim_middle|>updated
4236 "#}
4237 .to_string()
4238 );
4239
4240 assert_eq!(
4241 format_with_budget(&input, 50).unwrap(),
4242 indoc! {r#"
4243 <|file_sep|>big.rs
4244 first excerpt
4245 ...
4246 <|file_sep|>test.rs
4247 <|fim_prefix|>
4248 <|fim_middle|>current
4249 <|user_cursor|>x
4250 <|fim_suffix|>
4251 <|fim_middle|>updated
4252 "#}
4253 .to_string()
4254 );
4255 }
4256
4257 #[test]
4258 fn test_truncation_prioritizes_lower_order_excerpts() {
4259 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4260 // With tight budget, only the lower-order excerpt from file_b should be included.
4261 let input = make_input(
4262 "x",
4263 0..1,
4264 0,
4265 vec![],
4266 vec![
4267 RelatedFile {
4268 path: Path::new("file_a.rs").into(),
4269 max_row: 10,
4270 in_open_source_repo: false,
4271 excerpts: vec![RelatedExcerpt {
4272 row_range: 0..10,
4273 text: "low priority content\n".into(),
4274 order: 5,
4275 }],
4276 },
4277 RelatedFile {
4278 path: Path::new("file_b.rs").into(),
4279 max_row: 10,
4280 in_open_source_repo: false,
4281 excerpts: vec![RelatedExcerpt {
4282 row_range: 0..10,
4283 text: "high priority content\n".into(),
4284 order: 1,
4285 }],
4286 },
4287 ],
4288 );
4289
4290 // With large budget, both files included; rendered in stable lexicographic order.
4291 assert_eq!(
4292 format_with_budget(&input, 10000).unwrap(),
4293 indoc! {r#"
4294 <|file_sep|>file_a.rs
4295 low priority content
4296 <|file_sep|>file_b.rs
4297 high priority content
4298 <|file_sep|>test.rs
4299 <|fim_prefix|>
4300 <|fim_middle|>current
4301 <|user_cursor|>x
4302 <|fim_suffix|>
4303 <|fim_middle|>updated
4304 "#}
4305 .to_string()
4306 );
4307
4308 // With tight budget, only file_b (lower order) fits.
4309 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4310 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4311 // file_a would need another 14 tokens, which doesn't fit.
4312 assert_eq!(
4313 format_with_budget(&input, 52).unwrap(),
4314 indoc! {r#"
4315 <|file_sep|>file_b.rs
4316 high priority content
4317 <|file_sep|>test.rs
4318 <|fim_prefix|>
4319 <|fim_middle|>current
4320 <|user_cursor|>x
4321 <|fim_suffix|>
4322 <|fim_middle|>updated
4323 "#}
4324 .to_string()
4325 );
4326 }
4327
4328 #[test]
4329 fn test_truncation_drops_high_order_excerpts_within_file() {
4330 // A single file has excerpts at order 1 and order 3. With a tight budget,
4331 // only the order-1 excerpts are included while the order-3 excerpt is
4332 // dropped — even though they belong to the same file. This also preserves
4333 // the parent invariant: parent outline items have order ≤ their best
4334 // child, so they're always included when any child is.
4335 let input = make_input(
4336 "x",
4337 0..1,
4338 0,
4339 vec![],
4340 vec![RelatedFile {
4341 path: Path::new("mod.rs").into(),
4342 max_row: 30,
4343 in_open_source_repo: false,
4344 excerpts: vec![
4345 RelatedExcerpt {
4346 row_range: 0..5,
4347 text: "mod header\n".into(),
4348 order: 1,
4349 },
4350 RelatedExcerpt {
4351 row_range: 5..15,
4352 text: "important fn\n".into(),
4353 order: 1,
4354 },
4355 RelatedExcerpt {
4356 row_range: 15..30,
4357 text: "less important fn\n".into(),
4358 order: 3,
4359 },
4360 ],
4361 }],
4362 );
4363
4364 // With large budget, all three excerpts included.
4365 assert_eq!(
4366 format_with_budget(&input, 10000).unwrap(),
4367 indoc! {r#"
4368 <|file_sep|>mod.rs
4369 mod header
4370 ...
4371 important fn
4372 ...
4373 less important fn
4374 <|file_sep|>test.rs
4375 <|fim_prefix|>
4376 <|fim_middle|>current
4377 <|user_cursor|>x
4378 <|fim_suffix|>
4379 <|fim_middle|>updated
4380 "#}
4381 .to_string()
4382 );
4383
4384 // With tight budget, only order<=1 excerpts included (header + important fn).
4385 assert_eq!(
4386 format_with_budget(&input, 55).unwrap(),
4387 indoc! {r#"
4388 <|file_sep|>mod.rs
4389 mod header
4390 ...
4391 important fn
4392 ...
4393 <|file_sep|>test.rs
4394 <|fim_prefix|>
4395 <|fim_middle|>current
4396 <|user_cursor|>x
4397 <|fim_suffix|>
4398 <|fim_middle|>updated
4399 "#}
4400 .to_string()
4401 );
4402 }
4403
4404 #[test]
4405 fn test_truncation_drops_older_events_first() {
4406 let input = make_input(
4407 "x",
4408 0..1,
4409 0,
4410 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4411 vec![],
4412 );
4413
4414 assert_eq!(
4415 format_with_budget(&input, 10000).unwrap(),
4416 indoc! {r#"
4417 <|file_sep|>edit history
4418 --- a/old.rs
4419 +++ b/old.rs
4420 -1
4421 --- a/new.rs
4422 +++ b/new.rs
4423 -2
4424 <|file_sep|>test.rs
4425 <|fim_prefix|>
4426 <|fim_middle|>current
4427 <|user_cursor|>x
4428 <|fim_suffix|>
4429 <|fim_middle|>updated
4430 "#}
4431 .to_string()
4432 );
4433
4434 assert_eq!(
4435 format_with_budget(&input, 60).unwrap(),
4436 indoc! {r#"
4437 <|file_sep|>edit history
4438 --- a/new.rs
4439 +++ b/new.rs
4440 -2
4441 <|file_sep|>test.rs
4442 <|fim_prefix|>
4443 <|fim_middle|>current
4444 <|user_cursor|>x
4445 <|fim_suffix|>
4446 <|fim_middle|>updated
4447 "#}
4448 .to_string()
4449 );
4450 }
4451
4452 #[test]
4453 fn test_cursor_excerpt_always_included_with_minimal_budget() {
4454 let input = make_input(
4455 "fn main() {}",
4456 0..12,
4457 3,
4458 vec![make_event("a.rs", "-old\n+new\n")],
4459 vec![make_related_file("related.rs", "helper\n")],
4460 );
4461
4462 assert!(format_with_budget(&input, 30).is_none())
4463 }
4464
4465 #[track_caller]
4466 fn format_seed_coder(input: &ZetaPromptInput) -> String {
4467 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4468 .expect("seed coder prompt formatting should succeed")
4469 }
4470
4471 #[track_caller]
4472 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4473 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4474 .expect("seed coder prompt formatting should succeed")
4475 }
4476
4477 #[test]
4478 fn test_seed_coder_basic_format() {
4479 let input = make_input(
4480 "prefix\neditable\nsuffix",
4481 7..15,
4482 10,
4483 vec![make_event("a.rs", "-old\n+new\n")],
4484 vec![make_related_file("related.rs", "fn helper() {}\n")],
4485 );
4486
4487 assert_eq!(
4488 format_seed_coder(&input),
4489 indoc! {r#"
4490 <[fim-suffix]>
4491 suffix
4492 <[fim-prefix]><filename>related.rs
4493 fn helper() {}
4494
4495 <filename>edit_history
4496 --- a/a.rs
4497 +++ b/a.rs
4498 -old
4499 +new
4500
4501 <filename>test.rs
4502 prefix
4503 <<<<<<< CURRENT
4504 edi<|user_cursor|>table
4505 =======
4506 <[fim-middle]>"#}
4507 );
4508 }
4509
4510 #[test]
4511 fn test_seed_coder_no_context() {
4512 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4513
4514 assert_eq!(
4515 format_seed_coder(&input),
4516 indoc! {r#"
4517 <[fim-suffix]>
4518 after
4519 <[fim-prefix]><filename>test.rs
4520 before
4521 <<<<<<< CURRENT
4522 mid<|user_cursor|>dle
4523 =======
4524 <[fim-middle]>"#}
4525 );
4526 }
4527
4528 #[test]
4529 fn test_seed_coder_truncation_drops_context() {
4530 let input = make_input(
4531 "code",
4532 0..4,
4533 2,
4534 vec![make_event("a.rs", "-x\n+y\n")],
4535 vec![make_related_file("r1.rs", "content\n")],
4536 );
4537
4538 // With large budget, everything is included
4539 assert_eq!(
4540 format_seed_coder(&input),
4541 indoc! {r#"
4542 <[fim-suffix]>
4543 <[fim-prefix]><filename>r1.rs
4544 content
4545
4546 <filename>edit_history
4547 --- a/a.rs
4548 +++ b/a.rs
4549 -x
4550 +y
4551
4552 <filename>test.rs
4553 <<<<<<< CURRENT
4554 co<|user_cursor|>de
4555 =======
4556 <[fim-middle]>"#}
4557 );
4558
4559 assert_eq!(
4560 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
4561 None
4562 );
4563
4564 assert_eq!(
4565 format_seed_coder_with_budget(&input, 40),
4566 indoc! {r#"
4567 <[fim-suffix]>
4568 <[fim-prefix]><filename>test.rs
4569 <<<<<<< CURRENT
4570 co<|user_cursor|>de
4571 =======
4572 <[fim-middle]>"#
4573 }
4574 )
4575 }
4576
4577 #[test]
4578 fn test_seed_coder_truncation_prioritizes_lower_order() {
4579 let input = make_input(
4580 "code",
4581 0..4,
4582 2,
4583 vec![],
4584 vec![
4585 RelatedFile {
4586 path: Path::new("low_prio.rs").into(),
4587 max_row: 5,
4588 in_open_source_repo: false,
4589 excerpts: vec![RelatedExcerpt {
4590 row_range: 0..5,
4591 text: "low prio\n".into(),
4592 order: 10,
4593 }],
4594 },
4595 RelatedFile {
4596 path: Path::new("high_prio.rs").into(),
4597 max_row: 5,
4598 in_open_source_repo: false,
4599 excerpts: vec![RelatedExcerpt {
4600 row_range: 0..5,
4601 text: "high prio\n".into(),
4602 order: 1,
4603 }],
4604 },
4605 ],
4606 );
4607
4608 // With large budget, both included; rendered in stable lexicographic order.
4609 assert_eq!(
4610 format_seed_coder(&input),
4611 indoc! {r#"
4612 <[fim-suffix]>
4613 <[fim-prefix]><filename>low_prio.rs
4614 low prio
4615 <filename>high_prio.rs
4616 high prio
4617
4618 <filename>test.rs
4619 <<<<<<< CURRENT
4620 co<|user_cursor|>de
4621 =======
4622 <[fim-middle]>"#}
4623 );
4624
4625 // With tight budget under the generic heuristic, context is dropped but the
4626 // minimal cursor section still fits.
4627 assert_eq!(
4628 format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
4629 Some(
4630 indoc! {r#"
4631 <[fim-suffix]>
4632 <[fim-prefix]><filename>test.rs
4633 <<<<<<< CURRENT
4634 co<|user_cursor|>de
4635 =======
4636 <[fim-middle]>"#}
4637 .to_string()
4638 )
4639 );
4640 }
4641
4642 #[test]
4643 fn test_format_zeta1_from_input_basic() {
4644 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
4645 let input = ZetaPromptInput {
4646 cursor_path: Path::new("src/main.rs").into(),
4647 cursor_excerpt: excerpt.into(),
4648 cursor_offset_in_excerpt: 30,
4649 excerpt_start_row: Some(0),
4650 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4651 related_files: Some(vec![]),
4652 active_buffer_diagnostics: vec![],
4653 excerpt_ranges: ExcerptRanges {
4654 editable_150: 15..41,
4655 editable_180: 15..41,
4656 editable_350: 15..41,
4657 editable_150_context_350: 0..excerpt.len(),
4658 editable_180_context_350: 0..excerpt.len(),
4659 editable_350_context_150: 0..excerpt.len(),
4660 ..Default::default()
4661 },
4662 syntax_ranges: None,
4663 experiment: None,
4664 in_open_source_repo: false,
4665 can_collect_data: false,
4666 repo_url: None,
4667 };
4668
4669 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4670
4671 assert_eq!(
4672 prompt,
4673 concat!(
4674 "### Instruction:\n",
4675 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4676 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4677 "into account the cursor location.\n",
4678 "\n",
4679 "### User Edits:\n",
4680 "\n",
4681 "User edited other.rs:\n",
4682 "```diff\n",
4683 "-old\n",
4684 "+new\n",
4685 "\n",
4686 "```\n",
4687 "\n",
4688 "### User Excerpt:\n",
4689 "\n",
4690 "```src/main.rs\n",
4691 "<|start_of_file|>\n",
4692 "fn before() {}\n",
4693 "<|editable_region_start|>\n",
4694 "fn foo() {\n",
4695 " <|user_cursor_is_here|>let x = 1;\n",
4696 "\n",
4697 "<|editable_region_end|>}\n",
4698 "fn after() {}\n",
4699 "\n",
4700 "```\n",
4701 "\n",
4702 "### Response:\n",
4703 ),
4704 );
4705 }
4706
4707 #[test]
4708 fn test_format_zeta1_from_input_no_start_of_file() {
4709 let excerpt = "fn foo() {\n let x = 1;\n}\n";
4710 let input = ZetaPromptInput {
4711 cursor_path: Path::new("src/main.rs").into(),
4712 cursor_excerpt: excerpt.into(),
4713 cursor_offset_in_excerpt: 15,
4714 excerpt_start_row: Some(10),
4715 events: vec![],
4716 related_files: Some(vec![]),
4717 active_buffer_diagnostics: vec![],
4718 excerpt_ranges: ExcerptRanges {
4719 editable_150: 0..28,
4720 editable_180: 0..28,
4721 editable_350: 0..28,
4722 editable_150_context_350: 0..28,
4723 editable_180_context_350: 0..28,
4724 editable_350_context_150: 0..28,
4725 ..Default::default()
4726 },
4727 syntax_ranges: None,
4728 experiment: None,
4729 in_open_source_repo: false,
4730 can_collect_data: false,
4731 repo_url: None,
4732 };
4733
4734 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4735
4736 assert_eq!(
4737 prompt,
4738 concat!(
4739 "### Instruction:\n",
4740 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4741 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4742 "into account the cursor location.\n",
4743 "\n",
4744 "### User Edits:\n",
4745 "\n",
4746 "\n",
4747 "\n",
4748 "### User Excerpt:\n",
4749 "\n",
4750 "```src/main.rs\n",
4751 "<|editable_region_start|>\n",
4752 "fn foo() {\n",
4753 " <|user_cursor_is_here|>let x = 1;\n",
4754 "}\n",
4755 "\n",
4756 "<|editable_region_end|>\n",
4757 "```\n",
4758 "\n",
4759 "### Response:\n",
4760 ),
4761 );
4762 }
4763
4764 #[test]
4765 fn test_format_zeta1_from_input_with_sub_ranges() {
4766 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
4767 let editable_range = 10..37;
4768 let context_range = 0..excerpt.len();
4769
4770 let input = ZetaPromptInput {
4771 cursor_path: Path::new("test.rs").into(),
4772 cursor_excerpt: excerpt.into(),
4773 cursor_offset_in_excerpt: 25,
4774 excerpt_start_row: Some(0),
4775 events: vec![],
4776 related_files: Some(vec![]),
4777 active_buffer_diagnostics: vec![],
4778 excerpt_ranges: ExcerptRanges {
4779 editable_150: editable_range.clone(),
4780 editable_180: editable_range.clone(),
4781 editable_350: editable_range.clone(),
4782 editable_150_context_350: context_range.clone(),
4783 editable_180_context_350: context_range.clone(),
4784 editable_350_context_150: context_range.clone(),
4785 ..Default::default()
4786 },
4787 syntax_ranges: None,
4788 experiment: None,
4789 in_open_source_repo: false,
4790 can_collect_data: false,
4791 repo_url: None,
4792 };
4793
4794 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4795
4796 assert_eq!(
4797 prompt,
4798 concat!(
4799 "### Instruction:\n",
4800 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4801 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4802 "into account the cursor location.\n",
4803 "\n",
4804 "### User Edits:\n",
4805 "\n",
4806 "\n",
4807 "\n",
4808 "### User Excerpt:\n",
4809 "\n",
4810 "```test.rs\n",
4811 "<|start_of_file|>\n",
4812 "// prefix\n",
4813 "<|editable_region_start|>\n",
4814 "fn foo() {\n",
4815 " <|user_cursor_is_here|>let x = 1;\n",
4816 "}\n",
4817 "<|editable_region_end|>\n",
4818 "// suffix\n",
4819 "\n",
4820 "```\n",
4821 "\n",
4822 "### Response:\n",
4823 ),
4824 );
4825 }
4826
4827 #[test]
4828 fn test_max_event_count() {
4829 fn make_numbered_event(index: usize) -> Event {
4830 return make_event(
4831 &format!("event-{index}.rs"),
4832 &format!("-old-{index}\n+new-{index}\n"),
4833 );
4834 }
4835 let input = make_input(
4836 "x",
4837 0..1,
4838 0,
4839 (0..3).map(make_numbered_event).collect(),
4840 vec![],
4841 );
4842
4843 let edit_history_section = format_edit_history_within_budget(
4844 &input.events,
4845 "<|file_sep|>",
4846 "edit history",
4847 usize::MAX,
4848 5,
4849 );
4850
4851 assert_eq!(
4852 &edit_history_section,
4853 indoc!(
4854 "
4855 <|file_sep|>edit history
4856 --- a/event-0.rs
4857 +++ b/event-0.rs
4858 -old-0
4859 +new-0
4860 --- a/event-1.rs
4861 +++ b/event-1.rs
4862 -old-1
4863 +new-1
4864 --- a/event-2.rs
4865 +++ b/event-2.rs
4866 -old-2
4867 +new-2
4868 "
4869 )
4870 );
4871
4872 let edit_history_section = format_edit_history_within_budget(
4873 &input.events,
4874 "<|file_sep|>",
4875 "edit history",
4876 usize::MAX,
4877 2,
4878 );
4879
4880 assert_eq!(
4881 &edit_history_section,
4882 indoc!(
4883 "
4884 <|file_sep|>edit history
4885 --- a/event-1.rs
4886 +++ b/event-1.rs
4887 -old-1
4888 +new-1
4889 --- a/event-2.rs
4890 +++ b/event-2.rs
4891 -old-2
4892 +new-2
4893 "
4894 )
4895 );
4896
4897 let edit_history_section = format_edit_history_within_budget(
4898 &input.events,
4899 "<|file_sep|>",
4900 "edit history",
4901 usize::MAX,
4902 0,
4903 );
4904
4905 assert_eq!(&edit_history_section, "");
4906 }
4907
4908 #[test]
4909 fn test_clean_zeta1_model_output_basic() {
4910 let output = indoc! {"
4911 <|editable_region_start|>
4912 fn main() {
4913 println!(\"hello\");
4914 }
4915 <|editable_region_end|>
4916 "};
4917
4918 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4919 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
4920 }
4921
4922 #[test]
4923 fn test_clean_zeta1_model_output_with_cursor() {
4924 let output = indoc! {"
4925 <|editable_region_start|>
4926 fn main() {
4927 <|user_cursor_is_here|>println!(\"hello\");
4928 }
4929 <|editable_region_end|>
4930 "};
4931
4932 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4933 assert_eq!(
4934 cleaned,
4935 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
4936 );
4937 }
4938
4939 #[test]
4940 fn test_clean_zeta1_model_output_no_markers() {
4941 let output = "fn main() {}\n";
4942 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4943 assert_eq!(cleaned, "fn main() {}\n");
4944 }
4945
4946 #[test]
4947 fn test_clean_zeta1_model_output_empty_region() {
4948 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4949 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4950 assert_eq!(cleaned, "");
4951 }
4952
4953 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
4954 let mut result = excerpt.to_string();
4955 result.replace_range(
4956 parsed_output.range_in_excerpt.clone(),
4957 &parsed_output.new_editable_region,
4958 );
4959 result
4960 }
4961
4962 #[test]
4963 fn test_parse_zeta2_model_output() {
4964 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4965 let context_start = excerpt.find("ctx start").unwrap();
4966 let context_end = excerpt.find("after ctx").unwrap();
4967 let editable_start = excerpt.find("editable old").unwrap();
4968 let editable_end = editable_start + "editable old\n".len();
4969 let input = make_input_with_context_range(
4970 excerpt,
4971 editable_start..editable_end,
4972 context_start..context_end,
4973 editable_start,
4974 );
4975
4976 let output = parse_zeta2_model_output(
4977 "editable new\n>>>>>>> UPDATED\n",
4978 ZetaFormat::V0131GitMergeMarkersPrefix,
4979 &input,
4980 )
4981 .unwrap();
4982
4983 assert_eq!(
4984 apply_edit(excerpt, &output),
4985 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4986 );
4987 }
4988
4989 #[test]
4990 fn test_parse_zeta2_model_output_identity() {
4991 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4992 let editable_start = excerpt.find("bbb").unwrap();
4993 let editable_end = excerpt.find("ddd").unwrap();
4994 let input = make_input_with_context_range(
4995 excerpt,
4996 editable_start..editable_end,
4997 0..excerpt.len(),
4998 editable_start,
4999 );
5000
5001 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5002 let output =
5003 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5004
5005 assert_eq!(apply_edit(excerpt, &output), excerpt);
5006 }
5007
5008 #[test]
5009 fn test_parse_zeta2_model_output_strips_end_marker() {
5010 let excerpt = "hello\nworld\n";
5011 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5012
5013 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5014 let output1 =
5015 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5016 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5017
5018 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5019 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5020 }
5021}