1use anyhow::{Result, anyhow};
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
22/// editable and context token budgets. Allows the server to select the
23/// appropriate ranges for whichever model it uses.
24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
25pub struct ExcerptRanges {
26 /// Editable region computed with a 150-token budget.
27 pub editable_150: Range<usize>,
28 /// Editable region computed with a 180-token budget.
29 pub editable_180: Range<usize>,
30 /// Editable region computed with a 350-token budget.
31 pub editable_350: Range<usize>,
32 /// Editable region computed with a 350-token budget.
33 pub editable_512: Option<Range<usize>>,
34 /// Context boundary when using editable_150 with 350 tokens of additional context.
35 pub editable_150_context_350: Range<usize>,
36 /// Context boundary when using editable_180 with 350 tokens of additional context.
37 pub editable_180_context_350: Range<usize>,
38 /// Context boundary when using editable_350 with 150 tokens of additional context.
39 pub editable_350_context_150: Range<usize>,
40 pub editable_350_context_512: Option<Range<usize>>,
41 pub editable_350_context_1024: Option<Range<usize>>,
42 pub context_4096: Option<Range<usize>>,
43 pub context_8192: Option<Range<usize>>,
44}
45
46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
47pub struct ZetaPromptInput {
48 pub cursor_path: Arc<Path>,
49 pub cursor_excerpt: Arc<str>,
50 pub cursor_offset_in_excerpt: usize,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub excerpt_start_row: Option<u32>,
53 pub events: Vec<Arc<Event>>,
54 #[serde(default)]
55 pub related_files: Option<Vec<RelatedFile>>,
56 /// These ranges let the server select model-appropriate subsets.
57 pub excerpt_ranges: ExcerptRanges,
58 /// The name of the edit prediction model experiment to use.
59 #[serde(default, skip_serializing_if = "Option::is_none")]
60 pub experiment: Option<String>,
61 #[serde(default)]
62 pub in_open_source_repo: bool,
63 #[serde(default)]
64 pub can_collect_data: bool,
65 #[serde(default, skip_serializing_if = "Option::is_none")]
66 pub repo_url: Option<String>,
67}
68
69#[derive(
70 Default,
71 Clone,
72 Copy,
73 Debug,
74 PartialEq,
75 Eq,
76 Hash,
77 EnumIter,
78 IntoStaticStr,
79 Serialize,
80 Deserialize,
81)]
82#[allow(non_camel_case_types)]
83pub enum ZetaFormat {
84 V0112MiddleAtEnd,
85 V0113Ordered,
86 V0114180EditableRegion,
87 V0120GitMergeMarkers,
88 #[default]
89 V0131GitMergeMarkersPrefix,
90 V0211Prefill,
91 V0211SeedCoder,
92 v0226Hashline,
93 V0304VariableEdit,
94 V0304SeedNoEdits,
95}
96
97impl std::fmt::Display for ZetaFormat {
98 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 write!(f, "{}", <&'static str>::from(self))
100 }
101}
102
103impl ZetaFormat {
104 pub fn parse(format_name: &str) -> Result<Self> {
105 let mut results = ZetaFormat::iter().filter(|version| {
106 <&'static str>::from(version)
107 .to_lowercase()
108 .contains(&format_name.to_lowercase())
109 });
110 let Some(result) = results.next() else {
111 anyhow::bail!(
112 "`{format_name}` did not match any of:\n{}",
113 Self::options_as_string()
114 );
115 };
116 if results.next().is_some() {
117 anyhow::bail!(
118 "`{format_name}` matched more than one of:\n{}",
119 Self::options_as_string()
120 );
121 }
122 Ok(result)
123 }
124
125 pub fn options_as_string() -> String {
126 ZetaFormat::iter()
127 .map(|format| format!("- {}\n", <&'static str>::from(format)))
128 .collect::<Vec<_>>()
129 .concat()
130 }
131}
132
133#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
134#[serde(tag = "event")]
135pub enum Event {
136 BufferChange {
137 path: Arc<Path>,
138 old_path: Arc<Path>,
139 diff: String,
140 predicted: bool,
141 in_open_source_repo: bool,
142 },
143}
144
145impl Event {
146 pub fn in_open_source_repo(&self) -> bool {
147 match self {
148 Event::BufferChange {
149 in_open_source_repo,
150 ..
151 } => *in_open_source_repo,
152 }
153 }
154}
155
156pub fn write_event(prompt: &mut String, event: &Event) {
157 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
158 for component in path.components() {
159 prompt.push('/');
160 write!(prompt, "{}", component.as_os_str().display()).ok();
161 }
162 }
163 match event {
164 Event::BufferChange {
165 path,
166 old_path,
167 diff,
168 predicted,
169 in_open_source_repo: _,
170 } => {
171 if *predicted {
172 prompt.push_str("// User accepted prediction:\n");
173 }
174 prompt.push_str("--- a");
175 write_path_as_unix_str(prompt, old_path.as_ref());
176 prompt.push_str("\n+++ b");
177 write_path_as_unix_str(prompt, path.as_ref());
178 prompt.push('\n');
179 prompt.push_str(diff);
180 }
181 }
182}
183
184#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
185pub struct RelatedFile {
186 pub path: Arc<Path>,
187 pub max_row: u32,
188 pub excerpts: Vec<RelatedExcerpt>,
189 #[serde(default)]
190 pub in_open_source_repo: bool,
191}
192
193#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
194pub struct RelatedExcerpt {
195 pub row_range: Range<u32>,
196 pub text: Arc<str>,
197 #[serde(default)]
198 pub order: usize,
199}
200
201pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
202 special_tokens_for_format(format)
203 .iter()
204 .any(|token| input.cursor_excerpt.contains(token))
205}
206
207pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
208 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
209}
210
211pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
212 match format {
213 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
214 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
215 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
216 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
217 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
218 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
219 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
220 ZetaFormat::v0226Hashline => hashline::special_tokens(),
221 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
222 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
223 }
224}
225
226pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
227 match format {
228 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
229 ZetaFormat::V0112MiddleAtEnd
230 | ZetaFormat::V0113Ordered
231 | ZetaFormat::V0114180EditableRegion
232 | ZetaFormat::V0120GitMergeMarkers
233 | ZetaFormat::V0131GitMergeMarkersPrefix
234 | ZetaFormat::V0211Prefill
235 | ZetaFormat::V0211SeedCoder
236 | ZetaFormat::V0304VariableEdit
237 | ZetaFormat::V0304SeedNoEdits => &[],
238 }
239}
240
241pub fn excerpt_ranges_for_format(
242 format: ZetaFormat,
243 ranges: &ExcerptRanges,
244) -> (Range<usize>, Range<usize>) {
245 match format {
246 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
247 ranges.editable_150.clone(),
248 ranges.editable_150_context_350.clone(),
249 ),
250 ZetaFormat::V0114180EditableRegion => (
251 ranges.editable_180.clone(),
252 ranges.editable_180_context_350.clone(),
253 ),
254 ZetaFormat::V0120GitMergeMarkers
255 | ZetaFormat::V0131GitMergeMarkersPrefix
256 | ZetaFormat::V0211Prefill
257 | ZetaFormat::V0211SeedCoder
258 | ZetaFormat::v0226Hashline
259 | ZetaFormat::V0304SeedNoEdits => (
260 ranges.editable_350.clone(),
261 ranges.editable_350_context_150.clone(),
262 ),
263 ZetaFormat::V0304VariableEdit => {
264 let context = ranges
265 .context_8192
266 .clone()
267 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
268 (context.clone(), context)
269 }
270 }
271}
272
273pub fn write_cursor_excerpt_section_for_format(
274 format: ZetaFormat,
275 prompt: &mut String,
276 path: &Path,
277 context: &str,
278 editable_range: &Range<usize>,
279 cursor_offset: usize,
280) {
281 match format {
282 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
283 prompt,
284 path,
285 context,
286 editable_range,
287 cursor_offset,
288 ),
289 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
290 v0113_ordered::write_cursor_excerpt_section(
291 prompt,
292 path,
293 context,
294 editable_range,
295 cursor_offset,
296 )
297 }
298 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
299 prompt,
300 path,
301 context,
302 editable_range,
303 cursor_offset,
304 ),
305 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
306 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
307 prompt,
308 path,
309 context,
310 editable_range,
311 cursor_offset,
312 )
313 }
314 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
315 seed_coder::write_cursor_excerpt_section(
316 prompt,
317 path,
318 context,
319 editable_range,
320 cursor_offset,
321 )
322 }
323 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
324 prompt,
325 path,
326 context,
327 editable_range,
328 cursor_offset,
329 ),
330 ZetaFormat::V0304VariableEdit => {
331 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
332 }
333 }
334}
335
336fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
337 let start_row = text[0..range.start].matches('\n').count() as u32;
338 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
339 if !text[..range.end].ends_with('\n') {
340 end_row += 1;
341 }
342 return start_row..end_row;
343}
344
345pub fn format_prompt_with_budget_for_format(
346 input: &ZetaPromptInput,
347 format: ZetaFormat,
348 max_tokens: usize,
349) -> String {
350 let (context, editable_range, context_range, cursor_offset) =
351 resolve_cursor_region(input, format);
352 let path = &*input.cursor_path;
353
354 let empty_files = Vec::new();
355 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
356 let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
357 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
358 let row_range = relative_row_range.start + cursor_excerpt_start_row
359 ..relative_row_range.end + cursor_excerpt_start_row;
360 &filter_redundant_excerpts(
361 input_related_files.to_vec(),
362 input.cursor_path.as_ref(),
363 row_range,
364 )
365 } else {
366 input_related_files
367 };
368
369 match format {
370 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
371 seed_coder::format_prompt_with_budget(
372 path,
373 context,
374 &editable_range,
375 cursor_offset,
376 &input.events,
377 related_files,
378 max_tokens,
379 )
380 }
381 _ => {
382 let mut cursor_section = String::new();
383 write_cursor_excerpt_section_for_format(
384 format,
385 &mut cursor_section,
386 path,
387 context,
388 &editable_range,
389 cursor_offset,
390 );
391
392 let cursor_tokens = estimate_tokens(cursor_section.len());
393 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
394
395 let edit_history_section = format_edit_history_within_budget(
396 &input.events,
397 "<|file_sep|>",
398 "edit history",
399 budget_after_cursor,
400 );
401 let edit_history_tokens = estimate_tokens(edit_history_section.len());
402 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
403
404 let related_files_section = format_related_files_within_budget(
405 &related_files,
406 "<|file_sep|>",
407 "",
408 budget_after_edit_history,
409 );
410
411 let mut prompt = String::new();
412 prompt.push_str(&related_files_section);
413 prompt.push_str(&edit_history_section);
414 prompt.push_str(&cursor_section);
415 prompt
416 }
417 }
418}
419
420pub fn filter_redundant_excerpts(
421 mut related_files: Vec<RelatedFile>,
422 cursor_path: &Path,
423 cursor_row_range: Range<u32>,
424) -> Vec<RelatedFile> {
425 for file in &mut related_files {
426 if file.path.as_ref() == cursor_path {
427 file.excerpts.retain(|excerpt| {
428 excerpt.row_range.start < cursor_row_range.start
429 || excerpt.row_range.end > cursor_row_range.end
430 });
431 }
432 }
433 related_files.retain(|file| !file.excerpts.is_empty());
434 related_files
435}
436
437pub fn get_prefill_for_format(
438 format: ZetaFormat,
439 context: &str,
440 editable_range: &Range<usize>,
441) -> String {
442 match format {
443 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
444 ZetaFormat::V0112MiddleAtEnd
445 | ZetaFormat::V0113Ordered
446 | ZetaFormat::V0114180EditableRegion
447 | ZetaFormat::V0120GitMergeMarkers
448 | ZetaFormat::V0131GitMergeMarkersPrefix
449 | ZetaFormat::V0211SeedCoder
450 | ZetaFormat::v0226Hashline
451 | ZetaFormat::V0304VariableEdit => String::new(),
452 ZetaFormat::V0304SeedNoEdits => String::new(),
453 }
454}
455
456pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
457 match format {
458 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
459 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
460 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
461 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => Some(seed_coder::END_MARKER),
462 ZetaFormat::V0112MiddleAtEnd
463 | ZetaFormat::V0113Ordered
464 | ZetaFormat::V0114180EditableRegion
465 | ZetaFormat::v0226Hashline
466 | ZetaFormat::V0304VariableEdit => None,
467 }
468}
469
470pub fn encode_patch_as_output_for_format(
471 format: ZetaFormat,
472 old_editable_region: &str,
473 patch: &str,
474 cursor_offset: Option<usize>,
475) -> Result<Option<String>> {
476 match format {
477 ZetaFormat::v0226Hashline => {
478 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
479 }
480 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
481 old_editable_region,
482 patch,
483 cursor_offset,
484 )
485 .map(Some),
486 ZetaFormat::V0304SeedNoEdits => Ok(seed_coder::no_edits(patch)),
487 _ => Ok(None),
488 }
489}
490
491pub struct ParsedOutput {
492 /// Text that should replace the editable region
493 pub new_editable_region: String,
494 /// The byte range within `cursor_excerpt` that this replacement applies to
495 pub range_in_excerpt: Range<usize>,
496}
497
498/// Parse model output for the given zeta format
499pub fn parse_zeta2_model_output(
500 output: &str,
501 format: ZetaFormat,
502 prompt_inputs: &ZetaPromptInput,
503) -> Result<ParsedOutput> {
504 let output = match output_end_marker_for_format(format) {
505 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
506 None => output,
507 };
508
509 let (context, editable_range_in_context, context_range, _) =
510 resolve_cursor_region(prompt_inputs, format);
511 let context_start = context_range.start;
512 let old_editable_region = &context[editable_range_in_context.clone()];
513
514 let (range_in_context, output) = match format {
515 ZetaFormat::v0226Hashline => (
516 editable_range_in_context,
517 if hashline::output_has_edit_commands(output) {
518 hashline::apply_edit_commands(old_editable_region, output)
519 } else {
520 output.to_string()
521 },
522 ),
523 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
524 ZetaFormat::V0304SeedNoEdits => (
525 editable_range_in_context,
526 if output.starts_with(seed_coder::NO_EDITS) {
527 old_editable_region.to_string()
528 } else {
529 output.to_string()
530 },
531 ),
532 _ => (editable_range_in_context, output.to_string()),
533 };
534
535 let range_in_excerpt =
536 range_in_context.start + context_start..range_in_context.end + context_start;
537
538 Ok(ParsedOutput {
539 new_editable_region: output,
540 range_in_excerpt,
541 })
542}
543
544pub fn excerpt_range_for_format(
545 format: ZetaFormat,
546 ranges: &ExcerptRanges,
547) -> (Range<usize>, Range<usize>) {
548 excerpt_ranges_for_format(format, ranges)
549}
550
551pub fn resolve_cursor_region(
552 input: &ZetaPromptInput,
553 format: ZetaFormat,
554) -> (&str, Range<usize>, Range<usize>, usize) {
555 let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
556 let context_start = context_range.start;
557 let context_text = &input.cursor_excerpt[context_range.clone()];
558 let adjusted_editable =
559 (editable_range.start - context_start)..(editable_range.end - context_start);
560 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
561
562 (
563 context_text,
564 adjusted_editable,
565 context_range,
566 adjusted_cursor,
567 )
568}
569
570pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
571 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
572 get_prefill_for_format(format, context, &editable_range)
573}
574
575fn format_edit_history_within_budget(
576 events: &[Arc<Event>],
577 file_marker: &str,
578 edit_history_name: &str,
579 max_tokens: usize,
580) -> String {
581 let header = format!("{}{}\n", file_marker, edit_history_name);
582 let header_tokens = estimate_tokens(header.len());
583 if header_tokens >= max_tokens {
584 return String::new();
585 }
586
587 let mut event_strings: Vec<String> = Vec::new();
588 let mut total_tokens = header_tokens;
589
590 for event in events.iter().rev() {
591 let mut event_str = String::new();
592 write_event(&mut event_str, event);
593 let event_tokens = estimate_tokens(event_str.len());
594
595 if total_tokens + event_tokens > max_tokens {
596 break;
597 }
598 total_tokens += event_tokens;
599 event_strings.push(event_str);
600 }
601
602 if event_strings.is_empty() {
603 return String::new();
604 }
605
606 let mut result = header;
607 for event_str in event_strings.iter().rev() {
608 result.push_str(event_str);
609 }
610 result
611}
612
613fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
614 let needs_newline = !excerpt.text.ends_with('\n');
615 let needs_ellipsis = excerpt.row_range.end < file_max_row;
616 let len = excerpt.text.len()
617 + if needs_newline { "\n".len() } else { 0 }
618 + if needs_ellipsis { "...\n".len() } else { 0 };
619 estimate_tokens(len)
620}
621
622pub fn format_related_files_within_budget(
623 related_files: &[RelatedFile],
624 file_prefix: &str,
625 file_suffix: &str,
626 max_tokens: usize,
627) -> String {
628 struct ExcerptCandidate {
629 file_ix: usize,
630 excerpt_ix: usize,
631 order: usize,
632 }
633
634 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
635 .iter()
636 .enumerate()
637 .flat_map(|(file_ix, file)| {
638 file.excerpts
639 .iter()
640 .enumerate()
641 .map(move |(excerpt_ix, e)| ExcerptCandidate {
642 file_ix,
643 excerpt_ix,
644 order: e.order,
645 })
646 })
647 .collect();
648
649 // Pre-compute file header strings and their token costs.
650 let file_headers: Vec<String> = related_files
651 .iter()
652 .map(|file| {
653 let path_str = file.path.to_string_lossy();
654 format!("{}{}\n", file_prefix, path_str)
655 })
656 .collect();
657
658 // Sort the excerpts by their order and determine how many fit within the budget.
659 let mut total_tokens = 0;
660 let mut included_excerpt_count = 0_usize;
661 let mut included_file_indices = vec![false; related_files.len()];
662 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
663 for candidate in &excerpt_candidates {
664 let file = &related_files[candidate.file_ix];
665 let excerpt = &file.excerpts[candidate.excerpt_ix];
666 let file_already_included = included_file_indices[candidate.file_ix];
667 let header_cost = if file_already_included {
668 0
669 } else {
670 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
671 };
672 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
673 if total_tokens + header_cost + excerpt_cost > max_tokens {
674 break;
675 }
676 total_tokens += header_cost + excerpt_cost;
677 if !file_already_included {
678 included_file_indices[candidate.file_ix] = true;
679 }
680 included_excerpt_count += 1;
681 }
682
683 excerpt_candidates.truncate(included_excerpt_count);
684 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
685
686 // Render all of the files that fit within the token budget, in the original order.
687 let mut result = String::new();
688 let mut last_file_ix = None;
689 for candidate in &excerpt_candidates {
690 if last_file_ix != Some(candidate.file_ix) {
691 if last_file_ix.is_some() {
692 result.push_str(file_suffix);
693 }
694 result.push_str(&file_headers[candidate.file_ix]);
695 last_file_ix = Some(candidate.file_ix);
696 }
697 let file = &related_files[candidate.file_ix];
698 let excerpt = &file.excerpts[candidate.excerpt_ix];
699 result.push_str(&excerpt.text);
700 if !result.ends_with('\n') {
701 result.push('\n');
702 }
703 if excerpt.row_range.end < file.max_row {
704 result.push_str("...\n");
705 }
706 }
707
708 result
709}
710
711pub fn write_related_files(
712 prompt: &mut String,
713 related_files: &[RelatedFile],
714) -> Vec<Range<usize>> {
715 let mut ranges = Vec::new();
716 for file in related_files {
717 let start = prompt.len();
718 let path_str = file.path.to_string_lossy();
719 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
720 for excerpt in &file.excerpts {
721 prompt.push_str(&excerpt.text);
722 if !prompt.ends_with('\n') {
723 prompt.push('\n');
724 }
725 if excerpt.row_range.end < file.max_row {
726 prompt.push_str("...\n");
727 }
728 }
729 let end = prompt.len();
730 ranges.push(start..end);
731 }
732 ranges
733}
734
735mod v0112_middle_at_end {
736 use super::*;
737
738 pub fn special_tokens() -> &'static [&'static str] {
739 &[
740 "<|fim_prefix|>",
741 "<|fim_suffix|>",
742 "<|fim_middle|>",
743 "<|file_sep|>",
744 CURSOR_MARKER,
745 ]
746 }
747
748 pub fn write_cursor_excerpt_section(
749 prompt: &mut String,
750 path: &Path,
751 context: &str,
752 editable_range: &Range<usize>,
753 cursor_offset: usize,
754 ) {
755 let path_str = path.to_string_lossy();
756 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
757
758 prompt.push_str("<|fim_prefix|>\n");
759 prompt.push_str(&context[..editable_range.start]);
760
761 prompt.push_str("<|fim_suffix|>\n");
762 prompt.push_str(&context[editable_range.end..]);
763 if !prompt.ends_with('\n') {
764 prompt.push('\n');
765 }
766
767 prompt.push_str("<|fim_middle|>current\n");
768 prompt.push_str(&context[editable_range.start..cursor_offset]);
769 prompt.push_str(CURSOR_MARKER);
770 prompt.push_str(&context[cursor_offset..editable_range.end]);
771 if !prompt.ends_with('\n') {
772 prompt.push('\n');
773 }
774
775 prompt.push_str("<|fim_middle|>updated\n");
776 }
777}
778
779mod v0113_ordered {
780 use super::*;
781
782 pub fn special_tokens() -> &'static [&'static str] {
783 &[
784 "<|fim_prefix|>",
785 "<|fim_suffix|>",
786 "<|fim_middle|>",
787 "<|file_sep|>",
788 CURSOR_MARKER,
789 ]
790 }
791
792 pub fn write_cursor_excerpt_section(
793 prompt: &mut String,
794 path: &Path,
795 context: &str,
796 editable_range: &Range<usize>,
797 cursor_offset: usize,
798 ) {
799 let path_str = path.to_string_lossy();
800 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
801
802 prompt.push_str("<|fim_prefix|>\n");
803 prompt.push_str(&context[..editable_range.start]);
804 if !prompt.ends_with('\n') {
805 prompt.push('\n');
806 }
807
808 prompt.push_str("<|fim_middle|>current\n");
809 prompt.push_str(&context[editable_range.start..cursor_offset]);
810 prompt.push_str(CURSOR_MARKER);
811 prompt.push_str(&context[cursor_offset..editable_range.end]);
812 if !prompt.ends_with('\n') {
813 prompt.push('\n');
814 }
815
816 prompt.push_str("<|fim_suffix|>\n");
817 prompt.push_str(&context[editable_range.end..]);
818 if !prompt.ends_with('\n') {
819 prompt.push('\n');
820 }
821
822 prompt.push_str("<|fim_middle|>updated\n");
823 }
824}
825
826mod v0114180_editable_region {
827 use super::*;
828
829 pub fn special_tokens() -> &'static [&'static str] {
830 v0113_ordered::special_tokens()
831 }
832}
833
834pub mod v0120_git_merge_markers {
835 //! A prompt that uses git-style merge conflict markers to represent the editable region.
836 //!
837 //! Example prompt:
838 //!
839 //! <|file_sep|>path/to/target_file.py
840 //! <|fim_prefix|>
841 //! code before editable region
842 //! <|fim_suffix|>
843 //! code after editable region
844 //! <|fim_middle|>
845 //! <<<<<<< CURRENT
846 //! code that
847 //! needs to<|user_cursor|>
848 //! be rewritten
849 //! =======
850 //!
851 //! Expected output (should be generated by the model):
852 //!
853 //! updated
854 //! code with
855 //! changes applied
856 //! >>>>>>> UPDATED
857
858 use super::*;
859
860 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
861 pub const SEPARATOR: &str = "=======\n";
862 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
863
864 pub fn special_tokens() -> &'static [&'static str] {
865 &[
866 "<|fim_prefix|>",
867 "<|fim_suffix|>",
868 "<|fim_middle|>",
869 "<|file_sep|>",
870 START_MARKER,
871 SEPARATOR,
872 END_MARKER,
873 CURSOR_MARKER,
874 ]
875 }
876
877 pub fn write_cursor_excerpt_section(
878 prompt: &mut String,
879 path: &Path,
880 context: &str,
881 editable_range: &Range<usize>,
882 cursor_offset: usize,
883 ) {
884 let path_str = path.to_string_lossy();
885 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
886
887 prompt.push_str("<|fim_prefix|>");
888 prompt.push_str(&context[..editable_range.start]);
889
890 prompt.push_str("<|fim_suffix|>");
891 prompt.push_str(&context[editable_range.end..]);
892 if !prompt.ends_with('\n') {
893 prompt.push('\n');
894 }
895
896 prompt.push_str("<|fim_middle|>");
897 prompt.push_str(START_MARKER);
898 prompt.push_str(&context[editable_range.start..cursor_offset]);
899 prompt.push_str(CURSOR_MARKER);
900 prompt.push_str(&context[cursor_offset..editable_range.end]);
901 if !prompt.ends_with('\n') {
902 prompt.push('\n');
903 }
904 prompt.push_str(SEPARATOR);
905 }
906}
907
908pub mod v0131_git_merge_markers_prefix {
909 //! A prompt that uses git-style merge conflict markers to represent the editable region.
910 //!
911 //! Example prompt:
912 //!
913 //! <|file_sep|>path/to/target_file.py
914 //! <|fim_prefix|>
915 //! code before editable region
916 //! <<<<<<< CURRENT
917 //! code that
918 //! needs to<|user_cursor|>
919 //! be rewritten
920 //! =======
921 //! <|fim_suffix|>
922 //! code after editable region
923 //! <|fim_middle|>
924 //!
925 //! Expected output (should be generated by the model):
926 //!
927 //! updated
928 //! code with
929 //! changes applied
930 //! >>>>>>> UPDATED
931
932 use super::*;
933
934 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
935 pub const SEPARATOR: &str = "=======\n";
936 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
937
938 pub fn special_tokens() -> &'static [&'static str] {
939 &[
940 "<|fim_prefix|>",
941 "<|fim_suffix|>",
942 "<|fim_middle|>",
943 "<|file_sep|>",
944 START_MARKER,
945 SEPARATOR,
946 END_MARKER,
947 CURSOR_MARKER,
948 ]
949 }
950
951 pub fn write_cursor_excerpt_section(
952 prompt: &mut String,
953 path: &Path,
954 context: &str,
955 editable_range: &Range<usize>,
956 cursor_offset: usize,
957 ) {
958 let path_str = path.to_string_lossy();
959 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
960
961 prompt.push_str("<|fim_prefix|>");
962 prompt.push_str(&context[..editable_range.start]);
963 prompt.push_str(START_MARKER);
964 prompt.push_str(&context[editable_range.start..cursor_offset]);
965 prompt.push_str(CURSOR_MARKER);
966 prompt.push_str(&context[cursor_offset..editable_range.end]);
967 if !prompt.ends_with('\n') {
968 prompt.push('\n');
969 }
970 prompt.push_str(SEPARATOR);
971
972 prompt.push_str("<|fim_suffix|>");
973 prompt.push_str(&context[editable_range.end..]);
974 if !prompt.ends_with('\n') {
975 prompt.push('\n');
976 }
977
978 prompt.push_str("<|fim_middle|>");
979 }
980}
981
982pub mod v0211_prefill {
983 use super::*;
984
985 pub fn special_tokens() -> &'static [&'static str] {
986 v0131_git_merge_markers_prefix::special_tokens()
987 }
988
989 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
990 let editable_region = &context[editable_range.start..editable_range.end];
991
992 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
993 let prefill_len = editable_region.floor_char_boundary(prefill_len);
994
995 // Find a token boundary to avoid splitting tokens in the prefill.
996 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
997 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
998 // the \n and consume any consecutive \n characters after it.
999 let prefill = &editable_region[..prefill_len];
1000 match prefill.rfind('\n') {
1001 Some(pos) => {
1002 let mut end = pos + 1;
1003 while end < editable_region.len()
1004 && editable_region.as_bytes().get(end) == Some(&b'\n')
1005 {
1006 end += 1;
1007 }
1008 editable_region[..end].to_string()
1009 }
1010 // No newline found. Fall back to splitting before the last space
1011 // (word-level boundary)
1012 None => match prefill.rfind(' ') {
1013 Some(pos) => prefill[..pos].to_string(),
1014 None => prefill.to_string(),
1015 },
1016 }
1017 }
1018}
1019
1020pub mod hashline {
1021
1022 use std::fmt::Display;
1023
1024 pub const END_MARKER: &str = "<|fim_middle|>updated";
1025 pub const START_MARKER: &str = "<|fim_middle|>current";
1026
1027 use super::*;
1028
1029 const SET_COMMAND_MARKER: &str = "<|set|>";
1030 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1031 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1032
1033 pub fn special_tokens() -> &'static [&'static str] {
1034 return &[
1035 SET_COMMAND_MARKER,
1036 "<|set_range|>",
1037 INSERT_COMMAND_MARKER,
1038 NO_EDITS_COMMAND_MARKER,
1039 CURSOR_MARKER,
1040 "<|file_sep|>",
1041 "<|fim_prefix|>",
1042 "<|fim_suffix|>",
1043 "<|fim_middle|>",
1044 ];
1045 }
1046
1047 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1048 #[derive(Debug, Clone, PartialEq, Eq)]
1049 struct LineRef {
1050 index: usize,
1051 hash: u8,
1052 }
1053
1054 impl Display for LineRef {
1055 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1056 write!(f, "{}:{:02x}", self.index, self.hash)
1057 }
1058 }
1059
1060 pub fn hash_line(line: &[u8]) -> u8 {
1061 let mut h: u8 = 0;
1062 for &byte in line {
1063 h = h.wrapping_add(byte);
1064 }
1065 return h;
1066 }
1067
1068 /// Write the hashline-encoded editable region into `out`. Each line of
1069 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1070 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1071 /// to the start of `editable_text`).
1072 pub fn write_hashline_editable_region(
1073 out: &mut String,
1074 editable_text: &str,
1075 cursor_offset_in_editable: usize,
1076 ) {
1077 let mut offset = 0;
1078 for (i, line) in editable_text.lines().enumerate() {
1079 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1080 && cursor_offset_in_editable < offset + line.len()
1081 {
1082 (
1083 &line[..cursor_offset_in_editable - offset],
1084 CURSOR_MARKER,
1085 &line[cursor_offset_in_editable - offset..],
1086 )
1087 } else {
1088 (line, "", "")
1089 };
1090 write!(
1091 out,
1092 "\n{}|{head}{cursor}{tail}",
1093 LineRef {
1094 index: i,
1095 hash: hash_line(line.as_bytes())
1096 }
1097 )
1098 .unwrap();
1099 offset += line.len() + 1;
1100 }
1101 }
1102
1103 pub fn write_cursor_excerpt_section(
1104 prompt: &mut String,
1105 path: &Path,
1106 context: &str,
1107 editable_range: &Range<usize>,
1108 cursor_offset: usize,
1109 ) {
1110 let path_str = path.to_string_lossy();
1111 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1112
1113 prompt.push_str("<|fim_prefix|>\n");
1114 prompt.push_str(&context[..editable_range.start]);
1115 prompt.push_str(START_MARKER);
1116
1117 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1118 let editable_region = &context[editable_range.clone()];
1119 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1120
1121 if !prompt.ends_with('\n') {
1122 prompt.push('\n');
1123 }
1124
1125 prompt.push_str("<|fim_suffix|>\n");
1126 prompt.push_str(&context[editable_range.end..]);
1127 if !prompt.ends_with('\n') {
1128 prompt.push('\n');
1129 }
1130
1131 prompt.push_str(END_MARKER);
1132 prompt.push('\n');
1133 }
1134
1135 /// A single edit command parsed from the model output.
1136 #[derive(Debug)]
1137 enum EditCommand<'a> {
1138 /// Replace a range of lines (inclusive on both ends). Single-line set is
1139 /// represented by `start == end`.
1140 Set {
1141 start: LineRef,
1142 end: LineRef,
1143 content: &'a str,
1144 },
1145 /// Insert new lines after the given line, or before the first line if
1146 /// `after` is `None`.
1147 Insert {
1148 after: Option<LineRef>,
1149 content: &'a str,
1150 },
1151 }
1152
1153 /// Parse a line reference like `3:c3` into a `LineRef`.
1154 fn parse_line_ref(s: &str) -> Option<LineRef> {
1155 let (idx_str, hash_str) = s.split_once(':')?;
1156 let index = idx_str.parse::<usize>().ok()?;
1157 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1158 Some(LineRef { index, hash })
1159 }
1160
1161 /// Parse the model output into a list of `EditCommand`s.
1162 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1163 let mut commands = Vec::new();
1164 let mut offset = 0usize;
1165
1166 while offset < model_output.len() {
1167 let next_nl = model_output[offset..]
1168 .find('\n')
1169 .map(|i| offset + i)
1170 .unwrap_or(model_output.len());
1171 let line = &model_output[offset..next_nl];
1172 let line_end = if next_nl < model_output.len() {
1173 next_nl + 1
1174 } else {
1175 next_nl
1176 };
1177
1178 let trimmed = line.trim();
1179 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1180 (true, spec)
1181 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1182 (false, spec)
1183 } else {
1184 offset = line_end;
1185 continue;
1186 };
1187
1188 let mut content_end = line_end;
1189 let mut scan = line_end;
1190
1191 while scan < model_output.len() {
1192 let body_nl = model_output[scan..]
1193 .find('\n')
1194 .map(|i| scan + i)
1195 .unwrap_or(model_output.len());
1196 let body_line = &model_output[scan..body_nl];
1197 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1198 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1199 {
1200 break;
1201 }
1202 scan = if body_nl < model_output.len() {
1203 body_nl + 1
1204 } else {
1205 body_nl
1206 };
1207 content_end = scan;
1208 }
1209
1210 let content = &model_output[line_end..content_end];
1211
1212 if is_set {
1213 if let Some((start_str, end_str)) = specifier.split_once('-') {
1214 if let (Some(start), Some(end)) =
1215 (parse_line_ref(start_str), parse_line_ref(end_str))
1216 {
1217 commands.push(EditCommand::Set {
1218 start,
1219 end,
1220 content,
1221 });
1222 }
1223 } else if let Some(target) = parse_line_ref(specifier) {
1224 commands.push(EditCommand::Set {
1225 start: target.clone(),
1226 end: target,
1227 content,
1228 });
1229 }
1230 } else {
1231 let after = parse_line_ref(specifier);
1232 commands.push(EditCommand::Insert { after, content });
1233 }
1234
1235 offset = scan;
1236 }
1237
1238 commands
1239 }
1240
1241 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1242 /// (as opposed to being a plain full-replacement output).
1243 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1244 /// editable region, returning the plain text content.
1245 pub fn strip_hashline_prefixes(region: &str) -> String {
1246 let mut decoded: String = region
1247 .lines()
1248 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1249 .collect::<Vec<_>>()
1250 .join("\n");
1251 if region.ends_with('\n') {
1252 decoded.push('\n');
1253 }
1254 decoded
1255 }
1256
1257 pub fn output_has_edit_commands(model_output: &str) -> bool {
1258 model_output.contains(SET_COMMAND_MARKER)
1259 || model_output.contains(INSERT_COMMAND_MARKER)
1260 || model_output.contains(NO_EDITS_COMMAND_MARKER)
1261 }
1262
1263 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1264 /// original editable region text.
1265 ///
1266 /// `editable_region` is the original text of the editable region (without hash
1267 /// prefixes). `model_output` is the raw model response containing edit commands.
1268 ///
1269 /// Returns the full replacement text for the editable region.
1270 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1271 if model_output
1272 .trim_start()
1273 .starts_with(NO_EDITS_COMMAND_MARKER)
1274 {
1275 return editable_region.to_string();
1276 }
1277
1278 let original_lines: Vec<&str> = editable_region.lines().collect();
1279 let old_hashes: Vec<u8> = original_lines
1280 .iter()
1281 .map(|line| hash_line(line.as_bytes()))
1282 .collect();
1283
1284 let commands = parse_edit_commands(model_output);
1285
1286 // For set operations: indexed by start line → Some((end line index, content))
1287 // For insert operations: indexed by line index → vec of content to insert after
1288 // Insert-before-first is tracked separately.
1289 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1290 let mut insert_before_first: Vec<&str> = Vec::new();
1291 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1292
1293 for command in &commands {
1294 match command {
1295 EditCommand::Set {
1296 start,
1297 end,
1298 content,
1299 } => {
1300 if start.index < old_hashes.len()
1301 && end.index < old_hashes.len()
1302 && start.index <= end.index
1303 && old_hashes[start.index] == start.hash
1304 && old_hashes[end.index] == end.hash
1305 {
1306 set_ops[start.index] = Some((end.index, *content));
1307 }
1308 }
1309 EditCommand::Insert { after, content } => match after {
1310 None => insert_before_first.push(*content),
1311 Some(line_ref) => {
1312 if line_ref.index < old_hashes.len()
1313 && old_hashes[line_ref.index] == line_ref.hash
1314 {
1315 insert_after[line_ref.index].push(*content);
1316 }
1317 }
1318 },
1319 }
1320 }
1321
1322 let mut result = String::new();
1323
1324 // Emit any insertions before the first line
1325 for content in &insert_before_first {
1326 result.push_str(content);
1327 if !content.ends_with('\n') {
1328 result.push('\n');
1329 }
1330 }
1331
1332 let mut i = 0;
1333 while i < original_lines.len() {
1334 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1335 // Replace lines i..=end_index with the replacement content
1336 result.push_str(replacement);
1337 if !replacement.is_empty() && !replacement.ends_with('\n') {
1338 result.push('\n');
1339 }
1340 // Emit any insertions after the end of this set range
1341 if *end_index < insert_after.len() {
1342 for content in &insert_after[*end_index] {
1343 result.push_str(content);
1344 if !content.ends_with('\n') {
1345 result.push('\n');
1346 }
1347 }
1348 }
1349 i = end_index + 1;
1350 } else {
1351 // Keep the original line
1352 result.push_str(original_lines[i]);
1353 result.push('\n');
1354 // Emit any insertions after this line
1355 for content in &insert_after[i] {
1356 result.push_str(content);
1357 if !content.ends_with('\n') {
1358 result.push('\n');
1359 }
1360 }
1361 i += 1;
1362 }
1363 }
1364
1365 // Preserve trailing newline behavior: if the original ended with a
1366 // newline the result already has one; if it didn't, trim the extra one
1367 // we added.
1368 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1369 result.pop();
1370 }
1371
1372 result
1373 }
1374
1375 /// Convert a unified diff patch into hashline edit commands.
1376 ///
1377 /// Parses the unified diff `patch` directly to determine which lines of
1378 /// `old_text` are deleted/replaced and what new lines are added, then emits
1379 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1380 /// `{index}:{hash}` identifiers.
1381 ///
1382 /// `cursor_offset` is an optional byte offset into the first hunk's new
1383 /// text (context + additions) where the cursor marker should be placed.
1384 pub fn patch_to_edit_commands(
1385 old_text: &str,
1386 patch: &str,
1387 cursor_offset: Option<usize>,
1388 ) -> Result<String> {
1389 let old_lines: Vec<&str> = old_text.lines().collect();
1390 let old_hashes: Vec<u8> = old_lines
1391 .iter()
1392 .map(|line| hash_line(line.as_bytes()))
1393 .collect();
1394
1395 let mut result = String::new();
1396 let mut first_hunk = true;
1397
1398 struct Hunk<'a> {
1399 line_range: Range<usize>,
1400 new_text_lines: Vec<&'a str>,
1401 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1402 }
1403
1404 // Parse the patch line by line. We only care about hunk headers,
1405 // context, deletions, and additions.
1406 let mut old_line_index: usize = 0;
1407 let mut current_hunk: Option<Hunk> = None;
1408 // Byte offset tracking within the hunk's new text for cursor placement.
1409 let mut new_text_byte_offset: usize = 0;
1410 // The line index of the last old line seen before/in the current hunk
1411 // (used for insert-after reference).
1412 let mut last_old_line_before_hunk: Option<usize> = None;
1413
1414 fn flush_hunk(
1415 hunk: Hunk,
1416 last_old_line: Option<usize>,
1417 result: &mut String,
1418 old_hashes: &[u8],
1419 ) {
1420 if hunk.line_range.is_empty() {
1421 // Pure insertion — reference the old line to insert after when in bounds.
1422 if let Some(after) = last_old_line
1423 && let Some(&hash) = old_hashes.get(after)
1424 {
1425 write!(
1426 result,
1427 "{INSERT_COMMAND_MARKER}{}\n",
1428 LineRef { index: after, hash }
1429 )
1430 .unwrap();
1431 } else {
1432 result.push_str(INSERT_COMMAND_MARKER);
1433 result.push('\n');
1434 }
1435 } else {
1436 let start = hunk.line_range.start;
1437 let end_exclusive = hunk.line_range.end;
1438 let deleted_line_count = end_exclusive.saturating_sub(start);
1439
1440 if deleted_line_count == 1 {
1441 if let Some(&hash) = old_hashes.get(start) {
1442 write!(
1443 result,
1444 "{SET_COMMAND_MARKER}{}\n",
1445 LineRef { index: start, hash }
1446 )
1447 .unwrap();
1448 } else {
1449 result.push_str(SET_COMMAND_MARKER);
1450 result.push('\n');
1451 }
1452 } else {
1453 let end_inclusive = end_exclusive - 1;
1454 match (
1455 old_hashes.get(start).copied(),
1456 old_hashes.get(end_inclusive).copied(),
1457 ) {
1458 (Some(start_hash), Some(end_hash)) => {
1459 write!(
1460 result,
1461 "{SET_COMMAND_MARKER}{}-{}\n",
1462 LineRef {
1463 index: start,
1464 hash: start_hash
1465 },
1466 LineRef {
1467 index: end_inclusive,
1468 hash: end_hash
1469 }
1470 )
1471 .unwrap();
1472 }
1473 _ => {
1474 result.push_str(SET_COMMAND_MARKER);
1475 result.push('\n');
1476 }
1477 }
1478 }
1479 }
1480 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1481 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1482 && line_offset == cursor_line_offset
1483 {
1484 result.push_str(&line[..char_offset]);
1485 result.push_str(CURSOR_MARKER);
1486 result.push_str(&line[char_offset..]);
1487 continue;
1488 }
1489
1490 result.push_str(line);
1491 }
1492 }
1493
1494 for raw_line in patch.split_inclusive('\n') {
1495 if raw_line.starts_with("@@") {
1496 // Flush any pending change hunk from a previous patch hunk.
1497 if let Some(hunk) = current_hunk.take() {
1498 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1499 }
1500
1501 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1502 // We intentionally do not trust old_start as a direct local index into `old_text`,
1503 // because some patches are produced against a larger file region and carry
1504 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1505 if first_hunk {
1506 new_text_byte_offset = 0;
1507 first_hunk = false;
1508 }
1509 continue;
1510 }
1511
1512 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1513 continue;
1514 }
1515 if raw_line.starts_with("\\ No newline") {
1516 continue;
1517 }
1518
1519 if raw_line.starts_with('-') {
1520 // Extend or start a change hunk with this deleted old line.
1521 match &mut current_hunk {
1522 Some(Hunk {
1523 line_range: range, ..
1524 }) => range.end = old_line_index + 1,
1525 None => {
1526 current_hunk = Some(Hunk {
1527 line_range: old_line_index..old_line_index + 1,
1528 new_text_lines: Vec::new(),
1529 cursor_line_offset_in_new_text: None,
1530 });
1531 }
1532 }
1533 old_line_index += 1;
1534 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1535 // Place cursor marker if cursor_offset falls within this line.
1536 let mut cursor_line_offset = None;
1537 if let Some(cursor_off) = cursor_offset
1538 && (first_hunk
1539 || cursor_off >= new_text_byte_offset
1540 && cursor_off <= new_text_byte_offset + added_content.len())
1541 {
1542 let line_offset = added_content.floor_char_boundary(
1543 cursor_off
1544 .saturating_sub(new_text_byte_offset)
1545 .min(added_content.len()),
1546 );
1547 cursor_line_offset = Some(line_offset);
1548 }
1549
1550 new_text_byte_offset += added_content.len();
1551
1552 let hunk = current_hunk.get_or_insert(Hunk {
1553 line_range: old_line_index..old_line_index,
1554 new_text_lines: vec![],
1555 cursor_line_offset_in_new_text: None,
1556 });
1557 hunk.new_text_lines.push(added_content);
1558 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1559 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1560 } else {
1561 // Context line (starts with ' ' or is empty).
1562 if let Some(hunk) = current_hunk.take() {
1563 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1564 }
1565 last_old_line_before_hunk = Some(old_line_index);
1566 old_line_index += 1;
1567 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1568 new_text_byte_offset += content.len();
1569 }
1570 }
1571
1572 // Flush final group.
1573 if let Some(hunk) = current_hunk.take() {
1574 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1575 }
1576
1577 // Trim a single trailing newline.
1578 if result.ends_with('\n') {
1579 result.pop();
1580 }
1581
1582 if result.is_empty() {
1583 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1584 }
1585
1586 Ok(result)
1587 }
1588
1589 #[cfg(test)]
1590 mod tests {
1591 use super::*;
1592 use indoc::indoc;
1593
1594 #[test]
1595 fn test_format_cursor_region() {
1596 struct Case {
1597 name: &'static str,
1598 context: &'static str,
1599 editable_range: Range<usize>,
1600 cursor_offset: usize,
1601 expected: &'static str,
1602 }
1603
1604 let cases = [
1605 Case {
1606 name: "basic_cursor_placement",
1607 context: "hello world\n",
1608 editable_range: 0..12,
1609 cursor_offset: 5,
1610 expected: indoc! {"
1611 <|file_sep|>test.rs
1612 <|fim_prefix|>
1613 <|fim_middle|>current
1614 0:5c|hello<|user_cursor|> world
1615 <|fim_suffix|>
1616 <|fim_middle|>updated
1617 "},
1618 },
1619 Case {
1620 name: "multiline_cursor_on_second_line",
1621 context: "aaa\nbbb\nccc\n",
1622 editable_range: 0..12,
1623 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1624 expected: indoc! {"
1625 <|file_sep|>test.rs
1626 <|fim_prefix|>
1627 <|fim_middle|>current
1628 0:23|aaa
1629 1:26|b<|user_cursor|>bb
1630 2:29|ccc
1631 <|fim_suffix|>
1632 <|fim_middle|>updated
1633 "},
1634 },
1635 Case {
1636 name: "no_trailing_newline_in_context",
1637 context: "line1\nline2",
1638 editable_range: 0..11,
1639 cursor_offset: 3,
1640 expected: indoc! {"
1641 <|file_sep|>test.rs
1642 <|fim_prefix|>
1643 <|fim_middle|>current
1644 0:d9|lin<|user_cursor|>e1
1645 1:da|line2
1646 <|fim_suffix|>
1647 <|fim_middle|>updated
1648 "},
1649 },
1650 Case {
1651 name: "leading_newline_in_editable_region",
1652 context: "\nabc\n",
1653 editable_range: 0..5,
1654 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1655 expected: indoc! {"
1656 <|file_sep|>test.rs
1657 <|fim_prefix|>
1658 <|fim_middle|>current
1659 0:00|
1660 1:26|a<|user_cursor|>bc
1661 <|fim_suffix|>
1662 <|fim_middle|>updated
1663 "},
1664 },
1665 Case {
1666 name: "with_suffix",
1667 context: "abc\ndef",
1668 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1669 cursor_offset: 2,
1670 expected: indoc! {"
1671 <|file_sep|>test.rs
1672 <|fim_prefix|>
1673 <|fim_middle|>current
1674 0:26|ab<|user_cursor|>c
1675 <|fim_suffix|>
1676 def
1677 <|fim_middle|>updated
1678 "},
1679 },
1680 Case {
1681 name: "unicode_two_byte_chars",
1682 context: "héllo\n",
1683 editable_range: 0..7,
1684 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1685 expected: indoc! {"
1686 <|file_sep|>test.rs
1687 <|fim_prefix|>
1688 <|fim_middle|>current
1689 0:1b|hé<|user_cursor|>llo
1690 <|fim_suffix|>
1691 <|fim_middle|>updated
1692 "},
1693 },
1694 Case {
1695 name: "unicode_three_byte_chars",
1696 context: "日本語\n",
1697 editable_range: 0..10,
1698 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1699 expected: indoc! {"
1700 <|file_sep|>test.rs
1701 <|fim_prefix|>
1702 <|fim_middle|>current
1703 0:80|日本<|user_cursor|>語
1704 <|fim_suffix|>
1705 <|fim_middle|>updated
1706 "},
1707 },
1708 Case {
1709 name: "unicode_four_byte_chars",
1710 context: "a🌍b\n",
1711 editable_range: 0..7,
1712 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1713 expected: indoc! {"
1714 <|file_sep|>test.rs
1715 <|fim_prefix|>
1716 <|fim_middle|>current
1717 0:6b|a🌍<|user_cursor|>b
1718 <|fim_suffix|>
1719 <|fim_middle|>updated
1720 "},
1721 },
1722 Case {
1723 name: "cursor_at_start_of_region_not_placed",
1724 context: "abc\n",
1725 editable_range: 0..4,
1726 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1727 expected: indoc! {"
1728 <|file_sep|>test.rs
1729 <|fim_prefix|>
1730 <|fim_middle|>current
1731 0:26|abc
1732 <|fim_suffix|>
1733 <|fim_middle|>updated
1734 "},
1735 },
1736 Case {
1737 name: "cursor_at_end_of_line_not_placed",
1738 context: "abc\ndef\n",
1739 editable_range: 0..8,
1740 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1741 expected: indoc! {"
1742 <|file_sep|>test.rs
1743 <|fim_prefix|>
1744 <|fim_middle|>current
1745 0:26|abc
1746 1:2f|def
1747 <|fim_suffix|>
1748 <|fim_middle|>updated
1749 "},
1750 },
1751 Case {
1752 name: "cursor_offset_relative_to_context_not_editable_region",
1753 // cursor_offset is relative to `context`, so when editable_range.start > 0,
1754 // write_cursor_excerpt_section must subtract it before comparing against
1755 // per-line offsets within the editable region.
1756 context: "pre\naaa\nbbb\nsuf\n",
1757 editable_range: 4..12, // editable region = "aaa\nbbb\n"
1758 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
1759 expected: indoc! {"
1760 <|file_sep|>test.rs
1761 <|fim_prefix|>
1762 pre
1763 <|fim_middle|>current
1764 0:23|aaa
1765 1:26|b<|user_cursor|>bb
1766 <|fim_suffix|>
1767 suf
1768 <|fim_middle|>updated
1769 "},
1770 },
1771 ];
1772
1773 for case in &cases {
1774 let mut prompt = String::new();
1775 hashline::write_cursor_excerpt_section(
1776 &mut prompt,
1777 Path::new("test.rs"),
1778 case.context,
1779 &case.editable_range,
1780 case.cursor_offset,
1781 );
1782 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1783 }
1784 }
1785
1786 #[test]
1787 fn test_apply_edit_commands() {
1788 struct Case {
1789 name: &'static str,
1790 original: &'static str,
1791 model_output: &'static str,
1792 expected: &'static str,
1793 }
1794
1795 let cases = vec![
1796 Case {
1797 name: "set_single_line",
1798 original: indoc! {"
1799 let mut total = 0;
1800 for product in products {
1801 total += ;
1802 }
1803 total
1804 "},
1805 model_output: indoc! {"
1806 <|set|>2:87
1807 total += product.price;
1808 "},
1809 expected: indoc! {"
1810 let mut total = 0;
1811 for product in products {
1812 total += product.price;
1813 }
1814 total
1815 "},
1816 },
1817 Case {
1818 name: "set_range",
1819 original: indoc! {"
1820 fn foo() {
1821 let x = 1;
1822 let y = 2;
1823 let z = 3;
1824 }
1825 "},
1826 model_output: indoc! {"
1827 <|set|>1:46-3:4a
1828 let sum = 6;
1829 "},
1830 expected: indoc! {"
1831 fn foo() {
1832 let sum = 6;
1833 }
1834 "},
1835 },
1836 Case {
1837 name: "insert_after_line",
1838 original: indoc! {"
1839 fn main() {
1840 let x = 1;
1841 }
1842 "},
1843 model_output: indoc! {"
1844 <|insert|>1:46
1845 let y = 2;
1846 "},
1847 expected: indoc! {"
1848 fn main() {
1849 let x = 1;
1850 let y = 2;
1851 }
1852 "},
1853 },
1854 Case {
1855 name: "insert_before_first",
1856 original: indoc! {"
1857 let x = 1;
1858 let y = 2;
1859 "},
1860 model_output: indoc! {"
1861 <|insert|>
1862 use std::io;
1863 "},
1864 expected: indoc! {"
1865 use std::io;
1866 let x = 1;
1867 let y = 2;
1868 "},
1869 },
1870 Case {
1871 name: "set_with_cursor_marker",
1872 original: indoc! {"
1873 fn main() {
1874 println!();
1875 }
1876 "},
1877 model_output: indoc! {"
1878 <|set|>1:34
1879 eprintln!(\"<|user_cursor|>\");
1880 "},
1881 expected: indoc! {"
1882 fn main() {
1883 eprintln!(\"<|user_cursor|>\");
1884 }
1885 "},
1886 },
1887 Case {
1888 name: "multiple_set_commands",
1889 original: indoc! {"
1890 aaa
1891 bbb
1892 ccc
1893 ddd
1894 "},
1895 model_output: indoc! {"
1896 <|set|>0:23
1897 AAA
1898 <|set|>2:29
1899 CCC
1900 "},
1901 expected: indoc! {"
1902 AAA
1903 bbb
1904 CCC
1905 ddd
1906 "},
1907 },
1908 Case {
1909 name: "set_range_multiline_replacement",
1910 original: indoc! {"
1911 fn handle_submit() {
1912 }
1913
1914 fn handle_keystroke() {
1915 "},
1916 model_output: indoc! {"
1917 <|set|>0:3f-1:7d
1918 fn handle_submit(modal_state: &mut ModalState) {
1919 <|user_cursor|>
1920 }
1921 "},
1922 expected: indoc! {"
1923 fn handle_submit(modal_state: &mut ModalState) {
1924 <|user_cursor|>
1925 }
1926
1927 fn handle_keystroke() {
1928 "},
1929 },
1930 Case {
1931 name: "no_edit_commands_returns_original",
1932 original: indoc! {"
1933 hello
1934 world
1935 "},
1936 model_output: "some random text with no commands",
1937 expected: indoc! {"
1938 hello
1939 world
1940 "},
1941 },
1942 Case {
1943 name: "no_edits_command_returns_original",
1944 original: indoc! {"
1945 hello
1946 world
1947 "},
1948 model_output: "<|no_edits|>",
1949 expected: indoc! {"
1950 hello
1951 world
1952 "},
1953 },
1954 Case {
1955 name: "wrong_hash_set_ignored",
1956 original: indoc! {"
1957 aaa
1958 bbb
1959 "},
1960 model_output: indoc! {"
1961 <|set|>0:ff
1962 ZZZ
1963 "},
1964 expected: indoc! {"
1965 aaa
1966 bbb
1967 "},
1968 },
1969 Case {
1970 name: "insert_and_set_combined",
1971 original: indoc! {"
1972 alpha
1973 beta
1974 gamma
1975 "},
1976 model_output: indoc! {"
1977 <|set|>0:06
1978 ALPHA
1979 <|insert|>1:9c
1980 beta_extra
1981 "},
1982 expected: indoc! {"
1983 ALPHA
1984 beta
1985 beta_extra
1986 gamma
1987 "},
1988 },
1989 Case {
1990 name: "no_trailing_newline_preserved",
1991 original: "hello\nworld",
1992 model_output: indoc! {"
1993 <|set|>0:14
1994 HELLO
1995 "},
1996 expected: "HELLO\nworld",
1997 },
1998 Case {
1999 name: "set_range_hash_mismatch_in_end_bound",
2000 original: indoc! {"
2001 one
2002 two
2003 three
2004 "},
2005 model_output: indoc! {"
2006 <|set|>0:42-2:ff
2007 ONE_TWO_THREE
2008 "},
2009 expected: indoc! {"
2010 one
2011 two
2012 three
2013 "},
2014 },
2015 Case {
2016 name: "set_range_start_greater_than_end_ignored",
2017 original: indoc! {"
2018 a
2019 b
2020 c
2021 "},
2022 model_output: indoc! {"
2023 <|set|>2:63-1:62
2024 X
2025 "},
2026 expected: indoc! {"
2027 a
2028 b
2029 c
2030 "},
2031 },
2032 Case {
2033 name: "insert_out_of_bounds_ignored",
2034 original: indoc! {"
2035 x
2036 y
2037 "},
2038 model_output: indoc! {"
2039 <|insert|>99:aa
2040 z
2041 "},
2042 expected: indoc! {"
2043 x
2044 y
2045 "},
2046 },
2047 Case {
2048 name: "set_out_of_bounds_ignored",
2049 original: indoc! {"
2050 x
2051 y
2052 "},
2053 model_output: indoc! {"
2054 <|set|>99:aa
2055 z
2056 "},
2057 expected: indoc! {"
2058 x
2059 y
2060 "},
2061 },
2062 Case {
2063 name: "malformed_set_command_ignored",
2064 original: indoc! {"
2065 alpha
2066 beta
2067 "},
2068 model_output: indoc! {"
2069 <|set|>not-a-line-ref
2070 UPDATED
2071 "},
2072 expected: indoc! {"
2073 alpha
2074 beta
2075 "},
2076 },
2077 Case {
2078 name: "malformed_insert_hash_treated_as_before_first",
2079 original: indoc! {"
2080 alpha
2081 beta
2082 "},
2083 model_output: indoc! {"
2084 <|insert|>1:nothex
2085 preamble
2086 "},
2087 expected: indoc! {"
2088 preamble
2089 alpha
2090 beta
2091 "},
2092 },
2093 Case {
2094 name: "set_then_insert_same_target_orders_insert_after_replacement",
2095 original: indoc! {"
2096 cat
2097 dog
2098 "},
2099 model_output: indoc! {"
2100 <|set|>0:38
2101 CAT
2102 <|insert|>0:38
2103 TAIL
2104 "},
2105 expected: indoc! {"
2106 CAT
2107 TAIL
2108 dog
2109 "},
2110 },
2111 Case {
2112 name: "overlapping_set_ranges_last_wins",
2113 original: indoc! {"
2114 a
2115 b
2116 c
2117 d
2118 "},
2119 model_output: indoc! {"
2120 <|set|>0:61-2:63
2121 FIRST
2122 <|set|>1:62-3:64
2123 SECOND
2124 "},
2125 expected: indoc! {"
2126 FIRST
2127 d
2128 "},
2129 },
2130 Case {
2131 name: "insert_before_first_and_after_line",
2132 original: indoc! {"
2133 a
2134 b
2135 "},
2136 model_output: indoc! {"
2137 <|insert|>
2138 HEAD
2139 <|insert|>0:61
2140 MID
2141 "},
2142 expected: indoc! {"
2143 HEAD
2144 a
2145 MID
2146 b
2147 "},
2148 },
2149 ];
2150
2151 for case in &cases {
2152 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2153 assert_eq!(result, case.expected, "failed case: {}", case.name);
2154 }
2155 }
2156
2157 #[test]
2158 fn test_output_has_edit_commands() {
2159 assert!(hashline::output_has_edit_commands(&format!(
2160 "{}0:ab\nnew",
2161 SET_COMMAND_MARKER
2162 )));
2163 assert!(hashline::output_has_edit_commands(&format!(
2164 "{}0:ab\nnew",
2165 INSERT_COMMAND_MARKER
2166 )));
2167 assert!(hashline::output_has_edit_commands(&format!(
2168 "some text\n{}1:cd\nstuff",
2169 SET_COMMAND_MARKER
2170 )));
2171 assert!(!hashline::output_has_edit_commands("just plain text"));
2172 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2173 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2174 }
2175
2176 // ---- hashline::patch_to_edit_commands round-trip tests ----
2177
2178 #[test]
2179 fn test_patch_to_edit_commands() {
2180 struct Case {
2181 name: &'static str,
2182 old: &'static str,
2183 patch: &'static str,
2184 expected_new: &'static str,
2185 }
2186
2187 let cases = [
2188 Case {
2189 name: "single_line_replacement",
2190 old: indoc! {"
2191 let mut total = 0;
2192 for product in products {
2193 total += ;
2194 }
2195 total
2196 "},
2197 patch: indoc! {"
2198 @@ -1,5 +1,5 @@
2199 let mut total = 0;
2200 for product in products {
2201 - total += ;
2202 + total += product.price;
2203 }
2204 total
2205 "},
2206 expected_new: indoc! {"
2207 let mut total = 0;
2208 for product in products {
2209 total += product.price;
2210 }
2211 total
2212 "},
2213 },
2214 Case {
2215 name: "multiline_replacement",
2216 old: indoc! {"
2217 fn foo() {
2218 let x = 1;
2219 let y = 2;
2220 let z = 3;
2221 }
2222 "},
2223 patch: indoc! {"
2224 @@ -1,5 +1,3 @@
2225 fn foo() {
2226 - let x = 1;
2227 - let y = 2;
2228 - let z = 3;
2229 + let sum = 1 + 2 + 3;
2230 }
2231 "},
2232 expected_new: indoc! {"
2233 fn foo() {
2234 let sum = 1 + 2 + 3;
2235 }
2236 "},
2237 },
2238 Case {
2239 name: "insertion",
2240 old: indoc! {"
2241 fn main() {
2242 let x = 1;
2243 }
2244 "},
2245 patch: indoc! {"
2246 @@ -1,3 +1,4 @@
2247 fn main() {
2248 let x = 1;
2249 + let y = 2;
2250 }
2251 "},
2252 expected_new: indoc! {"
2253 fn main() {
2254 let x = 1;
2255 let y = 2;
2256 }
2257 "},
2258 },
2259 Case {
2260 name: "insertion_before_first",
2261 old: indoc! {"
2262 let x = 1;
2263 let y = 2;
2264 "},
2265 patch: indoc! {"
2266 @@ -1,2 +1,3 @@
2267 +use std::io;
2268 let x = 1;
2269 let y = 2;
2270 "},
2271 expected_new: indoc! {"
2272 use std::io;
2273 let x = 1;
2274 let y = 2;
2275 "},
2276 },
2277 Case {
2278 name: "deletion",
2279 old: indoc! {"
2280 aaa
2281 bbb
2282 ccc
2283 ddd
2284 "},
2285 patch: indoc! {"
2286 @@ -1,4 +1,2 @@
2287 aaa
2288 -bbb
2289 -ccc
2290 ddd
2291 "},
2292 expected_new: indoc! {"
2293 aaa
2294 ddd
2295 "},
2296 },
2297 Case {
2298 name: "multiple_changes",
2299 old: indoc! {"
2300 alpha
2301 beta
2302 gamma
2303 delta
2304 epsilon
2305 "},
2306 patch: indoc! {"
2307 @@ -1,5 +1,5 @@
2308 -alpha
2309 +ALPHA
2310 beta
2311 gamma
2312 -delta
2313 +DELTA
2314 epsilon
2315 "},
2316 expected_new: indoc! {"
2317 ALPHA
2318 beta
2319 gamma
2320 DELTA
2321 epsilon
2322 "},
2323 },
2324 Case {
2325 name: "replace_with_insertion",
2326 old: indoc! {r#"
2327 fn handle() {
2328 modal_state.close();
2329 modal_state.dismiss();
2330 "#},
2331 patch: indoc! {r#"
2332 @@ -1,3 +1,4 @@
2333 fn handle() {
2334 modal_state.close();
2335 + eprintln!("");
2336 modal_state.dismiss();
2337 "#},
2338 expected_new: indoc! {r#"
2339 fn handle() {
2340 modal_state.close();
2341 eprintln!("");
2342 modal_state.dismiss();
2343 "#},
2344 },
2345 Case {
2346 name: "complete_replacement",
2347 old: indoc! {"
2348 aaa
2349 bbb
2350 ccc
2351 "},
2352 patch: indoc! {"
2353 @@ -1,3 +1,3 @@
2354 -aaa
2355 -bbb
2356 -ccc
2357 +xxx
2358 +yyy
2359 +zzz
2360 "},
2361 expected_new: indoc! {"
2362 xxx
2363 yyy
2364 zzz
2365 "},
2366 },
2367 Case {
2368 name: "add_function_body",
2369 old: indoc! {"
2370 fn foo() {
2371 modal_state.dismiss();
2372 }
2373
2374 fn
2375
2376 fn handle_keystroke() {
2377 "},
2378 patch: indoc! {"
2379 @@ -1,6 +1,8 @@
2380 fn foo() {
2381 modal_state.dismiss();
2382 }
2383
2384 -fn
2385 +fn handle_submit() {
2386 + todo()
2387 +}
2388
2389 fn handle_keystroke() {
2390 "},
2391 expected_new: indoc! {"
2392 fn foo() {
2393 modal_state.dismiss();
2394 }
2395
2396 fn handle_submit() {
2397 todo()
2398 }
2399
2400 fn handle_keystroke() {
2401 "},
2402 },
2403 Case {
2404 name: "with_cursor_offset",
2405 old: indoc! {r#"
2406 fn main() {
2407 println!();
2408 }
2409 "#},
2410 patch: indoc! {r#"
2411 @@ -1,3 +1,3 @@
2412 fn main() {
2413 - println!();
2414 + eprintln!("");
2415 }
2416 "#},
2417 expected_new: indoc! {r#"
2418 fn main() {
2419 eprintln!("<|user_cursor|>");
2420 }
2421 "#},
2422 },
2423 Case {
2424 name: "non_local_hunk_header_pure_insertion_repro",
2425 old: indoc! {"
2426 aaa
2427 bbb
2428 "},
2429 patch: indoc! {"
2430 @@ -20,2 +20,3 @@
2431 aaa
2432 +xxx
2433 bbb
2434 "},
2435 expected_new: indoc! {"
2436 aaa
2437 xxx
2438 bbb
2439 "},
2440 },
2441 Case {
2442 name: "empty_patch_produces_no_edits_marker",
2443 old: indoc! {"
2444 aaa
2445 bbb
2446 "},
2447 patch: "@@ -20,2 +20,3 @@\n",
2448 expected_new: indoc! {"
2449 aaa
2450 bbb
2451 "},
2452 },
2453 ];
2454
2455 for case in &cases {
2456 // The cursor_offset for patch_to_edit_commands is relative to
2457 // the first hunk's new text (context + additions). We compute
2458 // it by finding where the marker sits in the expected output
2459 // (which mirrors the new text of the hunk).
2460 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2461
2462 let commands =
2463 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2464 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2465
2466 assert!(
2467 hashline::output_has_edit_commands(&commands),
2468 "case {}: expected edit commands, got: {commands:?}",
2469 case.name,
2470 );
2471
2472 let applied = hashline::apply_edit_commands(case.old, &commands);
2473 assert_eq!(applied, case.expected_new, "case {}", case.name);
2474 }
2475 }
2476 }
2477}
2478
2479pub mod seed_coder {
2480 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2481 //!
2482 //! Seed-Coder uses different FIM tokens and order than Qwen:
2483 //! - SPM order: suffix comes FIRST, then prefix, then middle
2484 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2485 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2486 //!
2487 //! All context (related files, edit history) goes in the PREFIX section.
2488 //! The suffix contains only code after the editable region.
2489 //!
2490 //! Example prompt:
2491 //!
2492 //! <[fim-suffix]>
2493 //! code after editable region
2494 //! <[fim-prefix]><filename>related/file.py
2495 //! related file content
2496 //!
2497 //! <filename>edit_history
2498 //! --- a/some_file.py
2499 //! +++ b/some_file.py
2500 //! -old
2501 //! +new
2502 //!
2503 //! <filename>path/to/target_file.py
2504 //! code before editable region
2505 //! <<<<<<< CURRENT
2506 //! code that
2507 //! needs to<|user_cursor|>
2508 //! be rewritten
2509 //! =======
2510 //! <[fim-middle]>
2511 //!
2512 //! Expected output (model generates):
2513 //!
2514 //! updated
2515 //! code with
2516 //! changes applied
2517 //! >>>>>>> UPDATED
2518
2519 use super::*;
2520
2521 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2522 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2523 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2524 pub const FILE_MARKER: &str = "<filename>";
2525
2526 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2527 pub const SEPARATOR: &str = "=======\n";
2528 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2529
2530 pub const NO_EDITS: &str = "NO_EDITS\n";
2531
2532 pub fn special_tokens() -> &'static [&'static str] {
2533 &[
2534 FIM_SUFFIX,
2535 FIM_PREFIX,
2536 FIM_MIDDLE,
2537 FILE_MARKER,
2538 START_MARKER,
2539 SEPARATOR,
2540 END_MARKER,
2541 CURSOR_MARKER,
2542 ]
2543 }
2544
2545 pub fn write_cursor_excerpt_section(
2546 prompt: &mut String,
2547 path: &Path,
2548 context: &str,
2549 editable_range: &Range<usize>,
2550 cursor_offset: usize,
2551 ) {
2552 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2553 prompt.push_str(§ion);
2554 }
2555
2556 pub fn format_prompt_with_budget(
2557 path: &Path,
2558 context: &str,
2559 editable_range: &Range<usize>,
2560 cursor_offset: usize,
2561 events: &[Arc<Event>],
2562 related_files: &[RelatedFile],
2563 max_tokens: usize,
2564 ) -> String {
2565 let suffix_section = build_suffix_section(context, editable_range);
2566 let cursor_prefix_section =
2567 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2568
2569 let suffix_tokens = estimate_tokens(suffix_section.len());
2570 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2571 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2572
2573 let edit_history_section = super::format_edit_history_within_budget(
2574 events,
2575 FILE_MARKER,
2576 "edit_history",
2577 budget_after_cursor,
2578 );
2579 let edit_history_tokens = estimate_tokens(edit_history_section.len());
2580 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2581
2582 let related_files_section = super::format_related_files_within_budget(
2583 related_files,
2584 FILE_MARKER,
2585 "",
2586 budget_after_edit_history,
2587 );
2588
2589 let mut prompt = String::new();
2590 prompt.push_str(&suffix_section);
2591 prompt.push_str(FIM_PREFIX);
2592 prompt.push_str(&related_files_section);
2593 if !related_files_section.is_empty() {
2594 prompt.push('\n');
2595 }
2596 prompt.push_str(&edit_history_section);
2597 if !edit_history_section.is_empty() {
2598 prompt.push('\n');
2599 }
2600 prompt.push_str(&cursor_prefix_section);
2601 prompt.push_str(FIM_MIDDLE);
2602 prompt
2603 }
2604
2605 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2606 let mut section = String::new();
2607 section.push_str(FIM_SUFFIX);
2608 section.push_str(&context[editable_range.end..]);
2609 if !section.ends_with('\n') {
2610 section.push('\n');
2611 }
2612 section
2613 }
2614
2615 fn build_cursor_prefix_section(
2616 path: &Path,
2617 context: &str,
2618 editable_range: &Range<usize>,
2619 cursor_offset: usize,
2620 ) -> String {
2621 let mut section = String::new();
2622 let path_str = path.to_string_lossy();
2623 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2624
2625 section.push_str(&context[..editable_range.start]);
2626 section.push_str(START_MARKER);
2627 section.push_str(&context[editable_range.start..cursor_offset]);
2628 section.push_str(CURSOR_MARKER);
2629 section.push_str(&context[cursor_offset..editable_range.end]);
2630 if !section.ends_with('\n') {
2631 section.push('\n');
2632 }
2633 section.push_str(SEPARATOR);
2634 section
2635 }
2636
2637 /// Format patch as containing no changes if it's empty; otherwise return None.
2638 pub(crate) fn no_edits(patch: &str) -> Option<String> {
2639 // Count lines in the patch
2640 let empty_patch = patch.lines().count() <= 3;
2641 if empty_patch {
2642 Some(format!("{NO_EDITS}{END_MARKER}"))
2643 } else {
2644 None
2645 }
2646 }
2647}
2648
2649pub mod v0304_variable_edit {
2650 //! A prompt format with no fixed editable region. The entire context is shown
2651 //! to the model, and it chooses which text to replace by outputting surrounding
2652 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2653 //! text.
2654 //!
2655 //! Example prompt:
2656 //!
2657 //! <|file_sep|>path/to/file.py
2658 //! zero
2659 //! one
2660 //! two
2661 //! three<|user_cursor|>
2662 //! four
2663 //! five
2664 //! <|fim_prefix|>
2665 //
2666 //! Expected output (model generates):
2667 //!
2668 //! two
2669 //! <|fim_middle|>
2670 //! THREE
2671 //! <|fim_suffix|>
2672 //! four
2673 //!
2674 //! The output means: find "two\n...\nfour" in the context, and replace
2675 //! everything between "two\n" and "four" with "THREE\n".
2676
2677 use super::*;
2678
2679 pub fn special_tokens() -> &'static [&'static str] {
2680 &[
2681 "<|fim_prefix|>",
2682 "<|fim_suffix|>",
2683 "<|fim_middle|>",
2684 "<|file_sep|>",
2685 CURSOR_MARKER,
2686 ]
2687 }
2688
2689 pub fn write_cursor_excerpt_section(
2690 prompt: &mut String,
2691 path: &Path,
2692 context: &str,
2693 cursor_offset: usize,
2694 ) {
2695 let path_str = path.to_string_lossy();
2696 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2697
2698 prompt.push_str(&context[..cursor_offset]);
2699 prompt.push_str(CURSOR_MARKER);
2700 prompt.push_str(&context[cursor_offset..]);
2701 if !prompt.ends_with('\n') {
2702 prompt.push('\n');
2703 }
2704 prompt.push_str("<|fim_prefix|>\n")
2705 }
2706
2707 /// Apply a variable-edit model output to the original context text.
2708 ///
2709 /// The model output has the form:
2710 ///
2711 /// - prefix context lines
2712 /// - `<|fim_middle|>`
2713 /// - new text
2714 /// - `<|fim_suffix|>`
2715 /// - suffix context lines
2716 ///
2717 /// We locate the prefix/suffix context lines in the original text and replace
2718 /// everything between them with the new text.
2719 pub fn apply_variable_edit(
2720 context: &str,
2721 model_output: &str,
2722 ) -> Result<(Range<usize>, String)> {
2723 let (prefix_context, rest) = model_output
2724 .split_once("<|fim_middle|>\n")
2725 .or_else(|| model_output.split_once("<|fim_middle|>"))
2726 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2727
2728 let (new_text, suffix_context) = rest
2729 .split_once("<|fim_suffix|>\n")
2730 .or_else(|| rest.split_once("<|fim_suffix|>"))
2731 .unwrap_or((rest, ""));
2732
2733 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2734 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2735 } else {
2736 suffix_context
2737 };
2738
2739 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2740 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2741 + prefix_context.len();
2742 let suffix_offset = if suffix_context.is_empty() {
2743 context.len()
2744 } else {
2745 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2746 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2747 + prefix_offset
2748 };
2749
2750 let edit_range = prefix_offset..suffix_offset;
2751 return Ok((edit_range, new_text.to_string()));
2752 }
2753
2754 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2755 if needle.is_empty() {
2756 return Some(0);
2757 }
2758
2759 haystack.match_indices(needle).find_map(|(offset, _)| {
2760 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2761 matched_line_start.then_some(offset)
2762 })
2763 }
2764
2765 /// Convert a unified diff patch into the variable-edit output format.
2766 ///
2767 /// Parses `patch` as a unified diff against `old_text` and produces model
2768 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2769 /// delimiters. The diff is resolved by content matching rather than line
2770 /// numbers.
2771 pub fn patch_to_variable_edit_output(
2772 old_text: &str,
2773 patch: &str,
2774 cursor_offset: Option<usize>,
2775 ) -> Result<String> {
2776 // Parse the unified diff into hunks. Each hunk has an `old_context`
2777 // string (context + deleted lines interleaved in order) and a list of
2778 // edits expressed as byte ranges within that context plus replacement
2779 // text.
2780 let hunks = parse_hunks(patch);
2781 if hunks.is_empty() {
2782 return Ok(String::new());
2783 }
2784
2785 // Apply each hunk by finding its old_context in the text and
2786 // performing the edits. We search forward from where the previous
2787 // hunk ended so that hunks are applied in order.
2788 let mut new_text = old_text.to_string();
2789 let mut search_from: usize = 0;
2790 let mut first_hunk_pos: Option<usize> = None;
2791
2792 for hunk in &hunks {
2793 let context_pos = new_text[search_from..]
2794 .find(&hunk.old_context)
2795 .map(|pos| pos + search_from)
2796 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2797
2798 if first_hunk_pos.is_none() {
2799 first_hunk_pos = Some(context_pos);
2800 }
2801
2802 // Apply edits in reverse order so byte offsets remain valid.
2803 for edit in hunk.edits.iter().rev() {
2804 let abs_start = context_pos + edit.range.start;
2805 let abs_end = context_pos + edit.range.end;
2806 new_text.replace_range(abs_start..abs_end, &edit.text);
2807 }
2808
2809 // Advance past this hunk's region in the (now modified) text.
2810 let new_region_len: usize =
2811 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2812 len + edit.text.len() - (edit.range.end - edit.range.start)
2813 });
2814 search_from = context_pos + new_region_len;
2815 }
2816
2817 // Now we have old_text and new_text. Find the changed line range by
2818 // comparing them.
2819 let old_lines: Vec<&str> = old_text.lines().collect();
2820 let new_lines: Vec<&str> = new_text.lines().collect();
2821
2822 // Find first differing line.
2823 let first_changed_row = old_lines
2824 .iter()
2825 .zip(new_lines.iter())
2826 .position(|(a, b)| a != b)
2827 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2828
2829 // Find last differing line (from the end).
2830 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2831 let common_suffix = old_lines
2832 .iter()
2833 .rev()
2834 .zip(new_lines.iter().rev())
2835 .take(max_suffix)
2836 .take_while(|(a, b)| a == b)
2837 .count();
2838
2839 let old_end = old_lines.len() - common_suffix;
2840 let new_end = new_lines.len() - common_suffix;
2841
2842 if first_changed_row == old_end && first_changed_row == new_end {
2843 return Ok(String::new());
2844 }
2845
2846 // Build the replacement text from new_lines[first_diff..new_end].
2847 let mut merged_new_text = String::new();
2848 for line in &new_lines[first_changed_row..new_end] {
2849 merged_new_text.push_str(line);
2850 merged_new_text.push('\n');
2851 }
2852
2853 // cursor_offset is relative to the first hunk's new content in
2854 // new_text. Translate it to an offset within merged_new_text, which
2855 // only contains lines first_diff..new_end of new_text.
2856 if let Some(hunk_offset) = cursor_offset {
2857 let hunk_start = first_hunk_pos.unwrap_or(0);
2858 let absolute_pos = hunk_start + hunk_offset;
2859
2860 // Byte offset where first_diff starts in new_text.
2861 let merged_start: usize = new_lines[..first_changed_row]
2862 .iter()
2863 .map(|line| line.len() + 1)
2864 .sum();
2865
2866 if absolute_pos >= merged_start {
2867 let relative_offset = absolute_pos - merged_start;
2868 if relative_offset <= merged_new_text.len() {
2869 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
2870 }
2871 }
2872 }
2873
2874 // Build output with 2 lines of context above and below.
2875 let context_lines_count = 2;
2876 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
2877 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
2878
2879 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
2880 let pattern = &lines[line_range];
2881 let pattern_len = pattern.len();
2882
2883 let mut count = 0;
2884 for offset in 0..=lines.len() - pattern_len {
2885 if &lines[offset..offset + pattern_len] == pattern {
2886 count += 1;
2887 }
2888 }
2889 count
2890 }
2891
2892 // Expand prefix and suffix until they are unique
2893 while prefix_start > 0 {
2894 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
2895 prefix_start -= 1;
2896 } else {
2897 break;
2898 }
2899 }
2900 while suffix_end < old_lines.len() {
2901 if count_matches(old_end..suffix_end, &old_lines) > 1 {
2902 suffix_end += 1;
2903 } else {
2904 break;
2905 }
2906 }
2907
2908 let mut output = String::new();
2909 for line in &old_lines[prefix_start..first_changed_row] {
2910 output.push_str(line);
2911 output.push('\n');
2912 }
2913 output.push_str("<|fim_middle|>\n");
2914 output.push_str(&merged_new_text);
2915 output.push_str("<|fim_suffix|>\n");
2916 for line in &old_lines[old_end..suffix_end] {
2917 output.push_str(line);
2918 output.push('\n');
2919 }
2920
2921 Ok(output)
2922 }
2923
2924 struct ParsedHunk {
2925 old_context: String,
2926 edits: Vec<ParsedEdit>,
2927 }
2928
2929 struct ParsedEdit {
2930 range: Range<usize>,
2931 text: String,
2932 }
2933
2934 /// Parse a unified diff into content-based hunks. Each hunk contains an
2935 /// `old_context` string (context lines + deleted lines, which together
2936 /// form the text that should be found in the original) and a list of edits
2937 /// expressed as byte ranges within that context.
2938 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
2939 let mut hunks = Vec::new();
2940 let mut current: Option<ParsedHunk> = None;
2941
2942 for line in patch.lines() {
2943 if line.starts_with("@@") {
2944 if let Some(hunk) = current.take() {
2945 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2946 hunks.push(hunk);
2947 }
2948 }
2949 current = Some(ParsedHunk {
2950 old_context: String::new(),
2951 edits: Vec::new(),
2952 });
2953 } else if line.starts_with("---") || line.starts_with("+++") {
2954 continue;
2955 } else if let Some(hunk) = &mut current {
2956 if let Some(added) = line.strip_prefix('+') {
2957 let pos = hunk.old_context.len();
2958 if let Some(last_edit) = hunk.edits.last_mut() {
2959 if last_edit.range.end == pos {
2960 writeln!(&mut last_edit.text, "{added}").ok();
2961 continue;
2962 }
2963 }
2964 hunk.edits.push(ParsedEdit {
2965 range: pos..pos,
2966 text: format!("{added}\n"),
2967 });
2968 } else if let Some(removed) = line.strip_prefix('-') {
2969 let start = hunk.old_context.len();
2970 writeln!(&mut hunk.old_context, "{removed}").ok();
2971 let end = hunk.old_context.len();
2972 if let Some(last_edit) = hunk.edits.last_mut() {
2973 if last_edit.range.end == start {
2974 last_edit.range.end = end;
2975 continue;
2976 }
2977 }
2978 hunk.edits.push(ParsedEdit {
2979 range: start..end,
2980 text: String::new(),
2981 });
2982 } else {
2983 let ctx = line.strip_prefix(' ').unwrap_or(line);
2984 writeln!(&mut hunk.old_context, "{ctx}").ok();
2985 }
2986 }
2987 }
2988
2989 if let Some(hunk) = current {
2990 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2991 hunks.push(hunk);
2992 }
2993 }
2994
2995 hunks
2996 }
2997
2998 #[cfg(test)]
2999 mod tests {
3000 use super::*;
3001 use indoc::indoc;
3002
3003 #[test]
3004 fn test_apply_variable_edit() {
3005 struct Case {
3006 name: &'static str,
3007 original: &'static str,
3008 model_output: &'static str,
3009 expected: &'static str,
3010 }
3011
3012 let cases = [
3013 Case {
3014 name: "simple_single_line_replacement",
3015 original: indoc! {"
3016 zero
3017 one
3018 two
3019 three
3020 four
3021 five
3022 "},
3023 model_output: indoc! {"
3024 two
3025 <|fim_middle|>
3026 THREE
3027 <|fim_suffix|>
3028 four
3029 "},
3030 expected: indoc! {"
3031 zero
3032 one
3033 two
3034 THREE
3035 four
3036 five
3037 "},
3038 },
3039 Case {
3040 name: "multi_line_replacement",
3041 original: indoc! {"
3042 a
3043 b
3044 c
3045 d
3046 e
3047 "},
3048 model_output: indoc! {"
3049 a
3050 <|fim_middle|>
3051 B
3052 C
3053 D
3054 <|fim_suffix|>
3055 e
3056 "},
3057 expected: indoc! {"
3058 a
3059 B
3060 C
3061 D
3062 e
3063 "},
3064 },
3065 Case {
3066 name: "insertion_between_existing_lines",
3067 original: indoc! {"
3068 a
3069 b
3070 c
3071 "},
3072 model_output: indoc! {"
3073 a
3074 <|fim_middle|>
3075 X
3076 <|fim_suffix|>
3077 b
3078 "},
3079 expected: indoc! {"
3080 a
3081 X
3082 b
3083 c
3084 "},
3085 },
3086 Case {
3087 name: "deletion",
3088 original: indoc! {"
3089 a
3090 b
3091 c
3092 d
3093 "},
3094 model_output: indoc! {"
3095 a
3096 <|fim_middle|>
3097 <|fim_suffix|>
3098 c
3099 "},
3100 expected: indoc! {"
3101 a
3102 c
3103 d
3104 "},
3105 },
3106 Case {
3107 name: "replacement_at_start_no_prefix_context",
3108 original: indoc! {"
3109 a
3110 b
3111 c
3112 "},
3113 model_output: indoc! {"
3114 <|fim_middle|>
3115 X
3116 <|fim_suffix|>
3117 b
3118 "},
3119 expected: indoc! {"
3120 X
3121 b
3122 c
3123 "},
3124 },
3125 Case {
3126 name: "replacement_at_end_no_suffix_context",
3127 original: indoc! {"
3128 a
3129 b
3130 c
3131 "},
3132 model_output: indoc! {"
3133 b
3134 <|fim_middle|>
3135 Z
3136 <|fim_suffix|>
3137 "},
3138 expected: indoc! {"
3139 a
3140 b
3141 Z
3142 "},
3143 },
3144 Case {
3145 name: "context_with_trailing_newline_is_preserved",
3146 original: indoc! {"
3147 a
3148 b
3149 c
3150 "},
3151 model_output: indoc! {"
3152 a
3153 <|fim_middle|>
3154 B
3155 <|fim_suffix|>
3156 c
3157 "},
3158 expected: indoc! {"
3159 a
3160 B
3161 c
3162 "},
3163 },
3164 Case {
3165 name: "cursor_marker_passes_through_untouched",
3166 original: indoc! {"
3167 a
3168 b
3169 c
3170 "},
3171 model_output: indoc! {"
3172 a
3173 <|fim_middle|>
3174 B<|user_cursor|>B
3175 <|fim_suffix|>
3176 c
3177 "},
3178 expected: indoc! {"
3179 a
3180 B<|user_cursor|>B
3181 c
3182 "},
3183 },
3184 Case {
3185 name: "multiple_prefix_context_lines",
3186 original: indoc! {"
3187 a
3188 b
3189 c
3190 d
3191 e
3192 "},
3193 model_output: indoc! {"
3194 b
3195 c
3196 <|fim_middle|>
3197 D
3198 <|fim_suffix|>
3199 e
3200 "},
3201 expected: indoc! {"
3202 a
3203 b
3204 c
3205 D
3206 e
3207 "},
3208 },
3209 ];
3210
3211 for case in cases {
3212 let (edit_range, replacement) =
3213 apply_variable_edit(case.original, case.model_output).unwrap();
3214 let mut edited = case.original.to_string();
3215 edited.replace_range(edit_range, &replacement);
3216 assert_eq!(edited, case.expected, "{}", case.name);
3217 }
3218 }
3219
3220 #[test]
3221 fn test_patch_to_variable_edit() {
3222 struct Case {
3223 name: &'static str,
3224 old: &'static str,
3225 patch: &'static str,
3226 cursor_offset: Option<usize>,
3227 expected_variable_edit: &'static str,
3228 expected_after_apply: &'static str,
3229 }
3230
3231 let cases = [
3232 Case {
3233 name: "simple_replacement",
3234 old: indoc! {"
3235 zero
3236 one
3237 two
3238 three
3239 four
3240 five
3241 "},
3242 patch: indoc! {"
3243 @@ -3,3 +3,3 @@
3244 two
3245 -three
3246 +THREE
3247 four
3248 "},
3249 cursor_offset: None,
3250 expected_variable_edit: indoc! {"
3251 one
3252 two
3253 <|fim_middle|>
3254 THREE
3255 <|fim_suffix|>
3256 four
3257 five
3258 "},
3259 expected_after_apply: indoc! {"
3260 zero
3261 one
3262 two
3263 THREE
3264 four
3265 five
3266 "},
3267 },
3268 Case {
3269 name: "insertion",
3270 old: indoc! {"
3271 a
3272 b
3273 c
3274 d
3275 e
3276 "},
3277 patch: indoc! {"
3278 @@ -2,0 +3,1 @@
3279 b
3280 +X
3281 c
3282 "},
3283 cursor_offset: None,
3284 expected_variable_edit: indoc! {"
3285 a
3286 b
3287 <|fim_middle|>
3288 X
3289 <|fim_suffix|>
3290 c
3291 d
3292 "},
3293 expected_after_apply: indoc! {"
3294 a
3295 b
3296 X
3297 c
3298 d
3299 e
3300 "},
3301 },
3302 Case {
3303 name: "deletion",
3304 old: indoc! {"
3305 a
3306 b
3307 c
3308 d
3309 e
3310 "},
3311 patch: indoc! {"
3312 @@ -2,3 +2,2 @@
3313 b
3314 -c
3315 d
3316 "},
3317 cursor_offset: None,
3318 expected_variable_edit: indoc! {"
3319 a
3320 b
3321 <|fim_middle|>
3322 <|fim_suffix|>
3323 d
3324 e
3325 "},
3326 expected_after_apply: indoc! {"
3327 a
3328 b
3329 d
3330 e
3331 "},
3332 },
3333 Case {
3334 name: "edit_near_start",
3335 old: indoc! {"
3336 first
3337 second
3338 third
3339 fourth
3340 "},
3341 patch: indoc! {"
3342 @@ -1,1 +1,1 @@
3343 -first
3344 +FIRST
3345 "},
3346 cursor_offset: None,
3347 expected_variable_edit: indoc! {"
3348 <|fim_middle|>
3349 FIRST
3350 <|fim_suffix|>
3351 second
3352 third
3353 "},
3354 expected_after_apply: indoc! {"
3355 FIRST
3356 second
3357 third
3358 fourth
3359 "},
3360 },
3361 Case {
3362 name: "edit_near_end",
3363 old: indoc! {"
3364 first
3365 second
3366 third
3367 fourth
3368 "},
3369 patch: indoc! {"
3370 @@ -4,1 +4,1 @@
3371 -fourth
3372 +FOURTH
3373 "},
3374 cursor_offset: None,
3375 expected_variable_edit: indoc! {"
3376 second
3377 third
3378 <|fim_middle|>
3379 FOURTH
3380 <|fim_suffix|>
3381 "},
3382 expected_after_apply: indoc! {"
3383 first
3384 second
3385 third
3386 FOURTH
3387 "},
3388 },
3389 Case {
3390 name: "cursor_at_start_of_replacement",
3391 old: indoc! {"
3392 zero
3393 one
3394 two
3395 three
3396 four
3397 five
3398 "},
3399 patch: indoc! {"
3400 @@ -3,3 +3,3 @@
3401 two
3402 -three
3403 +THREE
3404 four
3405 "},
3406 cursor_offset: Some(4),
3407 expected_variable_edit: indoc! {"
3408 one
3409 two
3410 <|fim_middle|>
3411 <|user_cursor|>THREE
3412 <|fim_suffix|>
3413 four
3414 five
3415 "},
3416 expected_after_apply: indoc! {"
3417 zero
3418 one
3419 two
3420 <|user_cursor|>THREE
3421 four
3422 five
3423 "},
3424 },
3425 Case {
3426 name: "cursor_in_middle_of_replacement",
3427 old: indoc! {"
3428 zero
3429 one
3430 two
3431 three
3432 four
3433 five
3434 "},
3435 patch: indoc! {"
3436 @@ -3,3 +3,3 @@
3437 two
3438 -three
3439 +THREE
3440 four
3441 "},
3442 cursor_offset: Some(6),
3443 expected_variable_edit: indoc! {"
3444 one
3445 two
3446 <|fim_middle|>
3447 TH<|user_cursor|>REE
3448 <|fim_suffix|>
3449 four
3450 five
3451 "},
3452 expected_after_apply: indoc! {"
3453 zero
3454 one
3455 two
3456 TH<|user_cursor|>REE
3457 four
3458 five
3459 "},
3460 },
3461 Case {
3462 name: "expands_context_when_two_lines_not_unique_before_and_after",
3463 old: indoc! {"
3464 one
3465 a
3466 b
3467 c
3468 d
3469 two
3470 a
3471 b
3472 c
3473 d
3474 three
3475 a
3476 b
3477 c
3478 d
3479 four
3480 "},
3481 patch: indoc! {"
3482 @@ -4,5 +4,5 @@
3483 two
3484 a
3485 b
3486 -c
3487 +C
3488 d
3489 three
3490 "},
3491 cursor_offset: None,
3492 expected_variable_edit: indoc! {"
3493 two
3494 a
3495 b
3496 <|fim_middle|>
3497 C
3498 <|fim_suffix|>
3499 d
3500 three
3501 "},
3502 expected_after_apply: indoc! {"
3503 one
3504 a
3505 b
3506 c
3507 d
3508 two
3509 a
3510 b
3511 C
3512 d
3513 three
3514 a
3515 b
3516 c
3517 d
3518 four
3519 "},
3520 },
3521 Case {
3522 name: "expands_context_when_two_lines_not_unique_before_and_after",
3523 old: indoc! {"
3524 {
3525 {
3526 one();
3527 }
3528 }
3529 {
3530 {
3531 two();
3532 }
3533 }
3534 {
3535 {
3536 three();
3537 }
3538 }
3539 {
3540 {
3541 four();
3542 }
3543 }
3544 "},
3545 patch: indoc! {"
3546 @@ -4,5 +4,5 @@
3547 {
3548 - two();
3549 + TWO();
3550 }
3551 "},
3552 cursor_offset: None,
3553 expected_variable_edit: indoc! {"
3554 one();
3555 }
3556 }
3557 {
3558 {
3559 <|fim_middle|>
3560 TWO();
3561 <|fim_suffix|>
3562 }
3563 }
3564 {
3565 {
3566 three();
3567 "},
3568 expected_after_apply: indoc! {"
3569 {
3570 {
3571 one();
3572 }
3573 }
3574 {
3575 {
3576 TWO();
3577 }
3578 }
3579 {
3580 {
3581 three();
3582 }
3583 }
3584 {
3585 {
3586 four();
3587 }
3588 }
3589 "},
3590 },
3591 ];
3592
3593 for case in cases {
3594 let output =
3595 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3596 .unwrap_or_else(|error| {
3597 panic!("failed converting patch for {}: {error}", case.name)
3598 });
3599 assert_eq!(
3600 output, case.expected_variable_edit,
3601 "patch->variable_edit mismatch for {}",
3602 case.name
3603 );
3604
3605 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3606 .unwrap_or_else(|error| {
3607 panic!("failed applying variable_edit for {}: {error}", case.name)
3608 });
3609 let mut edited_by_variable_edit = case.old.to_string();
3610 edited_by_variable_edit.replace_range(edit_range, &replacement);
3611 assert_eq!(
3612 edited_by_variable_edit, case.expected_after_apply,
3613 "variable_edit apply mismatch for {}",
3614 case.name
3615 );
3616
3617 let (expected_edit_range, expected_replacement) =
3618 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3619 |error| {
3620 panic!(
3621 "failed applying expected variable_edit for {}: {error}",
3622 case.name
3623 )
3624 },
3625 );
3626 let mut edited_by_expected_variable_edit = case.old.to_string();
3627 edited_by_expected_variable_edit
3628 .replace_range(expected_edit_range, &expected_replacement);
3629 assert_eq!(
3630 edited_by_expected_variable_edit, case.expected_after_apply,
3631 "expected variable_edit apply mismatch for {}",
3632 case.name
3633 );
3634 }
3635 }
3636
3637 #[test]
3638 fn test_write_cursor_excerpt_section() {
3639 let path = Path::new("test.rs");
3640 let context = "fn main() {\n hello();\n}\n";
3641 let cursor_offset = 17;
3642 let mut prompt = String::new();
3643 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3644 assert_eq!(
3645 prompt,
3646 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3647 );
3648 }
3649 }
3650}
3651
3652/// The zeta1 prompt format
3653pub mod zeta1 {
3654 use super::*;
3655 use std::fmt::Write;
3656
3657 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3658 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3659 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3660 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3661
3662 const INSTRUCTION_HEADER: &str = concat!(
3663 "### Instruction:\n",
3664 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3665 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3666 "into account the cursor location.\n\n",
3667 "### User Edits:\n\n"
3668 );
3669 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3670 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3671
3672 /// Formats a complete zeta1 prompt from the input events and excerpt.
3673 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3674 let mut prompt = String::with_capacity(
3675 INSTRUCTION_HEADER.len()
3676 + input_events.len()
3677 + EXCERPT_HEADER.len()
3678 + input_excerpt.len()
3679 + RESPONSE_HEADER.len(),
3680 );
3681 prompt.push_str(INSTRUCTION_HEADER);
3682 prompt.push_str(input_events);
3683 prompt.push_str(EXCERPT_HEADER);
3684 prompt.push_str(input_excerpt);
3685 prompt.push_str(RESPONSE_HEADER);
3686 prompt
3687 }
3688
3689 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3690 /// editable and context byte-offset ranges within `cursor_excerpt`.
3691 pub fn format_zeta1_from_input(
3692 input: &ZetaPromptInput,
3693 editable_range: Range<usize>,
3694 context_range: Range<usize>,
3695 ) -> String {
3696 let events = format_zeta1_events(&input.events);
3697 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3698 format_zeta1_prompt(&events, &excerpt)
3699 }
3700
3701 /// Formats events in zeta1 style (oldest first).
3702 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3703 let mut result = String::new();
3704 for event in events {
3705 let event_string = format_zeta1_event(event);
3706 if event_string.is_empty() {
3707 continue;
3708 }
3709 if !result.is_empty() {
3710 result.push_str("\n\n");
3711 }
3712 result.push_str(&event_string);
3713 }
3714 result
3715 }
3716
3717 fn format_zeta1_event(event: &Event) -> String {
3718 match event {
3719 Event::BufferChange {
3720 path,
3721 old_path,
3722 diff,
3723 ..
3724 } => {
3725 let mut prompt = String::new();
3726 if old_path != path {
3727 writeln!(
3728 prompt,
3729 "User renamed {} to {}\n",
3730 old_path.display(),
3731 path.display()
3732 )
3733 .ok();
3734 }
3735 if !diff.is_empty() {
3736 write!(
3737 prompt,
3738 "User edited {}:\n```diff\n{}\n```",
3739 path.display(),
3740 diff
3741 )
3742 .ok();
3743 }
3744 prompt
3745 }
3746 }
3747 }
3748
3749 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3750 /// within `cursor_excerpt`.
3751 fn format_zeta1_excerpt(
3752 input: &ZetaPromptInput,
3753 editable_range: Range<usize>,
3754 context_range: Range<usize>,
3755 ) -> String {
3756 let path_str = input.cursor_path.to_string_lossy();
3757 let excerpt = &*input.cursor_excerpt;
3758 let cursor_offset = input.cursor_offset_in_excerpt;
3759
3760 let mut prompt = String::new();
3761 writeln!(&mut prompt, "```{path_str}").ok();
3762
3763 let starts_at_file_beginning =
3764 input.excerpt_start_row == Some(0) && context_range.start == 0;
3765 if starts_at_file_beginning {
3766 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3767 }
3768
3769 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3770
3771 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3772 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3773 prompt.push_str(CURSOR_MARKER);
3774 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3775 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3776
3777 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3778 write!(prompt, "\n```").ok();
3779
3780 prompt
3781 }
3782
3783 /// Cleans zeta1 model output by extracting content between editable region
3784 /// markers and converting the zeta1 cursor marker to the universal one.
3785 /// Returns `None` if the output doesn't contain the expected markers.
3786 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3787 let content = output.replace(CURSOR_MARKER, "");
3788
3789 let content_start = content
3790 .find(EDITABLE_REGION_START_MARKER)
3791 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3792 .map(|pos| {
3793 if content.as_bytes().get(pos) == Some(&b'\n') {
3794 pos + 1
3795 } else {
3796 pos
3797 }
3798 })
3799 .unwrap_or(0);
3800
3801 let content_end = content
3802 .find(EDITABLE_REGION_END_MARKER)
3803 .map(|pos| {
3804 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3805 pos - 1
3806 } else {
3807 pos
3808 }
3809 })
3810 .unwrap_or(content.len());
3811
3812 if content_start > content_end {
3813 return Some(String::new());
3814 }
3815
3816 let extracted = &content[content_start..content_end];
3817
3818 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3819 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3820 let text_before_cursor = text_before_cursor
3821 .find(EDITABLE_REGION_START_MARKER)
3822 .map(|pos| {
3823 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3824 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3825 after_marker + 1
3826 } else {
3827 after_marker
3828 }
3829 })
3830 .unwrap_or(0);
3831 let offset_in_extracted = zeta1_cursor_pos
3832 .saturating_sub(text_before_cursor)
3833 .min(extracted.len());
3834 offset_in_extracted
3835 });
3836
3837 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3838 if let Some(offset) = cursor_offset {
3839 result.push_str(&extracted[..offset]);
3840 result.push_str(super::CURSOR_MARKER);
3841 result.push_str(&extracted[offset..]);
3842 } else {
3843 result.push_str(extracted);
3844 }
3845
3846 Some(result)
3847 }
3848}
3849
3850#[cfg(test)]
3851mod tests {
3852 use super::*;
3853 use indoc::indoc;
3854
3855 fn make_input(
3856 cursor_excerpt: &str,
3857 editable_range: Range<usize>,
3858 cursor_offset: usize,
3859 events: Vec<Event>,
3860 related_files: Vec<RelatedFile>,
3861 ) -> ZetaPromptInput {
3862 let context_range = 0..cursor_excerpt.len();
3863 ZetaPromptInput {
3864 cursor_path: Path::new("test.rs").into(),
3865 cursor_excerpt: cursor_excerpt.into(),
3866 cursor_offset_in_excerpt: cursor_offset,
3867 excerpt_start_row: None,
3868 events: events.into_iter().map(Arc::new).collect(),
3869 related_files: Some(related_files),
3870 excerpt_ranges: ExcerptRanges {
3871 editable_150: editable_range.clone(),
3872 editable_180: editable_range.clone(),
3873 editable_350: editable_range,
3874 editable_150_context_350: context_range.clone(),
3875 editable_180_context_350: context_range.clone(),
3876 editable_350_context_150: context_range,
3877 ..Default::default()
3878 },
3879 experiment: None,
3880 in_open_source_repo: false,
3881 can_collect_data: false,
3882 repo_url: None,
3883 }
3884 }
3885
3886 fn make_input_with_context_range(
3887 excerpt: &str,
3888 editable_range: Range<usize>,
3889 context_range: Range<usize>,
3890 cursor_offset: usize,
3891 ) -> ZetaPromptInput {
3892 ZetaPromptInput {
3893 cursor_path: Path::new("test.rs").into(),
3894 cursor_excerpt: excerpt.into(),
3895 cursor_offset_in_excerpt: cursor_offset,
3896 excerpt_start_row: None,
3897 events: vec![],
3898 related_files: Some(vec![]),
3899 excerpt_ranges: ExcerptRanges {
3900 editable_150: editable_range.clone(),
3901 editable_180: editable_range.clone(),
3902 editable_350: editable_range,
3903 editable_150_context_350: context_range.clone(),
3904 editable_180_context_350: context_range.clone(),
3905 editable_350_context_150: context_range,
3906 ..Default::default()
3907 },
3908 experiment: None,
3909 in_open_source_repo: false,
3910 can_collect_data: false,
3911 repo_url: None,
3912 }
3913 }
3914
3915 fn make_event(path: &str, diff: &str) -> Event {
3916 Event::BufferChange {
3917 path: Path::new(path).into(),
3918 old_path: Path::new(path).into(),
3919 diff: diff.to_string(),
3920 predicted: false,
3921 in_open_source_repo: false,
3922 }
3923 }
3924
3925 fn make_related_file(path: &str, content: &str) -> RelatedFile {
3926 RelatedFile {
3927 path: Path::new(path).into(),
3928 max_row: content.lines().count() as u32,
3929 excerpts: vec![RelatedExcerpt {
3930 row_range: 0..content.lines().count() as u32,
3931 text: content.into(),
3932 order: 0,
3933 }],
3934 in_open_source_repo: false,
3935 }
3936 }
3937
3938 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3939 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
3940 }
3941
3942 #[test]
3943 fn test_no_truncation_when_within_budget() {
3944 let input = make_input(
3945 "prefix\neditable\nsuffix",
3946 7..15,
3947 10,
3948 vec![make_event("a.rs", "-old\n+new\n")],
3949 vec![make_related_file("related.rs", "fn helper() {}\n")],
3950 );
3951
3952 assert_eq!(
3953 format_with_budget(&input, 10000),
3954 indoc! {r#"
3955 <|file_sep|>related.rs
3956 fn helper() {}
3957 <|file_sep|>edit history
3958 --- a/a.rs
3959 +++ b/a.rs
3960 -old
3961 +new
3962 <|file_sep|>test.rs
3963 <|fim_prefix|>
3964 prefix
3965 <|fim_middle|>current
3966 edi<|user_cursor|>table
3967 <|fim_suffix|>
3968
3969 suffix
3970 <|fim_middle|>updated
3971 "#}
3972 );
3973 }
3974
3975 #[test]
3976 fn test_truncation_drops_edit_history_when_budget_tight() {
3977 let input = make_input(
3978 "code",
3979 0..4,
3980 2,
3981 vec![make_event("a.rs", "-x\n+y\n")],
3982 vec![
3983 make_related_file("r1.rs", "a\n"),
3984 make_related_file("r2.rs", "b\n"),
3985 ],
3986 );
3987
3988 assert_eq!(
3989 format_with_budget(&input, 10000),
3990 indoc! {r#"
3991 <|file_sep|>r1.rs
3992 a
3993 <|file_sep|>r2.rs
3994 b
3995 <|file_sep|>edit history
3996 --- a/a.rs
3997 +++ b/a.rs
3998 -x
3999 +y
4000 <|file_sep|>test.rs
4001 <|fim_prefix|>
4002 <|fim_middle|>current
4003 co<|user_cursor|>de
4004 <|fim_suffix|>
4005 <|fim_middle|>updated
4006 "#}
4007 );
4008
4009 assert_eq!(
4010 format_with_budget(&input, 50),
4011 indoc! {r#"
4012 <|file_sep|>r1.rs
4013 a
4014 <|file_sep|>r2.rs
4015 b
4016 <|file_sep|>test.rs
4017 <|fim_prefix|>
4018 <|fim_middle|>current
4019 co<|user_cursor|>de
4020 <|fim_suffix|>
4021 <|fim_middle|>updated
4022 "#}
4023 );
4024 }
4025
4026 #[test]
4027 fn test_truncation_includes_partial_excerpts() {
4028 let input = make_input(
4029 "x",
4030 0..1,
4031 0,
4032 vec![],
4033 vec![RelatedFile {
4034 path: Path::new("big.rs").into(),
4035 max_row: 30,
4036 in_open_source_repo: false,
4037 excerpts: vec![
4038 RelatedExcerpt {
4039 row_range: 0..10,
4040 text: "first excerpt\n".into(),
4041 order: 0,
4042 },
4043 RelatedExcerpt {
4044 row_range: 10..20,
4045 text: "second excerpt\n".into(),
4046 order: 0,
4047 },
4048 RelatedExcerpt {
4049 row_range: 20..30,
4050 text: "third excerpt\n".into(),
4051 order: 0,
4052 },
4053 ],
4054 }],
4055 );
4056
4057 assert_eq!(
4058 format_with_budget(&input, 10000),
4059 indoc! {r#"
4060 <|file_sep|>big.rs
4061 first excerpt
4062 ...
4063 second excerpt
4064 ...
4065 third excerpt
4066 <|file_sep|>test.rs
4067 <|fim_prefix|>
4068 <|fim_middle|>current
4069 <|user_cursor|>x
4070 <|fim_suffix|>
4071 <|fim_middle|>updated
4072 "#}
4073 );
4074
4075 assert_eq!(
4076 format_with_budget(&input, 50),
4077 indoc! {r#"
4078 <|file_sep|>big.rs
4079 first excerpt
4080 ...
4081 <|file_sep|>test.rs
4082 <|fim_prefix|>
4083 <|fim_middle|>current
4084 <|user_cursor|>x
4085 <|fim_suffix|>
4086 <|fim_middle|>updated
4087 "#}
4088 );
4089 }
4090
4091 #[test]
4092 fn test_truncation_prioritizes_lower_order_excerpts() {
4093 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4094 // With tight budget, only the lower-order excerpt from file_b should be included.
4095 let input = make_input(
4096 "x",
4097 0..1,
4098 0,
4099 vec![],
4100 vec![
4101 RelatedFile {
4102 path: Path::new("file_a.rs").into(),
4103 max_row: 10,
4104 in_open_source_repo: false,
4105 excerpts: vec![RelatedExcerpt {
4106 row_range: 0..10,
4107 text: "low priority content\n".into(),
4108 order: 5,
4109 }],
4110 },
4111 RelatedFile {
4112 path: Path::new("file_b.rs").into(),
4113 max_row: 10,
4114 in_open_source_repo: false,
4115 excerpts: vec![RelatedExcerpt {
4116 row_range: 0..10,
4117 text: "high priority content\n".into(),
4118 order: 1,
4119 }],
4120 },
4121 ],
4122 );
4123
4124 // With large budget, both files included; rendered in stable lexicographic order.
4125 assert_eq!(
4126 format_with_budget(&input, 10000),
4127 indoc! {r#"
4128 <|file_sep|>file_a.rs
4129 low priority content
4130 <|file_sep|>file_b.rs
4131 high priority content
4132 <|file_sep|>test.rs
4133 <|fim_prefix|>
4134 <|fim_middle|>current
4135 <|user_cursor|>x
4136 <|fim_suffix|>
4137 <|fim_middle|>updated
4138 "#}
4139 );
4140
4141 // With tight budget, only file_b (lower order) fits.
4142 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4143 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4144 // file_a would need another 14 tokens, which doesn't fit.
4145 assert_eq!(
4146 format_with_budget(&input, 52),
4147 indoc! {r#"
4148 <|file_sep|>file_b.rs
4149 high priority content
4150 <|file_sep|>test.rs
4151 <|fim_prefix|>
4152 <|fim_middle|>current
4153 <|user_cursor|>x
4154 <|fim_suffix|>
4155 <|fim_middle|>updated
4156 "#}
4157 );
4158 }
4159
4160 #[test]
4161 fn test_truncation_drops_high_order_excerpts_within_file() {
4162 // A single file has excerpts at order 1 and order 3. With a tight budget,
4163 // only the order-1 excerpts are included while the order-3 excerpt is
4164 // dropped — even though they belong to the same file. This also preserves
4165 // the parent invariant: parent outline items have order ≤ their best
4166 // child, so they're always included when any child is.
4167 let input = make_input(
4168 "x",
4169 0..1,
4170 0,
4171 vec![],
4172 vec![RelatedFile {
4173 path: Path::new("mod.rs").into(),
4174 max_row: 30,
4175 in_open_source_repo: false,
4176 excerpts: vec![
4177 RelatedExcerpt {
4178 row_range: 0..5,
4179 text: "mod header\n".into(),
4180 order: 1,
4181 },
4182 RelatedExcerpt {
4183 row_range: 5..15,
4184 text: "important fn\n".into(),
4185 order: 1,
4186 },
4187 RelatedExcerpt {
4188 row_range: 15..30,
4189 text: "less important fn\n".into(),
4190 order: 3,
4191 },
4192 ],
4193 }],
4194 );
4195
4196 // With large budget, all three excerpts included.
4197 assert_eq!(
4198 format_with_budget(&input, 10000),
4199 indoc! {r#"
4200 <|file_sep|>mod.rs
4201 mod header
4202 ...
4203 important fn
4204 ...
4205 less important fn
4206 <|file_sep|>test.rs
4207 <|fim_prefix|>
4208 <|fim_middle|>current
4209 <|user_cursor|>x
4210 <|fim_suffix|>
4211 <|fim_middle|>updated
4212 "#}
4213 );
4214
4215 // With tight budget, only order<=1 excerpts included (header + important fn).
4216 assert_eq!(
4217 format_with_budget(&input, 55),
4218 indoc! {r#"
4219 <|file_sep|>mod.rs
4220 mod header
4221 ...
4222 important fn
4223 ...
4224 <|file_sep|>test.rs
4225 <|fim_prefix|>
4226 <|fim_middle|>current
4227 <|user_cursor|>x
4228 <|fim_suffix|>
4229 <|fim_middle|>updated
4230 "#}
4231 );
4232 }
4233
4234 #[test]
4235 fn test_truncation_drops_older_events_first() {
4236 let input = make_input(
4237 "x",
4238 0..1,
4239 0,
4240 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4241 vec![],
4242 );
4243
4244 assert_eq!(
4245 format_with_budget(&input, 10000),
4246 indoc! {r#"
4247 <|file_sep|>edit history
4248 --- a/old.rs
4249 +++ b/old.rs
4250 -1
4251 --- a/new.rs
4252 +++ b/new.rs
4253 -2
4254 <|file_sep|>test.rs
4255 <|fim_prefix|>
4256 <|fim_middle|>current
4257 <|user_cursor|>x
4258 <|fim_suffix|>
4259 <|fim_middle|>updated
4260 "#}
4261 );
4262
4263 assert_eq!(
4264 format_with_budget(&input, 55),
4265 indoc! {r#"
4266 <|file_sep|>edit history
4267 --- a/new.rs
4268 +++ b/new.rs
4269 -2
4270 <|file_sep|>test.rs
4271 <|fim_prefix|>
4272 <|fim_middle|>current
4273 <|user_cursor|>x
4274 <|fim_suffix|>
4275 <|fim_middle|>updated
4276 "#}
4277 );
4278 }
4279
4280 #[test]
4281 fn test_cursor_excerpt_always_included_with_minimal_budget() {
4282 let input = make_input(
4283 "fn main() {}",
4284 0..12,
4285 3,
4286 vec![make_event("a.rs", "-old\n+new\n")],
4287 vec![make_related_file("related.rs", "helper\n")],
4288 );
4289
4290 assert_eq!(
4291 format_with_budget(&input, 30),
4292 indoc! {r#"
4293 <|file_sep|>test.rs
4294 <|fim_prefix|>
4295 <|fim_middle|>current
4296 fn <|user_cursor|>main() {}
4297 <|fim_suffix|>
4298 <|fim_middle|>updated
4299 "#}
4300 );
4301 }
4302
4303 fn format_seed_coder(input: &ZetaPromptInput) -> String {
4304 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4305 }
4306
4307 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4308 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4309 }
4310
4311 #[test]
4312 fn test_seed_coder_basic_format() {
4313 let input = make_input(
4314 "prefix\neditable\nsuffix",
4315 7..15,
4316 10,
4317 vec![make_event("a.rs", "-old\n+new\n")],
4318 vec![make_related_file("related.rs", "fn helper() {}\n")],
4319 );
4320
4321 assert_eq!(
4322 format_seed_coder(&input),
4323 indoc! {r#"
4324 <[fim-suffix]>
4325 suffix
4326 <[fim-prefix]><filename>related.rs
4327 fn helper() {}
4328
4329 <filename>edit_history
4330 --- a/a.rs
4331 +++ b/a.rs
4332 -old
4333 +new
4334
4335 <filename>test.rs
4336 prefix
4337 <<<<<<< CURRENT
4338 edi<|user_cursor|>table
4339 =======
4340 <[fim-middle]>"#}
4341 );
4342 }
4343
4344 #[test]
4345 fn test_seed_coder_no_context() {
4346 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4347
4348 assert_eq!(
4349 format_seed_coder(&input),
4350 indoc! {r#"
4351 <[fim-suffix]>
4352 after
4353 <[fim-prefix]><filename>test.rs
4354 before
4355 <<<<<<< CURRENT
4356 mid<|user_cursor|>dle
4357 =======
4358 <[fim-middle]>"#}
4359 );
4360 }
4361
4362 #[test]
4363 fn test_seed_coder_truncation_drops_context() {
4364 let input = make_input(
4365 "code",
4366 0..4,
4367 2,
4368 vec![make_event("a.rs", "-x\n+y\n")],
4369 vec![make_related_file("r1.rs", "content\n")],
4370 );
4371
4372 // With large budget, everything is included
4373 assert_eq!(
4374 format_seed_coder(&input),
4375 indoc! {r#"
4376 <[fim-suffix]>
4377 <[fim-prefix]><filename>r1.rs
4378 content
4379
4380 <filename>edit_history
4381 --- a/a.rs
4382 +++ b/a.rs
4383 -x
4384 +y
4385
4386 <filename>test.rs
4387 <<<<<<< CURRENT
4388 co<|user_cursor|>de
4389 =======
4390 <[fim-middle]>"#}
4391 );
4392
4393 // With tight budget, context is dropped but cursor section remains
4394 assert_eq!(
4395 format_seed_coder_with_budget(&input, 30),
4396 indoc! {r#"
4397 <[fim-suffix]>
4398 <[fim-prefix]><filename>test.rs
4399 <<<<<<< CURRENT
4400 co<|user_cursor|>de
4401 =======
4402 <[fim-middle]>"#}
4403 );
4404 }
4405
4406 #[test]
4407 fn test_seed_coder_truncation_prioritizes_lower_order() {
4408 let input = make_input(
4409 "code",
4410 0..4,
4411 2,
4412 vec![],
4413 vec![
4414 RelatedFile {
4415 path: Path::new("low_prio.rs").into(),
4416 max_row: 5,
4417 in_open_source_repo: false,
4418 excerpts: vec![RelatedExcerpt {
4419 row_range: 0..5,
4420 text: "low prio\n".into(),
4421 order: 10,
4422 }],
4423 },
4424 RelatedFile {
4425 path: Path::new("high_prio.rs").into(),
4426 max_row: 5,
4427 in_open_source_repo: false,
4428 excerpts: vec![RelatedExcerpt {
4429 row_range: 0..5,
4430 text: "high prio\n".into(),
4431 order: 1,
4432 }],
4433 },
4434 ],
4435 );
4436
4437 // With large budget, both included; rendered in stable lexicographic order.
4438 assert_eq!(
4439 format_seed_coder(&input),
4440 indoc! {r#"
4441 <[fim-suffix]>
4442 <[fim-prefix]><filename>low_prio.rs
4443 low prio
4444 <filename>high_prio.rs
4445 high prio
4446
4447 <filename>test.rs
4448 <<<<<<< CURRENT
4449 co<|user_cursor|>de
4450 =======
4451 <[fim-middle]>"#}
4452 );
4453
4454 // With tight budget, only high_prio included.
4455 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
4456 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
4457 assert_eq!(
4458 format_seed_coder_with_budget(&input, 44),
4459 indoc! {r#"
4460 <[fim-suffix]>
4461 <[fim-prefix]><filename>high_prio.rs
4462 high prio
4463
4464 <filename>test.rs
4465 <<<<<<< CURRENT
4466 co<|user_cursor|>de
4467 =======
4468 <[fim-middle]>"#}
4469 );
4470 }
4471
4472 #[test]
4473 fn test_format_zeta1_from_input_basic() {
4474 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
4475 let input = ZetaPromptInput {
4476 cursor_path: Path::new("src/main.rs").into(),
4477 cursor_excerpt: excerpt.into(),
4478 cursor_offset_in_excerpt: 30,
4479 excerpt_start_row: Some(0),
4480 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4481 related_files: Some(vec![]),
4482 excerpt_ranges: ExcerptRanges {
4483 editable_150: 15..41,
4484 editable_180: 15..41,
4485 editable_350: 15..41,
4486 editable_150_context_350: 0..excerpt.len(),
4487 editable_180_context_350: 0..excerpt.len(),
4488 editable_350_context_150: 0..excerpt.len(),
4489 ..Default::default()
4490 },
4491 experiment: None,
4492 in_open_source_repo: false,
4493 can_collect_data: false,
4494 repo_url: None,
4495 };
4496
4497 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4498
4499 assert_eq!(
4500 prompt,
4501 concat!(
4502 "### Instruction:\n",
4503 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4504 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4505 "into account the cursor location.\n",
4506 "\n",
4507 "### User Edits:\n",
4508 "\n",
4509 "User edited other.rs:\n",
4510 "```diff\n",
4511 "-old\n",
4512 "+new\n",
4513 "\n",
4514 "```\n",
4515 "\n",
4516 "### User Excerpt:\n",
4517 "\n",
4518 "```src/main.rs\n",
4519 "<|start_of_file|>\n",
4520 "fn before() {}\n",
4521 "<|editable_region_start|>\n",
4522 "fn foo() {\n",
4523 " <|user_cursor_is_here|>let x = 1;\n",
4524 "\n",
4525 "<|editable_region_end|>}\n",
4526 "fn after() {}\n",
4527 "\n",
4528 "```\n",
4529 "\n",
4530 "### Response:\n",
4531 ),
4532 );
4533 }
4534
4535 #[test]
4536 fn test_format_zeta1_from_input_no_start_of_file() {
4537 let excerpt = "fn foo() {\n let x = 1;\n}\n";
4538 let input = ZetaPromptInput {
4539 cursor_path: Path::new("src/main.rs").into(),
4540 cursor_excerpt: excerpt.into(),
4541 cursor_offset_in_excerpt: 15,
4542 excerpt_start_row: Some(10),
4543 events: vec![],
4544 related_files: Some(vec![]),
4545 excerpt_ranges: ExcerptRanges {
4546 editable_150: 0..28,
4547 editable_180: 0..28,
4548 editable_350: 0..28,
4549 editable_150_context_350: 0..28,
4550 editable_180_context_350: 0..28,
4551 editable_350_context_150: 0..28,
4552 ..Default::default()
4553 },
4554 experiment: None,
4555 in_open_source_repo: false,
4556 can_collect_data: false,
4557 repo_url: None,
4558 };
4559
4560 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4561
4562 assert_eq!(
4563 prompt,
4564 concat!(
4565 "### Instruction:\n",
4566 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4567 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4568 "into account the cursor location.\n",
4569 "\n",
4570 "### User Edits:\n",
4571 "\n",
4572 "\n",
4573 "\n",
4574 "### User Excerpt:\n",
4575 "\n",
4576 "```src/main.rs\n",
4577 "<|editable_region_start|>\n",
4578 "fn foo() {\n",
4579 " <|user_cursor_is_here|>let x = 1;\n",
4580 "}\n",
4581 "\n",
4582 "<|editable_region_end|>\n",
4583 "```\n",
4584 "\n",
4585 "### Response:\n",
4586 ),
4587 );
4588 }
4589
4590 #[test]
4591 fn test_format_zeta1_from_input_with_sub_ranges() {
4592 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
4593 let editable_range = 10..37;
4594 let context_range = 0..excerpt.len();
4595
4596 let input = ZetaPromptInput {
4597 cursor_path: Path::new("test.rs").into(),
4598 cursor_excerpt: excerpt.into(),
4599 cursor_offset_in_excerpt: 25,
4600 excerpt_start_row: Some(0),
4601 events: vec![],
4602 related_files: Some(vec![]),
4603 excerpt_ranges: ExcerptRanges {
4604 editable_150: editable_range.clone(),
4605 editable_180: editable_range.clone(),
4606 editable_350: editable_range.clone(),
4607 editable_150_context_350: context_range.clone(),
4608 editable_180_context_350: context_range.clone(),
4609 editable_350_context_150: context_range.clone(),
4610 ..Default::default()
4611 },
4612 experiment: None,
4613 in_open_source_repo: false,
4614 can_collect_data: false,
4615 repo_url: None,
4616 };
4617
4618 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4619
4620 assert_eq!(
4621 prompt,
4622 concat!(
4623 "### Instruction:\n",
4624 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4625 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4626 "into account the cursor location.\n",
4627 "\n",
4628 "### User Edits:\n",
4629 "\n",
4630 "\n",
4631 "\n",
4632 "### User Excerpt:\n",
4633 "\n",
4634 "```test.rs\n",
4635 "<|start_of_file|>\n",
4636 "// prefix\n",
4637 "<|editable_region_start|>\n",
4638 "fn foo() {\n",
4639 " <|user_cursor_is_here|>let x = 1;\n",
4640 "}\n",
4641 "<|editable_region_end|>\n",
4642 "// suffix\n",
4643 "\n",
4644 "```\n",
4645 "\n",
4646 "### Response:\n",
4647 ),
4648 );
4649 }
4650
4651 #[test]
4652 fn test_clean_zeta1_model_output_basic() {
4653 let output = indoc! {"
4654 <|editable_region_start|>
4655 fn main() {
4656 println!(\"hello\");
4657 }
4658 <|editable_region_end|>
4659 "};
4660
4661 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4662 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
4663 }
4664
4665 #[test]
4666 fn test_clean_zeta1_model_output_with_cursor() {
4667 let output = indoc! {"
4668 <|editable_region_start|>
4669 fn main() {
4670 <|user_cursor_is_here|>println!(\"hello\");
4671 }
4672 <|editable_region_end|>
4673 "};
4674
4675 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4676 assert_eq!(
4677 cleaned,
4678 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
4679 );
4680 }
4681
4682 #[test]
4683 fn test_clean_zeta1_model_output_no_markers() {
4684 let output = "fn main() {}\n";
4685 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4686 assert_eq!(cleaned, "fn main() {}\n");
4687 }
4688
4689 #[test]
4690 fn test_clean_zeta1_model_output_empty_region() {
4691 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4692 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4693 assert_eq!(cleaned, "");
4694 }
4695
4696 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
4697 let mut result = excerpt.to_string();
4698 result.replace_range(
4699 parsed_output.range_in_excerpt.clone(),
4700 &parsed_output.new_editable_region,
4701 );
4702 result
4703 }
4704
4705 #[test]
4706 fn test_parse_zeta2_model_output() {
4707 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4708 let context_start = excerpt.find("ctx start").unwrap();
4709 let context_end = excerpt.find("after ctx").unwrap();
4710 let editable_start = excerpt.find("editable old").unwrap();
4711 let editable_end = editable_start + "editable old\n".len();
4712 let input = make_input_with_context_range(
4713 excerpt,
4714 editable_start..editable_end,
4715 context_start..context_end,
4716 editable_start,
4717 );
4718
4719 let output = parse_zeta2_model_output(
4720 "editable new\n>>>>>>> UPDATED\n",
4721 ZetaFormat::V0131GitMergeMarkersPrefix,
4722 &input,
4723 )
4724 .unwrap();
4725
4726 assert_eq!(
4727 apply_edit(excerpt, &output),
4728 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4729 );
4730 }
4731
4732 #[test]
4733 fn test_parse_zeta2_model_output_identity() {
4734 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4735 let editable_start = excerpt.find("bbb").unwrap();
4736 let editable_end = excerpt.find("ddd").unwrap();
4737 let input = make_input_with_context_range(
4738 excerpt,
4739 editable_start..editable_end,
4740 0..excerpt.len(),
4741 editable_start,
4742 );
4743
4744 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4745 let output =
4746 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
4747
4748 assert_eq!(apply_edit(excerpt, &output), excerpt);
4749 }
4750
4751 #[test]
4752 fn test_parse_zeta2_model_output_strips_end_marker() {
4753 let excerpt = "hello\nworld\n";
4754 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
4755
4756 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4757 let output1 =
4758 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
4759 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
4760
4761 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
4762 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
4763 }
4764}