1use anyhow::{Result, anyhow};
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
22/// editable and context token budgets. Allows the server to select the
23/// appropriate ranges for whichever model it uses.
24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
25pub struct ExcerptRanges {
26 /// Editable region computed with a 150-token budget.
27 pub editable_150: Range<usize>,
28 /// Editable region computed with a 180-token budget.
29 pub editable_180: Range<usize>,
30 /// Editable region computed with a 350-token budget.
31 pub editable_350: Range<usize>,
32 /// Editable region computed with a 350-token budget.
33 pub editable_512: Option<Range<usize>>,
34 /// Context boundary when using editable_150 with 350 tokens of additional context.
35 pub editable_150_context_350: Range<usize>,
36 /// Context boundary when using editable_180 with 350 tokens of additional context.
37 pub editable_180_context_350: Range<usize>,
38 /// Context boundary when using editable_350 with 150 tokens of additional context.
39 pub editable_350_context_150: Range<usize>,
40 pub editable_350_context_512: Option<Range<usize>>,
41 pub editable_350_context_1024: Option<Range<usize>>,
42 pub context_4096: Option<Range<usize>>,
43 pub context_8192: Option<Range<usize>>,
44}
45
46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
47pub struct ZetaPromptInput {
48 pub cursor_path: Arc<Path>,
49 pub cursor_excerpt: Arc<str>,
50 pub cursor_offset_in_excerpt: usize,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub excerpt_start_row: Option<u32>,
53 pub events: Vec<Arc<Event>>,
54 pub related_files: Vec<RelatedFile>,
55 /// These ranges let the server select model-appropriate subsets.
56 pub excerpt_ranges: ExcerptRanges,
57 /// The name of the edit prediction model experiment to use.
58 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub experiment: Option<String>,
60 #[serde(default)]
61 pub in_open_source_repo: bool,
62 #[serde(default)]
63 pub can_collect_data: bool,
64 #[serde(default, skip_serializing_if = "Option::is_none")]
65 pub repo_url: Option<String>,
66}
67
68#[derive(
69 Default,
70 Clone,
71 Copy,
72 Debug,
73 PartialEq,
74 Eq,
75 Hash,
76 EnumIter,
77 IntoStaticStr,
78 Serialize,
79 Deserialize,
80)]
81#[allow(non_camel_case_types)]
82pub enum ZetaFormat {
83 V0112MiddleAtEnd,
84 V0113Ordered,
85 V0114180EditableRegion,
86 V0120GitMergeMarkers,
87 #[default]
88 V0131GitMergeMarkersPrefix,
89 V0211Prefill,
90 V0211SeedCoder,
91 v0226Hashline,
92 V0304VariableEdit,
93 V0304SeedNoEdits,
94}
95
96impl std::fmt::Display for ZetaFormat {
97 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98 write!(f, "{}", <&'static str>::from(self))
99 }
100}
101
102impl ZetaFormat {
103 pub fn parse(format_name: &str) -> Result<Self> {
104 let mut results = ZetaFormat::iter().filter(|version| {
105 <&'static str>::from(version)
106 .to_lowercase()
107 .contains(&format_name.to_lowercase())
108 });
109 let Some(result) = results.next() else {
110 anyhow::bail!(
111 "`{format_name}` did not match any of:\n{}",
112 Self::options_as_string()
113 );
114 };
115 if results.next().is_some() {
116 anyhow::bail!(
117 "`{format_name}` matched more than one of:\n{}",
118 Self::options_as_string()
119 );
120 }
121 Ok(result)
122 }
123
124 pub fn options_as_string() -> String {
125 ZetaFormat::iter()
126 .map(|format| format!("- {}\n", <&'static str>::from(format)))
127 .collect::<Vec<_>>()
128 .concat()
129 }
130}
131
132#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
133#[serde(tag = "event")]
134pub enum Event {
135 BufferChange {
136 path: Arc<Path>,
137 old_path: Arc<Path>,
138 diff: String,
139 predicted: bool,
140 in_open_source_repo: bool,
141 },
142}
143
144impl Event {
145 pub fn in_open_source_repo(&self) -> bool {
146 match self {
147 Event::BufferChange {
148 in_open_source_repo,
149 ..
150 } => *in_open_source_repo,
151 }
152 }
153}
154
155pub fn write_event(prompt: &mut String, event: &Event) {
156 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
157 for component in path.components() {
158 prompt.push('/');
159 write!(prompt, "{}", component.as_os_str().display()).ok();
160 }
161 }
162 match event {
163 Event::BufferChange {
164 path,
165 old_path,
166 diff,
167 predicted,
168 in_open_source_repo: _,
169 } => {
170 if *predicted {
171 prompt.push_str("// User accepted prediction:\n");
172 }
173 prompt.push_str("--- a");
174 write_path_as_unix_str(prompt, old_path.as_ref());
175 prompt.push_str("\n+++ b");
176 write_path_as_unix_str(prompt, path.as_ref());
177 prompt.push('\n');
178 prompt.push_str(diff);
179 }
180 }
181}
182
183#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
184pub struct RelatedFile {
185 pub path: Arc<Path>,
186 pub max_row: u32,
187 pub excerpts: Vec<RelatedExcerpt>,
188 #[serde(default)]
189 pub in_open_source_repo: bool,
190}
191
192#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
193pub struct RelatedExcerpt {
194 pub row_range: Range<u32>,
195 pub text: Arc<str>,
196 #[serde(default)]
197 pub order: usize,
198}
199
200pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
201 special_tokens_for_format(format)
202 .iter()
203 .any(|token| input.cursor_excerpt.contains(token))
204}
205
206pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
207 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
208}
209
210pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
211 match format {
212 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
213 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
214 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
215 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
216 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
217 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
218 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
219 ZetaFormat::v0226Hashline => hashline::special_tokens(),
220 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
221 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
222 }
223}
224
225pub fn excerpt_ranges_for_format(
226 format: ZetaFormat,
227 ranges: &ExcerptRanges,
228) -> (Range<usize>, Range<usize>) {
229 match format {
230 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
231 ranges.editable_150.clone(),
232 ranges.editable_150_context_350.clone(),
233 ),
234 ZetaFormat::V0114180EditableRegion => (
235 ranges.editable_180.clone(),
236 ranges.editable_180_context_350.clone(),
237 ),
238 ZetaFormat::V0120GitMergeMarkers
239 | ZetaFormat::V0131GitMergeMarkersPrefix
240 | ZetaFormat::V0211Prefill
241 | ZetaFormat::V0211SeedCoder
242 | ZetaFormat::v0226Hashline
243 | ZetaFormat::V0304SeedNoEdits => (
244 ranges.editable_350.clone(),
245 ranges.editable_350_context_150.clone(),
246 ),
247 ZetaFormat::V0304VariableEdit => {
248 let context = ranges
249 .context_8192
250 .clone()
251 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
252 (context.clone(), context)
253 }
254 }
255}
256
257pub fn write_cursor_excerpt_section_for_format(
258 format: ZetaFormat,
259 prompt: &mut String,
260 path: &Path,
261 context: &str,
262 editable_range: &Range<usize>,
263 cursor_offset: usize,
264) {
265 match format {
266 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
267 prompt,
268 path,
269 context,
270 editable_range,
271 cursor_offset,
272 ),
273 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
274 v0113_ordered::write_cursor_excerpt_section(
275 prompt,
276 path,
277 context,
278 editable_range,
279 cursor_offset,
280 )
281 }
282 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
283 prompt,
284 path,
285 context,
286 editable_range,
287 cursor_offset,
288 ),
289 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
290 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
291 prompt,
292 path,
293 context,
294 editable_range,
295 cursor_offset,
296 )
297 }
298 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
299 seed_coder::write_cursor_excerpt_section(
300 prompt,
301 path,
302 context,
303 editable_range,
304 cursor_offset,
305 )
306 }
307 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
308 prompt,
309 path,
310 context,
311 editable_range,
312 cursor_offset,
313 ),
314 ZetaFormat::V0304VariableEdit => {
315 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
316 }
317 }
318}
319
320fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
321 let start_row = text[0..range.start].matches('\n').count() as u32;
322 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
323 if !text[..range.end].ends_with('\n') {
324 end_row += 1;
325 }
326 return start_row..end_row;
327}
328
329pub fn format_prompt_with_budget_for_format(
330 input: &ZetaPromptInput,
331 format: ZetaFormat,
332 max_tokens: usize,
333) -> String {
334 let (context, editable_range, context_range, cursor_offset) =
335 resolve_cursor_region(input, format);
336 let path = &*input.cursor_path;
337
338 let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
339 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
340 let row_range = relative_row_range.start + cursor_excerpt_start_row
341 ..relative_row_range.end + cursor_excerpt_start_row;
342 &filter_redundant_excerpts(
343 input.related_files.clone(),
344 input.cursor_path.as_ref(),
345 row_range,
346 )
347 } else {
348 &input.related_files
349 };
350
351 match format {
352 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
353 seed_coder::format_prompt_with_budget(
354 path,
355 context,
356 &editable_range,
357 cursor_offset,
358 &input.events,
359 related_files,
360 max_tokens,
361 )
362 }
363 _ => {
364 let mut cursor_section = String::new();
365 write_cursor_excerpt_section_for_format(
366 format,
367 &mut cursor_section,
368 path,
369 context,
370 &editable_range,
371 cursor_offset,
372 );
373
374 let cursor_tokens = estimate_tokens(cursor_section.len());
375 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
376
377 let edit_history_section = format_edit_history_within_budget(
378 &input.events,
379 "<|file_sep|>",
380 "edit history",
381 budget_after_cursor,
382 );
383 let edit_history_tokens = estimate_tokens(edit_history_section.len());
384 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
385
386 let related_files_section = format_related_files_within_budget(
387 &related_files,
388 "<|file_sep|>",
389 "",
390 budget_after_edit_history,
391 );
392
393 let mut prompt = String::new();
394 prompt.push_str(&related_files_section);
395 prompt.push_str(&edit_history_section);
396 prompt.push_str(&cursor_section);
397 prompt
398 }
399 }
400}
401
402pub fn filter_redundant_excerpts(
403 mut related_files: Vec<RelatedFile>,
404 cursor_path: &Path,
405 cursor_row_range: Range<u32>,
406) -> Vec<RelatedFile> {
407 for file in &mut related_files {
408 if file.path.as_ref() == cursor_path {
409 file.excerpts.retain(|excerpt| {
410 excerpt.row_range.start < cursor_row_range.start
411 || excerpt.row_range.end > cursor_row_range.end
412 });
413 }
414 }
415 related_files.retain(|file| !file.excerpts.is_empty());
416 related_files
417}
418
419pub fn get_prefill_for_format(
420 format: ZetaFormat,
421 context: &str,
422 editable_range: &Range<usize>,
423) -> String {
424 match format {
425 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
426 ZetaFormat::V0112MiddleAtEnd
427 | ZetaFormat::V0113Ordered
428 | ZetaFormat::V0114180EditableRegion
429 | ZetaFormat::V0120GitMergeMarkers
430 | ZetaFormat::V0131GitMergeMarkersPrefix
431 | ZetaFormat::V0211SeedCoder
432 | ZetaFormat::v0226Hashline
433 | ZetaFormat::V0304VariableEdit => String::new(),
434 ZetaFormat::V0304SeedNoEdits => String::new(),
435 }
436}
437
438pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
439 match format {
440 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
441 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
442 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
443 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => Some(seed_coder::END_MARKER),
444 ZetaFormat::V0112MiddleAtEnd
445 | ZetaFormat::V0113Ordered
446 | ZetaFormat::V0114180EditableRegion
447 | ZetaFormat::v0226Hashline
448 | ZetaFormat::V0304VariableEdit => None,
449 }
450}
451
452pub fn encode_patch_as_output_for_format(
453 format: ZetaFormat,
454 old_editable_region: &str,
455 patch: &str,
456 cursor_offset: Option<usize>,
457) -> Result<Option<String>> {
458 match format {
459 ZetaFormat::v0226Hashline => {
460 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
461 }
462 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
463 old_editable_region,
464 patch,
465 cursor_offset,
466 )
467 .map(Some),
468 ZetaFormat::V0304SeedNoEdits => Ok(seed_coder::no_edits(patch)),
469 _ => Ok(None),
470 }
471}
472
473pub struct ParsedOutput {
474 /// Text that should replace the editable region
475 pub new_editable_region: String,
476 /// The byte range within `cursor_excerpt` that this replacement applies to
477 pub range_in_excerpt: Range<usize>,
478}
479
480/// Parse model output for the given zeta format
481pub fn parse_zeta2_model_output(
482 output: &str,
483 format: ZetaFormat,
484 prompt_inputs: &ZetaPromptInput,
485) -> Result<ParsedOutput> {
486 let output = match output_end_marker_for_format(format) {
487 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
488 None => output,
489 };
490
491 let (context, editable_range_in_context, context_range, _) =
492 resolve_cursor_region(prompt_inputs, format);
493 let context_start = context_range.start;
494 let old_editable_region = &context[editable_range_in_context.clone()];
495
496 let (range_in_context, output) = match format {
497 ZetaFormat::v0226Hashline => (
498 editable_range_in_context,
499 if hashline::output_has_edit_commands(output) {
500 hashline::apply_edit_commands(old_editable_region, output)
501 } else {
502 output.to_string()
503 },
504 ),
505 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
506 ZetaFormat::V0304SeedNoEdits => (
507 editable_range_in_context,
508 if output.starts_with(seed_coder::NO_EDITS) {
509 old_editable_region.to_string()
510 } else {
511 output.to_string()
512 },
513 ),
514 _ => (editable_range_in_context, output.to_string()),
515 };
516
517 let range_in_excerpt =
518 range_in_context.start + context_start..range_in_context.end + context_start;
519
520 Ok(ParsedOutput {
521 new_editable_region: output,
522 range_in_excerpt,
523 })
524}
525
526pub fn excerpt_range_for_format(
527 format: ZetaFormat,
528 ranges: &ExcerptRanges,
529) -> (Range<usize>, Range<usize>) {
530 excerpt_ranges_for_format(format, ranges)
531}
532
533pub fn resolve_cursor_region(
534 input: &ZetaPromptInput,
535 format: ZetaFormat,
536) -> (&str, Range<usize>, Range<usize>, usize) {
537 let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
538 let context_start = context_range.start;
539 let context_text = &input.cursor_excerpt[context_range.clone()];
540 let adjusted_editable =
541 (editable_range.start - context_start)..(editable_range.end - context_start);
542 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
543
544 (
545 context_text,
546 adjusted_editable,
547 context_range,
548 adjusted_cursor,
549 )
550}
551
552pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
553 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
554 get_prefill_for_format(format, context, &editable_range)
555}
556
557fn format_edit_history_within_budget(
558 events: &[Arc<Event>],
559 file_marker: &str,
560 edit_history_name: &str,
561 max_tokens: usize,
562) -> String {
563 let header = format!("{}{}\n", file_marker, edit_history_name);
564 let header_tokens = estimate_tokens(header.len());
565 if header_tokens >= max_tokens {
566 return String::new();
567 }
568
569 let mut event_strings: Vec<String> = Vec::new();
570 let mut total_tokens = header_tokens;
571
572 for event in events.iter().rev() {
573 let mut event_str = String::new();
574 write_event(&mut event_str, event);
575 let event_tokens = estimate_tokens(event_str.len());
576
577 if total_tokens + event_tokens > max_tokens {
578 break;
579 }
580 total_tokens += event_tokens;
581 event_strings.push(event_str);
582 }
583
584 if event_strings.is_empty() {
585 return String::new();
586 }
587
588 let mut result = header;
589 for event_str in event_strings.iter().rev() {
590 result.push_str(event_str);
591 }
592 result
593}
594
595fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
596 let needs_newline = !excerpt.text.ends_with('\n');
597 let needs_ellipsis = excerpt.row_range.end < file_max_row;
598 let len = excerpt.text.len()
599 + if needs_newline { "\n".len() } else { 0 }
600 + if needs_ellipsis { "...\n".len() } else { 0 };
601 estimate_tokens(len)
602}
603
604pub fn format_related_files_within_budget(
605 related_files: &[RelatedFile],
606 file_prefix: &str,
607 file_suffix: &str,
608 max_tokens: usize,
609) -> String {
610 struct ExcerptCandidate {
611 file_ix: usize,
612 excerpt_ix: usize,
613 order: usize,
614 }
615
616 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
617 .iter()
618 .enumerate()
619 .flat_map(|(file_ix, file)| {
620 file.excerpts
621 .iter()
622 .enumerate()
623 .map(move |(excerpt_ix, e)| ExcerptCandidate {
624 file_ix,
625 excerpt_ix,
626 order: e.order,
627 })
628 })
629 .collect();
630
631 // Pre-compute file header strings and their token costs.
632 let file_headers: Vec<String> = related_files
633 .iter()
634 .map(|file| {
635 let path_str = file.path.to_string_lossy();
636 format!("{}{}\n", file_prefix, path_str)
637 })
638 .collect();
639
640 // Sort the excerpts by their order and determine how many fit within the budget.
641 let mut total_tokens = 0;
642 let mut included_excerpt_count = 0_usize;
643 let mut included_file_indices = vec![false; related_files.len()];
644 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
645 for candidate in &excerpt_candidates {
646 let file = &related_files[candidate.file_ix];
647 let excerpt = &file.excerpts[candidate.excerpt_ix];
648 let file_already_included = included_file_indices[candidate.file_ix];
649 let header_cost = if file_already_included {
650 0
651 } else {
652 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
653 };
654 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
655 if total_tokens + header_cost + excerpt_cost > max_tokens {
656 break;
657 }
658 total_tokens += header_cost + excerpt_cost;
659 if !file_already_included {
660 included_file_indices[candidate.file_ix] = true;
661 }
662 included_excerpt_count += 1;
663 }
664
665 excerpt_candidates.truncate(included_excerpt_count);
666 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
667
668 // Render all of the files that fit within the token budget, in the original order.
669 let mut result = String::new();
670 let mut last_file_ix = None;
671 for candidate in &excerpt_candidates {
672 if last_file_ix != Some(candidate.file_ix) {
673 if last_file_ix.is_some() {
674 result.push_str(file_suffix);
675 }
676 result.push_str(&file_headers[candidate.file_ix]);
677 last_file_ix = Some(candidate.file_ix);
678 }
679 let file = &related_files[candidate.file_ix];
680 let excerpt = &file.excerpts[candidate.excerpt_ix];
681 result.push_str(&excerpt.text);
682 if !result.ends_with('\n') {
683 result.push('\n');
684 }
685 if excerpt.row_range.end < file.max_row {
686 result.push_str("...\n");
687 }
688 }
689
690 result
691}
692
693pub fn write_related_files(
694 prompt: &mut String,
695 related_files: &[RelatedFile],
696) -> Vec<Range<usize>> {
697 let mut ranges = Vec::new();
698 for file in related_files {
699 let start = prompt.len();
700 let path_str = file.path.to_string_lossy();
701 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
702 for excerpt in &file.excerpts {
703 prompt.push_str(&excerpt.text);
704 if !prompt.ends_with('\n') {
705 prompt.push('\n');
706 }
707 if excerpt.row_range.end < file.max_row {
708 prompt.push_str("...\n");
709 }
710 }
711 let end = prompt.len();
712 ranges.push(start..end);
713 }
714 ranges
715}
716
717mod v0112_middle_at_end {
718 use super::*;
719
720 pub fn special_tokens() -> &'static [&'static str] {
721 &[
722 "<|fim_prefix|>",
723 "<|fim_suffix|>",
724 "<|fim_middle|>",
725 "<|file_sep|>",
726 CURSOR_MARKER,
727 ]
728 }
729
730 pub fn write_cursor_excerpt_section(
731 prompt: &mut String,
732 path: &Path,
733 context: &str,
734 editable_range: &Range<usize>,
735 cursor_offset: usize,
736 ) {
737 let path_str = path.to_string_lossy();
738 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
739
740 prompt.push_str("<|fim_prefix|>\n");
741 prompt.push_str(&context[..editable_range.start]);
742
743 prompt.push_str("<|fim_suffix|>\n");
744 prompt.push_str(&context[editable_range.end..]);
745 if !prompt.ends_with('\n') {
746 prompt.push('\n');
747 }
748
749 prompt.push_str("<|fim_middle|>current\n");
750 prompt.push_str(&context[editable_range.start..cursor_offset]);
751 prompt.push_str(CURSOR_MARKER);
752 prompt.push_str(&context[cursor_offset..editable_range.end]);
753 if !prompt.ends_with('\n') {
754 prompt.push('\n');
755 }
756
757 prompt.push_str("<|fim_middle|>updated\n");
758 }
759}
760
761mod v0113_ordered {
762 use super::*;
763
764 pub fn special_tokens() -> &'static [&'static str] {
765 &[
766 "<|fim_prefix|>",
767 "<|fim_suffix|>",
768 "<|fim_middle|>",
769 "<|file_sep|>",
770 CURSOR_MARKER,
771 ]
772 }
773
774 pub fn write_cursor_excerpt_section(
775 prompt: &mut String,
776 path: &Path,
777 context: &str,
778 editable_range: &Range<usize>,
779 cursor_offset: usize,
780 ) {
781 let path_str = path.to_string_lossy();
782 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
783
784 prompt.push_str("<|fim_prefix|>\n");
785 prompt.push_str(&context[..editable_range.start]);
786 if !prompt.ends_with('\n') {
787 prompt.push('\n');
788 }
789
790 prompt.push_str("<|fim_middle|>current\n");
791 prompt.push_str(&context[editable_range.start..cursor_offset]);
792 prompt.push_str(CURSOR_MARKER);
793 prompt.push_str(&context[cursor_offset..editable_range.end]);
794 if !prompt.ends_with('\n') {
795 prompt.push('\n');
796 }
797
798 prompt.push_str("<|fim_suffix|>\n");
799 prompt.push_str(&context[editable_range.end..]);
800 if !prompt.ends_with('\n') {
801 prompt.push('\n');
802 }
803
804 prompt.push_str("<|fim_middle|>updated\n");
805 }
806}
807
808mod v0114180_editable_region {
809 use super::*;
810
811 pub fn special_tokens() -> &'static [&'static str] {
812 v0113_ordered::special_tokens()
813 }
814}
815
816pub mod v0120_git_merge_markers {
817 //! A prompt that uses git-style merge conflict markers to represent the editable region.
818 //!
819 //! Example prompt:
820 //!
821 //! <|file_sep|>path/to/target_file.py
822 //! <|fim_prefix|>
823 //! code before editable region
824 //! <|fim_suffix|>
825 //! code after editable region
826 //! <|fim_middle|>
827 //! <<<<<<< CURRENT
828 //! code that
829 //! needs to<|user_cursor|>
830 //! be rewritten
831 //! =======
832 //!
833 //! Expected output (should be generated by the model):
834 //!
835 //! updated
836 //! code with
837 //! changes applied
838 //! >>>>>>> UPDATED
839
840 use super::*;
841
842 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
843 pub const SEPARATOR: &str = "=======\n";
844 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
845
846 pub fn special_tokens() -> &'static [&'static str] {
847 &[
848 "<|fim_prefix|>",
849 "<|fim_suffix|>",
850 "<|fim_middle|>",
851 "<|file_sep|>",
852 START_MARKER,
853 SEPARATOR,
854 END_MARKER,
855 CURSOR_MARKER,
856 ]
857 }
858
859 pub fn write_cursor_excerpt_section(
860 prompt: &mut String,
861 path: &Path,
862 context: &str,
863 editable_range: &Range<usize>,
864 cursor_offset: usize,
865 ) {
866 let path_str = path.to_string_lossy();
867 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
868
869 prompt.push_str("<|fim_prefix|>");
870 prompt.push_str(&context[..editable_range.start]);
871
872 prompt.push_str("<|fim_suffix|>");
873 prompt.push_str(&context[editable_range.end..]);
874 if !prompt.ends_with('\n') {
875 prompt.push('\n');
876 }
877
878 prompt.push_str("<|fim_middle|>");
879 prompt.push_str(START_MARKER);
880 prompt.push_str(&context[editable_range.start..cursor_offset]);
881 prompt.push_str(CURSOR_MARKER);
882 prompt.push_str(&context[cursor_offset..editable_range.end]);
883 if !prompt.ends_with('\n') {
884 prompt.push('\n');
885 }
886 prompt.push_str(SEPARATOR);
887 }
888}
889
890pub mod v0131_git_merge_markers_prefix {
891 //! A prompt that uses git-style merge conflict markers to represent the editable region.
892 //!
893 //! Example prompt:
894 //!
895 //! <|file_sep|>path/to/target_file.py
896 //! <|fim_prefix|>
897 //! code before editable region
898 //! <<<<<<< CURRENT
899 //! code that
900 //! needs to<|user_cursor|>
901 //! be rewritten
902 //! =======
903 //! <|fim_suffix|>
904 //! code after editable region
905 //! <|fim_middle|>
906 //!
907 //! Expected output (should be generated by the model):
908 //!
909 //! updated
910 //! code with
911 //! changes applied
912 //! >>>>>>> UPDATED
913
914 use super::*;
915
916 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
917 pub const SEPARATOR: &str = "=======\n";
918 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
919
920 pub fn special_tokens() -> &'static [&'static str] {
921 &[
922 "<|fim_prefix|>",
923 "<|fim_suffix|>",
924 "<|fim_middle|>",
925 "<|file_sep|>",
926 START_MARKER,
927 SEPARATOR,
928 END_MARKER,
929 CURSOR_MARKER,
930 ]
931 }
932
933 pub fn write_cursor_excerpt_section(
934 prompt: &mut String,
935 path: &Path,
936 context: &str,
937 editable_range: &Range<usize>,
938 cursor_offset: usize,
939 ) {
940 let path_str = path.to_string_lossy();
941 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
942
943 prompt.push_str("<|fim_prefix|>");
944 prompt.push_str(&context[..editable_range.start]);
945 prompt.push_str(START_MARKER);
946 prompt.push_str(&context[editable_range.start..cursor_offset]);
947 prompt.push_str(CURSOR_MARKER);
948 prompt.push_str(&context[cursor_offset..editable_range.end]);
949 if !prompt.ends_with('\n') {
950 prompt.push('\n');
951 }
952 prompt.push_str(SEPARATOR);
953
954 prompt.push_str("<|fim_suffix|>");
955 prompt.push_str(&context[editable_range.end..]);
956 if !prompt.ends_with('\n') {
957 prompt.push('\n');
958 }
959
960 prompt.push_str("<|fim_middle|>");
961 }
962}
963
964pub mod v0211_prefill {
965 use super::*;
966
967 pub fn special_tokens() -> &'static [&'static str] {
968 v0131_git_merge_markers_prefix::special_tokens()
969 }
970
971 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
972 let editable_region = &context[editable_range.start..editable_range.end];
973
974 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
975 let prefill_len = editable_region.floor_char_boundary(prefill_len);
976
977 // Find a token boundary to avoid splitting tokens in the prefill.
978 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
979 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
980 // the \n and consume any consecutive \n characters after it.
981 let prefill = &editable_region[..prefill_len];
982 match prefill.rfind('\n') {
983 Some(pos) => {
984 let mut end = pos + 1;
985 while end < editable_region.len()
986 && editable_region.as_bytes().get(end) == Some(&b'\n')
987 {
988 end += 1;
989 }
990 editable_region[..end].to_string()
991 }
992 // No newline found. Fall back to splitting before the last space
993 // (word-level boundary)
994 None => match prefill.rfind(' ') {
995 Some(pos) => prefill[..pos].to_string(),
996 None => prefill.to_string(),
997 },
998 }
999 }
1000}
1001
1002pub mod hashline {
1003
1004 use std::fmt::Display;
1005
1006 pub const END_MARKER: &str = "<|fim_middle|>updated";
1007 pub const START_MARKER: &str = "<|fim_middle|>current";
1008
1009 use super::*;
1010
1011 const SET_COMMAND_MARKER: &str = "<|set|>";
1012 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1013
1014 pub fn special_tokens() -> &'static [&'static str] {
1015 return &[
1016 SET_COMMAND_MARKER,
1017 "<|set_range|>",
1018 INSERT_COMMAND_MARKER,
1019 CURSOR_MARKER,
1020 "<|file_sep|>",
1021 "<|fim_prefix|>",
1022 "<|fim_suffix|>",
1023 "<|fim_middle|>",
1024 ];
1025 }
1026
1027 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1028 #[derive(Debug, Clone, PartialEq, Eq)]
1029 struct LineRef {
1030 index: usize,
1031 hash: u8,
1032 }
1033
1034 impl Display for LineRef {
1035 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1036 write!(f, "{}:{:02x}", self.index, self.hash)
1037 }
1038 }
1039
1040 pub fn hash_line(line: &[u8]) -> u8 {
1041 let mut h: u8 = 0;
1042 for &byte in line {
1043 h = h.wrapping_add(byte);
1044 }
1045 return h;
1046 }
1047
1048 /// Write the hashline-encoded editable region into `out`. Each line of
1049 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1050 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1051 /// to the start of `editable_text`).
1052 pub fn write_hashline_editable_region(
1053 out: &mut String,
1054 editable_text: &str,
1055 cursor_offset_in_editable: usize,
1056 ) {
1057 let mut offset = 0;
1058 for (i, line) in editable_text.lines().enumerate() {
1059 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1060 && cursor_offset_in_editable < offset + line.len()
1061 {
1062 (
1063 &line[..cursor_offset_in_editable - offset],
1064 CURSOR_MARKER,
1065 &line[cursor_offset_in_editable - offset..],
1066 )
1067 } else {
1068 (line, "", "")
1069 };
1070 write!(
1071 out,
1072 "\n{}|{head}{cursor}{tail}",
1073 LineRef {
1074 index: i,
1075 hash: hash_line(line.as_bytes())
1076 }
1077 )
1078 .unwrap();
1079 offset += line.len() + 1;
1080 }
1081 }
1082
1083 pub fn write_cursor_excerpt_section(
1084 prompt: &mut String,
1085 path: &Path,
1086 context: &str,
1087 editable_range: &Range<usize>,
1088 cursor_offset: usize,
1089 ) {
1090 let path_str = path.to_string_lossy();
1091 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1092
1093 prompt.push_str("<|fim_prefix|>\n");
1094 prompt.push_str(&context[..editable_range.start]);
1095 prompt.push_str(START_MARKER);
1096
1097 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1098 let editable_region = &context[editable_range.clone()];
1099 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1100
1101 if !prompt.ends_with('\n') {
1102 prompt.push('\n');
1103 }
1104
1105 prompt.push_str("<|fim_suffix|>\n");
1106 prompt.push_str(&context[editable_range.end..]);
1107 if !prompt.ends_with('\n') {
1108 prompt.push('\n');
1109 }
1110
1111 prompt.push_str(END_MARKER);
1112 }
1113
1114 /// A single edit command parsed from the model output.
1115 #[derive(Debug)]
1116 enum EditCommand<'a> {
1117 /// Replace a range of lines (inclusive on both ends). Single-line set is
1118 /// represented by `start == end`.
1119 Set {
1120 start: LineRef,
1121 end: LineRef,
1122 content: &'a str,
1123 },
1124 /// Insert new lines after the given line, or before the first line if
1125 /// `after` is `None`.
1126 Insert {
1127 after: Option<LineRef>,
1128 content: &'a str,
1129 },
1130 }
1131
1132 /// Parse a line reference like `3:c3` into a `LineRef`.
1133 fn parse_line_ref(s: &str) -> Option<LineRef> {
1134 let (idx_str, hash_str) = s.split_once(':')?;
1135 let index = idx_str.parse::<usize>().ok()?;
1136 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1137 Some(LineRef { index, hash })
1138 }
1139
1140 /// Parse the model output into a list of `EditCommand`s.
1141 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1142 let mut commands = Vec::new();
1143 let mut offset = 0usize;
1144
1145 while offset < model_output.len() {
1146 let next_nl = model_output[offset..]
1147 .find('\n')
1148 .map(|i| offset + i)
1149 .unwrap_or(model_output.len());
1150 let line = &model_output[offset..next_nl];
1151 let line_end = if next_nl < model_output.len() {
1152 next_nl + 1
1153 } else {
1154 next_nl
1155 };
1156
1157 let trimmed = line.trim();
1158 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1159 (true, spec)
1160 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1161 (false, spec)
1162 } else {
1163 offset = line_end;
1164 continue;
1165 };
1166
1167 let mut content_end = line_end;
1168 let mut scan = line_end;
1169
1170 while scan < model_output.len() {
1171 let body_nl = model_output[scan..]
1172 .find('\n')
1173 .map(|i| scan + i)
1174 .unwrap_or(model_output.len());
1175 let body_line = &model_output[scan..body_nl];
1176 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1177 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1178 {
1179 break;
1180 }
1181 scan = if body_nl < model_output.len() {
1182 body_nl + 1
1183 } else {
1184 body_nl
1185 };
1186 content_end = scan;
1187 }
1188
1189 let content = &model_output[line_end..content_end];
1190
1191 if is_set {
1192 if let Some((start_str, end_str)) = specifier.split_once('-') {
1193 if let (Some(start), Some(end)) =
1194 (parse_line_ref(start_str), parse_line_ref(end_str))
1195 {
1196 commands.push(EditCommand::Set {
1197 start,
1198 end,
1199 content,
1200 });
1201 }
1202 } else if let Some(target) = parse_line_ref(specifier) {
1203 commands.push(EditCommand::Set {
1204 start: target.clone(),
1205 end: target,
1206 content,
1207 });
1208 }
1209 } else {
1210 let after = parse_line_ref(specifier);
1211 commands.push(EditCommand::Insert { after, content });
1212 }
1213
1214 offset = scan;
1215 }
1216
1217 commands
1218 }
1219
1220 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1221 /// (as opposed to being a plain full-replacement output).
1222 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1223 /// editable region, returning the plain text content.
1224 pub fn strip_hashline_prefixes(region: &str) -> String {
1225 let mut decoded: String = region
1226 .lines()
1227 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1228 .collect::<Vec<_>>()
1229 .join("\n");
1230 if region.ends_with('\n') {
1231 decoded.push('\n');
1232 }
1233 decoded
1234 }
1235
1236 pub fn output_has_edit_commands(model_output: &str) -> bool {
1237 model_output.contains(SET_COMMAND_MARKER) || model_output.contains(INSERT_COMMAND_MARKER)
1238 }
1239
1240 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1241 /// original editable region text.
1242 ///
1243 /// `editable_region` is the original text of the editable region (without hash
1244 /// prefixes). `model_output` is the raw model response containing edit commands.
1245 ///
1246 /// Returns the full replacement text for the editable region.
1247 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1248 let original_lines: Vec<&str> = editable_region.lines().collect();
1249 let old_hashes: Vec<u8> = original_lines
1250 .iter()
1251 .map(|line| hash_line(line.as_bytes()))
1252 .collect();
1253
1254 let commands = parse_edit_commands(model_output);
1255
1256 // For set operations: indexed by start line → Some((end line index, content))
1257 // For insert operations: indexed by line index → vec of content to insert after
1258 // Insert-before-first is tracked separately.
1259 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1260 let mut insert_before_first: Vec<&str> = Vec::new();
1261 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1262
1263 for command in &commands {
1264 match command {
1265 EditCommand::Set {
1266 start,
1267 end,
1268 content,
1269 } => {
1270 if start.index < old_hashes.len()
1271 && end.index < old_hashes.len()
1272 && start.index <= end.index
1273 && old_hashes[start.index] == start.hash
1274 && old_hashes[end.index] == end.hash
1275 {
1276 set_ops[start.index] = Some((end.index, *content));
1277 }
1278 }
1279 EditCommand::Insert { after, content } => match after {
1280 None => insert_before_first.push(*content),
1281 Some(line_ref) => {
1282 if line_ref.index < old_hashes.len()
1283 && old_hashes[line_ref.index] == line_ref.hash
1284 {
1285 insert_after[line_ref.index].push(*content);
1286 }
1287 }
1288 },
1289 }
1290 }
1291
1292 let mut result = String::new();
1293
1294 // Emit any insertions before the first line
1295 for content in &insert_before_first {
1296 result.push_str(content);
1297 if !content.ends_with('\n') {
1298 result.push('\n');
1299 }
1300 }
1301
1302 let mut i = 0;
1303 while i < original_lines.len() {
1304 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1305 // Replace lines i..=end_index with the replacement content
1306 result.push_str(replacement);
1307 if !replacement.is_empty() && !replacement.ends_with('\n') {
1308 result.push('\n');
1309 }
1310 // Emit any insertions after the end of this set range
1311 if *end_index < insert_after.len() {
1312 for content in &insert_after[*end_index] {
1313 result.push_str(content);
1314 if !content.ends_with('\n') {
1315 result.push('\n');
1316 }
1317 }
1318 }
1319 i = end_index + 1;
1320 } else {
1321 // Keep the original line
1322 result.push_str(original_lines[i]);
1323 result.push('\n');
1324 // Emit any insertions after this line
1325 for content in &insert_after[i] {
1326 result.push_str(content);
1327 if !content.ends_with('\n') {
1328 result.push('\n');
1329 }
1330 }
1331 i += 1;
1332 }
1333 }
1334
1335 // Preserve trailing newline behavior: if the original ended with a
1336 // newline the result already has one; if it didn't, trim the extra one
1337 // we added.
1338 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1339 result.pop();
1340 }
1341
1342 result
1343 }
1344
1345 /// Convert a unified diff patch into hashline edit commands.
1346 ///
1347 /// Parses the unified diff `patch` directly to determine which lines of
1348 /// `old_text` are deleted/replaced and what new lines are added, then emits
1349 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1350 /// `{index}:{hash}` identifiers.
1351 ///
1352 /// `cursor_offset` is an optional byte offset into the first hunk's new
1353 /// text (context + additions) where the cursor marker should be placed.
1354 pub fn patch_to_edit_commands(
1355 old_text: &str,
1356 patch: &str,
1357 cursor_offset: Option<usize>,
1358 ) -> Result<String> {
1359 let old_lines: Vec<&str> = old_text.lines().collect();
1360 let old_hashes: Vec<u8> = old_lines
1361 .iter()
1362 .map(|line| hash_line(line.as_bytes()))
1363 .collect();
1364
1365 let mut result = String::new();
1366 let mut first_hunk = true;
1367
1368 struct Hunk<'a> {
1369 line_range: Range<usize>,
1370 new_text_lines: Vec<&'a str>,
1371 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1372 }
1373
1374 // Parse the patch line by line. We only care about hunk headers,
1375 // context, deletions, and additions.
1376 let mut old_line_index: usize = 0;
1377 let mut current_hunk: Option<Hunk> = None;
1378 // Byte offset tracking within the hunk's new text for cursor placement.
1379 let mut new_text_byte_offset: usize = 0;
1380 // The line index of the last old line seen before/in the current hunk
1381 // (used for insert-after reference).
1382 let mut last_old_line_before_hunk: Option<usize> = None;
1383
1384 fn flush_hunk(
1385 hunk: Hunk,
1386 last_old_line: Option<usize>,
1387 result: &mut String,
1388 old_hashes: &[u8],
1389 ) {
1390 if hunk.line_range.is_empty() {
1391 // Pure insertion — reference the old line to insert after when in bounds.
1392 if let Some(after) = last_old_line
1393 && let Some(&hash) = old_hashes.get(after)
1394 {
1395 write!(
1396 result,
1397 "{INSERT_COMMAND_MARKER}{}\n",
1398 LineRef { index: after, hash }
1399 )
1400 .unwrap();
1401 } else {
1402 result.push_str(INSERT_COMMAND_MARKER);
1403 result.push('\n');
1404 }
1405 } else {
1406 let start = hunk.line_range.start;
1407 let end_exclusive = hunk.line_range.end;
1408 let deleted_line_count = end_exclusive.saturating_sub(start);
1409
1410 if deleted_line_count == 1 {
1411 if let Some(&hash) = old_hashes.get(start) {
1412 write!(
1413 result,
1414 "{SET_COMMAND_MARKER}{}\n",
1415 LineRef { index: start, hash }
1416 )
1417 .unwrap();
1418 } else {
1419 result.push_str(SET_COMMAND_MARKER);
1420 result.push('\n');
1421 }
1422 } else {
1423 let end_inclusive = end_exclusive - 1;
1424 match (
1425 old_hashes.get(start).copied(),
1426 old_hashes.get(end_inclusive).copied(),
1427 ) {
1428 (Some(start_hash), Some(end_hash)) => {
1429 write!(
1430 result,
1431 "{SET_COMMAND_MARKER}{}-{}\n",
1432 LineRef {
1433 index: start,
1434 hash: start_hash
1435 },
1436 LineRef {
1437 index: end_inclusive,
1438 hash: end_hash
1439 }
1440 )
1441 .unwrap();
1442 }
1443 _ => {
1444 result.push_str(SET_COMMAND_MARKER);
1445 result.push('\n');
1446 }
1447 }
1448 }
1449 }
1450 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1451 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1452 && line_offset == cursor_line_offset
1453 {
1454 result.push_str(&line[..char_offset]);
1455 result.push_str(CURSOR_MARKER);
1456 result.push_str(&line[char_offset..]);
1457 continue;
1458 }
1459
1460 result.push_str(line);
1461 }
1462 }
1463
1464 for raw_line in patch.split_inclusive('\n') {
1465 if raw_line.starts_with("@@") {
1466 // Flush any pending change hunk from a previous patch hunk.
1467 if let Some(hunk) = current_hunk.take() {
1468 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1469 }
1470
1471 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1472 // We intentionally do not trust old_start as a direct local index into `old_text`,
1473 // because some patches are produced against a larger file region and carry
1474 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1475 if first_hunk {
1476 new_text_byte_offset = 0;
1477 first_hunk = false;
1478 }
1479 continue;
1480 }
1481
1482 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1483 continue;
1484 }
1485 if raw_line.starts_with("\\ No newline") {
1486 continue;
1487 }
1488
1489 if raw_line.starts_with('-') {
1490 // Extend or start a change hunk with this deleted old line.
1491 match &mut current_hunk {
1492 Some(Hunk {
1493 line_range: range, ..
1494 }) => range.end = old_line_index + 1,
1495 None => {
1496 current_hunk = Some(Hunk {
1497 line_range: old_line_index..old_line_index + 1,
1498 new_text_lines: Vec::new(),
1499 cursor_line_offset_in_new_text: None,
1500 });
1501 }
1502 }
1503 old_line_index += 1;
1504 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1505 // Place cursor marker if cursor_offset falls within this line.
1506 let mut cursor_line_offset = None;
1507 if let Some(cursor_off) = cursor_offset
1508 && (first_hunk
1509 || cursor_off >= new_text_byte_offset
1510 && cursor_off <= new_text_byte_offset + added_content.len())
1511 {
1512 let line_offset = added_content.floor_char_boundary(
1513 cursor_off
1514 .saturating_sub(new_text_byte_offset)
1515 .min(added_content.len()),
1516 );
1517 cursor_line_offset = Some(line_offset);
1518 }
1519
1520 new_text_byte_offset += added_content.len();
1521
1522 let hunk = current_hunk.get_or_insert(Hunk {
1523 line_range: old_line_index..old_line_index,
1524 new_text_lines: vec![],
1525 cursor_line_offset_in_new_text: None,
1526 });
1527 hunk.new_text_lines.push(added_content);
1528 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1529 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1530 } else {
1531 // Context line (starts with ' ' or is empty).
1532 if let Some(hunk) = current_hunk.take() {
1533 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1534 }
1535 last_old_line_before_hunk = Some(old_line_index);
1536 old_line_index += 1;
1537 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1538 new_text_byte_offset += content.len();
1539 }
1540 }
1541
1542 // Flush final group.
1543 if let Some(hunk) = current_hunk.take() {
1544 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1545 }
1546
1547 // Trim a single trailing newline.
1548 if result.ends_with('\n') {
1549 result.pop();
1550 }
1551
1552 Ok(result)
1553 }
1554
1555 #[cfg(test)]
1556 mod tests {
1557 use super::*;
1558 use indoc::indoc;
1559
1560 #[test]
1561 fn test_format_cursor_region() {
1562 struct Case {
1563 name: &'static str,
1564 context: &'static str,
1565 editable_range: Range<usize>,
1566 cursor_offset: usize,
1567 expected: &'static str,
1568 }
1569
1570 let cases = [
1571 Case {
1572 name: "basic_cursor_placement",
1573 context: "hello world\n",
1574 editable_range: 0..12,
1575 cursor_offset: 5,
1576 expected: indoc! {"
1577 <|file_sep|>test.rs
1578 <|fim_prefix|>
1579 <|fim_middle|>current
1580 0:5c|hello<|user_cursor|> world
1581 <|fim_suffix|>
1582 <|fim_middle|>updated"},
1583 },
1584 Case {
1585 name: "multiline_cursor_on_second_line",
1586 context: "aaa\nbbb\nccc\n",
1587 editable_range: 0..12,
1588 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1589 expected: indoc! {"
1590 <|file_sep|>test.rs
1591 <|fim_prefix|>
1592 <|fim_middle|>current
1593 0:23|aaa
1594 1:26|b<|user_cursor|>bb
1595 2:29|ccc
1596 <|fim_suffix|>
1597 <|fim_middle|>updated"},
1598 },
1599 Case {
1600 name: "no_trailing_newline_in_context",
1601 context: "line1\nline2",
1602 editable_range: 0..11,
1603 cursor_offset: 3,
1604 expected: indoc! {"
1605 <|file_sep|>test.rs
1606 <|fim_prefix|>
1607 <|fim_middle|>current
1608 0:d9|lin<|user_cursor|>e1
1609 1:da|line2
1610 <|fim_suffix|>
1611 <|fim_middle|>updated"},
1612 },
1613 Case {
1614 name: "leading_newline_in_editable_region",
1615 context: "\nabc\n",
1616 editable_range: 0..5,
1617 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1618 expected: indoc! {"
1619 <|file_sep|>test.rs
1620 <|fim_prefix|>
1621 <|fim_middle|>current
1622 0:00|
1623 1:26|a<|user_cursor|>bc
1624 <|fim_suffix|>
1625 <|fim_middle|>updated"},
1626 },
1627 Case {
1628 name: "with_suffix",
1629 context: "abc\ndef",
1630 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1631 cursor_offset: 2,
1632 expected: indoc! {"
1633 <|file_sep|>test.rs
1634 <|fim_prefix|>
1635 <|fim_middle|>current
1636 0:26|ab<|user_cursor|>c
1637 <|fim_suffix|>
1638 def
1639 <|fim_middle|>updated"},
1640 },
1641 Case {
1642 name: "unicode_two_byte_chars",
1643 context: "héllo\n",
1644 editable_range: 0..7,
1645 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1646 expected: indoc! {"
1647 <|file_sep|>test.rs
1648 <|fim_prefix|>
1649 <|fim_middle|>current
1650 0:1b|hé<|user_cursor|>llo
1651 <|fim_suffix|>
1652 <|fim_middle|>updated"},
1653 },
1654 Case {
1655 name: "unicode_three_byte_chars",
1656 context: "日本語\n",
1657 editable_range: 0..10,
1658 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1659 expected: indoc! {"
1660 <|file_sep|>test.rs
1661 <|fim_prefix|>
1662 <|fim_middle|>current
1663 0:80|日本<|user_cursor|>語
1664 <|fim_suffix|>
1665 <|fim_middle|>updated"},
1666 },
1667 Case {
1668 name: "unicode_four_byte_chars",
1669 context: "a🌍b\n",
1670 editable_range: 0..7,
1671 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1672 expected: indoc! {"
1673 <|file_sep|>test.rs
1674 <|fim_prefix|>
1675 <|fim_middle|>current
1676 0:6b|a🌍<|user_cursor|>b
1677 <|fim_suffix|>
1678 <|fim_middle|>updated"},
1679 },
1680 Case {
1681 name: "cursor_at_start_of_region_not_placed",
1682 context: "abc\n",
1683 editable_range: 0..4,
1684 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1685 expected: indoc! {"
1686 <|file_sep|>test.rs
1687 <|fim_prefix|>
1688 <|fim_middle|>current
1689 0:26|abc
1690 <|fim_suffix|>
1691 <|fim_middle|>updated"},
1692 },
1693 Case {
1694 name: "cursor_at_end_of_line_not_placed",
1695 context: "abc\ndef\n",
1696 editable_range: 0..8,
1697 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1698 expected: indoc! {"
1699 <|file_sep|>test.rs
1700 <|fim_prefix|>
1701 <|fim_middle|>current
1702 0:26|abc
1703 1:2f|def
1704 <|fim_suffix|>
1705 <|fim_middle|>updated"},
1706 },
1707 Case {
1708 name: "cursor_offset_relative_to_context_not_editable_region",
1709 // cursor_offset is relative to `context`, so when editable_range.start > 0,
1710 // write_cursor_excerpt_section must subtract it before comparing against
1711 // per-line offsets within the editable region.
1712 context: "pre\naaa\nbbb\nsuf\n",
1713 editable_range: 4..12, // editable region = "aaa\nbbb\n"
1714 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
1715 expected: indoc! {"
1716 <|file_sep|>test.rs
1717 <|fim_prefix|>
1718 pre
1719 <|fim_middle|>current
1720 0:23|aaa
1721 1:26|b<|user_cursor|>bb
1722 <|fim_suffix|>
1723 suf
1724 <|fim_middle|>updated"},
1725 },
1726 ];
1727
1728 for case in &cases {
1729 let mut prompt = String::new();
1730 hashline::write_cursor_excerpt_section(
1731 &mut prompt,
1732 Path::new("test.rs"),
1733 case.context,
1734 &case.editable_range,
1735 case.cursor_offset,
1736 );
1737 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1738 }
1739 }
1740
1741 #[test]
1742 fn test_apply_edit_commands() {
1743 struct Case {
1744 name: &'static str,
1745 original: &'static str,
1746 model_output: &'static str,
1747 expected: &'static str,
1748 }
1749
1750 let cases = vec![
1751 Case {
1752 name: "set_single_line",
1753 original: indoc! {"
1754 let mut total = 0;
1755 for product in products {
1756 total += ;
1757 }
1758 total
1759 "},
1760 model_output: indoc! {"
1761 <|set|>2:87
1762 total += product.price;
1763 "},
1764 expected: indoc! {"
1765 let mut total = 0;
1766 for product in products {
1767 total += product.price;
1768 }
1769 total
1770 "},
1771 },
1772 Case {
1773 name: "set_range",
1774 original: indoc! {"
1775 fn foo() {
1776 let x = 1;
1777 let y = 2;
1778 let z = 3;
1779 }
1780 "},
1781 model_output: indoc! {"
1782 <|set|>1:46-3:4a
1783 let sum = 6;
1784 "},
1785 expected: indoc! {"
1786 fn foo() {
1787 let sum = 6;
1788 }
1789 "},
1790 },
1791 Case {
1792 name: "insert_after_line",
1793 original: indoc! {"
1794 fn main() {
1795 let x = 1;
1796 }
1797 "},
1798 model_output: indoc! {"
1799 <|insert|>1:46
1800 let y = 2;
1801 "},
1802 expected: indoc! {"
1803 fn main() {
1804 let x = 1;
1805 let y = 2;
1806 }
1807 "},
1808 },
1809 Case {
1810 name: "insert_before_first",
1811 original: indoc! {"
1812 let x = 1;
1813 let y = 2;
1814 "},
1815 model_output: indoc! {"
1816 <|insert|>
1817 use std::io;
1818 "},
1819 expected: indoc! {"
1820 use std::io;
1821 let x = 1;
1822 let y = 2;
1823 "},
1824 },
1825 Case {
1826 name: "set_with_cursor_marker",
1827 original: indoc! {"
1828 fn main() {
1829 println!();
1830 }
1831 "},
1832 model_output: indoc! {"
1833 <|set|>1:34
1834 eprintln!(\"<|user_cursor|>\");
1835 "},
1836 expected: indoc! {"
1837 fn main() {
1838 eprintln!(\"<|user_cursor|>\");
1839 }
1840 "},
1841 },
1842 Case {
1843 name: "multiple_set_commands",
1844 original: indoc! {"
1845 aaa
1846 bbb
1847 ccc
1848 ddd
1849 "},
1850 model_output: indoc! {"
1851 <|set|>0:23
1852 AAA
1853 <|set|>2:29
1854 CCC
1855 "},
1856 expected: indoc! {"
1857 AAA
1858 bbb
1859 CCC
1860 ddd
1861 "},
1862 },
1863 Case {
1864 name: "set_range_multiline_replacement",
1865 original: indoc! {"
1866 fn handle_submit() {
1867 }
1868
1869 fn handle_keystroke() {
1870 "},
1871 model_output: indoc! {"
1872 <|set|>0:3f-1:7d
1873 fn handle_submit(modal_state: &mut ModalState) {
1874 <|user_cursor|>
1875 }
1876 "},
1877 expected: indoc! {"
1878 fn handle_submit(modal_state: &mut ModalState) {
1879 <|user_cursor|>
1880 }
1881
1882 fn handle_keystroke() {
1883 "},
1884 },
1885 Case {
1886 name: "no_edit_commands_returns_original",
1887 original: indoc! {"
1888 hello
1889 world
1890 "},
1891 model_output: "some random text with no commands",
1892 expected: indoc! {"
1893 hello
1894 world
1895 "},
1896 },
1897 Case {
1898 name: "wrong_hash_set_ignored",
1899 original: indoc! {"
1900 aaa
1901 bbb
1902 "},
1903 model_output: indoc! {"
1904 <|set|>0:ff
1905 ZZZ
1906 "},
1907 expected: indoc! {"
1908 aaa
1909 bbb
1910 "},
1911 },
1912 Case {
1913 name: "insert_and_set_combined",
1914 original: indoc! {"
1915 alpha
1916 beta
1917 gamma
1918 "},
1919 model_output: indoc! {"
1920 <|set|>0:06
1921 ALPHA
1922 <|insert|>1:9c
1923 beta_extra
1924 "},
1925 expected: indoc! {"
1926 ALPHA
1927 beta
1928 beta_extra
1929 gamma
1930 "},
1931 },
1932 Case {
1933 name: "no_trailing_newline_preserved",
1934 original: "hello\nworld",
1935 model_output: indoc! {"
1936 <|set|>0:14
1937 HELLO
1938 "},
1939 expected: "HELLO\nworld",
1940 },
1941 Case {
1942 name: "set_range_hash_mismatch_in_end_bound",
1943 original: indoc! {"
1944 one
1945 two
1946 three
1947 "},
1948 model_output: indoc! {"
1949 <|set|>0:42-2:ff
1950 ONE_TWO_THREE
1951 "},
1952 expected: indoc! {"
1953 one
1954 two
1955 three
1956 "},
1957 },
1958 Case {
1959 name: "set_range_start_greater_than_end_ignored",
1960 original: indoc! {"
1961 a
1962 b
1963 c
1964 "},
1965 model_output: indoc! {"
1966 <|set|>2:63-1:62
1967 X
1968 "},
1969 expected: indoc! {"
1970 a
1971 b
1972 c
1973 "},
1974 },
1975 Case {
1976 name: "insert_out_of_bounds_ignored",
1977 original: indoc! {"
1978 x
1979 y
1980 "},
1981 model_output: indoc! {"
1982 <|insert|>99:aa
1983 z
1984 "},
1985 expected: indoc! {"
1986 x
1987 y
1988 "},
1989 },
1990 Case {
1991 name: "set_out_of_bounds_ignored",
1992 original: indoc! {"
1993 x
1994 y
1995 "},
1996 model_output: indoc! {"
1997 <|set|>99:aa
1998 z
1999 "},
2000 expected: indoc! {"
2001 x
2002 y
2003 "},
2004 },
2005 Case {
2006 name: "malformed_set_command_ignored",
2007 original: indoc! {"
2008 alpha
2009 beta
2010 "},
2011 model_output: indoc! {"
2012 <|set|>not-a-line-ref
2013 UPDATED
2014 "},
2015 expected: indoc! {"
2016 alpha
2017 beta
2018 "},
2019 },
2020 Case {
2021 name: "malformed_insert_hash_treated_as_before_first",
2022 original: indoc! {"
2023 alpha
2024 beta
2025 "},
2026 model_output: indoc! {"
2027 <|insert|>1:nothex
2028 preamble
2029 "},
2030 expected: indoc! {"
2031 preamble
2032 alpha
2033 beta
2034 "},
2035 },
2036 Case {
2037 name: "set_then_insert_same_target_orders_insert_after_replacement",
2038 original: indoc! {"
2039 cat
2040 dog
2041 "},
2042 model_output: indoc! {"
2043 <|set|>0:38
2044 CAT
2045 <|insert|>0:38
2046 TAIL
2047 "},
2048 expected: indoc! {"
2049 CAT
2050 TAIL
2051 dog
2052 "},
2053 },
2054 Case {
2055 name: "overlapping_set_ranges_last_wins",
2056 original: indoc! {"
2057 a
2058 b
2059 c
2060 d
2061 "},
2062 model_output: indoc! {"
2063 <|set|>0:61-2:63
2064 FIRST
2065 <|set|>1:62-3:64
2066 SECOND
2067 "},
2068 expected: indoc! {"
2069 FIRST
2070 d
2071 "},
2072 },
2073 Case {
2074 name: "insert_before_first_and_after_line",
2075 original: indoc! {"
2076 a
2077 b
2078 "},
2079 model_output: indoc! {"
2080 <|insert|>
2081 HEAD
2082 <|insert|>0:61
2083 MID
2084 "},
2085 expected: indoc! {"
2086 HEAD
2087 a
2088 MID
2089 b
2090 "},
2091 },
2092 ];
2093
2094 for case in &cases {
2095 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2096 assert_eq!(result, case.expected, "failed case: {}", case.name);
2097 }
2098 }
2099
2100 #[test]
2101 fn test_output_has_edit_commands() {
2102 assert!(hashline::output_has_edit_commands(&format!(
2103 "{}0:ab\nnew",
2104 SET_COMMAND_MARKER
2105 )));
2106 assert!(hashline::output_has_edit_commands(&format!(
2107 "{}0:ab\nnew",
2108 INSERT_COMMAND_MARKER
2109 )));
2110 assert!(hashline::output_has_edit_commands(&format!(
2111 "some text\n{}1:cd\nstuff",
2112 SET_COMMAND_MARKER
2113 )));
2114 assert!(!hashline::output_has_edit_commands("just plain text"));
2115 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2116 }
2117
2118 // ---- hashline::patch_to_edit_commands round-trip tests ----
2119
2120 #[test]
2121 fn test_patch_to_edit_commands() {
2122 struct Case {
2123 name: &'static str,
2124 old: &'static str,
2125 patch: &'static str,
2126 expected_new: &'static str,
2127 }
2128
2129 let cases = [
2130 Case {
2131 name: "single_line_replacement",
2132 old: indoc! {"
2133 let mut total = 0;
2134 for product in products {
2135 total += ;
2136 }
2137 total
2138 "},
2139 patch: indoc! {"
2140 @@ -1,5 +1,5 @@
2141 let mut total = 0;
2142 for product in products {
2143 - total += ;
2144 + total += product.price;
2145 }
2146 total
2147 "},
2148 expected_new: indoc! {"
2149 let mut total = 0;
2150 for product in products {
2151 total += product.price;
2152 }
2153 total
2154 "},
2155 },
2156 Case {
2157 name: "multiline_replacement",
2158 old: indoc! {"
2159 fn foo() {
2160 let x = 1;
2161 let y = 2;
2162 let z = 3;
2163 }
2164 "},
2165 patch: indoc! {"
2166 @@ -1,5 +1,3 @@
2167 fn foo() {
2168 - let x = 1;
2169 - let y = 2;
2170 - let z = 3;
2171 + let sum = 1 + 2 + 3;
2172 }
2173 "},
2174 expected_new: indoc! {"
2175 fn foo() {
2176 let sum = 1 + 2 + 3;
2177 }
2178 "},
2179 },
2180 Case {
2181 name: "insertion",
2182 old: indoc! {"
2183 fn main() {
2184 let x = 1;
2185 }
2186 "},
2187 patch: indoc! {"
2188 @@ -1,3 +1,4 @@
2189 fn main() {
2190 let x = 1;
2191 + let y = 2;
2192 }
2193 "},
2194 expected_new: indoc! {"
2195 fn main() {
2196 let x = 1;
2197 let y = 2;
2198 }
2199 "},
2200 },
2201 Case {
2202 name: "insertion_before_first",
2203 old: indoc! {"
2204 let x = 1;
2205 let y = 2;
2206 "},
2207 patch: indoc! {"
2208 @@ -1,2 +1,3 @@
2209 +use std::io;
2210 let x = 1;
2211 let y = 2;
2212 "},
2213 expected_new: indoc! {"
2214 use std::io;
2215 let x = 1;
2216 let y = 2;
2217 "},
2218 },
2219 Case {
2220 name: "deletion",
2221 old: indoc! {"
2222 aaa
2223 bbb
2224 ccc
2225 ddd
2226 "},
2227 patch: indoc! {"
2228 @@ -1,4 +1,2 @@
2229 aaa
2230 -bbb
2231 -ccc
2232 ddd
2233 "},
2234 expected_new: indoc! {"
2235 aaa
2236 ddd
2237 "},
2238 },
2239 Case {
2240 name: "multiple_changes",
2241 old: indoc! {"
2242 alpha
2243 beta
2244 gamma
2245 delta
2246 epsilon
2247 "},
2248 patch: indoc! {"
2249 @@ -1,5 +1,5 @@
2250 -alpha
2251 +ALPHA
2252 beta
2253 gamma
2254 -delta
2255 +DELTA
2256 epsilon
2257 "},
2258 expected_new: indoc! {"
2259 ALPHA
2260 beta
2261 gamma
2262 DELTA
2263 epsilon
2264 "},
2265 },
2266 Case {
2267 name: "replace_with_insertion",
2268 old: indoc! {r#"
2269 fn handle() {
2270 modal_state.close();
2271 modal_state.dismiss();
2272 "#},
2273 patch: indoc! {r#"
2274 @@ -1,3 +1,4 @@
2275 fn handle() {
2276 modal_state.close();
2277 + eprintln!("");
2278 modal_state.dismiss();
2279 "#},
2280 expected_new: indoc! {r#"
2281 fn handle() {
2282 modal_state.close();
2283 eprintln!("");
2284 modal_state.dismiss();
2285 "#},
2286 },
2287 Case {
2288 name: "complete_replacement",
2289 old: indoc! {"
2290 aaa
2291 bbb
2292 ccc
2293 "},
2294 patch: indoc! {"
2295 @@ -1,3 +1,3 @@
2296 -aaa
2297 -bbb
2298 -ccc
2299 +xxx
2300 +yyy
2301 +zzz
2302 "},
2303 expected_new: indoc! {"
2304 xxx
2305 yyy
2306 zzz
2307 "},
2308 },
2309 Case {
2310 name: "add_function_body",
2311 old: indoc! {"
2312 fn foo() {
2313 modal_state.dismiss();
2314 }
2315
2316 fn
2317
2318 fn handle_keystroke() {
2319 "},
2320 patch: indoc! {"
2321 @@ -1,6 +1,8 @@
2322 fn foo() {
2323 modal_state.dismiss();
2324 }
2325
2326 -fn
2327 +fn handle_submit() {
2328 + todo()
2329 +}
2330
2331 fn handle_keystroke() {
2332 "},
2333 expected_new: indoc! {"
2334 fn foo() {
2335 modal_state.dismiss();
2336 }
2337
2338 fn handle_submit() {
2339 todo()
2340 }
2341
2342 fn handle_keystroke() {
2343 "},
2344 },
2345 Case {
2346 name: "with_cursor_offset",
2347 old: indoc! {r#"
2348 fn main() {
2349 println!();
2350 }
2351 "#},
2352 patch: indoc! {r#"
2353 @@ -1,3 +1,3 @@
2354 fn main() {
2355 - println!();
2356 + eprintln!("");
2357 }
2358 "#},
2359 expected_new: indoc! {r#"
2360 fn main() {
2361 eprintln!("<|user_cursor|>");
2362 }
2363 "#},
2364 },
2365 Case {
2366 name: "non_local_hunk_header_pure_insertion_repro",
2367 old: indoc! {"
2368 aaa
2369 bbb
2370 "},
2371 patch: indoc! {"
2372 @@ -20,2 +20,3 @@
2373 aaa
2374 +xxx
2375 bbb
2376 "},
2377 expected_new: indoc! {"
2378 aaa
2379 xxx
2380 bbb
2381 "},
2382 },
2383 ];
2384
2385 for case in &cases {
2386 // The cursor_offset for patch_to_edit_commands is relative to
2387 // the first hunk's new text (context + additions). We compute
2388 // it by finding where the marker sits in the expected output
2389 // (which mirrors the new text of the hunk).
2390 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2391
2392 let commands =
2393 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2394 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2395
2396 assert!(
2397 hashline::output_has_edit_commands(&commands),
2398 "case {}: expected edit commands, got: {commands:?}",
2399 case.name,
2400 );
2401
2402 let applied = hashline::apply_edit_commands(case.old, &commands);
2403 assert_eq!(applied, case.expected_new, "case {}", case.name);
2404 }
2405 }
2406 }
2407}
2408
2409pub mod seed_coder {
2410 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2411 //!
2412 //! Seed-Coder uses different FIM tokens and order than Qwen:
2413 //! - SPM order: suffix comes FIRST, then prefix, then middle
2414 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2415 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2416 //!
2417 //! All context (related files, edit history) goes in the PREFIX section.
2418 //! The suffix contains only code after the editable region.
2419 //!
2420 //! Example prompt:
2421 //!
2422 //! <[fim-suffix]>
2423 //! code after editable region
2424 //! <[fim-prefix]><filename>related/file.py
2425 //! related file content
2426 //!
2427 //! <filename>edit_history
2428 //! --- a/some_file.py
2429 //! +++ b/some_file.py
2430 //! -old
2431 //! +new
2432 //!
2433 //! <filename>path/to/target_file.py
2434 //! code before editable region
2435 //! <<<<<<< CURRENT
2436 //! code that
2437 //! needs to<|user_cursor|>
2438 //! be rewritten
2439 //! =======
2440 //! <[fim-middle]>
2441 //!
2442 //! Expected output (model generates):
2443 //!
2444 //! updated
2445 //! code with
2446 //! changes applied
2447 //! >>>>>>> UPDATED
2448
2449 use super::*;
2450
2451 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2452 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2453 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2454 pub const FILE_MARKER: &str = "<filename>";
2455
2456 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2457 pub const SEPARATOR: &str = "=======\n";
2458 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2459
2460 pub const NO_EDITS: &str = "NO_EDITS\n";
2461
2462 pub fn special_tokens() -> &'static [&'static str] {
2463 &[
2464 FIM_SUFFIX,
2465 FIM_PREFIX,
2466 FIM_MIDDLE,
2467 FILE_MARKER,
2468 START_MARKER,
2469 SEPARATOR,
2470 END_MARKER,
2471 CURSOR_MARKER,
2472 ]
2473 }
2474
2475 pub fn write_cursor_excerpt_section(
2476 prompt: &mut String,
2477 path: &Path,
2478 context: &str,
2479 editable_range: &Range<usize>,
2480 cursor_offset: usize,
2481 ) {
2482 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2483 prompt.push_str(§ion);
2484 }
2485
2486 pub fn format_prompt_with_budget(
2487 path: &Path,
2488 context: &str,
2489 editable_range: &Range<usize>,
2490 cursor_offset: usize,
2491 events: &[Arc<Event>],
2492 related_files: &[RelatedFile],
2493 max_tokens: usize,
2494 ) -> String {
2495 let suffix_section = build_suffix_section(context, editable_range);
2496 let cursor_prefix_section =
2497 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2498
2499 let suffix_tokens = estimate_tokens(suffix_section.len());
2500 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2501 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2502
2503 let edit_history_section = super::format_edit_history_within_budget(
2504 events,
2505 FILE_MARKER,
2506 "edit_history",
2507 budget_after_cursor,
2508 );
2509 let edit_history_tokens = estimate_tokens(edit_history_section.len());
2510 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2511
2512 let related_files_section = super::format_related_files_within_budget(
2513 related_files,
2514 FILE_MARKER,
2515 "",
2516 budget_after_edit_history,
2517 );
2518
2519 let mut prompt = String::new();
2520 prompt.push_str(&suffix_section);
2521 prompt.push_str(FIM_PREFIX);
2522 prompt.push_str(&related_files_section);
2523 if !related_files_section.is_empty() {
2524 prompt.push('\n');
2525 }
2526 prompt.push_str(&edit_history_section);
2527 if !edit_history_section.is_empty() {
2528 prompt.push('\n');
2529 }
2530 prompt.push_str(&cursor_prefix_section);
2531 prompt.push_str(FIM_MIDDLE);
2532 prompt
2533 }
2534
2535 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2536 let mut section = String::new();
2537 section.push_str(FIM_SUFFIX);
2538 section.push_str(&context[editable_range.end..]);
2539 if !section.ends_with('\n') {
2540 section.push('\n');
2541 }
2542 section
2543 }
2544
2545 fn build_cursor_prefix_section(
2546 path: &Path,
2547 context: &str,
2548 editable_range: &Range<usize>,
2549 cursor_offset: usize,
2550 ) -> String {
2551 let mut section = String::new();
2552 let path_str = path.to_string_lossy();
2553 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2554
2555 section.push_str(&context[..editable_range.start]);
2556 section.push_str(START_MARKER);
2557 section.push_str(&context[editable_range.start..cursor_offset]);
2558 section.push_str(CURSOR_MARKER);
2559 section.push_str(&context[cursor_offset..editable_range.end]);
2560 if !section.ends_with('\n') {
2561 section.push('\n');
2562 }
2563 section.push_str(SEPARATOR);
2564 section
2565 }
2566
2567 /// Format patch as containing no changes if it's empty; otherwise return None.
2568 pub(crate) fn no_edits(patch: &str) -> Option<String> {
2569 // Count lines in the patch
2570 let empty_patch = patch.lines().count() <= 3;
2571 if empty_patch {
2572 Some(format!("{NO_EDITS}{END_MARKER}"))
2573 } else {
2574 None
2575 }
2576 }
2577}
2578
2579pub mod v0304_variable_edit {
2580 //! A prompt format with no fixed editable region. The entire context is shown
2581 //! to the model, and it chooses which text to replace by outputting surrounding
2582 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2583 //! text.
2584 //!
2585 //! Example prompt:
2586 //!
2587 //! <|file_sep|>path/to/file.py
2588 //! zero
2589 //! one
2590 //! two
2591 //! three<|user_cursor|>
2592 //! four
2593 //! five
2594 //! <|fim_prefix|>
2595 //
2596 //! Expected output (model generates):
2597 //!
2598 //! two
2599 //! <|fim_middle|>
2600 //! THREE
2601 //! <|fim_suffix|>
2602 //! four
2603 //!
2604 //! The output means: find "two\n...\nfour" in the context, and replace
2605 //! everything between "two\n" and "four" with "THREE\n".
2606
2607 use super::*;
2608
2609 pub fn special_tokens() -> &'static [&'static str] {
2610 &[
2611 "<|fim_prefix|>",
2612 "<|fim_suffix|>",
2613 "<|fim_middle|>",
2614 "<|file_sep|>",
2615 CURSOR_MARKER,
2616 ]
2617 }
2618
2619 pub fn write_cursor_excerpt_section(
2620 prompt: &mut String,
2621 path: &Path,
2622 context: &str,
2623 cursor_offset: usize,
2624 ) {
2625 let path_str = path.to_string_lossy();
2626 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2627
2628 prompt.push_str(&context[..cursor_offset]);
2629 prompt.push_str(CURSOR_MARKER);
2630 prompt.push_str(&context[cursor_offset..]);
2631 if !prompt.ends_with('\n') {
2632 prompt.push('\n');
2633 }
2634 prompt.push_str("<|fim_prefix|>\n")
2635 }
2636
2637 /// Apply a variable-edit model output to the original context text.
2638 ///
2639 /// The model output has the form:
2640 ///
2641 /// - prefix context lines
2642 /// - `<|fim_middle|>`
2643 /// - new text
2644 /// - `<|fim_suffix|>`
2645 /// - suffix context lines
2646 ///
2647 /// We locate the prefix/suffix context lines in the original text and replace
2648 /// everything between them with the new text.
2649 pub fn apply_variable_edit(
2650 context: &str,
2651 model_output: &str,
2652 ) -> Result<(Range<usize>, String)> {
2653 let (prefix_context, rest) = model_output
2654 .split_once("<|fim_middle|>\n")
2655 .or_else(|| model_output.split_once("<|fim_middle|>"))
2656 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2657
2658 let (new_text, suffix_context) = rest
2659 .split_once("<|fim_suffix|>\n")
2660 .or_else(|| rest.split_once("<|fim_suffix|>"))
2661 .unwrap_or((rest, ""));
2662
2663 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2664 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2665 } else {
2666 suffix_context
2667 };
2668
2669 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2670 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2671 + prefix_context.len();
2672 let suffix_offset = if suffix_context.is_empty() {
2673 context.len()
2674 } else {
2675 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2676 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2677 + prefix_offset
2678 };
2679
2680 let edit_range = prefix_offset..suffix_offset;
2681 return Ok((edit_range, new_text.to_string()));
2682 }
2683
2684 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2685 if needle.is_empty() {
2686 return Some(0);
2687 }
2688
2689 haystack.match_indices(needle).find_map(|(offset, _)| {
2690 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2691 matched_line_start.then_some(offset)
2692 })
2693 }
2694
2695 /// Convert a unified diff patch into the variable-edit output format.
2696 ///
2697 /// Parses `patch` as a unified diff against `old_text` and produces model
2698 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2699 /// delimiters. The diff is resolved by content matching rather than line
2700 /// numbers.
2701 pub fn patch_to_variable_edit_output(
2702 old_text: &str,
2703 patch: &str,
2704 cursor_offset: Option<usize>,
2705 ) -> Result<String> {
2706 // Parse the unified diff into hunks. Each hunk has an `old_context`
2707 // string (context + deleted lines interleaved in order) and a list of
2708 // edits expressed as byte ranges within that context plus replacement
2709 // text.
2710 let hunks = parse_hunks(patch);
2711 if hunks.is_empty() {
2712 return Ok(String::new());
2713 }
2714
2715 // Apply each hunk by finding its old_context in the text and
2716 // performing the edits. We search forward from where the previous
2717 // hunk ended so that hunks are applied in order.
2718 let mut new_text = old_text.to_string();
2719 let mut search_from: usize = 0;
2720 let mut first_hunk_pos: Option<usize> = None;
2721
2722 for hunk in &hunks {
2723 let context_pos = new_text[search_from..]
2724 .find(&hunk.old_context)
2725 .map(|pos| pos + search_from)
2726 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2727
2728 if first_hunk_pos.is_none() {
2729 first_hunk_pos = Some(context_pos);
2730 }
2731
2732 // Apply edits in reverse order so byte offsets remain valid.
2733 for edit in hunk.edits.iter().rev() {
2734 let abs_start = context_pos + edit.range.start;
2735 let abs_end = context_pos + edit.range.end;
2736 new_text.replace_range(abs_start..abs_end, &edit.text);
2737 }
2738
2739 // Advance past this hunk's region in the (now modified) text.
2740 let new_region_len: usize =
2741 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2742 len + edit.text.len() - (edit.range.end - edit.range.start)
2743 });
2744 search_from = context_pos + new_region_len;
2745 }
2746
2747 // Now we have old_text and new_text. Find the changed line range by
2748 // comparing them.
2749 let old_lines: Vec<&str> = old_text.lines().collect();
2750 let new_lines: Vec<&str> = new_text.lines().collect();
2751
2752 // Find first differing line.
2753 let first_changed_row = old_lines
2754 .iter()
2755 .zip(new_lines.iter())
2756 .position(|(a, b)| a != b)
2757 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2758
2759 // Find last differing line (from the end).
2760 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2761 let common_suffix = old_lines
2762 .iter()
2763 .rev()
2764 .zip(new_lines.iter().rev())
2765 .take(max_suffix)
2766 .take_while(|(a, b)| a == b)
2767 .count();
2768
2769 let old_end = old_lines.len() - common_suffix;
2770 let new_end = new_lines.len() - common_suffix;
2771
2772 if first_changed_row == old_end && first_changed_row == new_end {
2773 return Ok(String::new());
2774 }
2775
2776 // Build the replacement text from new_lines[first_diff..new_end].
2777 let mut merged_new_text = String::new();
2778 for line in &new_lines[first_changed_row..new_end] {
2779 merged_new_text.push_str(line);
2780 merged_new_text.push('\n');
2781 }
2782
2783 // cursor_offset is relative to the first hunk's new content in
2784 // new_text. Translate it to an offset within merged_new_text, which
2785 // only contains lines first_diff..new_end of new_text.
2786 if let Some(hunk_offset) = cursor_offset {
2787 let hunk_start = first_hunk_pos.unwrap_or(0);
2788 let absolute_pos = hunk_start + hunk_offset;
2789
2790 // Byte offset where first_diff starts in new_text.
2791 let merged_start: usize = new_lines[..first_changed_row]
2792 .iter()
2793 .map(|line| line.len() + 1)
2794 .sum();
2795
2796 if absolute_pos >= merged_start {
2797 let relative_offset = absolute_pos - merged_start;
2798 if relative_offset <= merged_new_text.len() {
2799 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
2800 }
2801 }
2802 }
2803
2804 // Build output with 2 lines of context above and below.
2805 let context_lines_count = 2;
2806 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
2807 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
2808
2809 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
2810 let pattern = &lines[line_range];
2811 let pattern_len = pattern.len();
2812
2813 let mut count = 0;
2814 for offset in 0..=lines.len() - pattern_len {
2815 if &lines[offset..offset + pattern_len] == pattern {
2816 count += 1;
2817 }
2818 }
2819 count
2820 }
2821
2822 // Expand prefix and suffix until they are unique
2823 while prefix_start > 0 {
2824 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
2825 prefix_start -= 1;
2826 } else {
2827 break;
2828 }
2829 }
2830 while suffix_end < old_lines.len() {
2831 if count_matches(old_end..suffix_end, &old_lines) > 1 {
2832 suffix_end += 1;
2833 } else {
2834 break;
2835 }
2836 }
2837
2838 let mut output = String::new();
2839 for line in &old_lines[prefix_start..first_changed_row] {
2840 output.push_str(line);
2841 output.push('\n');
2842 }
2843 output.push_str("<|fim_middle|>\n");
2844 output.push_str(&merged_new_text);
2845 output.push_str("<|fim_suffix|>\n");
2846 for line in &old_lines[old_end..suffix_end] {
2847 output.push_str(line);
2848 output.push('\n');
2849 }
2850
2851 Ok(output)
2852 }
2853
2854 struct ParsedHunk {
2855 old_context: String,
2856 edits: Vec<ParsedEdit>,
2857 }
2858
2859 struct ParsedEdit {
2860 range: Range<usize>,
2861 text: String,
2862 }
2863
2864 /// Parse a unified diff into content-based hunks. Each hunk contains an
2865 /// `old_context` string (context lines + deleted lines, which together
2866 /// form the text that should be found in the original) and a list of edits
2867 /// expressed as byte ranges within that context.
2868 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
2869 let mut hunks = Vec::new();
2870 let mut current: Option<ParsedHunk> = None;
2871
2872 for line in patch.lines() {
2873 if line.starts_with("@@") {
2874 if let Some(hunk) = current.take() {
2875 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2876 hunks.push(hunk);
2877 }
2878 }
2879 current = Some(ParsedHunk {
2880 old_context: String::new(),
2881 edits: Vec::new(),
2882 });
2883 } else if line.starts_with("---") || line.starts_with("+++") {
2884 continue;
2885 } else if let Some(hunk) = &mut current {
2886 if let Some(added) = line.strip_prefix('+') {
2887 let pos = hunk.old_context.len();
2888 if let Some(last_edit) = hunk.edits.last_mut() {
2889 if last_edit.range.end == pos {
2890 writeln!(&mut last_edit.text, "{added}").ok();
2891 continue;
2892 }
2893 }
2894 hunk.edits.push(ParsedEdit {
2895 range: pos..pos,
2896 text: format!("{added}\n"),
2897 });
2898 } else if let Some(removed) = line.strip_prefix('-') {
2899 let start = hunk.old_context.len();
2900 writeln!(&mut hunk.old_context, "{removed}").ok();
2901 let end = hunk.old_context.len();
2902 if let Some(last_edit) = hunk.edits.last_mut() {
2903 if last_edit.range.end == start {
2904 last_edit.range.end = end;
2905 continue;
2906 }
2907 }
2908 hunk.edits.push(ParsedEdit {
2909 range: start..end,
2910 text: String::new(),
2911 });
2912 } else {
2913 let ctx = line.strip_prefix(' ').unwrap_or(line);
2914 writeln!(&mut hunk.old_context, "{ctx}").ok();
2915 }
2916 }
2917 }
2918
2919 if let Some(hunk) = current {
2920 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2921 hunks.push(hunk);
2922 }
2923 }
2924
2925 hunks
2926 }
2927
2928 #[cfg(test)]
2929 mod tests {
2930 use super::*;
2931 use indoc::indoc;
2932
2933 #[test]
2934 fn test_apply_variable_edit() {
2935 struct Case {
2936 name: &'static str,
2937 original: &'static str,
2938 model_output: &'static str,
2939 expected: &'static str,
2940 }
2941
2942 let cases = [
2943 Case {
2944 name: "simple_single_line_replacement",
2945 original: indoc! {"
2946 zero
2947 one
2948 two
2949 three
2950 four
2951 five
2952 "},
2953 model_output: indoc! {"
2954 two
2955 <|fim_middle|>
2956 THREE
2957 <|fim_suffix|>
2958 four
2959 "},
2960 expected: indoc! {"
2961 zero
2962 one
2963 two
2964 THREE
2965 four
2966 five
2967 "},
2968 },
2969 Case {
2970 name: "multi_line_replacement",
2971 original: indoc! {"
2972 a
2973 b
2974 c
2975 d
2976 e
2977 "},
2978 model_output: indoc! {"
2979 a
2980 <|fim_middle|>
2981 B
2982 C
2983 D
2984 <|fim_suffix|>
2985 e
2986 "},
2987 expected: indoc! {"
2988 a
2989 B
2990 C
2991 D
2992 e
2993 "},
2994 },
2995 Case {
2996 name: "insertion_between_existing_lines",
2997 original: indoc! {"
2998 a
2999 b
3000 c
3001 "},
3002 model_output: indoc! {"
3003 a
3004 <|fim_middle|>
3005 X
3006 <|fim_suffix|>
3007 b
3008 "},
3009 expected: indoc! {"
3010 a
3011 X
3012 b
3013 c
3014 "},
3015 },
3016 Case {
3017 name: "deletion",
3018 original: indoc! {"
3019 a
3020 b
3021 c
3022 d
3023 "},
3024 model_output: indoc! {"
3025 a
3026 <|fim_middle|>
3027 <|fim_suffix|>
3028 c
3029 "},
3030 expected: indoc! {"
3031 a
3032 c
3033 d
3034 "},
3035 },
3036 Case {
3037 name: "replacement_at_start_no_prefix_context",
3038 original: indoc! {"
3039 a
3040 b
3041 c
3042 "},
3043 model_output: indoc! {"
3044 <|fim_middle|>
3045 X
3046 <|fim_suffix|>
3047 b
3048 "},
3049 expected: indoc! {"
3050 X
3051 b
3052 c
3053 "},
3054 },
3055 Case {
3056 name: "replacement_at_end_no_suffix_context",
3057 original: indoc! {"
3058 a
3059 b
3060 c
3061 "},
3062 model_output: indoc! {"
3063 b
3064 <|fim_middle|>
3065 Z
3066 <|fim_suffix|>
3067 "},
3068 expected: indoc! {"
3069 a
3070 b
3071 Z
3072 "},
3073 },
3074 Case {
3075 name: "context_with_trailing_newline_is_preserved",
3076 original: indoc! {"
3077 a
3078 b
3079 c
3080 "},
3081 model_output: indoc! {"
3082 a
3083 <|fim_middle|>
3084 B
3085 <|fim_suffix|>
3086 c
3087 "},
3088 expected: indoc! {"
3089 a
3090 B
3091 c
3092 "},
3093 },
3094 Case {
3095 name: "cursor_marker_passes_through_untouched",
3096 original: indoc! {"
3097 a
3098 b
3099 c
3100 "},
3101 model_output: indoc! {"
3102 a
3103 <|fim_middle|>
3104 B<|user_cursor|>B
3105 <|fim_suffix|>
3106 c
3107 "},
3108 expected: indoc! {"
3109 a
3110 B<|user_cursor|>B
3111 c
3112 "},
3113 },
3114 Case {
3115 name: "multiple_prefix_context_lines",
3116 original: indoc! {"
3117 a
3118 b
3119 c
3120 d
3121 e
3122 "},
3123 model_output: indoc! {"
3124 b
3125 c
3126 <|fim_middle|>
3127 D
3128 <|fim_suffix|>
3129 e
3130 "},
3131 expected: indoc! {"
3132 a
3133 b
3134 c
3135 D
3136 e
3137 "},
3138 },
3139 ];
3140
3141 for case in cases {
3142 let (edit_range, replacement) =
3143 apply_variable_edit(case.original, case.model_output).unwrap();
3144 let mut edited = case.original.to_string();
3145 edited.replace_range(edit_range, &replacement);
3146 assert_eq!(edited, case.expected, "{}", case.name);
3147 }
3148 }
3149
3150 #[test]
3151 fn test_patch_to_variable_edit() {
3152 struct Case {
3153 name: &'static str,
3154 old: &'static str,
3155 patch: &'static str,
3156 cursor_offset: Option<usize>,
3157 expected_variable_edit: &'static str,
3158 expected_after_apply: &'static str,
3159 }
3160
3161 let cases = [
3162 Case {
3163 name: "simple_replacement",
3164 old: indoc! {"
3165 zero
3166 one
3167 two
3168 three
3169 four
3170 five
3171 "},
3172 patch: indoc! {"
3173 @@ -3,3 +3,3 @@
3174 two
3175 -three
3176 +THREE
3177 four
3178 "},
3179 cursor_offset: None,
3180 expected_variable_edit: indoc! {"
3181 one
3182 two
3183 <|fim_middle|>
3184 THREE
3185 <|fim_suffix|>
3186 four
3187 five
3188 "},
3189 expected_after_apply: indoc! {"
3190 zero
3191 one
3192 two
3193 THREE
3194 four
3195 five
3196 "},
3197 },
3198 Case {
3199 name: "insertion",
3200 old: indoc! {"
3201 a
3202 b
3203 c
3204 d
3205 e
3206 "},
3207 patch: indoc! {"
3208 @@ -2,0 +3,1 @@
3209 b
3210 +X
3211 c
3212 "},
3213 cursor_offset: None,
3214 expected_variable_edit: indoc! {"
3215 a
3216 b
3217 <|fim_middle|>
3218 X
3219 <|fim_suffix|>
3220 c
3221 d
3222 "},
3223 expected_after_apply: indoc! {"
3224 a
3225 b
3226 X
3227 c
3228 d
3229 e
3230 "},
3231 },
3232 Case {
3233 name: "deletion",
3234 old: indoc! {"
3235 a
3236 b
3237 c
3238 d
3239 e
3240 "},
3241 patch: indoc! {"
3242 @@ -2,3 +2,2 @@
3243 b
3244 -c
3245 d
3246 "},
3247 cursor_offset: None,
3248 expected_variable_edit: indoc! {"
3249 a
3250 b
3251 <|fim_middle|>
3252 <|fim_suffix|>
3253 d
3254 e
3255 "},
3256 expected_after_apply: indoc! {"
3257 a
3258 b
3259 d
3260 e
3261 "},
3262 },
3263 Case {
3264 name: "edit_near_start",
3265 old: indoc! {"
3266 first
3267 second
3268 third
3269 fourth
3270 "},
3271 patch: indoc! {"
3272 @@ -1,1 +1,1 @@
3273 -first
3274 +FIRST
3275 "},
3276 cursor_offset: None,
3277 expected_variable_edit: indoc! {"
3278 <|fim_middle|>
3279 FIRST
3280 <|fim_suffix|>
3281 second
3282 third
3283 "},
3284 expected_after_apply: indoc! {"
3285 FIRST
3286 second
3287 third
3288 fourth
3289 "},
3290 },
3291 Case {
3292 name: "edit_near_end",
3293 old: indoc! {"
3294 first
3295 second
3296 third
3297 fourth
3298 "},
3299 patch: indoc! {"
3300 @@ -4,1 +4,1 @@
3301 -fourth
3302 +FOURTH
3303 "},
3304 cursor_offset: None,
3305 expected_variable_edit: indoc! {"
3306 second
3307 third
3308 <|fim_middle|>
3309 FOURTH
3310 <|fim_suffix|>
3311 "},
3312 expected_after_apply: indoc! {"
3313 first
3314 second
3315 third
3316 FOURTH
3317 "},
3318 },
3319 Case {
3320 name: "cursor_at_start_of_replacement",
3321 old: indoc! {"
3322 zero
3323 one
3324 two
3325 three
3326 four
3327 five
3328 "},
3329 patch: indoc! {"
3330 @@ -3,3 +3,3 @@
3331 two
3332 -three
3333 +THREE
3334 four
3335 "},
3336 cursor_offset: Some(4),
3337 expected_variable_edit: indoc! {"
3338 one
3339 two
3340 <|fim_middle|>
3341 <|user_cursor|>THREE
3342 <|fim_suffix|>
3343 four
3344 five
3345 "},
3346 expected_after_apply: indoc! {"
3347 zero
3348 one
3349 two
3350 <|user_cursor|>THREE
3351 four
3352 five
3353 "},
3354 },
3355 Case {
3356 name: "cursor_in_middle_of_replacement",
3357 old: indoc! {"
3358 zero
3359 one
3360 two
3361 three
3362 four
3363 five
3364 "},
3365 patch: indoc! {"
3366 @@ -3,3 +3,3 @@
3367 two
3368 -three
3369 +THREE
3370 four
3371 "},
3372 cursor_offset: Some(6),
3373 expected_variable_edit: indoc! {"
3374 one
3375 two
3376 <|fim_middle|>
3377 TH<|user_cursor|>REE
3378 <|fim_suffix|>
3379 four
3380 five
3381 "},
3382 expected_after_apply: indoc! {"
3383 zero
3384 one
3385 two
3386 TH<|user_cursor|>REE
3387 four
3388 five
3389 "},
3390 },
3391 Case {
3392 name: "expands_context_when_two_lines_not_unique_before_and_after",
3393 old: indoc! {"
3394 one
3395 a
3396 b
3397 c
3398 d
3399 two
3400 a
3401 b
3402 c
3403 d
3404 three
3405 a
3406 b
3407 c
3408 d
3409 four
3410 "},
3411 patch: indoc! {"
3412 @@ -4,5 +4,5 @@
3413 two
3414 a
3415 b
3416 -c
3417 +C
3418 d
3419 three
3420 "},
3421 cursor_offset: None,
3422 expected_variable_edit: indoc! {"
3423 two
3424 a
3425 b
3426 <|fim_middle|>
3427 C
3428 <|fim_suffix|>
3429 d
3430 three
3431 "},
3432 expected_after_apply: indoc! {"
3433 one
3434 a
3435 b
3436 c
3437 d
3438 two
3439 a
3440 b
3441 C
3442 d
3443 three
3444 a
3445 b
3446 c
3447 d
3448 four
3449 "},
3450 },
3451 Case {
3452 name: "expands_context_when_two_lines_not_unique_before_and_after",
3453 old: indoc! {"
3454 {
3455 {
3456 one();
3457 }
3458 }
3459 {
3460 {
3461 two();
3462 }
3463 }
3464 {
3465 {
3466 three();
3467 }
3468 }
3469 {
3470 {
3471 four();
3472 }
3473 }
3474 "},
3475 patch: indoc! {"
3476 @@ -4,5 +4,5 @@
3477 {
3478 - two();
3479 + TWO();
3480 }
3481 "},
3482 cursor_offset: None,
3483 expected_variable_edit: indoc! {"
3484 one();
3485 }
3486 }
3487 {
3488 {
3489 <|fim_middle|>
3490 TWO();
3491 <|fim_suffix|>
3492 }
3493 }
3494 {
3495 {
3496 three();
3497 "},
3498 expected_after_apply: indoc! {"
3499 {
3500 {
3501 one();
3502 }
3503 }
3504 {
3505 {
3506 TWO();
3507 }
3508 }
3509 {
3510 {
3511 three();
3512 }
3513 }
3514 {
3515 {
3516 four();
3517 }
3518 }
3519 "},
3520 },
3521 ];
3522
3523 for case in cases {
3524 let output =
3525 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3526 .unwrap_or_else(|error| {
3527 panic!("failed converting patch for {}: {error}", case.name)
3528 });
3529 assert_eq!(
3530 output, case.expected_variable_edit,
3531 "patch->variable_edit mismatch for {}",
3532 case.name
3533 );
3534
3535 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3536 .unwrap_or_else(|error| {
3537 panic!("failed applying variable_edit for {}: {error}", case.name)
3538 });
3539 let mut edited_by_variable_edit = case.old.to_string();
3540 edited_by_variable_edit.replace_range(edit_range, &replacement);
3541 assert_eq!(
3542 edited_by_variable_edit, case.expected_after_apply,
3543 "variable_edit apply mismatch for {}",
3544 case.name
3545 );
3546
3547 let (expected_edit_range, expected_replacement) =
3548 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3549 |error| {
3550 panic!(
3551 "failed applying expected variable_edit for {}: {error}",
3552 case.name
3553 )
3554 },
3555 );
3556 let mut edited_by_expected_variable_edit = case.old.to_string();
3557 edited_by_expected_variable_edit
3558 .replace_range(expected_edit_range, &expected_replacement);
3559 assert_eq!(
3560 edited_by_expected_variable_edit, case.expected_after_apply,
3561 "expected variable_edit apply mismatch for {}",
3562 case.name
3563 );
3564 }
3565 }
3566
3567 #[test]
3568 fn test_write_cursor_excerpt_section() {
3569 let path = Path::new("test.rs");
3570 let context = "fn main() {\n hello();\n}\n";
3571 let cursor_offset = 17;
3572 let mut prompt = String::new();
3573 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3574 assert_eq!(
3575 prompt,
3576 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3577 );
3578 }
3579 }
3580}
3581
3582/// The zeta1 prompt format
3583pub mod zeta1 {
3584 use super::*;
3585 use std::fmt::Write;
3586
3587 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3588 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3589 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3590 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3591
3592 const INSTRUCTION_HEADER: &str = concat!(
3593 "### Instruction:\n",
3594 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3595 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3596 "into account the cursor location.\n\n",
3597 "### User Edits:\n\n"
3598 );
3599 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3600 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3601
3602 /// Formats a complete zeta1 prompt from the input events and excerpt.
3603 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3604 let mut prompt = String::with_capacity(
3605 INSTRUCTION_HEADER.len()
3606 + input_events.len()
3607 + EXCERPT_HEADER.len()
3608 + input_excerpt.len()
3609 + RESPONSE_HEADER.len(),
3610 );
3611 prompt.push_str(INSTRUCTION_HEADER);
3612 prompt.push_str(input_events);
3613 prompt.push_str(EXCERPT_HEADER);
3614 prompt.push_str(input_excerpt);
3615 prompt.push_str(RESPONSE_HEADER);
3616 prompt
3617 }
3618
3619 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3620 /// editable and context byte-offset ranges within `cursor_excerpt`.
3621 pub fn format_zeta1_from_input(
3622 input: &ZetaPromptInput,
3623 editable_range: Range<usize>,
3624 context_range: Range<usize>,
3625 ) -> String {
3626 let events = format_zeta1_events(&input.events);
3627 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3628 format_zeta1_prompt(&events, &excerpt)
3629 }
3630
3631 /// Formats events in zeta1 style (oldest first).
3632 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3633 let mut result = String::new();
3634 for event in events {
3635 let event_string = format_zeta1_event(event);
3636 if event_string.is_empty() {
3637 continue;
3638 }
3639 if !result.is_empty() {
3640 result.push_str("\n\n");
3641 }
3642 result.push_str(&event_string);
3643 }
3644 result
3645 }
3646
3647 fn format_zeta1_event(event: &Event) -> String {
3648 match event {
3649 Event::BufferChange {
3650 path,
3651 old_path,
3652 diff,
3653 ..
3654 } => {
3655 let mut prompt = String::new();
3656 if old_path != path {
3657 writeln!(
3658 prompt,
3659 "User renamed {} to {}\n",
3660 old_path.display(),
3661 path.display()
3662 )
3663 .ok();
3664 }
3665 if !diff.is_empty() {
3666 write!(
3667 prompt,
3668 "User edited {}:\n```diff\n{}\n```",
3669 path.display(),
3670 diff
3671 )
3672 .ok();
3673 }
3674 prompt
3675 }
3676 }
3677 }
3678
3679 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3680 /// within `cursor_excerpt`.
3681 fn format_zeta1_excerpt(
3682 input: &ZetaPromptInput,
3683 editable_range: Range<usize>,
3684 context_range: Range<usize>,
3685 ) -> String {
3686 let path_str = input.cursor_path.to_string_lossy();
3687 let excerpt = &*input.cursor_excerpt;
3688 let cursor_offset = input.cursor_offset_in_excerpt;
3689
3690 let mut prompt = String::new();
3691 writeln!(&mut prompt, "```{path_str}").ok();
3692
3693 let starts_at_file_beginning =
3694 input.excerpt_start_row == Some(0) && context_range.start == 0;
3695 if starts_at_file_beginning {
3696 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3697 }
3698
3699 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3700
3701 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3702 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3703 prompt.push_str(CURSOR_MARKER);
3704 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3705 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3706
3707 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3708 write!(prompt, "\n```").ok();
3709
3710 prompt
3711 }
3712
3713 /// Cleans zeta1 model output by extracting content between editable region
3714 /// markers and converting the zeta1 cursor marker to the universal one.
3715 /// Returns `None` if the output doesn't contain the expected markers.
3716 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3717 let content = output.replace(CURSOR_MARKER, "");
3718
3719 let content_start = content
3720 .find(EDITABLE_REGION_START_MARKER)
3721 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3722 .map(|pos| {
3723 if content.as_bytes().get(pos) == Some(&b'\n') {
3724 pos + 1
3725 } else {
3726 pos
3727 }
3728 })
3729 .unwrap_or(0);
3730
3731 let content_end = content
3732 .find(EDITABLE_REGION_END_MARKER)
3733 .map(|pos| {
3734 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3735 pos - 1
3736 } else {
3737 pos
3738 }
3739 })
3740 .unwrap_or(content.len());
3741
3742 if content_start > content_end {
3743 return Some(String::new());
3744 }
3745
3746 let extracted = &content[content_start..content_end];
3747
3748 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3749 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3750 let text_before_cursor = text_before_cursor
3751 .find(EDITABLE_REGION_START_MARKER)
3752 .map(|pos| {
3753 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3754 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3755 after_marker + 1
3756 } else {
3757 after_marker
3758 }
3759 })
3760 .unwrap_or(0);
3761 let offset_in_extracted = zeta1_cursor_pos
3762 .saturating_sub(text_before_cursor)
3763 .min(extracted.len());
3764 offset_in_extracted
3765 });
3766
3767 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3768 if let Some(offset) = cursor_offset {
3769 result.push_str(&extracted[..offset]);
3770 result.push_str(super::CURSOR_MARKER);
3771 result.push_str(&extracted[offset..]);
3772 } else {
3773 result.push_str(extracted);
3774 }
3775
3776 Some(result)
3777 }
3778}
3779
3780#[cfg(test)]
3781mod tests {
3782 use super::*;
3783 use indoc::indoc;
3784
3785 fn make_input(
3786 cursor_excerpt: &str,
3787 editable_range: Range<usize>,
3788 cursor_offset: usize,
3789 events: Vec<Event>,
3790 related_files: Vec<RelatedFile>,
3791 ) -> ZetaPromptInput {
3792 let context_range = 0..cursor_excerpt.len();
3793 ZetaPromptInput {
3794 cursor_path: Path::new("test.rs").into(),
3795 cursor_excerpt: cursor_excerpt.into(),
3796 cursor_offset_in_excerpt: cursor_offset,
3797 excerpt_start_row: None,
3798 events: events.into_iter().map(Arc::new).collect(),
3799 related_files,
3800 excerpt_ranges: ExcerptRanges {
3801 editable_150: editable_range.clone(),
3802 editable_180: editable_range.clone(),
3803 editable_350: editable_range,
3804 editable_150_context_350: context_range.clone(),
3805 editable_180_context_350: context_range.clone(),
3806 editable_350_context_150: context_range,
3807 ..Default::default()
3808 },
3809 experiment: None,
3810 in_open_source_repo: false,
3811 can_collect_data: false,
3812 repo_url: None,
3813 }
3814 }
3815
3816 fn make_input_with_context_range(
3817 excerpt: &str,
3818 editable_range: Range<usize>,
3819 context_range: Range<usize>,
3820 cursor_offset: usize,
3821 ) -> ZetaPromptInput {
3822 ZetaPromptInput {
3823 cursor_path: Path::new("test.rs").into(),
3824 cursor_excerpt: excerpt.into(),
3825 cursor_offset_in_excerpt: cursor_offset,
3826 excerpt_start_row: None,
3827 events: vec![],
3828 related_files: vec![],
3829 excerpt_ranges: ExcerptRanges {
3830 editable_150: editable_range.clone(),
3831 editable_180: editable_range.clone(),
3832 editable_350: editable_range,
3833 editable_150_context_350: context_range.clone(),
3834 editable_180_context_350: context_range.clone(),
3835 editable_350_context_150: context_range,
3836 ..Default::default()
3837 },
3838 experiment: None,
3839 in_open_source_repo: false,
3840 can_collect_data: false,
3841 repo_url: None,
3842 }
3843 }
3844
3845 fn make_event(path: &str, diff: &str) -> Event {
3846 Event::BufferChange {
3847 path: Path::new(path).into(),
3848 old_path: Path::new(path).into(),
3849 diff: diff.to_string(),
3850 predicted: false,
3851 in_open_source_repo: false,
3852 }
3853 }
3854
3855 fn make_related_file(path: &str, content: &str) -> RelatedFile {
3856 RelatedFile {
3857 path: Path::new(path).into(),
3858 max_row: content.lines().count() as u32,
3859 excerpts: vec![RelatedExcerpt {
3860 row_range: 0..content.lines().count() as u32,
3861 text: content.into(),
3862 order: 0,
3863 }],
3864 in_open_source_repo: false,
3865 }
3866 }
3867
3868 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3869 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
3870 }
3871
3872 #[test]
3873 fn test_no_truncation_when_within_budget() {
3874 let input = make_input(
3875 "prefix\neditable\nsuffix",
3876 7..15,
3877 10,
3878 vec![make_event("a.rs", "-old\n+new\n")],
3879 vec![make_related_file("related.rs", "fn helper() {}\n")],
3880 );
3881
3882 assert_eq!(
3883 format_with_budget(&input, 10000),
3884 indoc! {r#"
3885 <|file_sep|>related.rs
3886 fn helper() {}
3887 <|file_sep|>edit history
3888 --- a/a.rs
3889 +++ b/a.rs
3890 -old
3891 +new
3892 <|file_sep|>test.rs
3893 <|fim_prefix|>
3894 prefix
3895 <|fim_middle|>current
3896 edi<|user_cursor|>table
3897 <|fim_suffix|>
3898
3899 suffix
3900 <|fim_middle|>updated
3901 "#}
3902 );
3903 }
3904
3905 #[test]
3906 fn test_truncation_drops_edit_history_when_budget_tight() {
3907 let input = make_input(
3908 "code",
3909 0..4,
3910 2,
3911 vec![make_event("a.rs", "-x\n+y\n")],
3912 vec![
3913 make_related_file("r1.rs", "a\n"),
3914 make_related_file("r2.rs", "b\n"),
3915 ],
3916 );
3917
3918 assert_eq!(
3919 format_with_budget(&input, 10000),
3920 indoc! {r#"
3921 <|file_sep|>r1.rs
3922 a
3923 <|file_sep|>r2.rs
3924 b
3925 <|file_sep|>edit history
3926 --- a/a.rs
3927 +++ b/a.rs
3928 -x
3929 +y
3930 <|file_sep|>test.rs
3931 <|fim_prefix|>
3932 <|fim_middle|>current
3933 co<|user_cursor|>de
3934 <|fim_suffix|>
3935 <|fim_middle|>updated
3936 "#}
3937 );
3938
3939 assert_eq!(
3940 format_with_budget(&input, 50),
3941 indoc! {r#"
3942 <|file_sep|>r1.rs
3943 a
3944 <|file_sep|>r2.rs
3945 b
3946 <|file_sep|>test.rs
3947 <|fim_prefix|>
3948 <|fim_middle|>current
3949 co<|user_cursor|>de
3950 <|fim_suffix|>
3951 <|fim_middle|>updated
3952 "#}
3953 );
3954 }
3955
3956 #[test]
3957 fn test_truncation_includes_partial_excerpts() {
3958 let input = make_input(
3959 "x",
3960 0..1,
3961 0,
3962 vec![],
3963 vec![RelatedFile {
3964 path: Path::new("big.rs").into(),
3965 max_row: 30,
3966 in_open_source_repo: false,
3967 excerpts: vec![
3968 RelatedExcerpt {
3969 row_range: 0..10,
3970 text: "first excerpt\n".into(),
3971 order: 0,
3972 },
3973 RelatedExcerpt {
3974 row_range: 10..20,
3975 text: "second excerpt\n".into(),
3976 order: 0,
3977 },
3978 RelatedExcerpt {
3979 row_range: 20..30,
3980 text: "third excerpt\n".into(),
3981 order: 0,
3982 },
3983 ],
3984 }],
3985 );
3986
3987 assert_eq!(
3988 format_with_budget(&input, 10000),
3989 indoc! {r#"
3990 <|file_sep|>big.rs
3991 first excerpt
3992 ...
3993 second excerpt
3994 ...
3995 third excerpt
3996 <|file_sep|>test.rs
3997 <|fim_prefix|>
3998 <|fim_middle|>current
3999 <|user_cursor|>x
4000 <|fim_suffix|>
4001 <|fim_middle|>updated
4002 "#}
4003 );
4004
4005 assert_eq!(
4006 format_with_budget(&input, 50),
4007 indoc! {r#"
4008 <|file_sep|>big.rs
4009 first excerpt
4010 ...
4011 <|file_sep|>test.rs
4012 <|fim_prefix|>
4013 <|fim_middle|>current
4014 <|user_cursor|>x
4015 <|fim_suffix|>
4016 <|fim_middle|>updated
4017 "#}
4018 );
4019 }
4020
4021 #[test]
4022 fn test_truncation_prioritizes_lower_order_excerpts() {
4023 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4024 // With tight budget, only the lower-order excerpt from file_b should be included.
4025 let input = make_input(
4026 "x",
4027 0..1,
4028 0,
4029 vec![],
4030 vec![
4031 RelatedFile {
4032 path: Path::new("file_a.rs").into(),
4033 max_row: 10,
4034 in_open_source_repo: false,
4035 excerpts: vec![RelatedExcerpt {
4036 row_range: 0..10,
4037 text: "low priority content\n".into(),
4038 order: 5,
4039 }],
4040 },
4041 RelatedFile {
4042 path: Path::new("file_b.rs").into(),
4043 max_row: 10,
4044 in_open_source_repo: false,
4045 excerpts: vec![RelatedExcerpt {
4046 row_range: 0..10,
4047 text: "high priority content\n".into(),
4048 order: 1,
4049 }],
4050 },
4051 ],
4052 );
4053
4054 // With large budget, both files included; rendered in stable lexicographic order.
4055 assert_eq!(
4056 format_with_budget(&input, 10000),
4057 indoc! {r#"
4058 <|file_sep|>file_a.rs
4059 low priority content
4060 <|file_sep|>file_b.rs
4061 high priority content
4062 <|file_sep|>test.rs
4063 <|fim_prefix|>
4064 <|fim_middle|>current
4065 <|user_cursor|>x
4066 <|fim_suffix|>
4067 <|fim_middle|>updated
4068 "#}
4069 );
4070
4071 // With tight budget, only file_b (lower order) fits.
4072 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4073 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4074 // file_a would need another 14 tokens, which doesn't fit.
4075 assert_eq!(
4076 format_with_budget(&input, 52),
4077 indoc! {r#"
4078 <|file_sep|>file_b.rs
4079 high priority content
4080 <|file_sep|>test.rs
4081 <|fim_prefix|>
4082 <|fim_middle|>current
4083 <|user_cursor|>x
4084 <|fim_suffix|>
4085 <|fim_middle|>updated
4086 "#}
4087 );
4088 }
4089
4090 #[test]
4091 fn test_truncation_drops_high_order_excerpts_within_file() {
4092 // A single file has excerpts at order 1 and order 3. With a tight budget,
4093 // only the order-1 excerpts are included while the order-3 excerpt is
4094 // dropped — even though they belong to the same file. This also preserves
4095 // the parent invariant: parent outline items have order ≤ their best
4096 // child, so they're always included when any child is.
4097 let input = make_input(
4098 "x",
4099 0..1,
4100 0,
4101 vec![],
4102 vec![RelatedFile {
4103 path: Path::new("mod.rs").into(),
4104 max_row: 30,
4105 in_open_source_repo: false,
4106 excerpts: vec![
4107 RelatedExcerpt {
4108 row_range: 0..5,
4109 text: "mod header\n".into(),
4110 order: 1,
4111 },
4112 RelatedExcerpt {
4113 row_range: 5..15,
4114 text: "important fn\n".into(),
4115 order: 1,
4116 },
4117 RelatedExcerpt {
4118 row_range: 15..30,
4119 text: "less important fn\n".into(),
4120 order: 3,
4121 },
4122 ],
4123 }],
4124 );
4125
4126 // With large budget, all three excerpts included.
4127 assert_eq!(
4128 format_with_budget(&input, 10000),
4129 indoc! {r#"
4130 <|file_sep|>mod.rs
4131 mod header
4132 ...
4133 important fn
4134 ...
4135 less important fn
4136 <|file_sep|>test.rs
4137 <|fim_prefix|>
4138 <|fim_middle|>current
4139 <|user_cursor|>x
4140 <|fim_suffix|>
4141 <|fim_middle|>updated
4142 "#}
4143 );
4144
4145 // With tight budget, only order<=1 excerpts included (header + important fn).
4146 assert_eq!(
4147 format_with_budget(&input, 55),
4148 indoc! {r#"
4149 <|file_sep|>mod.rs
4150 mod header
4151 ...
4152 important fn
4153 ...
4154 <|file_sep|>test.rs
4155 <|fim_prefix|>
4156 <|fim_middle|>current
4157 <|user_cursor|>x
4158 <|fim_suffix|>
4159 <|fim_middle|>updated
4160 "#}
4161 );
4162 }
4163
4164 #[test]
4165 fn test_truncation_drops_older_events_first() {
4166 let input = make_input(
4167 "x",
4168 0..1,
4169 0,
4170 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4171 vec![],
4172 );
4173
4174 assert_eq!(
4175 format_with_budget(&input, 10000),
4176 indoc! {r#"
4177 <|file_sep|>edit history
4178 --- a/old.rs
4179 +++ b/old.rs
4180 -1
4181 --- a/new.rs
4182 +++ b/new.rs
4183 -2
4184 <|file_sep|>test.rs
4185 <|fim_prefix|>
4186 <|fim_middle|>current
4187 <|user_cursor|>x
4188 <|fim_suffix|>
4189 <|fim_middle|>updated
4190 "#}
4191 );
4192
4193 assert_eq!(
4194 format_with_budget(&input, 55),
4195 indoc! {r#"
4196 <|file_sep|>edit history
4197 --- a/new.rs
4198 +++ b/new.rs
4199 -2
4200 <|file_sep|>test.rs
4201 <|fim_prefix|>
4202 <|fim_middle|>current
4203 <|user_cursor|>x
4204 <|fim_suffix|>
4205 <|fim_middle|>updated
4206 "#}
4207 );
4208 }
4209
4210 #[test]
4211 fn test_cursor_excerpt_always_included_with_minimal_budget() {
4212 let input = make_input(
4213 "fn main() {}",
4214 0..12,
4215 3,
4216 vec![make_event("a.rs", "-old\n+new\n")],
4217 vec![make_related_file("related.rs", "helper\n")],
4218 );
4219
4220 assert_eq!(
4221 format_with_budget(&input, 30),
4222 indoc! {r#"
4223 <|file_sep|>test.rs
4224 <|fim_prefix|>
4225 <|fim_middle|>current
4226 fn <|user_cursor|>main() {}
4227 <|fim_suffix|>
4228 <|fim_middle|>updated
4229 "#}
4230 );
4231 }
4232
4233 fn format_seed_coder(input: &ZetaPromptInput) -> String {
4234 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4235 }
4236
4237 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4238 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4239 }
4240
4241 #[test]
4242 fn test_seed_coder_basic_format() {
4243 let input = make_input(
4244 "prefix\neditable\nsuffix",
4245 7..15,
4246 10,
4247 vec![make_event("a.rs", "-old\n+new\n")],
4248 vec![make_related_file("related.rs", "fn helper() {}\n")],
4249 );
4250
4251 assert_eq!(
4252 format_seed_coder(&input),
4253 indoc! {r#"
4254 <[fim-suffix]>
4255 suffix
4256 <[fim-prefix]><filename>related.rs
4257 fn helper() {}
4258
4259 <filename>edit_history
4260 --- a/a.rs
4261 +++ b/a.rs
4262 -old
4263 +new
4264
4265 <filename>test.rs
4266 prefix
4267 <<<<<<< CURRENT
4268 edi<|user_cursor|>table
4269 =======
4270 <[fim-middle]>"#}
4271 );
4272 }
4273
4274 #[test]
4275 fn test_seed_coder_no_context() {
4276 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4277
4278 assert_eq!(
4279 format_seed_coder(&input),
4280 indoc! {r#"
4281 <[fim-suffix]>
4282 after
4283 <[fim-prefix]><filename>test.rs
4284 before
4285 <<<<<<< CURRENT
4286 mid<|user_cursor|>dle
4287 =======
4288 <[fim-middle]>"#}
4289 );
4290 }
4291
4292 #[test]
4293 fn test_seed_coder_truncation_drops_context() {
4294 let input = make_input(
4295 "code",
4296 0..4,
4297 2,
4298 vec![make_event("a.rs", "-x\n+y\n")],
4299 vec![make_related_file("r1.rs", "content\n")],
4300 );
4301
4302 // With large budget, everything is included
4303 assert_eq!(
4304 format_seed_coder(&input),
4305 indoc! {r#"
4306 <[fim-suffix]>
4307 <[fim-prefix]><filename>r1.rs
4308 content
4309
4310 <filename>edit_history
4311 --- a/a.rs
4312 +++ b/a.rs
4313 -x
4314 +y
4315
4316 <filename>test.rs
4317 <<<<<<< CURRENT
4318 co<|user_cursor|>de
4319 =======
4320 <[fim-middle]>"#}
4321 );
4322
4323 // With tight budget, context is dropped but cursor section remains
4324 assert_eq!(
4325 format_seed_coder_with_budget(&input, 30),
4326 indoc! {r#"
4327 <[fim-suffix]>
4328 <[fim-prefix]><filename>test.rs
4329 <<<<<<< CURRENT
4330 co<|user_cursor|>de
4331 =======
4332 <[fim-middle]>"#}
4333 );
4334 }
4335
4336 #[test]
4337 fn test_seed_coder_truncation_prioritizes_lower_order() {
4338 let input = make_input(
4339 "code",
4340 0..4,
4341 2,
4342 vec![],
4343 vec![
4344 RelatedFile {
4345 path: Path::new("low_prio.rs").into(),
4346 max_row: 5,
4347 in_open_source_repo: false,
4348 excerpts: vec![RelatedExcerpt {
4349 row_range: 0..5,
4350 text: "low prio\n".into(),
4351 order: 10,
4352 }],
4353 },
4354 RelatedFile {
4355 path: Path::new("high_prio.rs").into(),
4356 max_row: 5,
4357 in_open_source_repo: false,
4358 excerpts: vec![RelatedExcerpt {
4359 row_range: 0..5,
4360 text: "high prio\n".into(),
4361 order: 1,
4362 }],
4363 },
4364 ],
4365 );
4366
4367 // With large budget, both included; rendered in stable lexicographic order.
4368 assert_eq!(
4369 format_seed_coder(&input),
4370 indoc! {r#"
4371 <[fim-suffix]>
4372 <[fim-prefix]><filename>low_prio.rs
4373 low prio
4374 <filename>high_prio.rs
4375 high prio
4376
4377 <filename>test.rs
4378 <<<<<<< CURRENT
4379 co<|user_cursor|>de
4380 =======
4381 <[fim-middle]>"#}
4382 );
4383
4384 // With tight budget, only high_prio included.
4385 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
4386 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
4387 assert_eq!(
4388 format_seed_coder_with_budget(&input, 44),
4389 indoc! {r#"
4390 <[fim-suffix]>
4391 <[fim-prefix]><filename>high_prio.rs
4392 high prio
4393
4394 <filename>test.rs
4395 <<<<<<< CURRENT
4396 co<|user_cursor|>de
4397 =======
4398 <[fim-middle]>"#}
4399 );
4400 }
4401
4402 #[test]
4403 fn test_format_zeta1_from_input_basic() {
4404 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
4405 let input = ZetaPromptInput {
4406 cursor_path: Path::new("src/main.rs").into(),
4407 cursor_excerpt: excerpt.into(),
4408 cursor_offset_in_excerpt: 30,
4409 excerpt_start_row: Some(0),
4410 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4411 related_files: vec![],
4412 excerpt_ranges: ExcerptRanges {
4413 editable_150: 15..41,
4414 editable_180: 15..41,
4415 editable_350: 15..41,
4416 editable_150_context_350: 0..excerpt.len(),
4417 editable_180_context_350: 0..excerpt.len(),
4418 editable_350_context_150: 0..excerpt.len(),
4419 ..Default::default()
4420 },
4421 experiment: None,
4422 in_open_source_repo: false,
4423 can_collect_data: false,
4424 repo_url: None,
4425 };
4426
4427 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4428
4429 assert_eq!(
4430 prompt,
4431 concat!(
4432 "### Instruction:\n",
4433 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4434 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4435 "into account the cursor location.\n",
4436 "\n",
4437 "### User Edits:\n",
4438 "\n",
4439 "User edited other.rs:\n",
4440 "```diff\n",
4441 "-old\n",
4442 "+new\n",
4443 "\n",
4444 "```\n",
4445 "\n",
4446 "### User Excerpt:\n",
4447 "\n",
4448 "```src/main.rs\n",
4449 "<|start_of_file|>\n",
4450 "fn before() {}\n",
4451 "<|editable_region_start|>\n",
4452 "fn foo() {\n",
4453 " <|user_cursor_is_here|>let x = 1;\n",
4454 "\n",
4455 "<|editable_region_end|>}\n",
4456 "fn after() {}\n",
4457 "\n",
4458 "```\n",
4459 "\n",
4460 "### Response:\n",
4461 ),
4462 );
4463 }
4464
4465 #[test]
4466 fn test_format_zeta1_from_input_no_start_of_file() {
4467 let excerpt = "fn foo() {\n let x = 1;\n}\n";
4468 let input = ZetaPromptInput {
4469 cursor_path: Path::new("src/main.rs").into(),
4470 cursor_excerpt: excerpt.into(),
4471 cursor_offset_in_excerpt: 15,
4472 excerpt_start_row: Some(10),
4473 events: vec![],
4474 related_files: vec![],
4475 excerpt_ranges: ExcerptRanges {
4476 editable_150: 0..28,
4477 editable_180: 0..28,
4478 editable_350: 0..28,
4479 editable_150_context_350: 0..28,
4480 editable_180_context_350: 0..28,
4481 editable_350_context_150: 0..28,
4482 ..Default::default()
4483 },
4484 experiment: None,
4485 in_open_source_repo: false,
4486 can_collect_data: false,
4487 repo_url: None,
4488 };
4489
4490 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4491
4492 assert_eq!(
4493 prompt,
4494 concat!(
4495 "### Instruction:\n",
4496 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4497 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4498 "into account the cursor location.\n",
4499 "\n",
4500 "### User Edits:\n",
4501 "\n",
4502 "\n",
4503 "\n",
4504 "### User Excerpt:\n",
4505 "\n",
4506 "```src/main.rs\n",
4507 "<|editable_region_start|>\n",
4508 "fn foo() {\n",
4509 " <|user_cursor_is_here|>let x = 1;\n",
4510 "}\n",
4511 "\n",
4512 "<|editable_region_end|>\n",
4513 "```\n",
4514 "\n",
4515 "### Response:\n",
4516 ),
4517 );
4518 }
4519
4520 #[test]
4521 fn test_format_zeta1_from_input_with_sub_ranges() {
4522 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
4523 let editable_range = 10..37;
4524 let context_range = 0..excerpt.len();
4525
4526 let input = ZetaPromptInput {
4527 cursor_path: Path::new("test.rs").into(),
4528 cursor_excerpt: excerpt.into(),
4529 cursor_offset_in_excerpt: 25,
4530 excerpt_start_row: Some(0),
4531 events: vec![],
4532 related_files: vec![],
4533 excerpt_ranges: ExcerptRanges {
4534 editable_150: editable_range.clone(),
4535 editable_180: editable_range.clone(),
4536 editable_350: editable_range.clone(),
4537 editable_150_context_350: context_range.clone(),
4538 editable_180_context_350: context_range.clone(),
4539 editable_350_context_150: context_range.clone(),
4540 ..Default::default()
4541 },
4542 experiment: None,
4543 in_open_source_repo: false,
4544 can_collect_data: false,
4545 repo_url: None,
4546 };
4547
4548 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4549
4550 assert_eq!(
4551 prompt,
4552 concat!(
4553 "### Instruction:\n",
4554 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4555 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4556 "into account the cursor location.\n",
4557 "\n",
4558 "### User Edits:\n",
4559 "\n",
4560 "\n",
4561 "\n",
4562 "### User Excerpt:\n",
4563 "\n",
4564 "```test.rs\n",
4565 "<|start_of_file|>\n",
4566 "// prefix\n",
4567 "<|editable_region_start|>\n",
4568 "fn foo() {\n",
4569 " <|user_cursor_is_here|>let x = 1;\n",
4570 "}\n",
4571 "<|editable_region_end|>\n",
4572 "// suffix\n",
4573 "\n",
4574 "```\n",
4575 "\n",
4576 "### Response:\n",
4577 ),
4578 );
4579 }
4580
4581 #[test]
4582 fn test_clean_zeta1_model_output_basic() {
4583 let output = indoc! {"
4584 <|editable_region_start|>
4585 fn main() {
4586 println!(\"hello\");
4587 }
4588 <|editable_region_end|>
4589 "};
4590
4591 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4592 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
4593 }
4594
4595 #[test]
4596 fn test_clean_zeta1_model_output_with_cursor() {
4597 let output = indoc! {"
4598 <|editable_region_start|>
4599 fn main() {
4600 <|user_cursor_is_here|>println!(\"hello\");
4601 }
4602 <|editable_region_end|>
4603 "};
4604
4605 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4606 assert_eq!(
4607 cleaned,
4608 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
4609 );
4610 }
4611
4612 #[test]
4613 fn test_clean_zeta1_model_output_no_markers() {
4614 let output = "fn main() {}\n";
4615 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4616 assert_eq!(cleaned, "fn main() {}\n");
4617 }
4618
4619 #[test]
4620 fn test_clean_zeta1_model_output_empty_region() {
4621 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4622 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4623 assert_eq!(cleaned, "");
4624 }
4625
4626 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
4627 let mut result = excerpt.to_string();
4628 result.replace_range(
4629 parsed_output.range_in_excerpt.clone(),
4630 &parsed_output.new_editable_region,
4631 );
4632 result
4633 }
4634
4635 #[test]
4636 fn test_parse_zeta2_model_output() {
4637 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4638 let context_start = excerpt.find("ctx start").unwrap();
4639 let context_end = excerpt.find("after ctx").unwrap();
4640 let editable_start = excerpt.find("editable old").unwrap();
4641 let editable_end = editable_start + "editable old\n".len();
4642 let input = make_input_with_context_range(
4643 excerpt,
4644 editable_start..editable_end,
4645 context_start..context_end,
4646 editable_start,
4647 );
4648
4649 let output = parse_zeta2_model_output(
4650 "editable new\n>>>>>>> UPDATED\n",
4651 ZetaFormat::V0131GitMergeMarkersPrefix,
4652 &input,
4653 )
4654 .unwrap();
4655
4656 assert_eq!(
4657 apply_edit(excerpt, &output),
4658 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4659 );
4660 }
4661
4662 #[test]
4663 fn test_parse_zeta2_model_output_identity() {
4664 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4665 let editable_start = excerpt.find("bbb").unwrap();
4666 let editable_end = excerpt.find("ddd").unwrap();
4667 let input = make_input_with_context_range(
4668 excerpt,
4669 editable_start..editable_end,
4670 0..excerpt.len(),
4671 editable_start,
4672 );
4673
4674 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4675 let output =
4676 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
4677
4678 assert_eq!(apply_edit(excerpt, &output), excerpt);
4679 }
4680
4681 #[test]
4682 fn test_parse_zeta2_model_output_strips_end_marker() {
4683 let excerpt = "hello\nworld\n";
4684 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
4685
4686 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4687 let output1 =
4688 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
4689 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
4690
4691 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
4692 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
4693 }
4694}