1pub mod excerpt_ranges;
2pub mod multi_region;
3
4use anyhow::{Result, anyhow};
5use serde::{Deserialize, Serialize};
6use std::fmt::Write;
7use std::ops::Range;
8use std::path::Path;
9use std::sync::Arc;
10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
11
12pub use crate::excerpt_ranges::{
13 ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
14};
15
16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
17pub const MAX_PROMPT_TOKENS: usize = 4096;
18
19/// Use up to this amount of the editable region for prefill.
20/// Larger values may result in more robust generation, but
21/// this region becomes non-editable.
22pub const PREFILL_RATIO: f64 = 0.1; // 10%
23
24fn estimate_tokens(bytes: usize) -> usize {
25 bytes / 3
26}
27
28#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
29pub struct ZetaPromptInput {
30 pub cursor_path: Arc<Path>,
31 pub cursor_excerpt: Arc<str>,
32 pub cursor_offset_in_excerpt: usize,
33 #[serde(default, skip_serializing_if = "Option::is_none")]
34 pub excerpt_start_row: Option<u32>,
35 pub events: Vec<Arc<Event>>,
36 #[serde(default)]
37 pub related_files: Option<Vec<RelatedFile>>,
38 #[serde(default, skip_serializing_if = "Vec::is_empty")]
39 pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
40 /// These ranges let the server select model-appropriate subsets.
41 pub excerpt_ranges: ExcerptRanges,
42 /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
43 /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
44 /// When present, the server uses these to compute editable/context ranges
45 /// instead of `excerpt_ranges`.
46 #[serde(default, skip_serializing_if = "Option::is_none")]
47 pub syntax_ranges: Option<Vec<Range<usize>>>,
48 /// The name of the edit prediction model experiment to use.
49 #[serde(default, skip_serializing_if = "Option::is_none")]
50 pub experiment: Option<String>,
51 #[serde(default)]
52 pub in_open_source_repo: bool,
53 #[serde(default)]
54 pub can_collect_data: bool,
55 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub repo_url: Option<String>,
57}
58
59#[derive(
60 Default,
61 Clone,
62 Copy,
63 Debug,
64 PartialEq,
65 Eq,
66 Hash,
67 EnumIter,
68 IntoStaticStr,
69 Serialize,
70 Deserialize,
71)]
72#[allow(non_camel_case_types)]
73pub enum ZetaFormat {
74 V0112MiddleAtEnd,
75 V0113Ordered,
76 V0114180EditableRegion,
77 V0120GitMergeMarkers,
78 #[default]
79 V0131GitMergeMarkersPrefix,
80 V0211Prefill,
81 V0211SeedCoder,
82 v0226Hashline,
83 V0304VariableEdit,
84 V0304SeedNoEdits,
85 V0306SeedMultiRegions,
86}
87
88impl std::fmt::Display for ZetaFormat {
89 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90 write!(f, "{}", <&'static str>::from(self))
91 }
92}
93
94impl ZetaFormat {
95 pub fn parse(format_name: &str) -> Result<Self> {
96 let mut results = ZetaFormat::iter().filter(|version| {
97 <&'static str>::from(version)
98 .to_lowercase()
99 .contains(&format_name.to_lowercase())
100 });
101 let Some(result) = results.next() else {
102 anyhow::bail!(
103 "`{format_name}` did not match any of:\n{}",
104 Self::options_as_string()
105 );
106 };
107 if results.next().is_some() {
108 anyhow::bail!(
109 "`{format_name}` matched more than one of:\n{}",
110 Self::options_as_string()
111 );
112 }
113 Ok(result)
114 }
115
116 pub fn options_as_string() -> String {
117 ZetaFormat::iter()
118 .map(|format| format!("- {}\n", <&'static str>::from(format)))
119 .collect::<Vec<_>>()
120 .concat()
121 }
122}
123
124#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
125#[serde(tag = "event")]
126pub enum Event {
127 BufferChange {
128 path: Arc<Path>,
129 old_path: Arc<Path>,
130 diff: String,
131 predicted: bool,
132 in_open_source_repo: bool,
133 },
134}
135
136impl Event {
137 pub fn in_open_source_repo(&self) -> bool {
138 match self {
139 Event::BufferChange {
140 in_open_source_repo,
141 ..
142 } => *in_open_source_repo,
143 }
144 }
145}
146
147pub fn write_event(prompt: &mut String, event: &Event) {
148 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
149 for component in path.components() {
150 prompt.push('/');
151 write!(prompt, "{}", component.as_os_str().display()).ok();
152 }
153 }
154 match event {
155 Event::BufferChange {
156 path,
157 old_path,
158 diff,
159 predicted,
160 in_open_source_repo: _,
161 } => {
162 if *predicted {
163 prompt.push_str("// User accepted prediction:\n");
164 }
165 prompt.push_str("--- a");
166 write_path_as_unix_str(prompt, old_path.as_ref());
167 prompt.push_str("\n+++ b");
168 write_path_as_unix_str(prompt, path.as_ref());
169 prompt.push('\n');
170 prompt.push_str(diff);
171 }
172 }
173}
174
175#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
176pub struct ActiveBufferDiagnostic {
177 pub severity: Option<i32>,
178 pub message: String,
179 pub snippet: String,
180 pub snippet_buffer_row_range: Range<u32>,
181 pub diagnostic_range_in_snippet: Range<usize>,
182}
183
184#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
185pub struct RelatedFile {
186 pub path: Arc<Path>,
187 pub max_row: u32,
188 pub excerpts: Vec<RelatedExcerpt>,
189 #[serde(default)]
190 pub in_open_source_repo: bool,
191}
192
193#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
194pub struct RelatedExcerpt {
195 pub row_range: Range<u32>,
196 pub text: Arc<str>,
197 #[serde(default)]
198 pub order: usize,
199}
200
201pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
202 special_tokens_for_format(format)
203 .iter()
204 .any(|token| input.cursor_excerpt.contains(token))
205}
206
207pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
208 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
209}
210
211pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
212 match format {
213 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
214 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
215 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
216 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
217 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
218 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
219 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
220 ZetaFormat::v0226Hashline => hashline::special_tokens(),
221 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
222 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
223 ZetaFormat::V0306SeedMultiRegions => {
224 static TOKENS: &[&str] = &[
225 seed_coder::FIM_SUFFIX,
226 seed_coder::FIM_PREFIX,
227 seed_coder::FIM_MIDDLE,
228 seed_coder::FILE_MARKER,
229 seed_coder::START_MARKER,
230 seed_coder::SEPARATOR,
231 seed_coder::END_MARKER,
232 CURSOR_MARKER,
233 multi_region::MARKER_TAG_PREFIX,
234 ];
235 TOKENS
236 }
237 }
238}
239
240/// Returns the (editable_token_limit, context_token_limit) for a given format.
241pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
242 match format {
243 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
244 ZetaFormat::V0114180EditableRegion => (180, 350),
245 ZetaFormat::V0120GitMergeMarkers
246 | ZetaFormat::V0131GitMergeMarkersPrefix
247 | ZetaFormat::V0211Prefill
248 | ZetaFormat::V0211SeedCoder
249 | ZetaFormat::v0226Hashline
250 | ZetaFormat::V0306SeedMultiRegions
251 | ZetaFormat::V0304SeedNoEdits => (350, 150),
252 ZetaFormat::V0304VariableEdit => (1024, 0),
253 }
254}
255
256pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
257 match format {
258 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
259 ZetaFormat::V0112MiddleAtEnd
260 | ZetaFormat::V0113Ordered
261 | ZetaFormat::V0114180EditableRegion
262 | ZetaFormat::V0120GitMergeMarkers
263 | ZetaFormat::V0131GitMergeMarkersPrefix
264 | ZetaFormat::V0211Prefill
265 | ZetaFormat::V0211SeedCoder
266 | ZetaFormat::V0304VariableEdit
267 | ZetaFormat::V0306SeedMultiRegions
268 | ZetaFormat::V0304SeedNoEdits => &[],
269 }
270}
271
272pub fn excerpt_ranges_for_format(
273 format: ZetaFormat,
274 ranges: &ExcerptRanges,
275) -> (Range<usize>, Range<usize>) {
276 match format {
277 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
278 ranges.editable_150.clone(),
279 ranges.editable_150_context_350.clone(),
280 ),
281 ZetaFormat::V0114180EditableRegion => (
282 ranges.editable_180.clone(),
283 ranges.editable_180_context_350.clone(),
284 ),
285 ZetaFormat::V0120GitMergeMarkers
286 | ZetaFormat::V0131GitMergeMarkersPrefix
287 | ZetaFormat::V0211Prefill
288 | ZetaFormat::V0211SeedCoder
289 | ZetaFormat::v0226Hashline
290 | ZetaFormat::V0304SeedNoEdits
291 | ZetaFormat::V0306SeedMultiRegions => (
292 ranges.editable_350.clone(),
293 ranges.editable_350_context_150.clone(),
294 ),
295 ZetaFormat::V0304VariableEdit => {
296 let context = ranges
297 .editable_350_context_1024
298 .clone()
299 .or(ranges.editable_350_context_512.clone())
300 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
301 (context.clone(), context)
302 }
303 }
304}
305
306pub fn write_cursor_excerpt_section_for_format(
307 format: ZetaFormat,
308 prompt: &mut String,
309 path: &Path,
310 context: &str,
311 editable_range: &Range<usize>,
312 cursor_offset: usize,
313) {
314 match format {
315 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
316 prompt,
317 path,
318 context,
319 editable_range,
320 cursor_offset,
321 ),
322 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
323 v0113_ordered::write_cursor_excerpt_section(
324 prompt,
325 path,
326 context,
327 editable_range,
328 cursor_offset,
329 )
330 }
331 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
332 prompt,
333 path,
334 context,
335 editable_range,
336 cursor_offset,
337 ),
338 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
339 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
340 prompt,
341 path,
342 context,
343 editable_range,
344 cursor_offset,
345 )
346 }
347 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
348 seed_coder::write_cursor_excerpt_section(
349 prompt,
350 path,
351 context,
352 editable_range,
353 cursor_offset,
354 )
355 }
356 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
357 prompt,
358 path,
359 context,
360 editable_range,
361 cursor_offset,
362 ),
363 ZetaFormat::V0304VariableEdit => {
364 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
365 }
366 ZetaFormat::V0306SeedMultiRegions => {
367 prompt.push_str(&build_v0306_cursor_prefix(
368 path,
369 context,
370 editable_range,
371 cursor_offset,
372 ));
373 }
374 }
375}
376
377fn build_v0306_cursor_prefix(
378 path: &Path,
379 context: &str,
380 editable_range: &Range<usize>,
381 cursor_offset: usize,
382) -> String {
383 let mut section = String::new();
384 let path_str = path.to_string_lossy();
385 write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
386
387 section.push_str(&context[..editable_range.start]);
388 section.push_str(seed_coder::START_MARKER);
389
390 let editable_text = &context[editable_range.clone()];
391 let cursor_in_editable = cursor_offset - editable_range.start;
392 multi_region::write_editable_with_markers(
393 &mut section,
394 editable_text,
395 cursor_in_editable,
396 CURSOR_MARKER,
397 );
398
399 if !section.ends_with('\n') {
400 section.push('\n');
401 }
402 section.push_str(seed_coder::SEPARATOR);
403 section
404}
405
406fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
407 let start_row = text[0..range.start].matches('\n').count() as u32;
408 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
409 if !text[..range.end].ends_with('\n') {
410 end_row += 1;
411 }
412 return start_row..end_row;
413}
414
415pub fn format_prompt_with_budget_for_format(
416 input: &ZetaPromptInput,
417 format: ZetaFormat,
418 max_tokens: usize,
419) -> String {
420 let (context, editable_range, context_range, cursor_offset) =
421 resolve_cursor_region(input, format);
422 let path = &*input.cursor_path;
423
424 let empty_files = Vec::new();
425 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
426 let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
427 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
428 let row_range = relative_row_range.start + cursor_excerpt_start_row
429 ..relative_row_range.end + cursor_excerpt_start_row;
430 &filter_redundant_excerpts(
431 input_related_files.to_vec(),
432 input.cursor_path.as_ref(),
433 row_range,
434 )
435 } else {
436 input_related_files
437 };
438
439 match format {
440 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
441 seed_coder::format_prompt_with_budget(
442 path,
443 context,
444 &editable_range,
445 cursor_offset,
446 &input.events,
447 related_files,
448 max_tokens,
449 )
450 }
451 ZetaFormat::V0306SeedMultiRegions => {
452 let cursor_prefix =
453 build_v0306_cursor_prefix(path, context, &editable_range, cursor_offset);
454 seed_coder::assemble_fim_prompt(
455 context,
456 &editable_range,
457 &cursor_prefix,
458 &input.events,
459 related_files,
460 max_tokens,
461 )
462 }
463 _ => {
464 let mut cursor_section = String::new();
465 write_cursor_excerpt_section_for_format(
466 format,
467 &mut cursor_section,
468 path,
469 context,
470 &editable_range,
471 cursor_offset,
472 );
473
474 let cursor_tokens = estimate_tokens(cursor_section.len());
475 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
476
477 let edit_history_section = format_edit_history_within_budget(
478 &input.events,
479 "<|file_sep|>",
480 "edit history",
481 budget_after_cursor,
482 );
483 let edit_history_tokens = estimate_tokens(edit_history_section.len());
484 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
485
486 let related_files_section = format_related_files_within_budget(
487 &related_files,
488 "<|file_sep|>",
489 "",
490 budget_after_edit_history,
491 );
492
493 let mut prompt = String::new();
494 prompt.push_str(&related_files_section);
495 prompt.push_str(&edit_history_section);
496 prompt.push_str(&cursor_section);
497 prompt
498 }
499 }
500}
501
502pub fn filter_redundant_excerpts(
503 mut related_files: Vec<RelatedFile>,
504 cursor_path: &Path,
505 cursor_row_range: Range<u32>,
506) -> Vec<RelatedFile> {
507 for file in &mut related_files {
508 if file.path.as_ref() == cursor_path {
509 file.excerpts.retain(|excerpt| {
510 excerpt.row_range.start < cursor_row_range.start
511 || excerpt.row_range.end > cursor_row_range.end
512 });
513 }
514 }
515 related_files.retain(|file| !file.excerpts.is_empty());
516 related_files
517}
518
519pub fn get_prefill_for_format(
520 format: ZetaFormat,
521 context: &str,
522 editable_range: &Range<usize>,
523) -> String {
524 match format {
525 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
526 ZetaFormat::V0112MiddleAtEnd
527 | ZetaFormat::V0113Ordered
528 | ZetaFormat::V0114180EditableRegion
529 | ZetaFormat::V0120GitMergeMarkers
530 | ZetaFormat::V0131GitMergeMarkersPrefix
531 | ZetaFormat::V0211SeedCoder
532 | ZetaFormat::v0226Hashline
533 | ZetaFormat::V0304VariableEdit => String::new(),
534 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => String::new(),
535 }
536}
537
538pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
539 match format {
540 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
541 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
542 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
543 ZetaFormat::V0211SeedCoder
544 | ZetaFormat::V0304SeedNoEdits
545 | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
546 ZetaFormat::V0112MiddleAtEnd
547 | ZetaFormat::V0113Ordered
548 | ZetaFormat::V0114180EditableRegion
549 | ZetaFormat::v0226Hashline
550 | ZetaFormat::V0304VariableEdit => None,
551 }
552}
553
554pub fn encode_patch_as_output_for_format(
555 format: ZetaFormat,
556 old_editable_region: &str,
557 patch: &str,
558 cursor_offset: Option<usize>,
559) -> Result<Option<String>> {
560 match format {
561 ZetaFormat::v0226Hashline => {
562 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
563 }
564 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
565 old_editable_region,
566 patch,
567 cursor_offset,
568 )
569 .map(Some),
570 ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
571 Ok(seed_coder::no_edits(patch))
572 }
573 _ => Ok(None),
574 }
575}
576
577pub struct ParsedOutput {
578 /// Text that should replace the editable region
579 pub new_editable_region: String,
580 /// The byte range within `cursor_excerpt` that this replacement applies to
581 pub range_in_excerpt: Range<usize>,
582}
583
584/// Parse model output for the given zeta format
585pub fn parse_zeta2_model_output(
586 output: &str,
587 format: ZetaFormat,
588 prompt_inputs: &ZetaPromptInput,
589) -> Result<ParsedOutput> {
590 let output = match output_end_marker_for_format(format) {
591 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
592 None => output,
593 };
594
595 let (context, editable_range_in_context, context_range, _) =
596 resolve_cursor_region(prompt_inputs, format);
597 let context_start = context_range.start;
598 let old_editable_region = &context[editable_range_in_context.clone()];
599
600 let (range_in_context, output) = match format {
601 ZetaFormat::v0226Hashline => (
602 editable_range_in_context,
603 if hashline::output_has_edit_commands(output) {
604 hashline::apply_edit_commands(old_editable_region, output)
605 } else {
606 output.to_string()
607 },
608 ),
609 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
610 ZetaFormat::V0304SeedNoEdits => (
611 editable_range_in_context,
612 if output.starts_with(seed_coder::NO_EDITS) {
613 old_editable_region.to_string()
614 } else {
615 output.to_string()
616 },
617 ),
618 ZetaFormat::V0306SeedMultiRegions => (
619 editable_range_in_context,
620 if output.starts_with(seed_coder::NO_EDITS) {
621 old_editable_region.to_string()
622 } else {
623 multi_region::apply_marker_span(old_editable_region, output)?
624 },
625 ),
626 _ => (editable_range_in_context, output.to_string()),
627 };
628
629 let range_in_excerpt =
630 range_in_context.start + context_start..range_in_context.end + context_start;
631
632 Ok(ParsedOutput {
633 new_editable_region: output,
634 range_in_excerpt,
635 })
636}
637
638pub fn excerpt_range_for_format(
639 format: ZetaFormat,
640 ranges: &ExcerptRanges,
641) -> (Range<usize>, Range<usize>) {
642 excerpt_ranges_for_format(format, ranges)
643}
644
645pub fn resolve_cursor_region(
646 input: &ZetaPromptInput,
647 format: ZetaFormat,
648) -> (&str, Range<usize>, Range<usize>, usize) {
649 let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
650 let (editable_tokens, context_tokens) = token_limits_for_format(format);
651 compute_editable_and_context_ranges(
652 &input.cursor_excerpt,
653 input.cursor_offset_in_excerpt,
654 syntax_ranges,
655 editable_tokens,
656 context_tokens,
657 )
658 } else {
659 excerpt_range_for_format(format, &input.excerpt_ranges)
660 };
661 let context_start = context_range.start;
662 let context_text = &input.cursor_excerpt[context_range.clone()];
663 let adjusted_editable =
664 (editable_range.start - context_start)..(editable_range.end - context_start);
665 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
666
667 (
668 context_text,
669 adjusted_editable,
670 context_range,
671 adjusted_cursor,
672 )
673}
674
675pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
676 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
677 get_prefill_for_format(format, context, &editable_range)
678}
679
680fn format_edit_history_within_budget(
681 events: &[Arc<Event>],
682 file_marker: &str,
683 edit_history_name: &str,
684 max_tokens: usize,
685) -> String {
686 let header = format!("{}{}\n", file_marker, edit_history_name);
687 let header_tokens = estimate_tokens(header.len());
688 if header_tokens >= max_tokens {
689 return String::new();
690 }
691
692 let mut event_strings: Vec<String> = Vec::new();
693 let mut total_tokens = header_tokens;
694
695 for event in events.iter().rev() {
696 let mut event_str = String::new();
697 write_event(&mut event_str, event);
698 let event_tokens = estimate_tokens(event_str.len());
699
700 if total_tokens + event_tokens > max_tokens {
701 break;
702 }
703 total_tokens += event_tokens;
704 event_strings.push(event_str);
705 }
706
707 if event_strings.is_empty() {
708 return String::new();
709 }
710
711 let mut result = header;
712 for event_str in event_strings.iter().rev() {
713 result.push_str(event_str);
714 }
715 result
716}
717
718fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
719 let needs_newline = !excerpt.text.ends_with('\n');
720 let needs_ellipsis = excerpt.row_range.end < file_max_row;
721 let len = excerpt.text.len()
722 + if needs_newline { "\n".len() } else { 0 }
723 + if needs_ellipsis { "...\n".len() } else { 0 };
724 estimate_tokens(len)
725}
726
727pub fn format_related_files_within_budget(
728 related_files: &[RelatedFile],
729 file_prefix: &str,
730 file_suffix: &str,
731 max_tokens: usize,
732) -> String {
733 struct ExcerptCandidate {
734 file_ix: usize,
735 excerpt_ix: usize,
736 order: usize,
737 }
738
739 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
740 .iter()
741 .enumerate()
742 .flat_map(|(file_ix, file)| {
743 file.excerpts
744 .iter()
745 .enumerate()
746 .map(move |(excerpt_ix, e)| ExcerptCandidate {
747 file_ix,
748 excerpt_ix,
749 order: e.order,
750 })
751 })
752 .collect();
753
754 // Pre-compute file header strings and their token costs.
755 let file_headers: Vec<String> = related_files
756 .iter()
757 .map(|file| {
758 let path_str = file.path.to_string_lossy();
759 format!("{}{}\n", file_prefix, path_str)
760 })
761 .collect();
762
763 // Sort the excerpts by their order and determine how many fit within the budget.
764 let mut total_tokens = 0;
765 let mut included_excerpt_count = 0_usize;
766 let mut included_file_indices = vec![false; related_files.len()];
767 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
768 for candidate in &excerpt_candidates {
769 let file = &related_files[candidate.file_ix];
770 let excerpt = &file.excerpts[candidate.excerpt_ix];
771 let file_already_included = included_file_indices[candidate.file_ix];
772 let header_cost = if file_already_included {
773 0
774 } else {
775 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
776 };
777 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
778 if total_tokens + header_cost + excerpt_cost > max_tokens {
779 break;
780 }
781 total_tokens += header_cost + excerpt_cost;
782 if !file_already_included {
783 included_file_indices[candidate.file_ix] = true;
784 }
785 included_excerpt_count += 1;
786 }
787
788 excerpt_candidates.truncate(included_excerpt_count);
789 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
790
791 // Render all of the files that fit within the token budget, in the original order.
792 let mut result = String::new();
793 let mut last_file_ix = None;
794 for candidate in &excerpt_candidates {
795 if last_file_ix != Some(candidate.file_ix) {
796 if last_file_ix.is_some() {
797 result.push_str(file_suffix);
798 }
799 result.push_str(&file_headers[candidate.file_ix]);
800 last_file_ix = Some(candidate.file_ix);
801 }
802 let file = &related_files[candidate.file_ix];
803 let excerpt = &file.excerpts[candidate.excerpt_ix];
804 result.push_str(&excerpt.text);
805 if !result.ends_with('\n') {
806 result.push('\n');
807 }
808 if excerpt.row_range.end < file.max_row {
809 result.push_str("...\n");
810 }
811 }
812
813 result
814}
815
816pub fn write_related_files(
817 prompt: &mut String,
818 related_files: &[RelatedFile],
819) -> Vec<Range<usize>> {
820 let mut ranges = Vec::new();
821 for file in related_files {
822 let start = prompt.len();
823 let path_str = file.path.to_string_lossy();
824 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
825 for excerpt in &file.excerpts {
826 prompt.push_str(&excerpt.text);
827 if !prompt.ends_with('\n') {
828 prompt.push('\n');
829 }
830 if excerpt.row_range.end < file.max_row {
831 prompt.push_str("...\n");
832 }
833 }
834 let end = prompt.len();
835 ranges.push(start..end);
836 }
837 ranges
838}
839
840mod v0112_middle_at_end {
841 use super::*;
842
843 pub fn special_tokens() -> &'static [&'static str] {
844 &[
845 "<|fim_prefix|>",
846 "<|fim_suffix|>",
847 "<|fim_middle|>",
848 "<|file_sep|>",
849 CURSOR_MARKER,
850 ]
851 }
852
853 pub fn write_cursor_excerpt_section(
854 prompt: &mut String,
855 path: &Path,
856 context: &str,
857 editable_range: &Range<usize>,
858 cursor_offset: usize,
859 ) {
860 let path_str = path.to_string_lossy();
861 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
862
863 prompt.push_str("<|fim_prefix|>\n");
864 prompt.push_str(&context[..editable_range.start]);
865
866 prompt.push_str("<|fim_suffix|>\n");
867 prompt.push_str(&context[editable_range.end..]);
868 if !prompt.ends_with('\n') {
869 prompt.push('\n');
870 }
871
872 prompt.push_str("<|fim_middle|>current\n");
873 prompt.push_str(&context[editable_range.start..cursor_offset]);
874 prompt.push_str(CURSOR_MARKER);
875 prompt.push_str(&context[cursor_offset..editable_range.end]);
876 if !prompt.ends_with('\n') {
877 prompt.push('\n');
878 }
879
880 prompt.push_str("<|fim_middle|>updated\n");
881 }
882}
883
884mod v0113_ordered {
885 use super::*;
886
887 pub fn special_tokens() -> &'static [&'static str] {
888 &[
889 "<|fim_prefix|>",
890 "<|fim_suffix|>",
891 "<|fim_middle|>",
892 "<|file_sep|>",
893 CURSOR_MARKER,
894 ]
895 }
896
897 pub fn write_cursor_excerpt_section(
898 prompt: &mut String,
899 path: &Path,
900 context: &str,
901 editable_range: &Range<usize>,
902 cursor_offset: usize,
903 ) {
904 let path_str = path.to_string_lossy();
905 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
906
907 prompt.push_str("<|fim_prefix|>\n");
908 prompt.push_str(&context[..editable_range.start]);
909 if !prompt.ends_with('\n') {
910 prompt.push('\n');
911 }
912
913 prompt.push_str("<|fim_middle|>current\n");
914 prompt.push_str(&context[editable_range.start..cursor_offset]);
915 prompt.push_str(CURSOR_MARKER);
916 prompt.push_str(&context[cursor_offset..editable_range.end]);
917 if !prompt.ends_with('\n') {
918 prompt.push('\n');
919 }
920
921 prompt.push_str("<|fim_suffix|>\n");
922 prompt.push_str(&context[editable_range.end..]);
923 if !prompt.ends_with('\n') {
924 prompt.push('\n');
925 }
926
927 prompt.push_str("<|fim_middle|>updated\n");
928 }
929}
930
931mod v0114180_editable_region {
932 use super::*;
933
934 pub fn special_tokens() -> &'static [&'static str] {
935 v0113_ordered::special_tokens()
936 }
937}
938
939pub mod v0120_git_merge_markers {
940 //! A prompt that uses git-style merge conflict markers to represent the editable region.
941 //!
942 //! Example prompt:
943 //!
944 //! <|file_sep|>path/to/target_file.py
945 //! <|fim_prefix|>
946 //! code before editable region
947 //! <|fim_suffix|>
948 //! code after editable region
949 //! <|fim_middle|>
950 //! <<<<<<< CURRENT
951 //! code that
952 //! needs to<|user_cursor|>
953 //! be rewritten
954 //! =======
955 //!
956 //! Expected output (should be generated by the model):
957 //!
958 //! updated
959 //! code with
960 //! changes applied
961 //! >>>>>>> UPDATED
962
963 use super::*;
964
965 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
966 pub const SEPARATOR: &str = "=======\n";
967 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
968
969 pub fn special_tokens() -> &'static [&'static str] {
970 &[
971 "<|fim_prefix|>",
972 "<|fim_suffix|>",
973 "<|fim_middle|>",
974 "<|file_sep|>",
975 START_MARKER,
976 SEPARATOR,
977 END_MARKER,
978 CURSOR_MARKER,
979 ]
980 }
981
982 pub fn write_cursor_excerpt_section(
983 prompt: &mut String,
984 path: &Path,
985 context: &str,
986 editable_range: &Range<usize>,
987 cursor_offset: usize,
988 ) {
989 let path_str = path.to_string_lossy();
990 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
991
992 prompt.push_str("<|fim_prefix|>");
993 prompt.push_str(&context[..editable_range.start]);
994
995 prompt.push_str("<|fim_suffix|>");
996 prompt.push_str(&context[editable_range.end..]);
997 if !prompt.ends_with('\n') {
998 prompt.push('\n');
999 }
1000
1001 prompt.push_str("<|fim_middle|>");
1002 prompt.push_str(START_MARKER);
1003 prompt.push_str(&context[editable_range.start..cursor_offset]);
1004 prompt.push_str(CURSOR_MARKER);
1005 prompt.push_str(&context[cursor_offset..editable_range.end]);
1006 if !prompt.ends_with('\n') {
1007 prompt.push('\n');
1008 }
1009 prompt.push_str(SEPARATOR);
1010 }
1011}
1012
1013pub mod v0131_git_merge_markers_prefix {
1014 //! A prompt that uses git-style merge conflict markers to represent the editable region.
1015 //!
1016 //! Example prompt:
1017 //!
1018 //! <|file_sep|>path/to/target_file.py
1019 //! <|fim_prefix|>
1020 //! code before editable region
1021 //! <<<<<<< CURRENT
1022 //! code that
1023 //! needs to<|user_cursor|>
1024 //! be rewritten
1025 //! =======
1026 //! <|fim_suffix|>
1027 //! code after editable region
1028 //! <|fim_middle|>
1029 //!
1030 //! Expected output (should be generated by the model):
1031 //!
1032 //! updated
1033 //! code with
1034 //! changes applied
1035 //! >>>>>>> UPDATED
1036
1037 use super::*;
1038
1039 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1040 pub const SEPARATOR: &str = "=======\n";
1041 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1042
1043 pub fn special_tokens() -> &'static [&'static str] {
1044 &[
1045 "<|fim_prefix|>",
1046 "<|fim_suffix|>",
1047 "<|fim_middle|>",
1048 "<|file_sep|>",
1049 START_MARKER,
1050 SEPARATOR,
1051 END_MARKER,
1052 CURSOR_MARKER,
1053 ]
1054 }
1055
1056 pub fn write_cursor_excerpt_section(
1057 prompt: &mut String,
1058 path: &Path,
1059 context: &str,
1060 editable_range: &Range<usize>,
1061 cursor_offset: usize,
1062 ) {
1063 let path_str = path.to_string_lossy();
1064 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1065
1066 prompt.push_str("<|fim_prefix|>");
1067 prompt.push_str(&context[..editable_range.start]);
1068 prompt.push_str(START_MARKER);
1069 prompt.push_str(&context[editable_range.start..cursor_offset]);
1070 prompt.push_str(CURSOR_MARKER);
1071 prompt.push_str(&context[cursor_offset..editable_range.end]);
1072 if !prompt.ends_with('\n') {
1073 prompt.push('\n');
1074 }
1075 prompt.push_str(SEPARATOR);
1076
1077 prompt.push_str("<|fim_suffix|>");
1078 prompt.push_str(&context[editable_range.end..]);
1079 if !prompt.ends_with('\n') {
1080 prompt.push('\n');
1081 }
1082
1083 prompt.push_str("<|fim_middle|>");
1084 }
1085}
1086
1087pub mod v0211_prefill {
1088 use super::*;
1089
1090 pub fn special_tokens() -> &'static [&'static str] {
1091 v0131_git_merge_markers_prefix::special_tokens()
1092 }
1093
1094 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1095 let editable_region = &context[editable_range.start..editable_range.end];
1096
1097 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1098 let prefill_len = editable_region.floor_char_boundary(prefill_len);
1099
1100 // Find a token boundary to avoid splitting tokens in the prefill.
1101 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1102 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1103 // the \n and consume any consecutive \n characters after it.
1104 let prefill = &editable_region[..prefill_len];
1105 match prefill.rfind('\n') {
1106 Some(pos) => {
1107 let mut end = pos + 1;
1108 while end < editable_region.len()
1109 && editable_region.as_bytes().get(end) == Some(&b'\n')
1110 {
1111 end += 1;
1112 }
1113 editable_region[..end].to_string()
1114 }
1115 // No newline found. Fall back to splitting before the last space
1116 // (word-level boundary)
1117 None => match prefill.rfind(' ') {
1118 Some(pos) => prefill[..pos].to_string(),
1119 None => prefill.to_string(),
1120 },
1121 }
1122 }
1123}
1124
1125pub mod hashline {
1126
1127 use std::fmt::Display;
1128
1129 pub const END_MARKER: &str = "<|fim_middle|>updated";
1130 pub const START_MARKER: &str = "<|fim_middle|>current";
1131
1132 use super::*;
1133
1134 const SET_COMMAND_MARKER: &str = "<|set|>";
1135 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1136 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1137
1138 pub fn special_tokens() -> &'static [&'static str] {
1139 return &[
1140 SET_COMMAND_MARKER,
1141 "<|set_range|>",
1142 INSERT_COMMAND_MARKER,
1143 NO_EDITS_COMMAND_MARKER,
1144 CURSOR_MARKER,
1145 "<|file_sep|>",
1146 "<|fim_prefix|>",
1147 "<|fim_suffix|>",
1148 "<|fim_middle|>",
1149 ];
1150 }
1151
1152 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1153 #[derive(Debug, Clone, PartialEq, Eq)]
1154 struct LineRef {
1155 index: usize,
1156 hash: u8,
1157 }
1158
1159 impl Display for LineRef {
1160 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1161 write!(f, "{}:{:02x}", self.index, self.hash)
1162 }
1163 }
1164
1165 pub fn hash_line(line: &[u8]) -> u8 {
1166 let mut h: u8 = 0;
1167 for &byte in line {
1168 h = h.wrapping_add(byte);
1169 }
1170 return h;
1171 }
1172
1173 /// Write the hashline-encoded editable region into `out`. Each line of
1174 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1175 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1176 /// to the start of `editable_text`).
1177 pub fn write_hashline_editable_region(
1178 out: &mut String,
1179 editable_text: &str,
1180 cursor_offset_in_editable: usize,
1181 ) {
1182 let mut offset = 0;
1183 for (i, line) in editable_text.lines().enumerate() {
1184 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1185 && cursor_offset_in_editable < offset + line.len()
1186 {
1187 (
1188 &line[..cursor_offset_in_editable - offset],
1189 CURSOR_MARKER,
1190 &line[cursor_offset_in_editable - offset..],
1191 )
1192 } else {
1193 (line, "", "")
1194 };
1195 write!(
1196 out,
1197 "\n{}|{head}{cursor}{tail}",
1198 LineRef {
1199 index: i,
1200 hash: hash_line(line.as_bytes())
1201 }
1202 )
1203 .unwrap();
1204 offset += line.len() + 1;
1205 }
1206 }
1207
1208 pub fn write_cursor_excerpt_section(
1209 prompt: &mut String,
1210 path: &Path,
1211 context: &str,
1212 editable_range: &Range<usize>,
1213 cursor_offset: usize,
1214 ) {
1215 let path_str = path.to_string_lossy();
1216 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1217
1218 prompt.push_str("<|fim_prefix|>\n");
1219 prompt.push_str(&context[..editable_range.start]);
1220 prompt.push_str(START_MARKER);
1221
1222 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1223 let editable_region = &context[editable_range.clone()];
1224 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1225
1226 if !prompt.ends_with('\n') {
1227 prompt.push('\n');
1228 }
1229
1230 prompt.push_str("<|fim_suffix|>\n");
1231 prompt.push_str(&context[editable_range.end..]);
1232 if !prompt.ends_with('\n') {
1233 prompt.push('\n');
1234 }
1235
1236 prompt.push_str(END_MARKER);
1237 prompt.push('\n');
1238 }
1239
1240 /// A single edit command parsed from the model output.
1241 #[derive(Debug)]
1242 enum EditCommand<'a> {
1243 /// Replace a range of lines (inclusive on both ends). Single-line set is
1244 /// represented by `start == end`.
1245 Set {
1246 start: LineRef,
1247 end: LineRef,
1248 content: &'a str,
1249 },
1250 /// Insert new lines after the given line, or before the first line if
1251 /// `after` is `None`.
1252 Insert {
1253 after: Option<LineRef>,
1254 content: &'a str,
1255 },
1256 }
1257
1258 /// Parse a line reference like `3:c3` into a `LineRef`.
1259 fn parse_line_ref(s: &str) -> Option<LineRef> {
1260 let (idx_str, hash_str) = s.split_once(':')?;
1261 let index = idx_str.parse::<usize>().ok()?;
1262 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1263 Some(LineRef { index, hash })
1264 }
1265
1266 /// Parse the model output into a list of `EditCommand`s.
1267 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1268 let mut commands = Vec::new();
1269 let mut offset = 0usize;
1270
1271 while offset < model_output.len() {
1272 let next_nl = model_output[offset..]
1273 .find('\n')
1274 .map(|i| offset + i)
1275 .unwrap_or(model_output.len());
1276 let line = &model_output[offset..next_nl];
1277 let line_end = if next_nl < model_output.len() {
1278 next_nl + 1
1279 } else {
1280 next_nl
1281 };
1282
1283 let trimmed = line.trim();
1284 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1285 (true, spec)
1286 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1287 (false, spec)
1288 } else {
1289 offset = line_end;
1290 continue;
1291 };
1292
1293 let mut content_end = line_end;
1294 let mut scan = line_end;
1295
1296 while scan < model_output.len() {
1297 let body_nl = model_output[scan..]
1298 .find('\n')
1299 .map(|i| scan + i)
1300 .unwrap_or(model_output.len());
1301 let body_line = &model_output[scan..body_nl];
1302 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1303 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1304 {
1305 break;
1306 }
1307 scan = if body_nl < model_output.len() {
1308 body_nl + 1
1309 } else {
1310 body_nl
1311 };
1312 content_end = scan;
1313 }
1314
1315 let content = &model_output[line_end..content_end];
1316
1317 if is_set {
1318 if let Some((start_str, end_str)) = specifier.split_once('-') {
1319 if let (Some(start), Some(end)) =
1320 (parse_line_ref(start_str), parse_line_ref(end_str))
1321 {
1322 commands.push(EditCommand::Set {
1323 start,
1324 end,
1325 content,
1326 });
1327 }
1328 } else if let Some(target) = parse_line_ref(specifier) {
1329 commands.push(EditCommand::Set {
1330 start: target.clone(),
1331 end: target,
1332 content,
1333 });
1334 }
1335 } else {
1336 let after = parse_line_ref(specifier);
1337 commands.push(EditCommand::Insert { after, content });
1338 }
1339
1340 offset = scan;
1341 }
1342
1343 commands
1344 }
1345
1346 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1347 /// (as opposed to being a plain full-replacement output).
1348 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1349 /// editable region, returning the plain text content.
1350 pub fn strip_hashline_prefixes(region: &str) -> String {
1351 let mut decoded: String = region
1352 .lines()
1353 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1354 .collect::<Vec<_>>()
1355 .join("\n");
1356 if region.ends_with('\n') {
1357 decoded.push('\n');
1358 }
1359 decoded
1360 }
1361
1362 pub fn output_has_edit_commands(model_output: &str) -> bool {
1363 model_output.contains(SET_COMMAND_MARKER)
1364 || model_output.contains(INSERT_COMMAND_MARKER)
1365 || model_output.contains(NO_EDITS_COMMAND_MARKER)
1366 }
1367
1368 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1369 /// original editable region text.
1370 ///
1371 /// `editable_region` is the original text of the editable region (without hash
1372 /// prefixes). `model_output` is the raw model response containing edit commands.
1373 ///
1374 /// Returns the full replacement text for the editable region.
1375 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1376 if model_output
1377 .trim_start()
1378 .starts_with(NO_EDITS_COMMAND_MARKER)
1379 {
1380 return editable_region.to_string();
1381 }
1382
1383 let original_lines: Vec<&str> = editable_region.lines().collect();
1384 let old_hashes: Vec<u8> = original_lines
1385 .iter()
1386 .map(|line| hash_line(line.as_bytes()))
1387 .collect();
1388
1389 let commands = parse_edit_commands(model_output);
1390
1391 // For set operations: indexed by start line → Some((end line index, content))
1392 // For insert operations: indexed by line index → vec of content to insert after
1393 // Insert-before-first is tracked separately.
1394 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1395 let mut insert_before_first: Vec<&str> = Vec::new();
1396 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1397
1398 for command in &commands {
1399 match command {
1400 EditCommand::Set {
1401 start,
1402 end,
1403 content,
1404 } => {
1405 if start.index < old_hashes.len()
1406 && end.index < old_hashes.len()
1407 && start.index <= end.index
1408 && old_hashes[start.index] == start.hash
1409 && old_hashes[end.index] == end.hash
1410 {
1411 set_ops[start.index] = Some((end.index, *content));
1412 }
1413 }
1414 EditCommand::Insert { after, content } => match after {
1415 None => insert_before_first.push(*content),
1416 Some(line_ref) => {
1417 if line_ref.index < old_hashes.len()
1418 && old_hashes[line_ref.index] == line_ref.hash
1419 {
1420 insert_after[line_ref.index].push(*content);
1421 }
1422 }
1423 },
1424 }
1425 }
1426
1427 let mut result = String::new();
1428
1429 // Emit any insertions before the first line
1430 for content in &insert_before_first {
1431 result.push_str(content);
1432 if !content.ends_with('\n') {
1433 result.push('\n');
1434 }
1435 }
1436
1437 let mut i = 0;
1438 while i < original_lines.len() {
1439 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1440 // Replace lines i..=end_index with the replacement content
1441 result.push_str(replacement);
1442 if !replacement.is_empty() && !replacement.ends_with('\n') {
1443 result.push('\n');
1444 }
1445 // Emit any insertions after the end of this set range
1446 if *end_index < insert_after.len() {
1447 for content in &insert_after[*end_index] {
1448 result.push_str(content);
1449 if !content.ends_with('\n') {
1450 result.push('\n');
1451 }
1452 }
1453 }
1454 i = end_index + 1;
1455 } else {
1456 // Keep the original line
1457 result.push_str(original_lines[i]);
1458 result.push('\n');
1459 // Emit any insertions after this line
1460 for content in &insert_after[i] {
1461 result.push_str(content);
1462 if !content.ends_with('\n') {
1463 result.push('\n');
1464 }
1465 }
1466 i += 1;
1467 }
1468 }
1469
1470 // Preserve trailing newline behavior: if the original ended with a
1471 // newline the result already has one; if it didn't, trim the extra one
1472 // we added.
1473 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1474 result.pop();
1475 }
1476
1477 result
1478 }
1479
1480 /// Convert a unified diff patch into hashline edit commands.
1481 ///
1482 /// Parses the unified diff `patch` directly to determine which lines of
1483 /// `old_text` are deleted/replaced and what new lines are added, then emits
1484 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1485 /// `{index}:{hash}` identifiers.
1486 ///
1487 /// `cursor_offset` is an optional byte offset into the first hunk's new
1488 /// text (context + additions) where the cursor marker should be placed.
1489 pub fn patch_to_edit_commands(
1490 old_text: &str,
1491 patch: &str,
1492 cursor_offset: Option<usize>,
1493 ) -> Result<String> {
1494 let old_lines: Vec<&str> = old_text.lines().collect();
1495 let old_hashes: Vec<u8> = old_lines
1496 .iter()
1497 .map(|line| hash_line(line.as_bytes()))
1498 .collect();
1499
1500 let mut result = String::new();
1501 let mut first_hunk = true;
1502
1503 struct Hunk<'a> {
1504 line_range: Range<usize>,
1505 new_text_lines: Vec<&'a str>,
1506 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1507 }
1508
1509 // Parse the patch line by line. We only care about hunk headers,
1510 // context, deletions, and additions.
1511 let mut old_line_index: usize = 0;
1512 let mut current_hunk: Option<Hunk> = None;
1513 // Byte offset tracking within the hunk's new text for cursor placement.
1514 let mut new_text_byte_offset: usize = 0;
1515 // The line index of the last old line seen before/in the current hunk
1516 // (used for insert-after reference).
1517 let mut last_old_line_before_hunk: Option<usize> = None;
1518
1519 fn flush_hunk(
1520 hunk: Hunk,
1521 last_old_line: Option<usize>,
1522 result: &mut String,
1523 old_hashes: &[u8],
1524 ) {
1525 if hunk.line_range.is_empty() {
1526 // Pure insertion — reference the old line to insert after when in bounds.
1527 if let Some(after) = last_old_line
1528 && let Some(&hash) = old_hashes.get(after)
1529 {
1530 write!(
1531 result,
1532 "{INSERT_COMMAND_MARKER}{}\n",
1533 LineRef { index: after, hash }
1534 )
1535 .unwrap();
1536 } else {
1537 result.push_str(INSERT_COMMAND_MARKER);
1538 result.push('\n');
1539 }
1540 } else {
1541 let start = hunk.line_range.start;
1542 let end_exclusive = hunk.line_range.end;
1543 let deleted_line_count = end_exclusive.saturating_sub(start);
1544
1545 if deleted_line_count == 1 {
1546 if let Some(&hash) = old_hashes.get(start) {
1547 write!(
1548 result,
1549 "{SET_COMMAND_MARKER}{}\n",
1550 LineRef { index: start, hash }
1551 )
1552 .unwrap();
1553 } else {
1554 result.push_str(SET_COMMAND_MARKER);
1555 result.push('\n');
1556 }
1557 } else {
1558 let end_inclusive = end_exclusive - 1;
1559 match (
1560 old_hashes.get(start).copied(),
1561 old_hashes.get(end_inclusive).copied(),
1562 ) {
1563 (Some(start_hash), Some(end_hash)) => {
1564 write!(
1565 result,
1566 "{SET_COMMAND_MARKER}{}-{}\n",
1567 LineRef {
1568 index: start,
1569 hash: start_hash
1570 },
1571 LineRef {
1572 index: end_inclusive,
1573 hash: end_hash
1574 }
1575 )
1576 .unwrap();
1577 }
1578 _ => {
1579 result.push_str(SET_COMMAND_MARKER);
1580 result.push('\n');
1581 }
1582 }
1583 }
1584 }
1585 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1586 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1587 && line_offset == cursor_line_offset
1588 {
1589 result.push_str(&line[..char_offset]);
1590 result.push_str(CURSOR_MARKER);
1591 result.push_str(&line[char_offset..]);
1592 continue;
1593 }
1594
1595 result.push_str(line);
1596 }
1597 }
1598
1599 for raw_line in patch.split_inclusive('\n') {
1600 if raw_line.starts_with("@@") {
1601 // Flush any pending change hunk from a previous patch hunk.
1602 if let Some(hunk) = current_hunk.take() {
1603 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1604 }
1605
1606 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1607 // We intentionally do not trust old_start as a direct local index into `old_text`,
1608 // because some patches are produced against a larger file region and carry
1609 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1610 if first_hunk {
1611 new_text_byte_offset = 0;
1612 first_hunk = false;
1613 }
1614 continue;
1615 }
1616
1617 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1618 continue;
1619 }
1620 if raw_line.starts_with("\\ No newline") {
1621 continue;
1622 }
1623
1624 if raw_line.starts_with('-') {
1625 // Extend or start a change hunk with this deleted old line.
1626 match &mut current_hunk {
1627 Some(Hunk {
1628 line_range: range, ..
1629 }) => range.end = old_line_index + 1,
1630 None => {
1631 current_hunk = Some(Hunk {
1632 line_range: old_line_index..old_line_index + 1,
1633 new_text_lines: Vec::new(),
1634 cursor_line_offset_in_new_text: None,
1635 });
1636 }
1637 }
1638 old_line_index += 1;
1639 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1640 // Place cursor marker if cursor_offset falls within this line.
1641 let mut cursor_line_offset = None;
1642 if let Some(cursor_off) = cursor_offset
1643 && (first_hunk
1644 || cursor_off >= new_text_byte_offset
1645 && cursor_off <= new_text_byte_offset + added_content.len())
1646 {
1647 let line_offset = added_content.floor_char_boundary(
1648 cursor_off
1649 .saturating_sub(new_text_byte_offset)
1650 .min(added_content.len()),
1651 );
1652 cursor_line_offset = Some(line_offset);
1653 }
1654
1655 new_text_byte_offset += added_content.len();
1656
1657 let hunk = current_hunk.get_or_insert(Hunk {
1658 line_range: old_line_index..old_line_index,
1659 new_text_lines: vec![],
1660 cursor_line_offset_in_new_text: None,
1661 });
1662 hunk.new_text_lines.push(added_content);
1663 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1664 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1665 } else {
1666 // Context line (starts with ' ' or is empty).
1667 if let Some(hunk) = current_hunk.take() {
1668 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1669 }
1670 last_old_line_before_hunk = Some(old_line_index);
1671 old_line_index += 1;
1672 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1673 new_text_byte_offset += content.len();
1674 }
1675 }
1676
1677 // Flush final group.
1678 if let Some(hunk) = current_hunk.take() {
1679 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1680 }
1681
1682 // Trim a single trailing newline.
1683 if result.ends_with('\n') {
1684 result.pop();
1685 }
1686
1687 if result.is_empty() {
1688 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1689 }
1690
1691 Ok(result)
1692 }
1693
1694 #[cfg(test)]
1695 mod tests {
1696 use super::*;
1697 use indoc::indoc;
1698
1699 #[test]
1700 fn test_format_cursor_region() {
1701 struct Case {
1702 name: &'static str,
1703 context: &'static str,
1704 editable_range: Range<usize>,
1705 cursor_offset: usize,
1706 expected: &'static str,
1707 }
1708
1709 let cases = [
1710 Case {
1711 name: "basic_cursor_placement",
1712 context: "hello world\n",
1713 editable_range: 0..12,
1714 cursor_offset: 5,
1715 expected: indoc! {"
1716 <|file_sep|>test.rs
1717 <|fim_prefix|>
1718 <|fim_middle|>current
1719 0:5c|hello<|user_cursor|> world
1720 <|fim_suffix|>
1721 <|fim_middle|>updated
1722 "},
1723 },
1724 Case {
1725 name: "multiline_cursor_on_second_line",
1726 context: "aaa\nbbb\nccc\n",
1727 editable_range: 0..12,
1728 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1729 expected: indoc! {"
1730 <|file_sep|>test.rs
1731 <|fim_prefix|>
1732 <|fim_middle|>current
1733 0:23|aaa
1734 1:26|b<|user_cursor|>bb
1735 2:29|ccc
1736 <|fim_suffix|>
1737 <|fim_middle|>updated
1738 "},
1739 },
1740 Case {
1741 name: "no_trailing_newline_in_context",
1742 context: "line1\nline2",
1743 editable_range: 0..11,
1744 cursor_offset: 3,
1745 expected: indoc! {"
1746 <|file_sep|>test.rs
1747 <|fim_prefix|>
1748 <|fim_middle|>current
1749 0:d9|lin<|user_cursor|>e1
1750 1:da|line2
1751 <|fim_suffix|>
1752 <|fim_middle|>updated
1753 "},
1754 },
1755 Case {
1756 name: "leading_newline_in_editable_region",
1757 context: "\nabc\n",
1758 editable_range: 0..5,
1759 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1760 expected: indoc! {"
1761 <|file_sep|>test.rs
1762 <|fim_prefix|>
1763 <|fim_middle|>current
1764 0:00|
1765 1:26|a<|user_cursor|>bc
1766 <|fim_suffix|>
1767 <|fim_middle|>updated
1768 "},
1769 },
1770 Case {
1771 name: "with_suffix",
1772 context: "abc\ndef",
1773 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1774 cursor_offset: 2,
1775 expected: indoc! {"
1776 <|file_sep|>test.rs
1777 <|fim_prefix|>
1778 <|fim_middle|>current
1779 0:26|ab<|user_cursor|>c
1780 <|fim_suffix|>
1781 def
1782 <|fim_middle|>updated
1783 "},
1784 },
1785 Case {
1786 name: "unicode_two_byte_chars",
1787 context: "héllo\n",
1788 editable_range: 0..7,
1789 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1790 expected: indoc! {"
1791 <|file_sep|>test.rs
1792 <|fim_prefix|>
1793 <|fim_middle|>current
1794 0:1b|hé<|user_cursor|>llo
1795 <|fim_suffix|>
1796 <|fim_middle|>updated
1797 "},
1798 },
1799 Case {
1800 name: "unicode_three_byte_chars",
1801 context: "日本語\n",
1802 editable_range: 0..10,
1803 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1804 expected: indoc! {"
1805 <|file_sep|>test.rs
1806 <|fim_prefix|>
1807 <|fim_middle|>current
1808 0:80|日本<|user_cursor|>語
1809 <|fim_suffix|>
1810 <|fim_middle|>updated
1811 "},
1812 },
1813 Case {
1814 name: "unicode_four_byte_chars",
1815 context: "a🌍b\n",
1816 editable_range: 0..7,
1817 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1818 expected: indoc! {"
1819 <|file_sep|>test.rs
1820 <|fim_prefix|>
1821 <|fim_middle|>current
1822 0:6b|a🌍<|user_cursor|>b
1823 <|fim_suffix|>
1824 <|fim_middle|>updated
1825 "},
1826 },
1827 Case {
1828 name: "cursor_at_start_of_region_not_placed",
1829 context: "abc\n",
1830 editable_range: 0..4,
1831 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1832 expected: indoc! {"
1833 <|file_sep|>test.rs
1834 <|fim_prefix|>
1835 <|fim_middle|>current
1836 0:26|abc
1837 <|fim_suffix|>
1838 <|fim_middle|>updated
1839 "},
1840 },
1841 Case {
1842 name: "cursor_at_end_of_line_not_placed",
1843 context: "abc\ndef\n",
1844 editable_range: 0..8,
1845 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1846 expected: indoc! {"
1847 <|file_sep|>test.rs
1848 <|fim_prefix|>
1849 <|fim_middle|>current
1850 0:26|abc
1851 1:2f|def
1852 <|fim_suffix|>
1853 <|fim_middle|>updated
1854 "},
1855 },
1856 Case {
1857 name: "cursor_offset_relative_to_context_not_editable_region",
1858 // cursor_offset is relative to `context`, so when editable_range.start > 0,
1859 // write_cursor_excerpt_section must subtract it before comparing against
1860 // per-line offsets within the editable region.
1861 context: "pre\naaa\nbbb\nsuf\n",
1862 editable_range: 4..12, // editable region = "aaa\nbbb\n"
1863 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
1864 expected: indoc! {"
1865 <|file_sep|>test.rs
1866 <|fim_prefix|>
1867 pre
1868 <|fim_middle|>current
1869 0:23|aaa
1870 1:26|b<|user_cursor|>bb
1871 <|fim_suffix|>
1872 suf
1873 <|fim_middle|>updated
1874 "},
1875 },
1876 ];
1877
1878 for case in &cases {
1879 let mut prompt = String::new();
1880 hashline::write_cursor_excerpt_section(
1881 &mut prompt,
1882 Path::new("test.rs"),
1883 case.context,
1884 &case.editable_range,
1885 case.cursor_offset,
1886 );
1887 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1888 }
1889 }
1890
1891 #[test]
1892 fn test_apply_edit_commands() {
1893 struct Case {
1894 name: &'static str,
1895 original: &'static str,
1896 model_output: &'static str,
1897 expected: &'static str,
1898 }
1899
1900 let cases = vec![
1901 Case {
1902 name: "set_single_line",
1903 original: indoc! {"
1904 let mut total = 0;
1905 for product in products {
1906 total += ;
1907 }
1908 total
1909 "},
1910 model_output: indoc! {"
1911 <|set|>2:87
1912 total += product.price;
1913 "},
1914 expected: indoc! {"
1915 let mut total = 0;
1916 for product in products {
1917 total += product.price;
1918 }
1919 total
1920 "},
1921 },
1922 Case {
1923 name: "set_range",
1924 original: indoc! {"
1925 fn foo() {
1926 let x = 1;
1927 let y = 2;
1928 let z = 3;
1929 }
1930 "},
1931 model_output: indoc! {"
1932 <|set|>1:46-3:4a
1933 let sum = 6;
1934 "},
1935 expected: indoc! {"
1936 fn foo() {
1937 let sum = 6;
1938 }
1939 "},
1940 },
1941 Case {
1942 name: "insert_after_line",
1943 original: indoc! {"
1944 fn main() {
1945 let x = 1;
1946 }
1947 "},
1948 model_output: indoc! {"
1949 <|insert|>1:46
1950 let y = 2;
1951 "},
1952 expected: indoc! {"
1953 fn main() {
1954 let x = 1;
1955 let y = 2;
1956 }
1957 "},
1958 },
1959 Case {
1960 name: "insert_before_first",
1961 original: indoc! {"
1962 let x = 1;
1963 let y = 2;
1964 "},
1965 model_output: indoc! {"
1966 <|insert|>
1967 use std::io;
1968 "},
1969 expected: indoc! {"
1970 use std::io;
1971 let x = 1;
1972 let y = 2;
1973 "},
1974 },
1975 Case {
1976 name: "set_with_cursor_marker",
1977 original: indoc! {"
1978 fn main() {
1979 println!();
1980 }
1981 "},
1982 model_output: indoc! {"
1983 <|set|>1:34
1984 eprintln!(\"<|user_cursor|>\");
1985 "},
1986 expected: indoc! {"
1987 fn main() {
1988 eprintln!(\"<|user_cursor|>\");
1989 }
1990 "},
1991 },
1992 Case {
1993 name: "multiple_set_commands",
1994 original: indoc! {"
1995 aaa
1996 bbb
1997 ccc
1998 ddd
1999 "},
2000 model_output: indoc! {"
2001 <|set|>0:23
2002 AAA
2003 <|set|>2:29
2004 CCC
2005 "},
2006 expected: indoc! {"
2007 AAA
2008 bbb
2009 CCC
2010 ddd
2011 "},
2012 },
2013 Case {
2014 name: "set_range_multiline_replacement",
2015 original: indoc! {"
2016 fn handle_submit() {
2017 }
2018
2019 fn handle_keystroke() {
2020 "},
2021 model_output: indoc! {"
2022 <|set|>0:3f-1:7d
2023 fn handle_submit(modal_state: &mut ModalState) {
2024 <|user_cursor|>
2025 }
2026 "},
2027 expected: indoc! {"
2028 fn handle_submit(modal_state: &mut ModalState) {
2029 <|user_cursor|>
2030 }
2031
2032 fn handle_keystroke() {
2033 "},
2034 },
2035 Case {
2036 name: "no_edit_commands_returns_original",
2037 original: indoc! {"
2038 hello
2039 world
2040 "},
2041 model_output: "some random text with no commands",
2042 expected: indoc! {"
2043 hello
2044 world
2045 "},
2046 },
2047 Case {
2048 name: "no_edits_command_returns_original",
2049 original: indoc! {"
2050 hello
2051 world
2052 "},
2053 model_output: "<|no_edits|>",
2054 expected: indoc! {"
2055 hello
2056 world
2057 "},
2058 },
2059 Case {
2060 name: "wrong_hash_set_ignored",
2061 original: indoc! {"
2062 aaa
2063 bbb
2064 "},
2065 model_output: indoc! {"
2066 <|set|>0:ff
2067 ZZZ
2068 "},
2069 expected: indoc! {"
2070 aaa
2071 bbb
2072 "},
2073 },
2074 Case {
2075 name: "insert_and_set_combined",
2076 original: indoc! {"
2077 alpha
2078 beta
2079 gamma
2080 "},
2081 model_output: indoc! {"
2082 <|set|>0:06
2083 ALPHA
2084 <|insert|>1:9c
2085 beta_extra
2086 "},
2087 expected: indoc! {"
2088 ALPHA
2089 beta
2090 beta_extra
2091 gamma
2092 "},
2093 },
2094 Case {
2095 name: "no_trailing_newline_preserved",
2096 original: "hello\nworld",
2097 model_output: indoc! {"
2098 <|set|>0:14
2099 HELLO
2100 "},
2101 expected: "HELLO\nworld",
2102 },
2103 Case {
2104 name: "set_range_hash_mismatch_in_end_bound",
2105 original: indoc! {"
2106 one
2107 two
2108 three
2109 "},
2110 model_output: indoc! {"
2111 <|set|>0:42-2:ff
2112 ONE_TWO_THREE
2113 "},
2114 expected: indoc! {"
2115 one
2116 two
2117 three
2118 "},
2119 },
2120 Case {
2121 name: "set_range_start_greater_than_end_ignored",
2122 original: indoc! {"
2123 a
2124 b
2125 c
2126 "},
2127 model_output: indoc! {"
2128 <|set|>2:63-1:62
2129 X
2130 "},
2131 expected: indoc! {"
2132 a
2133 b
2134 c
2135 "},
2136 },
2137 Case {
2138 name: "insert_out_of_bounds_ignored",
2139 original: indoc! {"
2140 x
2141 y
2142 "},
2143 model_output: indoc! {"
2144 <|insert|>99:aa
2145 z
2146 "},
2147 expected: indoc! {"
2148 x
2149 y
2150 "},
2151 },
2152 Case {
2153 name: "set_out_of_bounds_ignored",
2154 original: indoc! {"
2155 x
2156 y
2157 "},
2158 model_output: indoc! {"
2159 <|set|>99:aa
2160 z
2161 "},
2162 expected: indoc! {"
2163 x
2164 y
2165 "},
2166 },
2167 Case {
2168 name: "malformed_set_command_ignored",
2169 original: indoc! {"
2170 alpha
2171 beta
2172 "},
2173 model_output: indoc! {"
2174 <|set|>not-a-line-ref
2175 UPDATED
2176 "},
2177 expected: indoc! {"
2178 alpha
2179 beta
2180 "},
2181 },
2182 Case {
2183 name: "malformed_insert_hash_treated_as_before_first",
2184 original: indoc! {"
2185 alpha
2186 beta
2187 "},
2188 model_output: indoc! {"
2189 <|insert|>1:nothex
2190 preamble
2191 "},
2192 expected: indoc! {"
2193 preamble
2194 alpha
2195 beta
2196 "},
2197 },
2198 Case {
2199 name: "set_then_insert_same_target_orders_insert_after_replacement",
2200 original: indoc! {"
2201 cat
2202 dog
2203 "},
2204 model_output: indoc! {"
2205 <|set|>0:38
2206 CAT
2207 <|insert|>0:38
2208 TAIL
2209 "},
2210 expected: indoc! {"
2211 CAT
2212 TAIL
2213 dog
2214 "},
2215 },
2216 Case {
2217 name: "overlapping_set_ranges_last_wins",
2218 original: indoc! {"
2219 a
2220 b
2221 c
2222 d
2223 "},
2224 model_output: indoc! {"
2225 <|set|>0:61-2:63
2226 FIRST
2227 <|set|>1:62-3:64
2228 SECOND
2229 "},
2230 expected: indoc! {"
2231 FIRST
2232 d
2233 "},
2234 },
2235 Case {
2236 name: "insert_before_first_and_after_line",
2237 original: indoc! {"
2238 a
2239 b
2240 "},
2241 model_output: indoc! {"
2242 <|insert|>
2243 HEAD
2244 <|insert|>0:61
2245 MID
2246 "},
2247 expected: indoc! {"
2248 HEAD
2249 a
2250 MID
2251 b
2252 "},
2253 },
2254 ];
2255
2256 for case in &cases {
2257 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2258 assert_eq!(result, case.expected, "failed case: {}", case.name);
2259 }
2260 }
2261
2262 #[test]
2263 fn test_output_has_edit_commands() {
2264 assert!(hashline::output_has_edit_commands(&format!(
2265 "{}0:ab\nnew",
2266 SET_COMMAND_MARKER
2267 )));
2268 assert!(hashline::output_has_edit_commands(&format!(
2269 "{}0:ab\nnew",
2270 INSERT_COMMAND_MARKER
2271 )));
2272 assert!(hashline::output_has_edit_commands(&format!(
2273 "some text\n{}1:cd\nstuff",
2274 SET_COMMAND_MARKER
2275 )));
2276 assert!(!hashline::output_has_edit_commands("just plain text"));
2277 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2278 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2279 }
2280
2281 // ---- hashline::patch_to_edit_commands round-trip tests ----
2282
2283 #[test]
2284 fn test_patch_to_edit_commands() {
2285 struct Case {
2286 name: &'static str,
2287 old: &'static str,
2288 patch: &'static str,
2289 expected_new: &'static str,
2290 }
2291
2292 let cases = [
2293 Case {
2294 name: "single_line_replacement",
2295 old: indoc! {"
2296 let mut total = 0;
2297 for product in products {
2298 total += ;
2299 }
2300 total
2301 "},
2302 patch: indoc! {"
2303 @@ -1,5 +1,5 @@
2304 let mut total = 0;
2305 for product in products {
2306 - total += ;
2307 + total += product.price;
2308 }
2309 total
2310 "},
2311 expected_new: indoc! {"
2312 let mut total = 0;
2313 for product in products {
2314 total += product.price;
2315 }
2316 total
2317 "},
2318 },
2319 Case {
2320 name: "multiline_replacement",
2321 old: indoc! {"
2322 fn foo() {
2323 let x = 1;
2324 let y = 2;
2325 let z = 3;
2326 }
2327 "},
2328 patch: indoc! {"
2329 @@ -1,5 +1,3 @@
2330 fn foo() {
2331 - let x = 1;
2332 - let y = 2;
2333 - let z = 3;
2334 + let sum = 1 + 2 + 3;
2335 }
2336 "},
2337 expected_new: indoc! {"
2338 fn foo() {
2339 let sum = 1 + 2 + 3;
2340 }
2341 "},
2342 },
2343 Case {
2344 name: "insertion",
2345 old: indoc! {"
2346 fn main() {
2347 let x = 1;
2348 }
2349 "},
2350 patch: indoc! {"
2351 @@ -1,3 +1,4 @@
2352 fn main() {
2353 let x = 1;
2354 + let y = 2;
2355 }
2356 "},
2357 expected_new: indoc! {"
2358 fn main() {
2359 let x = 1;
2360 let y = 2;
2361 }
2362 "},
2363 },
2364 Case {
2365 name: "insertion_before_first",
2366 old: indoc! {"
2367 let x = 1;
2368 let y = 2;
2369 "},
2370 patch: indoc! {"
2371 @@ -1,2 +1,3 @@
2372 +use std::io;
2373 let x = 1;
2374 let y = 2;
2375 "},
2376 expected_new: indoc! {"
2377 use std::io;
2378 let x = 1;
2379 let y = 2;
2380 "},
2381 },
2382 Case {
2383 name: "deletion",
2384 old: indoc! {"
2385 aaa
2386 bbb
2387 ccc
2388 ddd
2389 "},
2390 patch: indoc! {"
2391 @@ -1,4 +1,2 @@
2392 aaa
2393 -bbb
2394 -ccc
2395 ddd
2396 "},
2397 expected_new: indoc! {"
2398 aaa
2399 ddd
2400 "},
2401 },
2402 Case {
2403 name: "multiple_changes",
2404 old: indoc! {"
2405 alpha
2406 beta
2407 gamma
2408 delta
2409 epsilon
2410 "},
2411 patch: indoc! {"
2412 @@ -1,5 +1,5 @@
2413 -alpha
2414 +ALPHA
2415 beta
2416 gamma
2417 -delta
2418 +DELTA
2419 epsilon
2420 "},
2421 expected_new: indoc! {"
2422 ALPHA
2423 beta
2424 gamma
2425 DELTA
2426 epsilon
2427 "},
2428 },
2429 Case {
2430 name: "replace_with_insertion",
2431 old: indoc! {r#"
2432 fn handle() {
2433 modal_state.close();
2434 modal_state.dismiss();
2435 "#},
2436 patch: indoc! {r#"
2437 @@ -1,3 +1,4 @@
2438 fn handle() {
2439 modal_state.close();
2440 + eprintln!("");
2441 modal_state.dismiss();
2442 "#},
2443 expected_new: indoc! {r#"
2444 fn handle() {
2445 modal_state.close();
2446 eprintln!("");
2447 modal_state.dismiss();
2448 "#},
2449 },
2450 Case {
2451 name: "complete_replacement",
2452 old: indoc! {"
2453 aaa
2454 bbb
2455 ccc
2456 "},
2457 patch: indoc! {"
2458 @@ -1,3 +1,3 @@
2459 -aaa
2460 -bbb
2461 -ccc
2462 +xxx
2463 +yyy
2464 +zzz
2465 "},
2466 expected_new: indoc! {"
2467 xxx
2468 yyy
2469 zzz
2470 "},
2471 },
2472 Case {
2473 name: "add_function_body",
2474 old: indoc! {"
2475 fn foo() {
2476 modal_state.dismiss();
2477 }
2478
2479 fn
2480
2481 fn handle_keystroke() {
2482 "},
2483 patch: indoc! {"
2484 @@ -1,6 +1,8 @@
2485 fn foo() {
2486 modal_state.dismiss();
2487 }
2488
2489 -fn
2490 +fn handle_submit() {
2491 + todo()
2492 +}
2493
2494 fn handle_keystroke() {
2495 "},
2496 expected_new: indoc! {"
2497 fn foo() {
2498 modal_state.dismiss();
2499 }
2500
2501 fn handle_submit() {
2502 todo()
2503 }
2504
2505 fn handle_keystroke() {
2506 "},
2507 },
2508 Case {
2509 name: "with_cursor_offset",
2510 old: indoc! {r#"
2511 fn main() {
2512 println!();
2513 }
2514 "#},
2515 patch: indoc! {r#"
2516 @@ -1,3 +1,3 @@
2517 fn main() {
2518 - println!();
2519 + eprintln!("");
2520 }
2521 "#},
2522 expected_new: indoc! {r#"
2523 fn main() {
2524 eprintln!("<|user_cursor|>");
2525 }
2526 "#},
2527 },
2528 Case {
2529 name: "non_local_hunk_header_pure_insertion_repro",
2530 old: indoc! {"
2531 aaa
2532 bbb
2533 "},
2534 patch: indoc! {"
2535 @@ -20,2 +20,3 @@
2536 aaa
2537 +xxx
2538 bbb
2539 "},
2540 expected_new: indoc! {"
2541 aaa
2542 xxx
2543 bbb
2544 "},
2545 },
2546 Case {
2547 name: "empty_patch_produces_no_edits_marker",
2548 old: indoc! {"
2549 aaa
2550 bbb
2551 "},
2552 patch: "@@ -20,2 +20,3 @@\n",
2553 expected_new: indoc! {"
2554 aaa
2555 bbb
2556 "},
2557 },
2558 ];
2559
2560 for case in &cases {
2561 // The cursor_offset for patch_to_edit_commands is relative to
2562 // the first hunk's new text (context + additions). We compute
2563 // it by finding where the marker sits in the expected output
2564 // (which mirrors the new text of the hunk).
2565 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2566
2567 let commands =
2568 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2569 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2570
2571 assert!(
2572 hashline::output_has_edit_commands(&commands),
2573 "case {}: expected edit commands, got: {commands:?}",
2574 case.name,
2575 );
2576
2577 let applied = hashline::apply_edit_commands(case.old, &commands);
2578 assert_eq!(applied, case.expected_new, "case {}", case.name);
2579 }
2580 }
2581 }
2582}
2583
2584pub mod seed_coder {
2585 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2586 //!
2587 //! Seed-Coder uses different FIM tokens and order than Qwen:
2588 //! - SPM order: suffix comes FIRST, then prefix, then middle
2589 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2590 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2591 //!
2592 //! All context (related files, edit history) goes in the PREFIX section.
2593 //! The suffix contains only code after the editable region.
2594 //!
2595 //! Example prompt:
2596 //!
2597 //! <[fim-suffix]>
2598 //! code after editable region
2599 //! <[fim-prefix]><filename>related/file.py
2600 //! related file content
2601 //!
2602 //! <filename>edit_history
2603 //! --- a/some_file.py
2604 //! +++ b/some_file.py
2605 //! -old
2606 //! +new
2607 //!
2608 //! <filename>path/to/target_file.py
2609 //! code before editable region
2610 //! <<<<<<< CURRENT
2611 //! code that
2612 //! needs to<|user_cursor|>
2613 //! be rewritten
2614 //! =======
2615 //! <[fim-middle]>
2616 //!
2617 //! Expected output (model generates):
2618 //!
2619 //! updated
2620 //! code with
2621 //! changes applied
2622 //! >>>>>>> UPDATED
2623
2624 use super::*;
2625
2626 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2627 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2628 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2629 pub const FILE_MARKER: &str = "<filename>";
2630
2631 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2632 pub const SEPARATOR: &str = "=======\n";
2633 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2634
2635 pub const NO_EDITS: &str = "NO_EDITS\n";
2636
2637 pub fn special_tokens() -> &'static [&'static str] {
2638 &[
2639 FIM_SUFFIX,
2640 FIM_PREFIX,
2641 FIM_MIDDLE,
2642 FILE_MARKER,
2643 START_MARKER,
2644 SEPARATOR,
2645 END_MARKER,
2646 CURSOR_MARKER,
2647 ]
2648 }
2649
2650 pub fn write_cursor_excerpt_section(
2651 prompt: &mut String,
2652 path: &Path,
2653 context: &str,
2654 editable_range: &Range<usize>,
2655 cursor_offset: usize,
2656 ) {
2657 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2658 prompt.push_str(§ion);
2659 }
2660
2661 pub fn format_prompt_with_budget(
2662 path: &Path,
2663 context: &str,
2664 editable_range: &Range<usize>,
2665 cursor_offset: usize,
2666 events: &[Arc<Event>],
2667 related_files: &[RelatedFile],
2668 max_tokens: usize,
2669 ) -> String {
2670 let cursor_prefix_section =
2671 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2672 assemble_fim_prompt(
2673 context,
2674 editable_range,
2675 &cursor_prefix_section,
2676 events,
2677 related_files,
2678 max_tokens,
2679 )
2680 }
2681
2682 pub fn assemble_fim_prompt(
2683 context: &str,
2684 editable_range: &Range<usize>,
2685 cursor_prefix_section: &str,
2686 events: &[Arc<Event>],
2687 related_files: &[RelatedFile],
2688 max_tokens: usize,
2689 ) -> String {
2690 let suffix_section = build_suffix_section(context, editable_range);
2691
2692 let suffix_tokens = estimate_tokens(suffix_section.len());
2693 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2694 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2695
2696 let edit_history_section = super::format_edit_history_within_budget(
2697 events,
2698 FILE_MARKER,
2699 "edit_history",
2700 budget_after_cursor,
2701 );
2702 let edit_history_tokens = estimate_tokens(edit_history_section.len());
2703 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2704
2705 let related_files_section = super::format_related_files_within_budget(
2706 related_files,
2707 FILE_MARKER,
2708 "",
2709 budget_after_edit_history,
2710 );
2711
2712 let mut prompt = String::new();
2713 prompt.push_str(&suffix_section);
2714 prompt.push_str(FIM_PREFIX);
2715 prompt.push_str(&related_files_section);
2716 if !related_files_section.is_empty() {
2717 prompt.push('\n');
2718 }
2719 prompt.push_str(&edit_history_section);
2720 if !edit_history_section.is_empty() {
2721 prompt.push('\n');
2722 }
2723 prompt.push_str(cursor_prefix_section);
2724 prompt.push_str(FIM_MIDDLE);
2725 prompt
2726 }
2727
2728 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2729 let mut section = String::new();
2730 section.push_str(FIM_SUFFIX);
2731 section.push_str(&context[editable_range.end..]);
2732 if !section.ends_with('\n') {
2733 section.push('\n');
2734 }
2735 section
2736 }
2737
2738 fn build_cursor_prefix_section(
2739 path: &Path,
2740 context: &str,
2741 editable_range: &Range<usize>,
2742 cursor_offset: usize,
2743 ) -> String {
2744 let mut section = String::new();
2745 let path_str = path.to_string_lossy();
2746 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2747
2748 section.push_str(&context[..editable_range.start]);
2749 section.push_str(START_MARKER);
2750 section.push_str(&context[editable_range.start..cursor_offset]);
2751 section.push_str(CURSOR_MARKER);
2752 section.push_str(&context[cursor_offset..editable_range.end]);
2753 if !section.ends_with('\n') {
2754 section.push('\n');
2755 }
2756 section.push_str(SEPARATOR);
2757 section
2758 }
2759
2760 /// Format patch as containing no changes if it's empty; otherwise return None.
2761 pub(crate) fn no_edits(patch: &str) -> Option<String> {
2762 // Count lines in the patch
2763 let empty_patch = patch.lines().count() <= 3;
2764 if empty_patch {
2765 Some(format!("{NO_EDITS}{END_MARKER}"))
2766 } else {
2767 None
2768 }
2769 }
2770}
2771
2772pub mod v0304_variable_edit {
2773 //! A prompt format with no fixed editable region. The entire context is shown
2774 //! to the model, and it chooses which text to replace by outputting surrounding
2775 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2776 //! text.
2777 //!
2778 //! Example prompt:
2779 //!
2780 //! <|file_sep|>path/to/file.py
2781 //! zero
2782 //! one
2783 //! two
2784 //! three<|user_cursor|>
2785 //! four
2786 //! five
2787 //! <|fim_prefix|>
2788 //
2789 //! Expected output (model generates):
2790 //!
2791 //! two
2792 //! <|fim_middle|>
2793 //! THREE
2794 //! <|fim_suffix|>
2795 //! four
2796 //!
2797 //! The output means: find "two\n...\nfour" in the context, and replace
2798 //! everything between "two\n" and "four" with "THREE\n".
2799
2800 use super::*;
2801
2802 pub fn special_tokens() -> &'static [&'static str] {
2803 &[
2804 "<|fim_prefix|>",
2805 "<|fim_suffix|>",
2806 "<|fim_middle|>",
2807 "<|file_sep|>",
2808 CURSOR_MARKER,
2809 ]
2810 }
2811
2812 pub fn write_cursor_excerpt_section(
2813 prompt: &mut String,
2814 path: &Path,
2815 context: &str,
2816 cursor_offset: usize,
2817 ) {
2818 let path_str = path.to_string_lossy();
2819 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2820
2821 prompt.push_str(&context[..cursor_offset]);
2822 prompt.push_str(CURSOR_MARKER);
2823 prompt.push_str(&context[cursor_offset..]);
2824 if !prompt.ends_with('\n') {
2825 prompt.push('\n');
2826 }
2827 prompt.push_str("<|fim_prefix|>\n")
2828 }
2829
2830 /// Apply a variable-edit model output to the original context text.
2831 ///
2832 /// The model output has the form:
2833 ///
2834 /// - prefix context lines
2835 /// - `<|fim_middle|>`
2836 /// - new text
2837 /// - `<|fim_suffix|>`
2838 /// - suffix context lines
2839 ///
2840 /// We locate the prefix/suffix context lines in the original text and replace
2841 /// everything between them with the new text.
2842 pub fn apply_variable_edit(
2843 context: &str,
2844 model_output: &str,
2845 ) -> Result<(Range<usize>, String)> {
2846 let (prefix_context, rest) = model_output
2847 .split_once("<|fim_middle|>\n")
2848 .or_else(|| model_output.split_once("<|fim_middle|>"))
2849 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2850
2851 let (new_text, suffix_context) = rest
2852 .split_once("<|fim_suffix|>\n")
2853 .or_else(|| rest.split_once("<|fim_suffix|>"))
2854 .unwrap_or((rest, ""));
2855
2856 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2857 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2858 } else {
2859 suffix_context
2860 };
2861
2862 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2863 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2864 + prefix_context.len();
2865 let suffix_offset = if suffix_context.is_empty() {
2866 context.len()
2867 } else {
2868 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2869 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2870 + prefix_offset
2871 };
2872
2873 let edit_range = prefix_offset..suffix_offset;
2874 return Ok((edit_range, new_text.to_string()));
2875 }
2876
2877 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2878 if needle.is_empty() {
2879 return Some(0);
2880 }
2881
2882 haystack.match_indices(needle).find_map(|(offset, _)| {
2883 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2884 matched_line_start.then_some(offset)
2885 })
2886 }
2887
2888 /// Convert a unified diff patch into the variable-edit output format.
2889 ///
2890 /// Parses `patch` as a unified diff against `old_text` and produces model
2891 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2892 /// delimiters. The diff is resolved by content matching rather than line
2893 /// numbers.
2894 pub fn patch_to_variable_edit_output(
2895 old_text: &str,
2896 patch: &str,
2897 cursor_offset: Option<usize>,
2898 ) -> Result<String> {
2899 // Parse the unified diff into hunks. Each hunk has an `old_context`
2900 // string (context + deleted lines interleaved in order) and a list of
2901 // edits expressed as byte ranges within that context plus replacement
2902 // text.
2903 let hunks = parse_hunks(patch);
2904 if hunks.is_empty() {
2905 return Ok(String::new());
2906 }
2907
2908 // Apply each hunk by finding its old_context in the text and
2909 // performing the edits. We search forward from where the previous
2910 // hunk ended so that hunks are applied in order.
2911 let mut new_text = old_text.to_string();
2912 let mut search_from: usize = 0;
2913 let mut first_hunk_pos: Option<usize> = None;
2914
2915 for hunk in &hunks {
2916 let context_pos = new_text[search_from..]
2917 .find(&hunk.old_context)
2918 .map(|pos| pos + search_from)
2919 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2920
2921 if first_hunk_pos.is_none() {
2922 first_hunk_pos = Some(context_pos);
2923 }
2924
2925 // Apply edits in reverse order so byte offsets remain valid.
2926 for edit in hunk.edits.iter().rev() {
2927 let abs_start = context_pos + edit.range.start;
2928 let abs_end = context_pos + edit.range.end;
2929 new_text.replace_range(abs_start..abs_end, &edit.text);
2930 }
2931
2932 // Advance past this hunk's region in the (now modified) text.
2933 let new_region_len: usize =
2934 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2935 len + edit.text.len() - (edit.range.end - edit.range.start)
2936 });
2937 search_from = context_pos + new_region_len;
2938 }
2939
2940 // Now we have old_text and new_text. Find the changed line range by
2941 // comparing them.
2942 let old_lines: Vec<&str> = old_text.lines().collect();
2943 let new_lines: Vec<&str> = new_text.lines().collect();
2944
2945 // Find first differing line.
2946 let first_changed_row = old_lines
2947 .iter()
2948 .zip(new_lines.iter())
2949 .position(|(a, b)| a != b)
2950 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2951
2952 // Find last differing line (from the end).
2953 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2954 let common_suffix = old_lines
2955 .iter()
2956 .rev()
2957 .zip(new_lines.iter().rev())
2958 .take(max_suffix)
2959 .take_while(|(a, b)| a == b)
2960 .count();
2961
2962 let old_end = old_lines.len() - common_suffix;
2963 let new_end = new_lines.len() - common_suffix;
2964
2965 if first_changed_row == old_end && first_changed_row == new_end {
2966 return Ok(String::new());
2967 }
2968
2969 // Build the replacement text from new_lines[first_diff..new_end].
2970 let mut merged_new_text = String::new();
2971 for line in &new_lines[first_changed_row..new_end] {
2972 merged_new_text.push_str(line);
2973 merged_new_text.push('\n');
2974 }
2975
2976 // cursor_offset is relative to the first hunk's new content in
2977 // new_text. Translate it to an offset within merged_new_text, which
2978 // only contains lines first_diff..new_end of new_text.
2979 if let Some(hunk_offset) = cursor_offset {
2980 let hunk_start = first_hunk_pos.unwrap_or(0);
2981 let absolute_pos = hunk_start + hunk_offset;
2982
2983 // Byte offset where first_diff starts in new_text.
2984 let merged_start: usize = new_lines[..first_changed_row]
2985 .iter()
2986 .map(|line| line.len() + 1)
2987 .sum();
2988
2989 if absolute_pos >= merged_start {
2990 let relative_offset = absolute_pos - merged_start;
2991 if relative_offset <= merged_new_text.len() {
2992 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
2993 }
2994 }
2995 }
2996
2997 // Build output with 2 lines of context above and below.
2998 let context_lines_count = 2;
2999 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3000 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3001
3002 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3003 let pattern = &lines[line_range];
3004 let pattern_len = pattern.len();
3005
3006 let mut count = 0;
3007 for offset in 0..=lines.len() - pattern_len {
3008 if &lines[offset..offset + pattern_len] == pattern {
3009 count += 1;
3010 }
3011 }
3012 count
3013 }
3014
3015 // Expand prefix and suffix until they are unique
3016 while prefix_start > 0 {
3017 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3018 prefix_start -= 1;
3019 } else {
3020 break;
3021 }
3022 }
3023 while suffix_end < old_lines.len() {
3024 if count_matches(old_end..suffix_end, &old_lines) > 1 {
3025 suffix_end += 1;
3026 } else {
3027 break;
3028 }
3029 }
3030
3031 let mut output = String::new();
3032 for line in &old_lines[prefix_start..first_changed_row] {
3033 output.push_str(line);
3034 output.push('\n');
3035 }
3036 output.push_str("<|fim_middle|>\n");
3037 output.push_str(&merged_new_text);
3038 output.push_str("<|fim_suffix|>\n");
3039 for line in &old_lines[old_end..suffix_end] {
3040 output.push_str(line);
3041 output.push('\n');
3042 }
3043
3044 Ok(output)
3045 }
3046
3047 struct ParsedHunk {
3048 old_context: String,
3049 edits: Vec<ParsedEdit>,
3050 }
3051
3052 struct ParsedEdit {
3053 range: Range<usize>,
3054 text: String,
3055 }
3056
3057 /// Parse a unified diff into content-based hunks. Each hunk contains an
3058 /// `old_context` string (context lines + deleted lines, which together
3059 /// form the text that should be found in the original) and a list of edits
3060 /// expressed as byte ranges within that context.
3061 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3062 let mut hunks = Vec::new();
3063 let mut current: Option<ParsedHunk> = None;
3064
3065 for line in patch.lines() {
3066 if line.starts_with("@@") {
3067 if let Some(hunk) = current.take() {
3068 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3069 hunks.push(hunk);
3070 }
3071 }
3072 current = Some(ParsedHunk {
3073 old_context: String::new(),
3074 edits: Vec::new(),
3075 });
3076 } else if line.starts_with("---") || line.starts_with("+++") {
3077 continue;
3078 } else if let Some(hunk) = &mut current {
3079 if let Some(added) = line.strip_prefix('+') {
3080 let pos = hunk.old_context.len();
3081 if let Some(last_edit) = hunk.edits.last_mut() {
3082 if last_edit.range.end == pos {
3083 writeln!(&mut last_edit.text, "{added}").ok();
3084 continue;
3085 }
3086 }
3087 hunk.edits.push(ParsedEdit {
3088 range: pos..pos,
3089 text: format!("{added}\n"),
3090 });
3091 } else if let Some(removed) = line.strip_prefix('-') {
3092 let start = hunk.old_context.len();
3093 writeln!(&mut hunk.old_context, "{removed}").ok();
3094 let end = hunk.old_context.len();
3095 if let Some(last_edit) = hunk.edits.last_mut() {
3096 if last_edit.range.end == start {
3097 last_edit.range.end = end;
3098 continue;
3099 }
3100 }
3101 hunk.edits.push(ParsedEdit {
3102 range: start..end,
3103 text: String::new(),
3104 });
3105 } else {
3106 let ctx = line.strip_prefix(' ').unwrap_or(line);
3107 writeln!(&mut hunk.old_context, "{ctx}").ok();
3108 }
3109 }
3110 }
3111
3112 if let Some(hunk) = current {
3113 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3114 hunks.push(hunk);
3115 }
3116 }
3117
3118 hunks
3119 }
3120
3121 #[cfg(test)]
3122 mod tests {
3123 use super::*;
3124 use indoc::indoc;
3125
3126 #[test]
3127 fn test_apply_variable_edit() {
3128 struct Case {
3129 name: &'static str,
3130 original: &'static str,
3131 model_output: &'static str,
3132 expected: &'static str,
3133 }
3134
3135 let cases = [
3136 Case {
3137 name: "simple_single_line_replacement",
3138 original: indoc! {"
3139 zero
3140 one
3141 two
3142 three
3143 four
3144 five
3145 "},
3146 model_output: indoc! {"
3147 two
3148 <|fim_middle|>
3149 THREE
3150 <|fim_suffix|>
3151 four
3152 "},
3153 expected: indoc! {"
3154 zero
3155 one
3156 two
3157 THREE
3158 four
3159 five
3160 "},
3161 },
3162 Case {
3163 name: "multi_line_replacement",
3164 original: indoc! {"
3165 a
3166 b
3167 c
3168 d
3169 e
3170 "},
3171 model_output: indoc! {"
3172 a
3173 <|fim_middle|>
3174 B
3175 C
3176 D
3177 <|fim_suffix|>
3178 e
3179 "},
3180 expected: indoc! {"
3181 a
3182 B
3183 C
3184 D
3185 e
3186 "},
3187 },
3188 Case {
3189 name: "insertion_between_existing_lines",
3190 original: indoc! {"
3191 a
3192 b
3193 c
3194 "},
3195 model_output: indoc! {"
3196 a
3197 <|fim_middle|>
3198 X
3199 <|fim_suffix|>
3200 b
3201 "},
3202 expected: indoc! {"
3203 a
3204 X
3205 b
3206 c
3207 "},
3208 },
3209 Case {
3210 name: "deletion",
3211 original: indoc! {"
3212 a
3213 b
3214 c
3215 d
3216 "},
3217 model_output: indoc! {"
3218 a
3219 <|fim_middle|>
3220 <|fim_suffix|>
3221 c
3222 "},
3223 expected: indoc! {"
3224 a
3225 c
3226 d
3227 "},
3228 },
3229 Case {
3230 name: "replacement_at_start_no_prefix_context",
3231 original: indoc! {"
3232 a
3233 b
3234 c
3235 "},
3236 model_output: indoc! {"
3237 <|fim_middle|>
3238 X
3239 <|fim_suffix|>
3240 b
3241 "},
3242 expected: indoc! {"
3243 X
3244 b
3245 c
3246 "},
3247 },
3248 Case {
3249 name: "replacement_at_end_no_suffix_context",
3250 original: indoc! {"
3251 a
3252 b
3253 c
3254 "},
3255 model_output: indoc! {"
3256 b
3257 <|fim_middle|>
3258 Z
3259 <|fim_suffix|>
3260 "},
3261 expected: indoc! {"
3262 a
3263 b
3264 Z
3265 "},
3266 },
3267 Case {
3268 name: "context_with_trailing_newline_is_preserved",
3269 original: indoc! {"
3270 a
3271 b
3272 c
3273 "},
3274 model_output: indoc! {"
3275 a
3276 <|fim_middle|>
3277 B
3278 <|fim_suffix|>
3279 c
3280 "},
3281 expected: indoc! {"
3282 a
3283 B
3284 c
3285 "},
3286 },
3287 Case {
3288 name: "cursor_marker_passes_through_untouched",
3289 original: indoc! {"
3290 a
3291 b
3292 c
3293 "},
3294 model_output: indoc! {"
3295 a
3296 <|fim_middle|>
3297 B<|user_cursor|>B
3298 <|fim_suffix|>
3299 c
3300 "},
3301 expected: indoc! {"
3302 a
3303 B<|user_cursor|>B
3304 c
3305 "},
3306 },
3307 Case {
3308 name: "multiple_prefix_context_lines",
3309 original: indoc! {"
3310 a
3311 b
3312 c
3313 d
3314 e
3315 "},
3316 model_output: indoc! {"
3317 b
3318 c
3319 <|fim_middle|>
3320 D
3321 <|fim_suffix|>
3322 e
3323 "},
3324 expected: indoc! {"
3325 a
3326 b
3327 c
3328 D
3329 e
3330 "},
3331 },
3332 ];
3333
3334 for case in cases {
3335 let (edit_range, replacement) =
3336 apply_variable_edit(case.original, case.model_output).unwrap();
3337 let mut edited = case.original.to_string();
3338 edited.replace_range(edit_range, &replacement);
3339 assert_eq!(edited, case.expected, "{}", case.name);
3340 }
3341 }
3342
3343 #[test]
3344 fn test_patch_to_variable_edit() {
3345 struct Case {
3346 name: &'static str,
3347 old: &'static str,
3348 patch: &'static str,
3349 cursor_offset: Option<usize>,
3350 expected_variable_edit: &'static str,
3351 expected_after_apply: &'static str,
3352 }
3353
3354 let cases = [
3355 Case {
3356 name: "simple_replacement",
3357 old: indoc! {"
3358 zero
3359 one
3360 two
3361 three
3362 four
3363 five
3364 "},
3365 patch: indoc! {"
3366 @@ -3,3 +3,3 @@
3367 two
3368 -three
3369 +THREE
3370 four
3371 "},
3372 cursor_offset: None,
3373 expected_variable_edit: indoc! {"
3374 one
3375 two
3376 <|fim_middle|>
3377 THREE
3378 <|fim_suffix|>
3379 four
3380 five
3381 "},
3382 expected_after_apply: indoc! {"
3383 zero
3384 one
3385 two
3386 THREE
3387 four
3388 five
3389 "},
3390 },
3391 Case {
3392 name: "insertion",
3393 old: indoc! {"
3394 a
3395 b
3396 c
3397 d
3398 e
3399 "},
3400 patch: indoc! {"
3401 @@ -2,0 +3,1 @@
3402 b
3403 +X
3404 c
3405 "},
3406 cursor_offset: None,
3407 expected_variable_edit: indoc! {"
3408 a
3409 b
3410 <|fim_middle|>
3411 X
3412 <|fim_suffix|>
3413 c
3414 d
3415 "},
3416 expected_after_apply: indoc! {"
3417 a
3418 b
3419 X
3420 c
3421 d
3422 e
3423 "},
3424 },
3425 Case {
3426 name: "deletion",
3427 old: indoc! {"
3428 a
3429 b
3430 c
3431 d
3432 e
3433 "},
3434 patch: indoc! {"
3435 @@ -2,3 +2,2 @@
3436 b
3437 -c
3438 d
3439 "},
3440 cursor_offset: None,
3441 expected_variable_edit: indoc! {"
3442 a
3443 b
3444 <|fim_middle|>
3445 <|fim_suffix|>
3446 d
3447 e
3448 "},
3449 expected_after_apply: indoc! {"
3450 a
3451 b
3452 d
3453 e
3454 "},
3455 },
3456 Case {
3457 name: "edit_near_start",
3458 old: indoc! {"
3459 first
3460 second
3461 third
3462 fourth
3463 "},
3464 patch: indoc! {"
3465 @@ -1,1 +1,1 @@
3466 -first
3467 +FIRST
3468 "},
3469 cursor_offset: None,
3470 expected_variable_edit: indoc! {"
3471 <|fim_middle|>
3472 FIRST
3473 <|fim_suffix|>
3474 second
3475 third
3476 "},
3477 expected_after_apply: indoc! {"
3478 FIRST
3479 second
3480 third
3481 fourth
3482 "},
3483 },
3484 Case {
3485 name: "edit_near_end",
3486 old: indoc! {"
3487 first
3488 second
3489 third
3490 fourth
3491 "},
3492 patch: indoc! {"
3493 @@ -4,1 +4,1 @@
3494 -fourth
3495 +FOURTH
3496 "},
3497 cursor_offset: None,
3498 expected_variable_edit: indoc! {"
3499 second
3500 third
3501 <|fim_middle|>
3502 FOURTH
3503 <|fim_suffix|>
3504 "},
3505 expected_after_apply: indoc! {"
3506 first
3507 second
3508 third
3509 FOURTH
3510 "},
3511 },
3512 Case {
3513 name: "cursor_at_start_of_replacement",
3514 old: indoc! {"
3515 zero
3516 one
3517 two
3518 three
3519 four
3520 five
3521 "},
3522 patch: indoc! {"
3523 @@ -3,3 +3,3 @@
3524 two
3525 -three
3526 +THREE
3527 four
3528 "},
3529 cursor_offset: Some(4),
3530 expected_variable_edit: indoc! {"
3531 one
3532 two
3533 <|fim_middle|>
3534 <|user_cursor|>THREE
3535 <|fim_suffix|>
3536 four
3537 five
3538 "},
3539 expected_after_apply: indoc! {"
3540 zero
3541 one
3542 two
3543 <|user_cursor|>THREE
3544 four
3545 five
3546 "},
3547 },
3548 Case {
3549 name: "cursor_in_middle_of_replacement",
3550 old: indoc! {"
3551 zero
3552 one
3553 two
3554 three
3555 four
3556 five
3557 "},
3558 patch: indoc! {"
3559 @@ -3,3 +3,3 @@
3560 two
3561 -three
3562 +THREE
3563 four
3564 "},
3565 cursor_offset: Some(6),
3566 expected_variable_edit: indoc! {"
3567 one
3568 two
3569 <|fim_middle|>
3570 TH<|user_cursor|>REE
3571 <|fim_suffix|>
3572 four
3573 five
3574 "},
3575 expected_after_apply: indoc! {"
3576 zero
3577 one
3578 two
3579 TH<|user_cursor|>REE
3580 four
3581 five
3582 "},
3583 },
3584 Case {
3585 name: "expands_context_when_two_lines_not_unique_before_and_after",
3586 old: indoc! {"
3587 one
3588 a
3589 b
3590 c
3591 d
3592 two
3593 a
3594 b
3595 c
3596 d
3597 three
3598 a
3599 b
3600 c
3601 d
3602 four
3603 "},
3604 patch: indoc! {"
3605 @@ -4,5 +4,5 @@
3606 two
3607 a
3608 b
3609 -c
3610 +C
3611 d
3612 three
3613 "},
3614 cursor_offset: None,
3615 expected_variable_edit: indoc! {"
3616 two
3617 a
3618 b
3619 <|fim_middle|>
3620 C
3621 <|fim_suffix|>
3622 d
3623 three
3624 "},
3625 expected_after_apply: indoc! {"
3626 one
3627 a
3628 b
3629 c
3630 d
3631 two
3632 a
3633 b
3634 C
3635 d
3636 three
3637 a
3638 b
3639 c
3640 d
3641 four
3642 "},
3643 },
3644 Case {
3645 name: "expands_context_when_two_lines_not_unique_before_and_after",
3646 old: indoc! {"
3647 {
3648 {
3649 one();
3650 }
3651 }
3652 {
3653 {
3654 two();
3655 }
3656 }
3657 {
3658 {
3659 three();
3660 }
3661 }
3662 {
3663 {
3664 four();
3665 }
3666 }
3667 "},
3668 patch: indoc! {"
3669 @@ -4,5 +4,5 @@
3670 {
3671 - two();
3672 + TWO();
3673 }
3674 "},
3675 cursor_offset: None,
3676 expected_variable_edit: indoc! {"
3677 one();
3678 }
3679 }
3680 {
3681 {
3682 <|fim_middle|>
3683 TWO();
3684 <|fim_suffix|>
3685 }
3686 }
3687 {
3688 {
3689 three();
3690 "},
3691 expected_after_apply: indoc! {"
3692 {
3693 {
3694 one();
3695 }
3696 }
3697 {
3698 {
3699 TWO();
3700 }
3701 }
3702 {
3703 {
3704 three();
3705 }
3706 }
3707 {
3708 {
3709 four();
3710 }
3711 }
3712 "},
3713 },
3714 ];
3715
3716 for case in cases {
3717 let output =
3718 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3719 .unwrap_or_else(|error| {
3720 panic!("failed converting patch for {}: {error}", case.name)
3721 });
3722 assert_eq!(
3723 output, case.expected_variable_edit,
3724 "patch->variable_edit mismatch for {}",
3725 case.name
3726 );
3727
3728 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3729 .unwrap_or_else(|error| {
3730 panic!("failed applying variable_edit for {}: {error}", case.name)
3731 });
3732 let mut edited_by_variable_edit = case.old.to_string();
3733 edited_by_variable_edit.replace_range(edit_range, &replacement);
3734 assert_eq!(
3735 edited_by_variable_edit, case.expected_after_apply,
3736 "variable_edit apply mismatch for {}",
3737 case.name
3738 );
3739
3740 let (expected_edit_range, expected_replacement) =
3741 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3742 |error| {
3743 panic!(
3744 "failed applying expected variable_edit for {}: {error}",
3745 case.name
3746 )
3747 },
3748 );
3749 let mut edited_by_expected_variable_edit = case.old.to_string();
3750 edited_by_expected_variable_edit
3751 .replace_range(expected_edit_range, &expected_replacement);
3752 assert_eq!(
3753 edited_by_expected_variable_edit, case.expected_after_apply,
3754 "expected variable_edit apply mismatch for {}",
3755 case.name
3756 );
3757 }
3758 }
3759
3760 #[test]
3761 fn test_write_cursor_excerpt_section() {
3762 let path = Path::new("test.rs");
3763 let context = "fn main() {\n hello();\n}\n";
3764 let cursor_offset = 17;
3765 let mut prompt = String::new();
3766 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3767 assert_eq!(
3768 prompt,
3769 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3770 );
3771 }
3772 }
3773}
3774
3775/// The zeta1 prompt format
3776pub mod zeta1 {
3777 use super::*;
3778 use std::fmt::Write;
3779
3780 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3781 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3782 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3783 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3784
3785 const INSTRUCTION_HEADER: &str = concat!(
3786 "### Instruction:\n",
3787 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3788 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3789 "into account the cursor location.\n\n",
3790 "### User Edits:\n\n"
3791 );
3792 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3793 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3794
3795 /// Formats a complete zeta1 prompt from the input events and excerpt.
3796 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3797 let mut prompt = String::with_capacity(
3798 INSTRUCTION_HEADER.len()
3799 + input_events.len()
3800 + EXCERPT_HEADER.len()
3801 + input_excerpt.len()
3802 + RESPONSE_HEADER.len(),
3803 );
3804 prompt.push_str(INSTRUCTION_HEADER);
3805 prompt.push_str(input_events);
3806 prompt.push_str(EXCERPT_HEADER);
3807 prompt.push_str(input_excerpt);
3808 prompt.push_str(RESPONSE_HEADER);
3809 prompt
3810 }
3811
3812 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3813 /// editable and context byte-offset ranges within `cursor_excerpt`.
3814 pub fn format_zeta1_from_input(
3815 input: &ZetaPromptInput,
3816 editable_range: Range<usize>,
3817 context_range: Range<usize>,
3818 ) -> String {
3819 let events = format_zeta1_events(&input.events);
3820 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3821 format_zeta1_prompt(&events, &excerpt)
3822 }
3823
3824 /// Formats events in zeta1 style (oldest first).
3825 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3826 let mut result = String::new();
3827 for event in events {
3828 let event_string = format_zeta1_event(event);
3829 if event_string.is_empty() {
3830 continue;
3831 }
3832 if !result.is_empty() {
3833 result.push_str("\n\n");
3834 }
3835 result.push_str(&event_string);
3836 }
3837 result
3838 }
3839
3840 fn format_zeta1_event(event: &Event) -> String {
3841 match event {
3842 Event::BufferChange {
3843 path,
3844 old_path,
3845 diff,
3846 ..
3847 } => {
3848 let mut prompt = String::new();
3849 if old_path != path {
3850 writeln!(
3851 prompt,
3852 "User renamed {} to {}\n",
3853 old_path.display(),
3854 path.display()
3855 )
3856 .ok();
3857 }
3858 if !diff.is_empty() {
3859 write!(
3860 prompt,
3861 "User edited {}:\n```diff\n{}\n```",
3862 path.display(),
3863 diff
3864 )
3865 .ok();
3866 }
3867 prompt
3868 }
3869 }
3870 }
3871
3872 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3873 /// within `cursor_excerpt`.
3874 fn format_zeta1_excerpt(
3875 input: &ZetaPromptInput,
3876 editable_range: Range<usize>,
3877 context_range: Range<usize>,
3878 ) -> String {
3879 let path_str = input.cursor_path.to_string_lossy();
3880 let excerpt = &*input.cursor_excerpt;
3881 let cursor_offset = input.cursor_offset_in_excerpt;
3882
3883 let mut prompt = String::new();
3884 writeln!(&mut prompt, "```{path_str}").ok();
3885
3886 let starts_at_file_beginning =
3887 input.excerpt_start_row == Some(0) && context_range.start == 0;
3888 if starts_at_file_beginning {
3889 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3890 }
3891
3892 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3893
3894 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3895 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3896 prompt.push_str(CURSOR_MARKER);
3897 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3898 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3899
3900 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3901 write!(prompt, "\n```").ok();
3902
3903 prompt
3904 }
3905
3906 /// Cleans zeta1 model output by extracting content between editable region
3907 /// markers and converting the zeta1 cursor marker to the universal one.
3908 /// Returns `None` if the output doesn't contain the expected markers.
3909 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3910 let content = output.replace(CURSOR_MARKER, "");
3911
3912 let content_start = content
3913 .find(EDITABLE_REGION_START_MARKER)
3914 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3915 .map(|pos| {
3916 if content.as_bytes().get(pos) == Some(&b'\n') {
3917 pos + 1
3918 } else {
3919 pos
3920 }
3921 })
3922 .unwrap_or(0);
3923
3924 let content_end = content
3925 .find(EDITABLE_REGION_END_MARKER)
3926 .map(|pos| {
3927 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3928 pos - 1
3929 } else {
3930 pos
3931 }
3932 })
3933 .unwrap_or(content.len());
3934
3935 if content_start > content_end {
3936 return Some(String::new());
3937 }
3938
3939 let extracted = &content[content_start..content_end];
3940
3941 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3942 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3943 let text_before_cursor = text_before_cursor
3944 .find(EDITABLE_REGION_START_MARKER)
3945 .map(|pos| {
3946 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3947 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3948 after_marker + 1
3949 } else {
3950 after_marker
3951 }
3952 })
3953 .unwrap_or(0);
3954 let offset_in_extracted = zeta1_cursor_pos
3955 .saturating_sub(text_before_cursor)
3956 .min(extracted.len());
3957 offset_in_extracted
3958 });
3959
3960 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3961 if let Some(offset) = cursor_offset {
3962 result.push_str(&extracted[..offset]);
3963 result.push_str(super::CURSOR_MARKER);
3964 result.push_str(&extracted[offset..]);
3965 } else {
3966 result.push_str(extracted);
3967 }
3968
3969 Some(result)
3970 }
3971}
3972
3973#[cfg(test)]
3974mod tests {
3975 use super::*;
3976 use indoc::indoc;
3977
3978 fn make_input(
3979 cursor_excerpt: &str,
3980 editable_range: Range<usize>,
3981 cursor_offset: usize,
3982 events: Vec<Event>,
3983 related_files: Vec<RelatedFile>,
3984 ) -> ZetaPromptInput {
3985 let context_range = 0..cursor_excerpt.len();
3986 ZetaPromptInput {
3987 cursor_path: Path::new("test.rs").into(),
3988 cursor_excerpt: cursor_excerpt.into(),
3989 cursor_offset_in_excerpt: cursor_offset,
3990 excerpt_start_row: None,
3991 events: events.into_iter().map(Arc::new).collect(),
3992 related_files: Some(related_files),
3993 active_buffer_diagnostics: vec![],
3994 excerpt_ranges: ExcerptRanges {
3995 editable_150: editable_range.clone(),
3996 editable_180: editable_range.clone(),
3997 editable_350: editable_range,
3998 editable_150_context_350: context_range.clone(),
3999 editable_180_context_350: context_range.clone(),
4000 editable_350_context_150: context_range,
4001 ..Default::default()
4002 },
4003 syntax_ranges: None,
4004 experiment: None,
4005 in_open_source_repo: false,
4006 can_collect_data: false,
4007 repo_url: None,
4008 }
4009 }
4010
4011 fn make_input_with_context_range(
4012 excerpt: &str,
4013 editable_range: Range<usize>,
4014 context_range: Range<usize>,
4015 cursor_offset: usize,
4016 ) -> ZetaPromptInput {
4017 ZetaPromptInput {
4018 cursor_path: Path::new("test.rs").into(),
4019 cursor_excerpt: excerpt.into(),
4020 cursor_offset_in_excerpt: cursor_offset,
4021 excerpt_start_row: None,
4022 events: vec![],
4023 related_files: Some(vec![]),
4024 active_buffer_diagnostics: vec![],
4025 excerpt_ranges: ExcerptRanges {
4026 editable_150: editable_range.clone(),
4027 editable_180: editable_range.clone(),
4028 editable_350: editable_range,
4029 editable_150_context_350: context_range.clone(),
4030 editable_180_context_350: context_range.clone(),
4031 editable_350_context_150: context_range,
4032 ..Default::default()
4033 },
4034 syntax_ranges: None,
4035 experiment: None,
4036 in_open_source_repo: false,
4037 can_collect_data: false,
4038 repo_url: None,
4039 }
4040 }
4041
4042 fn make_event(path: &str, diff: &str) -> Event {
4043 Event::BufferChange {
4044 path: Path::new(path).into(),
4045 old_path: Path::new(path).into(),
4046 diff: diff.to_string(),
4047 predicted: false,
4048 in_open_source_repo: false,
4049 }
4050 }
4051
4052 fn make_related_file(path: &str, content: &str) -> RelatedFile {
4053 RelatedFile {
4054 path: Path::new(path).into(),
4055 max_row: content.lines().count() as u32,
4056 excerpts: vec![RelatedExcerpt {
4057 row_range: 0..content.lines().count() as u32,
4058 text: content.into(),
4059 order: 0,
4060 }],
4061 in_open_source_repo: false,
4062 }
4063 }
4064
4065 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4066 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4067 }
4068
4069 #[test]
4070 fn test_no_truncation_when_within_budget() {
4071 let input = make_input(
4072 "prefix\neditable\nsuffix",
4073 7..15,
4074 10,
4075 vec![make_event("a.rs", "-old\n+new\n")],
4076 vec![make_related_file("related.rs", "fn helper() {}\n")],
4077 );
4078
4079 assert_eq!(
4080 format_with_budget(&input, 10000),
4081 indoc! {r#"
4082 <|file_sep|>related.rs
4083 fn helper() {}
4084 <|file_sep|>edit history
4085 --- a/a.rs
4086 +++ b/a.rs
4087 -old
4088 +new
4089 <|file_sep|>test.rs
4090 <|fim_prefix|>
4091 prefix
4092 <|fim_middle|>current
4093 edi<|user_cursor|>table
4094 <|fim_suffix|>
4095
4096 suffix
4097 <|fim_middle|>updated
4098 "#}
4099 );
4100 }
4101
4102 #[test]
4103 fn test_truncation_drops_edit_history_when_budget_tight() {
4104 let input = make_input(
4105 "code",
4106 0..4,
4107 2,
4108 vec![make_event("a.rs", "-x\n+y\n")],
4109 vec![
4110 make_related_file("r1.rs", "a\n"),
4111 make_related_file("r2.rs", "b\n"),
4112 ],
4113 );
4114
4115 assert_eq!(
4116 format_with_budget(&input, 10000),
4117 indoc! {r#"
4118 <|file_sep|>r1.rs
4119 a
4120 <|file_sep|>r2.rs
4121 b
4122 <|file_sep|>edit history
4123 --- a/a.rs
4124 +++ b/a.rs
4125 -x
4126 +y
4127 <|file_sep|>test.rs
4128 <|fim_prefix|>
4129 <|fim_middle|>current
4130 co<|user_cursor|>de
4131 <|fim_suffix|>
4132 <|fim_middle|>updated
4133 "#}
4134 );
4135
4136 assert_eq!(
4137 format_with_budget(&input, 50),
4138 indoc! {r#"
4139 <|file_sep|>r1.rs
4140 a
4141 <|file_sep|>r2.rs
4142 b
4143 <|file_sep|>test.rs
4144 <|fim_prefix|>
4145 <|fim_middle|>current
4146 co<|user_cursor|>de
4147 <|fim_suffix|>
4148 <|fim_middle|>updated
4149 "#}
4150 );
4151 }
4152
4153 #[test]
4154 fn test_truncation_includes_partial_excerpts() {
4155 let input = make_input(
4156 "x",
4157 0..1,
4158 0,
4159 vec![],
4160 vec![RelatedFile {
4161 path: Path::new("big.rs").into(),
4162 max_row: 30,
4163 in_open_source_repo: false,
4164 excerpts: vec![
4165 RelatedExcerpt {
4166 row_range: 0..10,
4167 text: "first excerpt\n".into(),
4168 order: 0,
4169 },
4170 RelatedExcerpt {
4171 row_range: 10..20,
4172 text: "second excerpt\n".into(),
4173 order: 0,
4174 },
4175 RelatedExcerpt {
4176 row_range: 20..30,
4177 text: "third excerpt\n".into(),
4178 order: 0,
4179 },
4180 ],
4181 }],
4182 );
4183
4184 assert_eq!(
4185 format_with_budget(&input, 10000),
4186 indoc! {r#"
4187 <|file_sep|>big.rs
4188 first excerpt
4189 ...
4190 second excerpt
4191 ...
4192 third excerpt
4193 <|file_sep|>test.rs
4194 <|fim_prefix|>
4195 <|fim_middle|>current
4196 <|user_cursor|>x
4197 <|fim_suffix|>
4198 <|fim_middle|>updated
4199 "#}
4200 );
4201
4202 assert_eq!(
4203 format_with_budget(&input, 50),
4204 indoc! {r#"
4205 <|file_sep|>big.rs
4206 first excerpt
4207 ...
4208 <|file_sep|>test.rs
4209 <|fim_prefix|>
4210 <|fim_middle|>current
4211 <|user_cursor|>x
4212 <|fim_suffix|>
4213 <|fim_middle|>updated
4214 "#}
4215 );
4216 }
4217
4218 #[test]
4219 fn test_truncation_prioritizes_lower_order_excerpts() {
4220 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4221 // With tight budget, only the lower-order excerpt from file_b should be included.
4222 let input = make_input(
4223 "x",
4224 0..1,
4225 0,
4226 vec![],
4227 vec![
4228 RelatedFile {
4229 path: Path::new("file_a.rs").into(),
4230 max_row: 10,
4231 in_open_source_repo: false,
4232 excerpts: vec![RelatedExcerpt {
4233 row_range: 0..10,
4234 text: "low priority content\n".into(),
4235 order: 5,
4236 }],
4237 },
4238 RelatedFile {
4239 path: Path::new("file_b.rs").into(),
4240 max_row: 10,
4241 in_open_source_repo: false,
4242 excerpts: vec![RelatedExcerpt {
4243 row_range: 0..10,
4244 text: "high priority content\n".into(),
4245 order: 1,
4246 }],
4247 },
4248 ],
4249 );
4250
4251 // With large budget, both files included; rendered in stable lexicographic order.
4252 assert_eq!(
4253 format_with_budget(&input, 10000),
4254 indoc! {r#"
4255 <|file_sep|>file_a.rs
4256 low priority content
4257 <|file_sep|>file_b.rs
4258 high priority content
4259 <|file_sep|>test.rs
4260 <|fim_prefix|>
4261 <|fim_middle|>current
4262 <|user_cursor|>x
4263 <|fim_suffix|>
4264 <|fim_middle|>updated
4265 "#}
4266 );
4267
4268 // With tight budget, only file_b (lower order) fits.
4269 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4270 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4271 // file_a would need another 14 tokens, which doesn't fit.
4272 assert_eq!(
4273 format_with_budget(&input, 52),
4274 indoc! {r#"
4275 <|file_sep|>file_b.rs
4276 high priority content
4277 <|file_sep|>test.rs
4278 <|fim_prefix|>
4279 <|fim_middle|>current
4280 <|user_cursor|>x
4281 <|fim_suffix|>
4282 <|fim_middle|>updated
4283 "#}
4284 );
4285 }
4286
4287 #[test]
4288 fn test_truncation_drops_high_order_excerpts_within_file() {
4289 // A single file has excerpts at order 1 and order 3. With a tight budget,
4290 // only the order-1 excerpts are included while the order-3 excerpt is
4291 // dropped — even though they belong to the same file. This also preserves
4292 // the parent invariant: parent outline items have order ≤ their best
4293 // child, so they're always included when any child is.
4294 let input = make_input(
4295 "x",
4296 0..1,
4297 0,
4298 vec![],
4299 vec![RelatedFile {
4300 path: Path::new("mod.rs").into(),
4301 max_row: 30,
4302 in_open_source_repo: false,
4303 excerpts: vec![
4304 RelatedExcerpt {
4305 row_range: 0..5,
4306 text: "mod header\n".into(),
4307 order: 1,
4308 },
4309 RelatedExcerpt {
4310 row_range: 5..15,
4311 text: "important fn\n".into(),
4312 order: 1,
4313 },
4314 RelatedExcerpt {
4315 row_range: 15..30,
4316 text: "less important fn\n".into(),
4317 order: 3,
4318 },
4319 ],
4320 }],
4321 );
4322
4323 // With large budget, all three excerpts included.
4324 assert_eq!(
4325 format_with_budget(&input, 10000),
4326 indoc! {r#"
4327 <|file_sep|>mod.rs
4328 mod header
4329 ...
4330 important fn
4331 ...
4332 less important fn
4333 <|file_sep|>test.rs
4334 <|fim_prefix|>
4335 <|fim_middle|>current
4336 <|user_cursor|>x
4337 <|fim_suffix|>
4338 <|fim_middle|>updated
4339 "#}
4340 );
4341
4342 // With tight budget, only order<=1 excerpts included (header + important fn).
4343 assert_eq!(
4344 format_with_budget(&input, 55),
4345 indoc! {r#"
4346 <|file_sep|>mod.rs
4347 mod header
4348 ...
4349 important fn
4350 ...
4351 <|file_sep|>test.rs
4352 <|fim_prefix|>
4353 <|fim_middle|>current
4354 <|user_cursor|>x
4355 <|fim_suffix|>
4356 <|fim_middle|>updated
4357 "#}
4358 );
4359 }
4360
4361 #[test]
4362 fn test_truncation_drops_older_events_first() {
4363 let input = make_input(
4364 "x",
4365 0..1,
4366 0,
4367 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4368 vec![],
4369 );
4370
4371 assert_eq!(
4372 format_with_budget(&input, 10000),
4373 indoc! {r#"
4374 <|file_sep|>edit history
4375 --- a/old.rs
4376 +++ b/old.rs
4377 -1
4378 --- a/new.rs
4379 +++ b/new.rs
4380 -2
4381 <|file_sep|>test.rs
4382 <|fim_prefix|>
4383 <|fim_middle|>current
4384 <|user_cursor|>x
4385 <|fim_suffix|>
4386 <|fim_middle|>updated
4387 "#}
4388 );
4389
4390 assert_eq!(
4391 format_with_budget(&input, 55),
4392 indoc! {r#"
4393 <|file_sep|>edit history
4394 --- a/new.rs
4395 +++ b/new.rs
4396 -2
4397 <|file_sep|>test.rs
4398 <|fim_prefix|>
4399 <|fim_middle|>current
4400 <|user_cursor|>x
4401 <|fim_suffix|>
4402 <|fim_middle|>updated
4403 "#}
4404 );
4405 }
4406
4407 #[test]
4408 fn test_cursor_excerpt_always_included_with_minimal_budget() {
4409 let input = make_input(
4410 "fn main() {}",
4411 0..12,
4412 3,
4413 vec![make_event("a.rs", "-old\n+new\n")],
4414 vec![make_related_file("related.rs", "helper\n")],
4415 );
4416
4417 assert_eq!(
4418 format_with_budget(&input, 30),
4419 indoc! {r#"
4420 <|file_sep|>test.rs
4421 <|fim_prefix|>
4422 <|fim_middle|>current
4423 fn <|user_cursor|>main() {}
4424 <|fim_suffix|>
4425 <|fim_middle|>updated
4426 "#}
4427 );
4428 }
4429
4430 fn format_seed_coder(input: &ZetaPromptInput) -> String {
4431 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4432 }
4433
4434 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4435 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4436 }
4437
4438 #[test]
4439 fn test_seed_coder_basic_format() {
4440 let input = make_input(
4441 "prefix\neditable\nsuffix",
4442 7..15,
4443 10,
4444 vec![make_event("a.rs", "-old\n+new\n")],
4445 vec![make_related_file("related.rs", "fn helper() {}\n")],
4446 );
4447
4448 assert_eq!(
4449 format_seed_coder(&input),
4450 indoc! {r#"
4451 <[fim-suffix]>
4452 suffix
4453 <[fim-prefix]><filename>related.rs
4454 fn helper() {}
4455
4456 <filename>edit_history
4457 --- a/a.rs
4458 +++ b/a.rs
4459 -old
4460 +new
4461
4462 <filename>test.rs
4463 prefix
4464 <<<<<<< CURRENT
4465 edi<|user_cursor|>table
4466 =======
4467 <[fim-middle]>"#}
4468 );
4469 }
4470
4471 #[test]
4472 fn test_seed_coder_no_context() {
4473 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4474
4475 assert_eq!(
4476 format_seed_coder(&input),
4477 indoc! {r#"
4478 <[fim-suffix]>
4479 after
4480 <[fim-prefix]><filename>test.rs
4481 before
4482 <<<<<<< CURRENT
4483 mid<|user_cursor|>dle
4484 =======
4485 <[fim-middle]>"#}
4486 );
4487 }
4488
4489 #[test]
4490 fn test_seed_coder_truncation_drops_context() {
4491 let input = make_input(
4492 "code",
4493 0..4,
4494 2,
4495 vec![make_event("a.rs", "-x\n+y\n")],
4496 vec![make_related_file("r1.rs", "content\n")],
4497 );
4498
4499 // With large budget, everything is included
4500 assert_eq!(
4501 format_seed_coder(&input),
4502 indoc! {r#"
4503 <[fim-suffix]>
4504 <[fim-prefix]><filename>r1.rs
4505 content
4506
4507 <filename>edit_history
4508 --- a/a.rs
4509 +++ b/a.rs
4510 -x
4511 +y
4512
4513 <filename>test.rs
4514 <<<<<<< CURRENT
4515 co<|user_cursor|>de
4516 =======
4517 <[fim-middle]>"#}
4518 );
4519
4520 // With tight budget, context is dropped but cursor section remains
4521 assert_eq!(
4522 format_seed_coder_with_budget(&input, 30),
4523 indoc! {r#"
4524 <[fim-suffix]>
4525 <[fim-prefix]><filename>test.rs
4526 <<<<<<< CURRENT
4527 co<|user_cursor|>de
4528 =======
4529 <[fim-middle]>"#}
4530 );
4531 }
4532
4533 #[test]
4534 fn test_seed_coder_truncation_prioritizes_lower_order() {
4535 let input = make_input(
4536 "code",
4537 0..4,
4538 2,
4539 vec![],
4540 vec![
4541 RelatedFile {
4542 path: Path::new("low_prio.rs").into(),
4543 max_row: 5,
4544 in_open_source_repo: false,
4545 excerpts: vec![RelatedExcerpt {
4546 row_range: 0..5,
4547 text: "low prio\n".into(),
4548 order: 10,
4549 }],
4550 },
4551 RelatedFile {
4552 path: Path::new("high_prio.rs").into(),
4553 max_row: 5,
4554 in_open_source_repo: false,
4555 excerpts: vec![RelatedExcerpt {
4556 row_range: 0..5,
4557 text: "high prio\n".into(),
4558 order: 1,
4559 }],
4560 },
4561 ],
4562 );
4563
4564 // With large budget, both included; rendered in stable lexicographic order.
4565 assert_eq!(
4566 format_seed_coder(&input),
4567 indoc! {r#"
4568 <[fim-suffix]>
4569 <[fim-prefix]><filename>low_prio.rs
4570 low prio
4571 <filename>high_prio.rs
4572 high prio
4573
4574 <filename>test.rs
4575 <<<<<<< CURRENT
4576 co<|user_cursor|>de
4577 =======
4578 <[fim-middle]>"#}
4579 );
4580
4581 // With tight budget, only high_prio included.
4582 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
4583 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
4584 assert_eq!(
4585 format_seed_coder_with_budget(&input, 44),
4586 indoc! {r#"
4587 <[fim-suffix]>
4588 <[fim-prefix]><filename>high_prio.rs
4589 high prio
4590
4591 <filename>test.rs
4592 <<<<<<< CURRENT
4593 co<|user_cursor|>de
4594 =======
4595 <[fim-middle]>"#}
4596 );
4597 }
4598
4599 #[test]
4600 fn test_format_zeta1_from_input_basic() {
4601 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
4602 let input = ZetaPromptInput {
4603 cursor_path: Path::new("src/main.rs").into(),
4604 cursor_excerpt: excerpt.into(),
4605 cursor_offset_in_excerpt: 30,
4606 excerpt_start_row: Some(0),
4607 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4608 related_files: Some(vec![]),
4609 active_buffer_diagnostics: vec![],
4610 excerpt_ranges: ExcerptRanges {
4611 editable_150: 15..41,
4612 editable_180: 15..41,
4613 editable_350: 15..41,
4614 editable_150_context_350: 0..excerpt.len(),
4615 editable_180_context_350: 0..excerpt.len(),
4616 editable_350_context_150: 0..excerpt.len(),
4617 ..Default::default()
4618 },
4619 syntax_ranges: None,
4620 experiment: None,
4621 in_open_source_repo: false,
4622 can_collect_data: false,
4623 repo_url: None,
4624 };
4625
4626 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4627
4628 assert_eq!(
4629 prompt,
4630 concat!(
4631 "### Instruction:\n",
4632 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4633 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4634 "into account the cursor location.\n",
4635 "\n",
4636 "### User Edits:\n",
4637 "\n",
4638 "User edited other.rs:\n",
4639 "```diff\n",
4640 "-old\n",
4641 "+new\n",
4642 "\n",
4643 "```\n",
4644 "\n",
4645 "### User Excerpt:\n",
4646 "\n",
4647 "```src/main.rs\n",
4648 "<|start_of_file|>\n",
4649 "fn before() {}\n",
4650 "<|editable_region_start|>\n",
4651 "fn foo() {\n",
4652 " <|user_cursor_is_here|>let x = 1;\n",
4653 "\n",
4654 "<|editable_region_end|>}\n",
4655 "fn after() {}\n",
4656 "\n",
4657 "```\n",
4658 "\n",
4659 "### Response:\n",
4660 ),
4661 );
4662 }
4663
4664 #[test]
4665 fn test_format_zeta1_from_input_no_start_of_file() {
4666 let excerpt = "fn foo() {\n let x = 1;\n}\n";
4667 let input = ZetaPromptInput {
4668 cursor_path: Path::new("src/main.rs").into(),
4669 cursor_excerpt: excerpt.into(),
4670 cursor_offset_in_excerpt: 15,
4671 excerpt_start_row: Some(10),
4672 events: vec![],
4673 related_files: Some(vec![]),
4674 active_buffer_diagnostics: vec![],
4675 excerpt_ranges: ExcerptRanges {
4676 editable_150: 0..28,
4677 editable_180: 0..28,
4678 editable_350: 0..28,
4679 editable_150_context_350: 0..28,
4680 editable_180_context_350: 0..28,
4681 editable_350_context_150: 0..28,
4682 ..Default::default()
4683 },
4684 syntax_ranges: None,
4685 experiment: None,
4686 in_open_source_repo: false,
4687 can_collect_data: false,
4688 repo_url: None,
4689 };
4690
4691 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4692
4693 assert_eq!(
4694 prompt,
4695 concat!(
4696 "### Instruction:\n",
4697 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4698 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4699 "into account the cursor location.\n",
4700 "\n",
4701 "### User Edits:\n",
4702 "\n",
4703 "\n",
4704 "\n",
4705 "### User Excerpt:\n",
4706 "\n",
4707 "```src/main.rs\n",
4708 "<|editable_region_start|>\n",
4709 "fn foo() {\n",
4710 " <|user_cursor_is_here|>let x = 1;\n",
4711 "}\n",
4712 "\n",
4713 "<|editable_region_end|>\n",
4714 "```\n",
4715 "\n",
4716 "### Response:\n",
4717 ),
4718 );
4719 }
4720
4721 #[test]
4722 fn test_format_zeta1_from_input_with_sub_ranges() {
4723 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
4724 let editable_range = 10..37;
4725 let context_range = 0..excerpt.len();
4726
4727 let input = ZetaPromptInput {
4728 cursor_path: Path::new("test.rs").into(),
4729 cursor_excerpt: excerpt.into(),
4730 cursor_offset_in_excerpt: 25,
4731 excerpt_start_row: Some(0),
4732 events: vec![],
4733 related_files: Some(vec![]),
4734 active_buffer_diagnostics: vec![],
4735 excerpt_ranges: ExcerptRanges {
4736 editable_150: editable_range.clone(),
4737 editable_180: editable_range.clone(),
4738 editable_350: editable_range.clone(),
4739 editable_150_context_350: context_range.clone(),
4740 editable_180_context_350: context_range.clone(),
4741 editable_350_context_150: context_range.clone(),
4742 ..Default::default()
4743 },
4744 syntax_ranges: None,
4745 experiment: None,
4746 in_open_source_repo: false,
4747 can_collect_data: false,
4748 repo_url: None,
4749 };
4750
4751 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4752
4753 assert_eq!(
4754 prompt,
4755 concat!(
4756 "### Instruction:\n",
4757 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4758 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4759 "into account the cursor location.\n",
4760 "\n",
4761 "### User Edits:\n",
4762 "\n",
4763 "\n",
4764 "\n",
4765 "### User Excerpt:\n",
4766 "\n",
4767 "```test.rs\n",
4768 "<|start_of_file|>\n",
4769 "// prefix\n",
4770 "<|editable_region_start|>\n",
4771 "fn foo() {\n",
4772 " <|user_cursor_is_here|>let x = 1;\n",
4773 "}\n",
4774 "<|editable_region_end|>\n",
4775 "// suffix\n",
4776 "\n",
4777 "```\n",
4778 "\n",
4779 "### Response:\n",
4780 ),
4781 );
4782 }
4783
4784 #[test]
4785 fn test_clean_zeta1_model_output_basic() {
4786 let output = indoc! {"
4787 <|editable_region_start|>
4788 fn main() {
4789 println!(\"hello\");
4790 }
4791 <|editable_region_end|>
4792 "};
4793
4794 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4795 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
4796 }
4797
4798 #[test]
4799 fn test_clean_zeta1_model_output_with_cursor() {
4800 let output = indoc! {"
4801 <|editable_region_start|>
4802 fn main() {
4803 <|user_cursor_is_here|>println!(\"hello\");
4804 }
4805 <|editable_region_end|>
4806 "};
4807
4808 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4809 assert_eq!(
4810 cleaned,
4811 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
4812 );
4813 }
4814
4815 #[test]
4816 fn test_clean_zeta1_model_output_no_markers() {
4817 let output = "fn main() {}\n";
4818 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4819 assert_eq!(cleaned, "fn main() {}\n");
4820 }
4821
4822 #[test]
4823 fn test_clean_zeta1_model_output_empty_region() {
4824 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4825 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4826 assert_eq!(cleaned, "");
4827 }
4828
4829 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
4830 let mut result = excerpt.to_string();
4831 result.replace_range(
4832 parsed_output.range_in_excerpt.clone(),
4833 &parsed_output.new_editable_region,
4834 );
4835 result
4836 }
4837
4838 #[test]
4839 fn test_parse_zeta2_model_output() {
4840 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4841 let context_start = excerpt.find("ctx start").unwrap();
4842 let context_end = excerpt.find("after ctx").unwrap();
4843 let editable_start = excerpt.find("editable old").unwrap();
4844 let editable_end = editable_start + "editable old\n".len();
4845 let input = make_input_with_context_range(
4846 excerpt,
4847 editable_start..editable_end,
4848 context_start..context_end,
4849 editable_start,
4850 );
4851
4852 let output = parse_zeta2_model_output(
4853 "editable new\n>>>>>>> UPDATED\n",
4854 ZetaFormat::V0131GitMergeMarkersPrefix,
4855 &input,
4856 )
4857 .unwrap();
4858
4859 assert_eq!(
4860 apply_edit(excerpt, &output),
4861 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4862 );
4863 }
4864
4865 #[test]
4866 fn test_parse_zeta2_model_output_identity() {
4867 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4868 let editable_start = excerpt.find("bbb").unwrap();
4869 let editable_end = excerpt.find("ddd").unwrap();
4870 let input = make_input_with_context_range(
4871 excerpt,
4872 editable_start..editable_end,
4873 0..excerpt.len(),
4874 editable_start,
4875 );
4876
4877 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4878 let output =
4879 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
4880
4881 assert_eq!(apply_edit(excerpt, &output), excerpt);
4882 }
4883
4884 #[test]
4885 fn test_parse_zeta2_model_output_strips_end_marker() {
4886 let excerpt = "hello\nworld\n";
4887 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
4888
4889 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4890 let output1 =
4891 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
4892 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
4893
4894 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
4895 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
4896 }
4897}