1//! Zeta2 prompt planning and generation code shared with cloud.
2pub mod retrieval_prompt;
3
4use anyhow::{Context as _, Result, anyhow};
5use cloud_llm_client::predict_edits_v3::{
6 self, DiffPathFmt, Event, Excerpt, IncludedFile, Line, Point, PromptFormat,
7 ReferencedDeclaration,
8};
9use indoc::indoc;
10use ordered_float::OrderedFloat;
11use rustc_hash::{FxHashMap, FxHashSet};
12use serde::Serialize;
13use std::cmp;
14use std::fmt::Write;
15use std::sync::Arc;
16use std::{cmp::Reverse, collections::BinaryHeap, ops::Range, path::Path};
17use strum::{EnumIter, IntoEnumIterator};
18
19pub const DEFAULT_MAX_PROMPT_BYTES: usize = 10 * 1024;
20
21pub const CURSOR_MARKER: &str = "<|user_cursor|>";
22/// NOTE: Differs from zed version of constant - includes a newline
23pub const EDITABLE_REGION_START_MARKER_WITH_NEWLINE: &str = "<|editable_region_start|>\n";
24/// NOTE: Differs from zed version of constant - includes a newline
25pub const EDITABLE_REGION_END_MARKER_WITH_NEWLINE: &str = "<|editable_region_end|>\n";
26
27// TODO: use constants for markers?
28const MARKED_EXCERPT_INSTRUCTIONS: &str = indoc! {"
29 You are a code completion assistant and your task is to analyze user edits and then rewrite an excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking into account the cursor location.
30
31 The excerpt to edit will be wrapped in markers <|editable_region_start|> and <|editable_region_end|>. The cursor position is marked with <|user_cursor|>. Please respond with edited code for that region.
32
33 Other code is provided for context, and `…` indicates when code has been skipped.
34
35 ## Edit History
36
37"};
38
39const LABELED_SECTIONS_INSTRUCTIONS: &str = indoc! {r#"
40 You are a code completion assistant and your task is to analyze user edits, and suggest an edit to one of the provided sections of code.
41
42 Sections of code are grouped by file and then labeled by `<|section_N|>` (e.g `<|section_8|>`).
43
44 The cursor position is marked with `<|user_cursor|>` and it will appear within a special section labeled `<|current_section|>`. Prefer editing the current section until no more changes are needed within it.
45
46 Respond ONLY with the name of the section to edit on a single line, followed by all of the code that should replace that section. For example:
47
48 <|current_section|>
49 for i in 0..16 {
50 println!("{i}");
51 }
52
53 ## Edit History
54
55"#};
56
57const NUMBERED_LINES_INSTRUCTIONS: &str = indoc! {r#"
58 # Instructions
59
60 You are an edit prediction agent in a code editor.
61 Your job is to predict the next edit that the user will make,
62 based on their last few edits and their current cursor location.
63
64 ## Output Format
65
66 You must briefly explain your understanding of the user's goal, in one
67 or two sentences, and then specify their next edit in the form of a
68 unified diff, like this:
69
70 ```
71 --- a/src/myapp/cli.py
72 +++ b/src/myapp/cli.py
73 @@ ... @@
74 import os
75 import time
76 import sys
77 +from constants import LOG_LEVEL_WARNING
78 @@ ... @@
79 config.headless()
80 config.set_interactive(false)
81 -config.set_log_level(LOG_L)
82 +config.set_log_level(LOG_LEVEL_WARNING)
83 config.set_use_color(True)
84 ```
85
86 ## Edit History
87
88"#};
89
90const STUDENT_MODEL_INSTRUCTIONS: &str = indoc! {r#"
91 You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.
92
93 ## Edit History
94
95 "#};
96
97const UNIFIED_DIFF_REMINDER: &str = indoc! {"
98 ---
99
100 Analyze the edit history and the files, then provide the unified diff for your predicted edits.
101 Do not include the cursor marker in your output.
102 Your diff should include edited file paths in its file headers (lines beginning with `---` and `+++`).
103 Do not include line numbers in the hunk headers, use `@@ ... @@`.
104 Removed lines begin with `-`.
105 Added lines begin with `+`.
106 Context lines begin with an extra space.
107 Context and removed lines are used to match the target edit location, so make sure to include enough of them
108 to uniquely identify it amongst all excerpts of code provided.
109"};
110
111const MINIMAL_PROMPT_REMINDER: &str = indoc! {"
112 ---
113
114 Please analyze the edit history and the files, then provide the unified diff for your predicted edits.
115 Do not include the cursor marker in your output.
116 If you're editing multiple files, be sure to reflect filename in the hunk's header.
117 "};
118
119const XML_TAGS_INSTRUCTIONS: &str = indoc! {r#"
120 # Instructions
121
122 You are an edit prediction agent in a code editor.
123
124 Analyze the history of edits made by the user in order to infer what they are currently trying to accomplish.
125 Then complete the remainder of the current change if it is incomplete, or predict the next edit the user intends to make.
126 Always continue along the user's current trajectory, rather than changing course.
127
128 ## Output Format
129
130 You should briefly explain your understanding of the user's overall goal in one sentence, then explain what the next change
131 along the users current trajectory will be in another, and finally specify the next edit using the following XML-like format:
132
133 <edits path="my-project/src/myapp/cli.py">
134 <old_text>
135 OLD TEXT 1 HERE
136 </old_text>
137 <new_text>
138 NEW TEXT 1 HERE
139 </new_text>
140
141 <old_text>
142 OLD TEXT 1 HERE
143 </old_text>
144 <new_text>
145 NEW TEXT 1 HERE
146 </new_text>
147 </edits>
148
149 - Specify the file to edit using the `path` attribute.
150 - Use `<old_text>` and `<new_text>` tags to replace content
151 - `<old_text>` must exactly match existing file content, including indentation
152 - `<old_text>` cannot be empty
153 - Do not escape quotes, newlines, or other characters within tags
154 - Always close all tags properly
155 - Don't include the <|user_cursor|> marker in your output.
156
157 ## Edit History
158
159"#};
160
161const OLD_TEXT_NEW_TEXT_REMINDER: &str = indoc! {r#"
162 ---
163
164 Remember that the edits in the edit history have already been applied.
165"#};
166
167pub fn build_prompt(
168 request: &predict_edits_v3::PredictEditsRequest,
169) -> Result<(String, SectionLabels)> {
170 let mut section_labels = Default::default();
171
172 let prompt_data = PromptData {
173 events: request.events.clone(),
174 cursor_point: request.cursor_point,
175 cursor_path: request.excerpt_path.clone(),
176 included_files: request.included_files.clone(),
177 };
178 match request.prompt_format {
179 PromptFormat::MinimalQwen => {
180 return Ok((MinimalQwenPrompt.render(&prompt_data), section_labels));
181 }
182 PromptFormat::SeedCoder1120 => {
183 return Ok((SeedCoder1120Prompt.render(&prompt_data), section_labels));
184 }
185 _ => (),
186 };
187
188 let mut insertions = match request.prompt_format {
189 PromptFormat::MarkedExcerpt => vec![
190 (
191 Point {
192 line: request.excerpt_line_range.start,
193 column: 0,
194 },
195 EDITABLE_REGION_START_MARKER_WITH_NEWLINE,
196 ),
197 (request.cursor_point, CURSOR_MARKER),
198 (
199 Point {
200 line: request.excerpt_line_range.end,
201 column: 0,
202 },
203 EDITABLE_REGION_END_MARKER_WITH_NEWLINE,
204 ),
205 ],
206 PromptFormat::LabeledSections
207 | PromptFormat::NumLinesUniDiff
208 | PromptFormat::Minimal
209 | PromptFormat::OldTextNewText => {
210 vec![(request.cursor_point, CURSOR_MARKER)]
211 }
212 PromptFormat::OnlySnippets => vec![],
213 PromptFormat::MinimalQwen => unreachable!(),
214 PromptFormat::SeedCoder1120 => unreachable!(),
215 };
216
217 let mut prompt = match request.prompt_format {
218 PromptFormat::MarkedExcerpt => MARKED_EXCERPT_INSTRUCTIONS.to_string(),
219 PromptFormat::LabeledSections => LABELED_SECTIONS_INSTRUCTIONS.to_string(),
220 PromptFormat::NumLinesUniDiff => NUMBERED_LINES_INSTRUCTIONS.to_string(),
221 PromptFormat::OldTextNewText => XML_TAGS_INSTRUCTIONS.to_string(),
222 PromptFormat::OnlySnippets => String::new(),
223 PromptFormat::Minimal => STUDENT_MODEL_INSTRUCTIONS.to_string(),
224 PromptFormat::MinimalQwen => unreachable!(),
225 PromptFormat::SeedCoder1120 => unreachable!(),
226 };
227
228 if request.events.is_empty() {
229 prompt.push_str("(No edit history)\n\n");
230 } else {
231 let edit_preamble = if request.prompt_format == PromptFormat::Minimal {
232 "The following are the latest edits made by the user, from earlier to later.\n\n"
233 } else {
234 "Here are the latest edits made by the user, from earlier to later.\n\n"
235 };
236 prompt.push_str(edit_preamble);
237 push_events(&mut prompt, &request.events);
238 }
239
240 let excerpts_preamble = match request.prompt_format {
241 PromptFormat::Minimal => indoc! {"
242 ## Part of the file under the cursor
243
244 (The cursor marker <|user_cursor|> indicates the current user cursor position.
245 The file is in current state, edits from edit history has been applied.
246 We only show part of the file around the cursor.
247 You can only edit exactly this part of the file.
248 We prepend line numbers (e.g., `123|<actual line>`); they are not part of the file.)
249 "},
250 PromptFormat::NumLinesUniDiff | PromptFormat::OldTextNewText => indoc! {"
251 ## Code Excerpts
252
253 Here is some excerpts of code that you should take into account to predict the next edit.
254
255 The cursor position is marked by `<|user_cursor|>` as it stands after the last edit in the history.
256
257 In addition other excerpts are included to better understand what the edit will be, including the declaration
258 or references of symbols around the cursor, or other similar code snippets that may need to be updated
259 following patterns that appear in the edit history.
260
261 Consider each of them carefully in relation to the edit history, and that the user may not have navigated
262 to the next place they want to edit yet.
263
264 Lines starting with `…` indicate omitted line ranges. These may appear inside multi-line code constructs.
265 "},
266 _ => indoc! {"
267 ## Code Excerpts
268
269 The cursor marker <|user_cursor|> indicates the current user cursor position.
270 The file is in current state, edits from edit history have been applied.
271 "},
272 };
273
274 prompt.push_str(excerpts_preamble);
275 prompt.push('\n');
276
277 if !request.referenced_declarations.is_empty() || !request.signatures.is_empty() {
278 let syntax_based_prompt = SyntaxBasedPrompt::populate(request)?;
279 section_labels = syntax_based_prompt.write(&mut insertions, &mut prompt)?;
280 } else {
281 if request.prompt_format == PromptFormat::LabeledSections {
282 anyhow::bail!("PromptFormat::LabeledSections cannot be used with ContextMode::Llm");
283 }
284
285 let include_line_numbers = matches!(
286 request.prompt_format,
287 PromptFormat::NumLinesUniDiff | PromptFormat::Minimal
288 );
289 for related_file in &request.included_files {
290 if request.prompt_format == PromptFormat::Minimal {
291 write_codeblock_with_filename(
292 &related_file.path,
293 &related_file.excerpts,
294 if related_file.path == request.excerpt_path {
295 &insertions
296 } else {
297 &[]
298 },
299 related_file.max_row,
300 include_line_numbers,
301 &mut prompt,
302 );
303 } else {
304 write_codeblock(
305 &related_file.path,
306 &related_file.excerpts,
307 if related_file.path == request.excerpt_path {
308 &insertions
309 } else {
310 &[]
311 },
312 related_file.max_row,
313 include_line_numbers,
314 &mut prompt,
315 );
316 }
317 }
318 }
319
320 match request.prompt_format {
321 PromptFormat::NumLinesUniDiff => {
322 prompt.push_str(UNIFIED_DIFF_REMINDER);
323 }
324 PromptFormat::OldTextNewText => {
325 prompt.push_str(OLD_TEXT_NEW_TEXT_REMINDER);
326 }
327 PromptFormat::Minimal => {
328 prompt.push_str(MINIMAL_PROMPT_REMINDER);
329 }
330 _ => {}
331 }
332
333 Ok((prompt, section_labels))
334}
335
336pub fn generation_params(prompt_format: PromptFormat) -> GenerationParams {
337 match prompt_format {
338 PromptFormat::SeedCoder1120 => SeedCoder1120Prompt::generation_params(),
339 _ => GenerationParams::default(),
340 }
341}
342
343pub fn write_codeblock<'a>(
344 path: &Path,
345 excerpts: impl IntoIterator<Item = &'a Excerpt>,
346 sorted_insertions: &[(Point, &str)],
347 file_line_count: Line,
348 include_line_numbers: bool,
349 output: &'a mut String,
350) {
351 writeln!(output, "`````{}", DiffPathFmt(path)).unwrap();
352
353 write_excerpts(
354 excerpts,
355 sorted_insertions,
356 file_line_count,
357 include_line_numbers,
358 output,
359 );
360 write!(output, "`````\n\n").unwrap();
361}
362
363fn write_codeblock_with_filename<'a>(
364 path: &Path,
365 excerpts: impl IntoIterator<Item = &'a Excerpt>,
366 sorted_insertions: &[(Point, &str)],
367 file_line_count: Line,
368 include_line_numbers: bool,
369 output: &'a mut String,
370) {
371 writeln!(output, "`````filename={}", DiffPathFmt(path)).unwrap();
372
373 write_excerpts(
374 excerpts,
375 sorted_insertions,
376 file_line_count,
377 include_line_numbers,
378 output,
379 );
380 write!(output, "`````\n\n").unwrap();
381}
382
383pub fn write_excerpts<'a>(
384 excerpts: impl IntoIterator<Item = &'a Excerpt>,
385 sorted_insertions: &[(Point, &str)],
386 file_line_count: Line,
387 include_line_numbers: bool,
388 output: &mut String,
389) {
390 let mut current_row = Line(0);
391 let mut sorted_insertions = sorted_insertions.iter().peekable();
392
393 for excerpt in excerpts {
394 if excerpt.start_line > current_row {
395 writeln!(output, "…").unwrap();
396 }
397 if excerpt.text.is_empty() {
398 return;
399 }
400
401 current_row = excerpt.start_line;
402
403 for mut line in excerpt.text.lines() {
404 if include_line_numbers {
405 write!(output, "{}|", current_row.0 + 1).unwrap();
406 }
407
408 while let Some((insertion_location, insertion_marker)) = sorted_insertions.peek() {
409 match current_row.cmp(&insertion_location.line) {
410 cmp::Ordering::Equal => {
411 let (prefix, suffix) = line.split_at(insertion_location.column as usize);
412 output.push_str(prefix);
413 output.push_str(insertion_marker);
414 line = suffix;
415 sorted_insertions.next();
416 }
417 cmp::Ordering::Less => break,
418 cmp::Ordering::Greater => {
419 sorted_insertions.next();
420 break;
421 }
422 }
423 }
424 output.push_str(line);
425 output.push('\n');
426 current_row.0 += 1;
427 }
428 }
429
430 if current_row < file_line_count {
431 writeln!(output, "…").unwrap();
432 }
433}
434
435pub fn push_events(output: &mut String, events: &[Arc<predict_edits_v3::Event>]) {
436 if events.is_empty() {
437 return;
438 };
439
440 writeln!(output, "`````diff").unwrap();
441 for event in events {
442 writeln!(output, "{}", event).unwrap();
443 }
444 writeln!(output, "`````\n").unwrap();
445}
446
447pub struct SyntaxBasedPrompt<'a> {
448 request: &'a predict_edits_v3::PredictEditsRequest,
449 /// Snippets to include in the prompt. These may overlap - they are merged / deduplicated in
450 /// `to_prompt_string`.
451 snippets: Vec<PlannedSnippet<'a>>,
452 budget_used: usize,
453}
454
455#[derive(Clone, Debug)]
456pub struct PlannedSnippet<'a> {
457 path: Arc<Path>,
458 range: Range<Line>,
459 text: &'a str,
460 // TODO: Indicate this in the output
461 #[allow(dead_code)]
462 text_is_truncated: bool,
463}
464
465#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
466pub enum DeclarationStyle {
467 Signature,
468 Declaration,
469}
470
471#[derive(Default, Clone, Debug, Serialize)]
472pub struct SectionLabels {
473 pub excerpt_index: usize,
474 pub section_ranges: Vec<(Arc<Path>, Range<Line>)>,
475}
476
477impl<'a> SyntaxBasedPrompt<'a> {
478 /// Greedy one-pass knapsack algorithm to populate the prompt plan. Does the following:
479 ///
480 /// Initializes a priority queue by populating it with each snippet, finding the
481 /// DeclarationStyle that minimizes `score_density = score / snippet.range(style).len()`. When a
482 /// "signature" snippet is popped, insert an entry for the "declaration" variant that reflects
483 /// the cost of upgrade.
484 ///
485 /// TODO: Implement an early halting condition. One option might be to have another priority
486 /// queue where the score is the size, and update it accordingly. Another option might be to
487 /// have some simpler heuristic like bailing after N failed insertions, or based on how much
488 /// budget is left.
489 ///
490 /// TODO: Has the current known sources of imprecision:
491 ///
492 /// * Does not consider snippet overlap when ranking. For example, it might add a field to the
493 /// plan even though the containing struct is already included.
494 ///
495 /// * Does not consider cost of signatures when ranking snippets - this is tricky since
496 /// signatures may be shared by multiple snippets.
497 ///
498 /// * Does not include file paths / other text when considering max_bytes.
499 pub fn populate(request: &'a predict_edits_v3::PredictEditsRequest) -> Result<Self> {
500 let mut this = Self {
501 request,
502 snippets: Vec::new(),
503 budget_used: request.excerpt.len(),
504 };
505 let mut included_parents = FxHashSet::default();
506 let additional_parents = this.additional_parent_signatures(
507 &request.excerpt_path,
508 request.excerpt_parent,
509 &included_parents,
510 )?;
511 this.add_parents(&mut included_parents, additional_parents);
512
513 let max_bytes = request.prompt_max_bytes.unwrap_or(DEFAULT_MAX_PROMPT_BYTES);
514
515 if this.budget_used > max_bytes {
516 return Err(anyhow!(
517 "Excerpt + signatures size of {} already exceeds budget of {}",
518 this.budget_used,
519 max_bytes
520 ));
521 }
522
523 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
524 struct QueueEntry {
525 score_density: OrderedFloat<f32>,
526 declaration_index: usize,
527 style: DeclarationStyle,
528 }
529
530 // Initialize priority queue with the best score for each snippet.
531 let mut queue: BinaryHeap<QueueEntry> = BinaryHeap::new();
532 for (declaration_index, declaration) in request.referenced_declarations.iter().enumerate() {
533 let (style, score_density) = DeclarationStyle::iter()
534 .map(|style| {
535 (
536 style,
537 OrderedFloat(declaration_score_density(&declaration, style)),
538 )
539 })
540 .max_by_key(|(_, score_density)| *score_density)
541 .unwrap();
542 queue.push(QueueEntry {
543 score_density,
544 declaration_index,
545 style,
546 });
547 }
548
549 // Knapsack selection loop
550 while let Some(queue_entry) = queue.pop() {
551 let Some(declaration) = request
552 .referenced_declarations
553 .get(queue_entry.declaration_index)
554 else {
555 return Err(anyhow!(
556 "Invalid declaration index {}",
557 queue_entry.declaration_index
558 ));
559 };
560
561 let mut additional_bytes = declaration_size(declaration, queue_entry.style);
562 if this.budget_used + additional_bytes > max_bytes {
563 continue;
564 }
565
566 let additional_parents = this.additional_parent_signatures(
567 &declaration.path,
568 declaration.parent_index,
569 &mut included_parents,
570 )?;
571 additional_bytes += additional_parents
572 .iter()
573 .map(|(_, snippet)| snippet.text.len())
574 .sum::<usize>();
575 if this.budget_used + additional_bytes > max_bytes {
576 continue;
577 }
578
579 this.budget_used += additional_bytes;
580 this.add_parents(&mut included_parents, additional_parents);
581 let planned_snippet = match queue_entry.style {
582 DeclarationStyle::Signature => {
583 let Some(text) = declaration.text.get(declaration.signature_range.clone())
584 else {
585 return Err(anyhow!(
586 "Invalid declaration signature_range {:?} with text.len() = {}",
587 declaration.signature_range,
588 declaration.text.len()
589 ));
590 };
591 let signature_start_line = declaration.range.start
592 + Line(
593 declaration.text[..declaration.signature_range.start]
594 .lines()
595 .count() as u32,
596 );
597 let signature_end_line = signature_start_line
598 + Line(
599 declaration.text
600 [declaration.signature_range.start..declaration.signature_range.end]
601 .lines()
602 .count() as u32,
603 );
604 let range = signature_start_line..signature_end_line;
605
606 PlannedSnippet {
607 path: declaration.path.clone(),
608 range,
609 text,
610 text_is_truncated: declaration.text_is_truncated,
611 }
612 }
613 DeclarationStyle::Declaration => PlannedSnippet {
614 path: declaration.path.clone(),
615 range: declaration.range.clone(),
616 text: &declaration.text,
617 text_is_truncated: declaration.text_is_truncated,
618 },
619 };
620 this.snippets.push(planned_snippet);
621
622 // When a Signature is consumed, insert an entry for Definition style.
623 if queue_entry.style == DeclarationStyle::Signature {
624 let signature_size = declaration_size(&declaration, DeclarationStyle::Signature);
625 let declaration_size =
626 declaration_size(&declaration, DeclarationStyle::Declaration);
627 let signature_score = declaration_score(&declaration, DeclarationStyle::Signature);
628 let declaration_score =
629 declaration_score(&declaration, DeclarationStyle::Declaration);
630
631 let score_diff = declaration_score - signature_score;
632 let size_diff = declaration_size.saturating_sub(signature_size);
633 if score_diff > 0.0001 && size_diff > 0 {
634 queue.push(QueueEntry {
635 declaration_index: queue_entry.declaration_index,
636 score_density: OrderedFloat(score_diff / (size_diff as f32)),
637 style: DeclarationStyle::Declaration,
638 });
639 }
640 }
641 }
642
643 anyhow::Ok(this)
644 }
645
646 fn add_parents(
647 &mut self,
648 included_parents: &mut FxHashSet<usize>,
649 snippets: Vec<(usize, PlannedSnippet<'a>)>,
650 ) {
651 for (parent_index, snippet) in snippets {
652 included_parents.insert(parent_index);
653 self.budget_used += snippet.text.len();
654 self.snippets.push(snippet);
655 }
656 }
657
658 fn additional_parent_signatures(
659 &self,
660 path: &Arc<Path>,
661 parent_index: Option<usize>,
662 included_parents: &FxHashSet<usize>,
663 ) -> Result<Vec<(usize, PlannedSnippet<'a>)>> {
664 let mut results = Vec::new();
665 self.additional_parent_signatures_impl(path, parent_index, included_parents, &mut results)?;
666 Ok(results)
667 }
668
669 fn additional_parent_signatures_impl(
670 &self,
671 path: &Arc<Path>,
672 parent_index: Option<usize>,
673 included_parents: &FxHashSet<usize>,
674 results: &mut Vec<(usize, PlannedSnippet<'a>)>,
675 ) -> Result<()> {
676 let Some(parent_index) = parent_index else {
677 return Ok(());
678 };
679 if included_parents.contains(&parent_index) {
680 return Ok(());
681 }
682 let Some(parent_signature) = self.request.signatures.get(parent_index) else {
683 return Err(anyhow!("Invalid parent index {}", parent_index));
684 };
685 results.push((
686 parent_index,
687 PlannedSnippet {
688 path: path.clone(),
689 range: parent_signature.range.clone(),
690 text: &parent_signature.text,
691 text_is_truncated: parent_signature.text_is_truncated,
692 },
693 ));
694 self.additional_parent_signatures_impl(
695 path,
696 parent_signature.parent_index,
697 included_parents,
698 results,
699 )
700 }
701
702 /// Renders the planned context. Each file starts with "```FILE_PATH\n` and ends with triple
703 /// backticks, with a newline after each file. Outputs a line with "..." between nonconsecutive
704 /// chunks.
705 pub fn write(
706 &'a self,
707 excerpt_file_insertions: &mut Vec<(Point, &'static str)>,
708 prompt: &mut String,
709 ) -> Result<SectionLabels> {
710 let mut file_to_snippets: FxHashMap<&'a std::path::Path, Vec<&PlannedSnippet<'a>>> =
711 FxHashMap::default();
712 for snippet in &self.snippets {
713 file_to_snippets
714 .entry(&snippet.path)
715 .or_default()
716 .push(snippet);
717 }
718
719 // Reorder so that file with cursor comes last
720 let mut file_snippets = Vec::new();
721 let mut excerpt_file_snippets = Vec::new();
722 for (file_path, snippets) in file_to_snippets {
723 if file_path == self.request.excerpt_path.as_ref() {
724 excerpt_file_snippets = snippets;
725 } else {
726 file_snippets.push((file_path, snippets, false));
727 }
728 }
729 let excerpt_snippet = PlannedSnippet {
730 path: self.request.excerpt_path.clone(),
731 range: self.request.excerpt_line_range.clone(),
732 text: &self.request.excerpt,
733 text_is_truncated: false,
734 };
735 excerpt_file_snippets.push(&excerpt_snippet);
736 file_snippets.push((&self.request.excerpt_path, excerpt_file_snippets, true));
737
738 let section_labels =
739 self.push_file_snippets(prompt, excerpt_file_insertions, file_snippets)?;
740
741 Ok(section_labels)
742 }
743
744 fn push_file_snippets(
745 &self,
746 output: &mut String,
747 excerpt_file_insertions: &mut Vec<(Point, &'static str)>,
748 file_snippets: Vec<(&'a Path, Vec<&'a PlannedSnippet>, bool)>,
749 ) -> Result<SectionLabels> {
750 let mut section_ranges = Vec::new();
751 let mut excerpt_index = None;
752
753 for (file_path, mut snippets, is_excerpt_file) in file_snippets {
754 snippets.sort_by_key(|s| (s.range.start, Reverse(s.range.end)));
755
756 // TODO: What if the snippets get expanded too large to be editable?
757 let mut current_snippet: Option<(&PlannedSnippet, Range<Line>)> = None;
758 let mut disjoint_snippets: Vec<(&PlannedSnippet, Range<Line>)> = Vec::new();
759 for snippet in snippets {
760 if let Some((_, current_snippet_range)) = current_snippet.as_mut()
761 && snippet.range.start <= current_snippet_range.end
762 {
763 current_snippet_range.end = current_snippet_range.end.max(snippet.range.end);
764 continue;
765 }
766 if let Some(current_snippet) = current_snippet.take() {
767 disjoint_snippets.push(current_snippet);
768 }
769 current_snippet = Some((snippet, snippet.range.clone()));
770 }
771 if let Some(current_snippet) = current_snippet.take() {
772 disjoint_snippets.push(current_snippet);
773 }
774
775 writeln!(output, "`````path={}", file_path.display()).ok();
776 let mut skipped_last_snippet = false;
777 for (snippet, range) in disjoint_snippets {
778 let section_index = section_ranges.len();
779
780 match self.request.prompt_format {
781 PromptFormat::MarkedExcerpt
782 | PromptFormat::OnlySnippets
783 | PromptFormat::OldTextNewText
784 | PromptFormat::Minimal
785 | PromptFormat::NumLinesUniDiff => {
786 if range.start.0 > 0 && !skipped_last_snippet {
787 output.push_str("…\n");
788 }
789 }
790 PromptFormat::LabeledSections => {
791 if is_excerpt_file
792 && range.start <= self.request.excerpt_line_range.start
793 && range.end >= self.request.excerpt_line_range.end
794 {
795 writeln!(output, "<|current_section|>").ok();
796 } else {
797 writeln!(output, "<|section_{}|>", section_index).ok();
798 }
799 }
800 PromptFormat::MinimalQwen => unreachable!(),
801 PromptFormat::SeedCoder1120 => unreachable!(),
802 }
803
804 let push_full_snippet = |output: &mut String| {
805 if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
806 for (i, line) in snippet.text.lines().enumerate() {
807 writeln!(output, "{}|{}", i as u32 + range.start.0 + 1, line)?;
808 }
809 } else {
810 output.push_str(&snippet.text);
811 }
812 anyhow::Ok(())
813 };
814
815 if is_excerpt_file {
816 if self.request.prompt_format == PromptFormat::OnlySnippets {
817 if range.start >= self.request.excerpt_line_range.start
818 && range.end <= self.request.excerpt_line_range.end
819 {
820 skipped_last_snippet = true;
821 } else {
822 skipped_last_snippet = false;
823 output.push_str(snippet.text);
824 }
825 } else if !excerpt_file_insertions.is_empty() {
826 let lines = snippet.text.lines().collect::<Vec<_>>();
827 let push_line = |output: &mut String, line_ix: usize| {
828 if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
829 write!(output, "{}|", line_ix as u32 + range.start.0 + 1)?;
830 }
831 anyhow::Ok(writeln!(output, "{}", lines[line_ix])?)
832 };
833 let mut last_line_ix = 0;
834 let mut insertion_ix = 0;
835 while insertion_ix < excerpt_file_insertions.len() {
836 let (point, insertion) = &excerpt_file_insertions[insertion_ix];
837 let found = point.line >= range.start && point.line <= range.end;
838 if found {
839 excerpt_index = Some(section_index);
840 let insertion_line_ix = (point.line.0 - range.start.0) as usize;
841 for line_ix in last_line_ix..insertion_line_ix {
842 push_line(output, line_ix)?;
843 }
844 if let Some(next_line) = lines.get(insertion_line_ix) {
845 if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
846 write!(
847 output,
848 "{}|",
849 insertion_line_ix as u32 + range.start.0 + 1
850 )?
851 }
852 output.push_str(&next_line[..point.column as usize]);
853 output.push_str(insertion);
854 writeln!(output, "{}", &next_line[point.column as usize..])?;
855 } else {
856 writeln!(output, "{}", insertion)?;
857 }
858 last_line_ix = insertion_line_ix + 1;
859 excerpt_file_insertions.remove(insertion_ix);
860 continue;
861 }
862 insertion_ix += 1;
863 }
864 skipped_last_snippet = false;
865 for line_ix in last_line_ix..lines.len() {
866 push_line(output, line_ix)?;
867 }
868 } else {
869 skipped_last_snippet = false;
870 push_full_snippet(output)?;
871 }
872 } else {
873 skipped_last_snippet = false;
874 push_full_snippet(output)?;
875 }
876
877 section_ranges.push((snippet.path.clone(), range));
878 }
879
880 output.push_str("`````\n\n");
881 }
882
883 Ok(SectionLabels {
884 // TODO: Clean this up
885 excerpt_index: match self.request.prompt_format {
886 PromptFormat::OnlySnippets => 0,
887 _ => excerpt_index.context("bug: no snippet found for excerpt")?,
888 },
889 section_ranges,
890 })
891 }
892}
893
894fn declaration_score_density(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> f32 {
895 declaration_score(declaration, style) / declaration_size(declaration, style) as f32
896}
897
898fn declaration_score(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> f32 {
899 match style {
900 DeclarationStyle::Signature => declaration.signature_score,
901 DeclarationStyle::Declaration => declaration.declaration_score,
902 }
903}
904
905fn declaration_size(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> usize {
906 match style {
907 DeclarationStyle::Signature => declaration.signature_range.len(),
908 DeclarationStyle::Declaration => declaration.text.len(),
909 }
910}
911
912struct PromptData {
913 events: Vec<Arc<Event>>,
914 cursor_point: Point,
915 cursor_path: Arc<Path>, // TODO: make a common struct with cursor_point
916 included_files: Vec<IncludedFile>,
917}
918
919#[derive(Default)]
920pub struct GenerationParams {
921 pub temperature: Option<f32>,
922 pub top_p: Option<f32>,
923 pub stop: Option<Vec<String>>,
924}
925
926trait PromptFormatter {
927 fn render(&self, data: &PromptData) -> String;
928
929 fn generation_params() -> GenerationParams {
930 return GenerationParams::default();
931 }
932}
933
934struct MinimalQwenPrompt;
935
936impl PromptFormatter for MinimalQwenPrompt {
937 fn render(&self, data: &PromptData) -> String {
938 let edit_history = self.fmt_edit_history(data);
939 let context = self.fmt_context(data);
940
941 format!(
942 "{instructions}\n\n{edit_history}\n\n{context}",
943 instructions = MinimalQwenPrompt::INSTRUCTIONS,
944 edit_history = edit_history,
945 context = context
946 )
947 }
948}
949
950impl MinimalQwenPrompt {
951 const INSTRUCTIONS: &str = "You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.\n";
952
953 fn fmt_edit_history(&self, data: &PromptData) -> String {
954 if data.events.is_empty() {
955 "(No edit history)\n\n".to_string()
956 } else {
957 let mut events_str = String::new();
958 push_events(&mut events_str, &data.events);
959 format!(
960 "The following are the latest edits made by the user, from earlier to later.\n\n{}",
961 events_str
962 )
963 }
964 }
965
966 fn fmt_context(&self, data: &PromptData) -> String {
967 let mut context = String::new();
968 let include_line_numbers = true;
969
970 for related_file in &data.included_files {
971 writeln!(context, "<|file_sep|>{}", DiffPathFmt(&related_file.path)).unwrap();
972
973 if related_file.path == data.cursor_path {
974 write!(context, "<|fim_prefix|>").unwrap();
975 write_excerpts(
976 &related_file.excerpts,
977 &[(data.cursor_point, "<|fim_suffix|>")],
978 related_file.max_row,
979 include_line_numbers,
980 &mut context,
981 );
982 writeln!(context, "<|fim_middle|>").unwrap();
983 } else {
984 write_excerpts(
985 &related_file.excerpts,
986 &[],
987 related_file.max_row,
988 include_line_numbers,
989 &mut context,
990 );
991 }
992 }
993 context
994 }
995}
996
997struct SeedCoder1120Prompt;
998
999impl PromptFormatter for SeedCoder1120Prompt {
1000 fn render(&self, data: &PromptData) -> String {
1001 let edit_history = self.fmt_edit_history(data);
1002 let context = self.fmt_context(data);
1003
1004 format!(
1005 "# Edit History:\n{edit_history}\n\n{context}",
1006 edit_history = edit_history,
1007 context = context
1008 )
1009 }
1010
1011 fn generation_params() -> GenerationParams {
1012 GenerationParams {
1013 temperature: Some(0.2),
1014 top_p: Some(0.9),
1015 stop: Some(vec!["<[end_of_sentence]>".into()]),
1016 }
1017 }
1018}
1019
1020impl SeedCoder1120Prompt {
1021 fn fmt_edit_history(&self, data: &PromptData) -> String {
1022 if data.events.is_empty() {
1023 "(No edit history)\n\n".to_string()
1024 } else {
1025 let mut events_str = String::new();
1026 push_events(&mut events_str, &data.events);
1027 events_str
1028 }
1029 }
1030
1031 fn fmt_context(&self, data: &PromptData) -> String {
1032 let mut context = String::new();
1033 let include_line_numbers = true;
1034
1035 for related_file in &data.included_files {
1036 writeln!(context, "# Path: {}\n", DiffPathFmt(&related_file.path)).unwrap();
1037
1038 if related_file.path == data.cursor_path {
1039 let fim_prompt = self.fmt_fim(&related_file, data.cursor_point);
1040 context.push_str(&fim_prompt);
1041 } else {
1042 write_excerpts(
1043 &related_file.excerpts,
1044 &[],
1045 related_file.max_row,
1046 include_line_numbers,
1047 &mut context,
1048 );
1049 }
1050 }
1051 context
1052 }
1053
1054 fn fmt_fim(&self, file: &IncludedFile, cursor_point: Point) -> String {
1055 let mut buf = String::new();
1056 const FIM_SUFFIX: &str = "<[fim-suffix]>";
1057 const FIM_PREFIX: &str = "<[fim-prefix]>";
1058 const FIM_MIDDLE: &str = "<[fim-middle]>";
1059 write!(buf, "{}", FIM_PREFIX).unwrap();
1060 write_excerpts(
1061 &file.excerpts,
1062 &[(cursor_point, FIM_SUFFIX)],
1063 file.max_row,
1064 true,
1065 &mut buf,
1066 );
1067
1068 // Swap prefix and suffix parts
1069 let index = buf.find(FIM_SUFFIX).unwrap();
1070 let prefix = &buf[..index];
1071 let suffix = &buf[index..];
1072
1073 format!("{}{}{}", suffix, prefix, FIM_MIDDLE)
1074 }
1075}