1//! Zeta2 prompt planning and generation code shared with cloud.
2pub mod retrieval_prompt;
3
4use anyhow::{Context as _, Result, anyhow};
5use cloud_llm_client::predict_edits_v3::{
6 self, DiffPathFmt, Event, Excerpt, IncludedFile, Line, Point, PromptFormat,
7 ReferencedDeclaration,
8};
9use indoc::indoc;
10use ordered_float::OrderedFloat;
11use rustc_hash::{FxHashMap, FxHashSet};
12use serde::Serialize;
13use std::cmp;
14use std::fmt::Write;
15use std::sync::Arc;
16use std::{cmp::Reverse, collections::BinaryHeap, ops::Range, path::Path};
17use strum::{EnumIter, IntoEnumIterator};
18
19pub const DEFAULT_MAX_PROMPT_BYTES: usize = 10 * 1024;
20
21pub const CURSOR_MARKER: &str = "<|user_cursor|>";
22/// NOTE: Differs from zed version of constant - includes a newline
23pub const EDITABLE_REGION_START_MARKER_WITH_NEWLINE: &str = "<|editable_region_start|>\n";
24/// NOTE: Differs from zed version of constant - includes a newline
25pub const EDITABLE_REGION_END_MARKER_WITH_NEWLINE: &str = "<|editable_region_end|>\n";
26
27// TODO: use constants for markers?
28const MARKED_EXCERPT_INSTRUCTIONS: &str = indoc! {"
29 You are a code completion assistant and your task is to analyze user edits and then rewrite an excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking into account the cursor location.
30
31 The excerpt to edit will be wrapped in markers <|editable_region_start|> and <|editable_region_end|>. The cursor position is marked with <|user_cursor|>. Please respond with edited code for that region.
32
33 Other code is provided for context, and `…` indicates when code has been skipped.
34
35 ## Edit History
36
37"};
38
39const LABELED_SECTIONS_INSTRUCTIONS: &str = indoc! {r#"
40 You are a code completion assistant and your task is to analyze user edits, and suggest an edit to one of the provided sections of code.
41
42 Sections of code are grouped by file and then labeled by `<|section_N|>` (e.g `<|section_8|>`).
43
44 The cursor position is marked with `<|user_cursor|>` and it will appear within a special section labeled `<|current_section|>`. Prefer editing the current section until no more changes are needed within it.
45
46 Respond ONLY with the name of the section to edit on a single line, followed by all of the code that should replace that section. For example:
47
48 <|current_section|>
49 for i in 0..16 {
50 println!("{i}");
51 }
52
53 ## Edit History
54
55"#};
56
57const NUMBERED_LINES_INSTRUCTIONS: &str = indoc! {r#"
58 # Instructions
59
60 You are an edit prediction agent in a code editor.
61 Your job is to predict the next edit that the user will make,
62 based on their last few edits and their current cursor location.
63
64 ## Output Format
65
66 You must briefly explain your understanding of the user's goal, in one
67 or two sentences, and then specify their next edit in the form of a
68 unified diff, like this:
69
70 ```
71 --- a/src/myapp/cli.py
72 +++ b/src/myapp/cli.py
73 @@ ... @@
74 import os
75 import time
76 import sys
77 +from constants import LOG_LEVEL_WARNING
78 @@ ... @@
79 config.headless()
80 config.set_interactive(false)
81 -config.set_log_level(LOG_L)
82 +config.set_log_level(LOG_LEVEL_WARNING)
83 config.set_use_color(True)
84 ```
85
86 ## Edit History
87
88"#};
89
90const STUDENT_MODEL_INSTRUCTIONS: &str = indoc! {r#"
91 You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.
92
93 ## Edit History
94
95 "#};
96
97const UNIFIED_DIFF_REMINDER: &str = indoc! {"
98 ---
99
100 Analyze the edit history and the files, then provide the unified diff for your predicted edits.
101 Do not include the cursor marker in your output.
102 Your diff should include edited file paths in its file headers (lines beginning with `---` and `+++`).
103 Do not include line numbers in the hunk headers, use `@@ ... @@`.
104 Removed lines begin with `-`.
105 Added lines begin with `+`.
106 Context lines begin with an extra space.
107 Context and removed lines are used to match the target edit location, so make sure to include enough of them
108 to uniquely identify it amongst all excerpts of code provided.
109"};
110
111const MINIMAL_PROMPT_REMINDER: &str = indoc! {"
112 ---
113
114 Please analyze the edit history and the files, then provide the unified diff for your predicted edits.
115 Do not include the cursor marker in your output.
116 If you're editing multiple files, be sure to reflect filename in the hunk's header.
117 "};
118
119const XML_TAGS_INSTRUCTIONS: &str = indoc! {r#"
120 # Instructions
121
122 You are an edit prediction agent in a code editor.
123
124 Analyze the history of edits made by the user in order to infer what they are currently trying to accomplish.
125 Then complete the remainder of the current change if it is incomplete, or predict the next edit the user intends to make.
126 Always continue along the user's current trajectory, rather than changing course.
127
128 ## Output Format
129
130 You should briefly explain your understanding of the user's overall goal in one sentence, then explain what the next change
131 along the users current trajectory will be in another, and finally specify the next edit using the following XML-like format:
132
133 <edits path="my-project/src/myapp/cli.py">
134 <old_text>
135 OLD TEXT 1 HERE
136 </old_text>
137 <new_text>
138 NEW TEXT 1 HERE
139 </new_text>
140
141 <old_text>
142 OLD TEXT 1 HERE
143 </old_text>
144 <new_text>
145 NEW TEXT 1 HERE
146 </new_text>
147 </edits>
148
149 - Specify the file to edit using the `path` attribute.
150 - Use `<old_text>` and `<new_text>` tags to replace content
151 - `<old_text>` must exactly match existing file content, including indentation
152 - `<old_text>` cannot be empty
153 - Do not escape quotes, newlines, or other characters within tags
154 - Always close all tags properly
155 - Don't include the <|user_cursor|> marker in your output.
156
157 ## Edit History
158
159"#};
160
161const OLD_TEXT_NEW_TEXT_REMINDER: &str = indoc! {r#"
162 ---
163
164 Remember that the edits in the edit history have already been applied.
165"#};
166
167pub fn build_prompt(
168 request: &predict_edits_v3::PredictEditsRequest,
169) -> Result<(String, SectionLabels)> {
170 let mut section_labels = Default::default();
171
172 match request.prompt_format {
173 PromptFormat::MinimalQwen => {
174 let prompt = MinimalQwenPrompt {
175 events: request.events.clone(),
176 cursor_point: request.cursor_point,
177 cursor_path: request.excerpt_path.clone(),
178 included_files: request.included_files.clone(),
179 };
180 return Ok((prompt.render(), section_labels));
181 }
182 _ => (),
183 };
184
185 let mut insertions = match request.prompt_format {
186 PromptFormat::MarkedExcerpt => vec![
187 (
188 Point {
189 line: request.excerpt_line_range.start,
190 column: 0,
191 },
192 EDITABLE_REGION_START_MARKER_WITH_NEWLINE,
193 ),
194 (request.cursor_point, CURSOR_MARKER),
195 (
196 Point {
197 line: request.excerpt_line_range.end,
198 column: 0,
199 },
200 EDITABLE_REGION_END_MARKER_WITH_NEWLINE,
201 ),
202 ],
203 PromptFormat::LabeledSections
204 | PromptFormat::NumLinesUniDiff
205 | PromptFormat::Minimal
206 | PromptFormat::OldTextNewText => {
207 vec![(request.cursor_point, CURSOR_MARKER)]
208 }
209 PromptFormat::OnlySnippets => vec![],
210 PromptFormat::MinimalQwen => unreachable!(),
211 };
212
213 let mut prompt = match request.prompt_format {
214 PromptFormat::MarkedExcerpt => MARKED_EXCERPT_INSTRUCTIONS.to_string(),
215 PromptFormat::LabeledSections => LABELED_SECTIONS_INSTRUCTIONS.to_string(),
216 PromptFormat::NumLinesUniDiff => NUMBERED_LINES_INSTRUCTIONS.to_string(),
217 PromptFormat::OldTextNewText => XML_TAGS_INSTRUCTIONS.to_string(),
218 PromptFormat::OnlySnippets => String::new(),
219 PromptFormat::Minimal => STUDENT_MODEL_INSTRUCTIONS.to_string(),
220 PromptFormat::MinimalQwen => unreachable!(),
221 };
222
223 if request.events.is_empty() {
224 prompt.push_str("(No edit history)\n\n");
225 } else {
226 let edit_preamble = if request.prompt_format == PromptFormat::Minimal {
227 "The following are the latest edits made by the user, from earlier to later.\n\n"
228 } else {
229 "Here are the latest edits made by the user, from earlier to later.\n\n"
230 };
231 prompt.push_str(edit_preamble);
232 push_events(&mut prompt, &request.events);
233 }
234
235 let excerpts_preamble = match request.prompt_format {
236 PromptFormat::Minimal => indoc! {"
237 ## Part of the file under the cursor
238
239 (The cursor marker <|user_cursor|> indicates the current user cursor position.
240 The file is in current state, edits from edit history has been applied.
241 We only show part of the file around the cursor.
242 You can only edit exactly this part of the file.
243 We prepend line numbers (e.g., `123|<actual line>`); they are not part of the file.)
244 "},
245 PromptFormat::NumLinesUniDiff | PromptFormat::OldTextNewText => indoc! {"
246 ## Code Excerpts
247
248 Here is some excerpts of code that you should take into account to predict the next edit.
249
250 The cursor position is marked by `<|user_cursor|>` as it stands after the last edit in the history.
251
252 In addition other excerpts are included to better understand what the edit will be, including the declaration
253 or references of symbols around the cursor, or other similar code snippets that may need to be updated
254 following patterns that appear in the edit history.
255
256 Consider each of them carefully in relation to the edit history, and that the user may not have navigated
257 to the next place they want to edit yet.
258
259 Lines starting with `…` indicate omitted line ranges. These may appear inside multi-line code constructs.
260 "},
261 _ => indoc! {"
262 ## Code Excerpts
263
264 The cursor marker <|user_cursor|> indicates the current user cursor position.
265 The file is in current state, edits from edit history have been applied.
266 "},
267 };
268
269 prompt.push_str(excerpts_preamble);
270 prompt.push('\n');
271
272 if !request.referenced_declarations.is_empty() || !request.signatures.is_empty() {
273 let syntax_based_prompt = SyntaxBasedPrompt::populate(request)?;
274 section_labels = syntax_based_prompt.write(&mut insertions, &mut prompt)?;
275 } else {
276 if request.prompt_format == PromptFormat::LabeledSections {
277 anyhow::bail!("PromptFormat::LabeledSections cannot be used with ContextMode::Llm");
278 }
279
280 let include_line_numbers = matches!(
281 request.prompt_format,
282 PromptFormat::NumLinesUniDiff | PromptFormat::Minimal
283 );
284 for related_file in &request.included_files {
285 if request.prompt_format == PromptFormat::Minimal {
286 write_codeblock_with_filename(
287 &related_file.path,
288 &related_file.excerpts,
289 if related_file.path == request.excerpt_path {
290 &insertions
291 } else {
292 &[]
293 },
294 related_file.max_row,
295 include_line_numbers,
296 &mut prompt,
297 );
298 } else {
299 write_codeblock(
300 &related_file.path,
301 &related_file.excerpts,
302 if related_file.path == request.excerpt_path {
303 &insertions
304 } else {
305 &[]
306 },
307 related_file.max_row,
308 include_line_numbers,
309 &mut prompt,
310 );
311 }
312 }
313 }
314
315 match request.prompt_format {
316 PromptFormat::NumLinesUniDiff => {
317 prompt.push_str(UNIFIED_DIFF_REMINDER);
318 }
319 PromptFormat::OldTextNewText => {
320 prompt.push_str(OLD_TEXT_NEW_TEXT_REMINDER);
321 }
322 PromptFormat::Minimal => {
323 prompt.push_str(MINIMAL_PROMPT_REMINDER);
324 }
325 _ => {}
326 }
327
328 Ok((prompt, section_labels))
329}
330
331pub fn write_codeblock<'a>(
332 path: &Path,
333 excerpts: impl IntoIterator<Item = &'a Excerpt>,
334 sorted_insertions: &[(Point, &str)],
335 file_line_count: Line,
336 include_line_numbers: bool,
337 output: &'a mut String,
338) {
339 writeln!(output, "`````{}", DiffPathFmt(path)).unwrap();
340
341 write_excerpts(
342 excerpts,
343 sorted_insertions,
344 file_line_count,
345 include_line_numbers,
346 output,
347 );
348 write!(output, "`````\n\n").unwrap();
349}
350
351fn write_codeblock_with_filename<'a>(
352 path: &Path,
353 excerpts: impl IntoIterator<Item = &'a Excerpt>,
354 sorted_insertions: &[(Point, &str)],
355 file_line_count: Line,
356 include_line_numbers: bool,
357 output: &'a mut String,
358) {
359 writeln!(output, "`````filename={}", DiffPathFmt(path)).unwrap();
360
361 write_excerpts(
362 excerpts,
363 sorted_insertions,
364 file_line_count,
365 include_line_numbers,
366 output,
367 );
368 write!(output, "`````\n\n").unwrap();
369}
370
371pub fn write_excerpts<'a>(
372 excerpts: impl IntoIterator<Item = &'a Excerpt>,
373 sorted_insertions: &[(Point, &str)],
374 file_line_count: Line,
375 include_line_numbers: bool,
376 output: &mut String,
377) {
378 let mut current_row = Line(0);
379 let mut sorted_insertions = sorted_insertions.iter().peekable();
380
381 for excerpt in excerpts {
382 if excerpt.start_line > current_row {
383 writeln!(output, "…").unwrap();
384 }
385 if excerpt.text.is_empty() {
386 return;
387 }
388
389 current_row = excerpt.start_line;
390
391 for mut line in excerpt.text.lines() {
392 if include_line_numbers {
393 write!(output, "{}|", current_row.0 + 1).unwrap();
394 }
395
396 while let Some((insertion_location, insertion_marker)) = sorted_insertions.peek() {
397 match current_row.cmp(&insertion_location.line) {
398 cmp::Ordering::Equal => {
399 let (prefix, suffix) = line.split_at(insertion_location.column as usize);
400 output.push_str(prefix);
401 output.push_str(insertion_marker);
402 line = suffix;
403 sorted_insertions.next();
404 }
405 cmp::Ordering::Less => break,
406 cmp::Ordering::Greater => {
407 sorted_insertions.next();
408 break;
409 }
410 }
411 }
412 output.push_str(line);
413 output.push('\n');
414 current_row.0 += 1;
415 }
416 }
417
418 if current_row < file_line_count {
419 writeln!(output, "…").unwrap();
420 }
421}
422
423pub fn push_events(output: &mut String, events: &[predict_edits_v3::Event]) {
424 if events.is_empty() {
425 return;
426 };
427
428 writeln!(output, "`````diff").unwrap();
429 for event in events {
430 writeln!(output, "{}", event).unwrap();
431 }
432 writeln!(output, "`````\n").unwrap();
433}
434
435pub struct SyntaxBasedPrompt<'a> {
436 request: &'a predict_edits_v3::PredictEditsRequest,
437 /// Snippets to include in the prompt. These may overlap - they are merged / deduplicated in
438 /// `to_prompt_string`.
439 snippets: Vec<PlannedSnippet<'a>>,
440 budget_used: usize,
441}
442
443#[derive(Clone, Debug)]
444pub struct PlannedSnippet<'a> {
445 path: Arc<Path>,
446 range: Range<Line>,
447 text: &'a str,
448 // TODO: Indicate this in the output
449 #[allow(dead_code)]
450 text_is_truncated: bool,
451}
452
453#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
454pub enum DeclarationStyle {
455 Signature,
456 Declaration,
457}
458
459#[derive(Default, Clone, Debug, Serialize)]
460pub struct SectionLabels {
461 pub excerpt_index: usize,
462 pub section_ranges: Vec<(Arc<Path>, Range<Line>)>,
463}
464
465impl<'a> SyntaxBasedPrompt<'a> {
466 /// Greedy one-pass knapsack algorithm to populate the prompt plan. Does the following:
467 ///
468 /// Initializes a priority queue by populating it with each snippet, finding the
469 /// DeclarationStyle that minimizes `score_density = score / snippet.range(style).len()`. When a
470 /// "signature" snippet is popped, insert an entry for the "declaration" variant that reflects
471 /// the cost of upgrade.
472 ///
473 /// TODO: Implement an early halting condition. One option might be to have another priority
474 /// queue where the score is the size, and update it accordingly. Another option might be to
475 /// have some simpler heuristic like bailing after N failed insertions, or based on how much
476 /// budget is left.
477 ///
478 /// TODO: Has the current known sources of imprecision:
479 ///
480 /// * Does not consider snippet overlap when ranking. For example, it might add a field to the
481 /// plan even though the containing struct is already included.
482 ///
483 /// * Does not consider cost of signatures when ranking snippets - this is tricky since
484 /// signatures may be shared by multiple snippets.
485 ///
486 /// * Does not include file paths / other text when considering max_bytes.
487 pub fn populate(request: &'a predict_edits_v3::PredictEditsRequest) -> Result<Self> {
488 let mut this = Self {
489 request,
490 snippets: Vec::new(),
491 budget_used: request.excerpt.len(),
492 };
493 let mut included_parents = FxHashSet::default();
494 let additional_parents = this.additional_parent_signatures(
495 &request.excerpt_path,
496 request.excerpt_parent,
497 &included_parents,
498 )?;
499 this.add_parents(&mut included_parents, additional_parents);
500
501 let max_bytes = request.prompt_max_bytes.unwrap_or(DEFAULT_MAX_PROMPT_BYTES);
502
503 if this.budget_used > max_bytes {
504 return Err(anyhow!(
505 "Excerpt + signatures size of {} already exceeds budget of {}",
506 this.budget_used,
507 max_bytes
508 ));
509 }
510
511 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
512 struct QueueEntry {
513 score_density: OrderedFloat<f32>,
514 declaration_index: usize,
515 style: DeclarationStyle,
516 }
517
518 // Initialize priority queue with the best score for each snippet.
519 let mut queue: BinaryHeap<QueueEntry> = BinaryHeap::new();
520 for (declaration_index, declaration) in request.referenced_declarations.iter().enumerate() {
521 let (style, score_density) = DeclarationStyle::iter()
522 .map(|style| {
523 (
524 style,
525 OrderedFloat(declaration_score_density(&declaration, style)),
526 )
527 })
528 .max_by_key(|(_, score_density)| *score_density)
529 .unwrap();
530 queue.push(QueueEntry {
531 score_density,
532 declaration_index,
533 style,
534 });
535 }
536
537 // Knapsack selection loop
538 while let Some(queue_entry) = queue.pop() {
539 let Some(declaration) = request
540 .referenced_declarations
541 .get(queue_entry.declaration_index)
542 else {
543 return Err(anyhow!(
544 "Invalid declaration index {}",
545 queue_entry.declaration_index
546 ));
547 };
548
549 let mut additional_bytes = declaration_size(declaration, queue_entry.style);
550 if this.budget_used + additional_bytes > max_bytes {
551 continue;
552 }
553
554 let additional_parents = this.additional_parent_signatures(
555 &declaration.path,
556 declaration.parent_index,
557 &mut included_parents,
558 )?;
559 additional_bytes += additional_parents
560 .iter()
561 .map(|(_, snippet)| snippet.text.len())
562 .sum::<usize>();
563 if this.budget_used + additional_bytes > max_bytes {
564 continue;
565 }
566
567 this.budget_used += additional_bytes;
568 this.add_parents(&mut included_parents, additional_parents);
569 let planned_snippet = match queue_entry.style {
570 DeclarationStyle::Signature => {
571 let Some(text) = declaration.text.get(declaration.signature_range.clone())
572 else {
573 return Err(anyhow!(
574 "Invalid declaration signature_range {:?} with text.len() = {}",
575 declaration.signature_range,
576 declaration.text.len()
577 ));
578 };
579 let signature_start_line = declaration.range.start
580 + Line(
581 declaration.text[..declaration.signature_range.start]
582 .lines()
583 .count() as u32,
584 );
585 let signature_end_line = signature_start_line
586 + Line(
587 declaration.text
588 [declaration.signature_range.start..declaration.signature_range.end]
589 .lines()
590 .count() as u32,
591 );
592 let range = signature_start_line..signature_end_line;
593
594 PlannedSnippet {
595 path: declaration.path.clone(),
596 range,
597 text,
598 text_is_truncated: declaration.text_is_truncated,
599 }
600 }
601 DeclarationStyle::Declaration => PlannedSnippet {
602 path: declaration.path.clone(),
603 range: declaration.range.clone(),
604 text: &declaration.text,
605 text_is_truncated: declaration.text_is_truncated,
606 },
607 };
608 this.snippets.push(planned_snippet);
609
610 // When a Signature is consumed, insert an entry for Definition style.
611 if queue_entry.style == DeclarationStyle::Signature {
612 let signature_size = declaration_size(&declaration, DeclarationStyle::Signature);
613 let declaration_size =
614 declaration_size(&declaration, DeclarationStyle::Declaration);
615 let signature_score = declaration_score(&declaration, DeclarationStyle::Signature);
616 let declaration_score =
617 declaration_score(&declaration, DeclarationStyle::Declaration);
618
619 let score_diff = declaration_score - signature_score;
620 let size_diff = declaration_size.saturating_sub(signature_size);
621 if score_diff > 0.0001 && size_diff > 0 {
622 queue.push(QueueEntry {
623 declaration_index: queue_entry.declaration_index,
624 score_density: OrderedFloat(score_diff / (size_diff as f32)),
625 style: DeclarationStyle::Declaration,
626 });
627 }
628 }
629 }
630
631 anyhow::Ok(this)
632 }
633
634 fn add_parents(
635 &mut self,
636 included_parents: &mut FxHashSet<usize>,
637 snippets: Vec<(usize, PlannedSnippet<'a>)>,
638 ) {
639 for (parent_index, snippet) in snippets {
640 included_parents.insert(parent_index);
641 self.budget_used += snippet.text.len();
642 self.snippets.push(snippet);
643 }
644 }
645
646 fn additional_parent_signatures(
647 &self,
648 path: &Arc<Path>,
649 parent_index: Option<usize>,
650 included_parents: &FxHashSet<usize>,
651 ) -> Result<Vec<(usize, PlannedSnippet<'a>)>> {
652 let mut results = Vec::new();
653 self.additional_parent_signatures_impl(path, parent_index, included_parents, &mut results)?;
654 Ok(results)
655 }
656
657 fn additional_parent_signatures_impl(
658 &self,
659 path: &Arc<Path>,
660 parent_index: Option<usize>,
661 included_parents: &FxHashSet<usize>,
662 results: &mut Vec<(usize, PlannedSnippet<'a>)>,
663 ) -> Result<()> {
664 let Some(parent_index) = parent_index else {
665 return Ok(());
666 };
667 if included_parents.contains(&parent_index) {
668 return Ok(());
669 }
670 let Some(parent_signature) = self.request.signatures.get(parent_index) else {
671 return Err(anyhow!("Invalid parent index {}", parent_index));
672 };
673 results.push((
674 parent_index,
675 PlannedSnippet {
676 path: path.clone(),
677 range: parent_signature.range.clone(),
678 text: &parent_signature.text,
679 text_is_truncated: parent_signature.text_is_truncated,
680 },
681 ));
682 self.additional_parent_signatures_impl(
683 path,
684 parent_signature.parent_index,
685 included_parents,
686 results,
687 )
688 }
689
690 /// Renders the planned context. Each file starts with "```FILE_PATH\n` and ends with triple
691 /// backticks, with a newline after each file. Outputs a line with "..." between nonconsecutive
692 /// chunks.
693 pub fn write(
694 &'a self,
695 excerpt_file_insertions: &mut Vec<(Point, &'static str)>,
696 prompt: &mut String,
697 ) -> Result<SectionLabels> {
698 let mut file_to_snippets: FxHashMap<&'a std::path::Path, Vec<&PlannedSnippet<'a>>> =
699 FxHashMap::default();
700 for snippet in &self.snippets {
701 file_to_snippets
702 .entry(&snippet.path)
703 .or_default()
704 .push(snippet);
705 }
706
707 // Reorder so that file with cursor comes last
708 let mut file_snippets = Vec::new();
709 let mut excerpt_file_snippets = Vec::new();
710 for (file_path, snippets) in file_to_snippets {
711 if file_path == self.request.excerpt_path.as_ref() {
712 excerpt_file_snippets = snippets;
713 } else {
714 file_snippets.push((file_path, snippets, false));
715 }
716 }
717 let excerpt_snippet = PlannedSnippet {
718 path: self.request.excerpt_path.clone(),
719 range: self.request.excerpt_line_range.clone(),
720 text: &self.request.excerpt,
721 text_is_truncated: false,
722 };
723 excerpt_file_snippets.push(&excerpt_snippet);
724 file_snippets.push((&self.request.excerpt_path, excerpt_file_snippets, true));
725
726 let section_labels =
727 self.push_file_snippets(prompt, excerpt_file_insertions, file_snippets)?;
728
729 Ok(section_labels)
730 }
731
732 fn push_file_snippets(
733 &self,
734 output: &mut String,
735 excerpt_file_insertions: &mut Vec<(Point, &'static str)>,
736 file_snippets: Vec<(&'a Path, Vec<&'a PlannedSnippet>, bool)>,
737 ) -> Result<SectionLabels> {
738 let mut section_ranges = Vec::new();
739 let mut excerpt_index = None;
740
741 for (file_path, mut snippets, is_excerpt_file) in file_snippets {
742 snippets.sort_by_key(|s| (s.range.start, Reverse(s.range.end)));
743
744 // TODO: What if the snippets get expanded too large to be editable?
745 let mut current_snippet: Option<(&PlannedSnippet, Range<Line>)> = None;
746 let mut disjoint_snippets: Vec<(&PlannedSnippet, Range<Line>)> = Vec::new();
747 for snippet in snippets {
748 if let Some((_, current_snippet_range)) = current_snippet.as_mut()
749 && snippet.range.start <= current_snippet_range.end
750 {
751 current_snippet_range.end = current_snippet_range.end.max(snippet.range.end);
752 continue;
753 }
754 if let Some(current_snippet) = current_snippet.take() {
755 disjoint_snippets.push(current_snippet);
756 }
757 current_snippet = Some((snippet, snippet.range.clone()));
758 }
759 if let Some(current_snippet) = current_snippet.take() {
760 disjoint_snippets.push(current_snippet);
761 }
762
763 writeln!(output, "`````path={}", file_path.display()).ok();
764 let mut skipped_last_snippet = false;
765 for (snippet, range) in disjoint_snippets {
766 let section_index = section_ranges.len();
767
768 match self.request.prompt_format {
769 PromptFormat::MarkedExcerpt
770 | PromptFormat::OnlySnippets
771 | PromptFormat::OldTextNewText
772 | PromptFormat::Minimal
773 | PromptFormat::NumLinesUniDiff => {
774 if range.start.0 > 0 && !skipped_last_snippet {
775 output.push_str("…\n");
776 }
777 }
778 PromptFormat::LabeledSections => {
779 if is_excerpt_file
780 && range.start <= self.request.excerpt_line_range.start
781 && range.end >= self.request.excerpt_line_range.end
782 {
783 writeln!(output, "<|current_section|>").ok();
784 } else {
785 writeln!(output, "<|section_{}|>", section_index).ok();
786 }
787 }
788 PromptFormat::MinimalQwen => unreachable!(),
789 }
790
791 let push_full_snippet = |output: &mut String| {
792 if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
793 for (i, line) in snippet.text.lines().enumerate() {
794 writeln!(output, "{}|{}", i as u32 + range.start.0 + 1, line)?;
795 }
796 } else {
797 output.push_str(&snippet.text);
798 }
799 anyhow::Ok(())
800 };
801
802 if is_excerpt_file {
803 if self.request.prompt_format == PromptFormat::OnlySnippets {
804 if range.start >= self.request.excerpt_line_range.start
805 && range.end <= self.request.excerpt_line_range.end
806 {
807 skipped_last_snippet = true;
808 } else {
809 skipped_last_snippet = false;
810 output.push_str(snippet.text);
811 }
812 } else if !excerpt_file_insertions.is_empty() {
813 let lines = snippet.text.lines().collect::<Vec<_>>();
814 let push_line = |output: &mut String, line_ix: usize| {
815 if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
816 write!(output, "{}|", line_ix as u32 + range.start.0 + 1)?;
817 }
818 anyhow::Ok(writeln!(output, "{}", lines[line_ix])?)
819 };
820 let mut last_line_ix = 0;
821 let mut insertion_ix = 0;
822 while insertion_ix < excerpt_file_insertions.len() {
823 let (point, insertion) = &excerpt_file_insertions[insertion_ix];
824 let found = point.line >= range.start && point.line <= range.end;
825 if found {
826 excerpt_index = Some(section_index);
827 let insertion_line_ix = (point.line.0 - range.start.0) as usize;
828 for line_ix in last_line_ix..insertion_line_ix {
829 push_line(output, line_ix)?;
830 }
831 if let Some(next_line) = lines.get(insertion_line_ix) {
832 if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
833 write!(
834 output,
835 "{}|",
836 insertion_line_ix as u32 + range.start.0 + 1
837 )?
838 }
839 output.push_str(&next_line[..point.column as usize]);
840 output.push_str(insertion);
841 writeln!(output, "{}", &next_line[point.column as usize..])?;
842 } else {
843 writeln!(output, "{}", insertion)?;
844 }
845 last_line_ix = insertion_line_ix + 1;
846 excerpt_file_insertions.remove(insertion_ix);
847 continue;
848 }
849 insertion_ix += 1;
850 }
851 skipped_last_snippet = false;
852 for line_ix in last_line_ix..lines.len() {
853 push_line(output, line_ix)?;
854 }
855 } else {
856 skipped_last_snippet = false;
857 push_full_snippet(output)?;
858 }
859 } else {
860 skipped_last_snippet = false;
861 push_full_snippet(output)?;
862 }
863
864 section_ranges.push((snippet.path.clone(), range));
865 }
866
867 output.push_str("`````\n\n");
868 }
869
870 Ok(SectionLabels {
871 // TODO: Clean this up
872 excerpt_index: match self.request.prompt_format {
873 PromptFormat::OnlySnippets => 0,
874 _ => excerpt_index.context("bug: no snippet found for excerpt")?,
875 },
876 section_ranges,
877 })
878 }
879}
880
881fn declaration_score_density(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> f32 {
882 declaration_score(declaration, style) / declaration_size(declaration, style) as f32
883}
884
885fn declaration_score(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> f32 {
886 match style {
887 DeclarationStyle::Signature => declaration.signature_score,
888 DeclarationStyle::Declaration => declaration.declaration_score,
889 }
890}
891
892fn declaration_size(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> usize {
893 match style {
894 DeclarationStyle::Signature => declaration.signature_range.len(),
895 DeclarationStyle::Declaration => declaration.text.len(),
896 }
897}
898
899struct MinimalQwenPrompt {
900 events: Vec<Event>,
901 cursor_point: Point,
902 cursor_path: Arc<Path>, // TODO: make a common struct with cursor_point
903 included_files: Vec<IncludedFile>,
904}
905
906impl MinimalQwenPrompt {
907 const INSTRUCTIONS: &str = "You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.\n";
908
909 fn render(&self) -> String {
910 let edit_history = self.fmt_edit_history();
911 let context = self.fmt_context();
912
913 format!(
914 "{instructions}\n\n{edit_history}\n\n{context}",
915 instructions = MinimalQwenPrompt::INSTRUCTIONS,
916 edit_history = edit_history,
917 context = context
918 )
919 }
920
921 fn fmt_edit_history(&self) -> String {
922 if self.events.is_empty() {
923 "(No edit history)\n\n".to_string()
924 } else {
925 let mut events_str = String::new();
926 push_events(&mut events_str, &self.events);
927 format!(
928 "The following are the latest edits made by the user, from earlier to later.\n\n{}",
929 events_str
930 )
931 }
932 }
933
934 fn fmt_context(&self) -> String {
935 let mut context = String::new();
936 let include_line_numbers = true;
937
938 for related_file in &self.included_files {
939 writeln!(context, "<|file_sep|>{}", DiffPathFmt(&related_file.path)).unwrap();
940
941 if related_file.path == self.cursor_path {
942 write!(context, "<|fim_prefix|>").unwrap();
943 write_excerpts(
944 &related_file.excerpts,
945 &[(self.cursor_point, "<|fim_suffix|>")],
946 related_file.max_row,
947 include_line_numbers,
948 &mut context,
949 );
950 writeln!(context, "<|fim_middle|>").unwrap();
951 } else {
952 write_excerpts(
953 &related_file.excerpts,
954 &[],
955 related_file.max_row,
956 include_line_numbers,
957 &mut context,
958 );
959 }
960 }
961 context
962 }
963}