1//! Zeta2 prompt planning and generation code shared with cloud.
2pub mod retrieval_prompt;
3
4use anyhow::{Context as _, Result, anyhow};
5use cloud_llm_client::predict_edits_v3::{
6 self, DiffPathFmt, Excerpt, Line, Point, PromptFormat, ReferencedDeclaration,
7};
8use indoc::indoc;
9use ordered_float::OrderedFloat;
10use rustc_hash::{FxHashMap, FxHashSet};
11use serde::Serialize;
12use std::cmp;
13use std::fmt::Write;
14use std::sync::Arc;
15use std::{cmp::Reverse, collections::BinaryHeap, ops::Range, path::Path};
16use strum::{EnumIter, IntoEnumIterator};
17
18pub const DEFAULT_MAX_PROMPT_BYTES: usize = 10 * 1024;
19
20pub const CURSOR_MARKER: &str = "<|user_cursor|>";
21/// NOTE: Differs from zed version of constant - includes a newline
22pub const EDITABLE_REGION_START_MARKER_WITH_NEWLINE: &str = "<|editable_region_start|>\n";
23/// NOTE: Differs from zed version of constant - includes a newline
24pub const EDITABLE_REGION_END_MARKER_WITH_NEWLINE: &str = "<|editable_region_end|>\n";
25
26// TODO: use constants for markers?
27const MARKED_EXCERPT_INSTRUCTIONS: &str = indoc! {"
28 You are a code completion assistant and your task is to analyze user edits and then rewrite an excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking into account the cursor location.
29
30 The excerpt to edit will be wrapped in markers <|editable_region_start|> and <|editable_region_end|>. The cursor position is marked with <|user_cursor|>. Please respond with edited code for that region.
31
32 Other code is provided for context, and `…` indicates when code has been skipped.
33
34 # Edit History:
35
36"};
37
38const LABELED_SECTIONS_INSTRUCTIONS: &str = indoc! {r#"
39 You are a code completion assistant and your task is to analyze user edits, and suggest an edit to one of the provided sections of code.
40
41 Sections of code are grouped by file and then labeled by `<|section_N|>` (e.g `<|section_8|>`).
42
43 The cursor position is marked with `<|user_cursor|>` and it will appear within a special section labeled `<|current_section|>`. Prefer editing the current section until no more changes are needed within it.
44
45 Respond ONLY with the name of the section to edit on a single line, followed by all of the code that should replace that section. For example:
46
47 <|current_section|>
48 for i in 0..16 {
49 println!("{i}");
50 }
51
52 # Edit History:
53
54"#};
55
56const NUMBERED_LINES_INSTRUCTIONS: &str = indoc! {r#"
57 # Instructions
58
59 You are an edit prediction agent in a code editor.
60 Your job is to predict the next edit that the user will make,
61 based on their last few edits and their current cursor location.
62
63 ## Output Format
64
65 You must briefly explain your understanding of the user's goal, in one
66 or two sentences, and then specify their next edit in the form of a
67 unified diff, like this:
68
69 ```
70 --- a/src/myapp/cli.py
71 +++ b/src/myapp/cli.py
72 @@ ... @@
73 import os
74 import time
75 import sys
76 +from constants import LOG_LEVEL_WARNING
77 @@ ... @@
78 config.headless()
79 config.set_interactive(false)
80 -config.set_log_level(LOG_L)
81 +config.set_log_level(LOG_LEVEL_WARNING)
82 config.set_use_color(True)
83 ```
84
85 ## Edit History
86
87"#};
88
89const STUDENT_MODEL_INSTRUCTIONS: &str = indoc! {r#"
90 You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.
91
92 # Edit History:
93
94 "#};
95
96const UNIFIED_DIFF_REMINDER: &str = indoc! {"
97 ---
98
99 Analyze the edit history and the files, then provide the unified diff for your predicted edits.
100 Do not include the cursor marker in your output.
101 Your diff should include edited file paths in its file headers (lines beginning with `---` and `+++`).
102 Do not include line numbers in the hunk headers, use `@@ ... @@`.
103 Removed lines begin with `-`.
104 Added lines begin with `+`.
105 Context lines begin with an extra space.
106 Context and removed lines are used to match the target edit location, so make sure to include enough of them
107 to uniquely identify it amongst all excerpts of code provided.
108"};
109
110const MINIMAL_PROMPT_REMINDER: &str = indoc! {"
111 ---
112
113 Please analyze the edit history and the files, then provide the unified diff for your predicted edits.
114 Do not include the cursor marker in your output.
115 If you're editing multiple files, be sure to reflect filename in the hunk's header.
116 "};
117
118const XML_TAGS_INSTRUCTIONS: &str = indoc! {r#"
119 # Instructions
120
121 You are an edit prediction agent in a code editor.
122 Your job is to predict the next edit that the user will make,
123 based on their last few edits and their current cursor location.
124
125 # Output Format
126
127 You must briefly explain your understanding of the user's goal, in one
128 or two sentences, and then specify their next edit, using the following
129 XML format:
130
131 <edits path="my-project/src/myapp/cli.py">
132 <old_text>
133 OLD TEXT 1 HERE
134 </old_text>
135 <new_text>
136 NEW TEXT 1 HERE
137 </new_text>
138
139 <old_text>
140 OLD TEXT 1 HERE
141 </old_text>
142 <new_text>
143 NEW TEXT 1 HERE
144 </new_text>
145 </edits>
146
147 - Specify the file to edit using the `path` attribute.
148 - Use `<old_text>` and `<new_text>` tags to replace content
149 - `<old_text>` must exactly match existing file content, including indentation
150 - `<old_text>` cannot be empty
151 - Do not escape quotes, newlines, or other characters within tags
152 - Always close all tags properly
153 - Don't include the <|user_cursor|> marker in your output.
154
155 # Edit History:
156
157"#};
158
159const OLD_TEXT_NEW_TEXT_REMINDER: &str = indoc! {r#"
160 ---
161
162 Remember that the edits in the edit history have already been deployed.
163 The files are currently as shown in the Code Excerpts section.
164"#};
165
166pub fn build_prompt(
167 request: &predict_edits_v3::PredictEditsRequest,
168) -> Result<(String, SectionLabels)> {
169 let mut insertions = match request.prompt_format {
170 PromptFormat::MarkedExcerpt => vec![
171 (
172 Point {
173 line: request.excerpt_line_range.start,
174 column: 0,
175 },
176 EDITABLE_REGION_START_MARKER_WITH_NEWLINE,
177 ),
178 (request.cursor_point, CURSOR_MARKER),
179 (
180 Point {
181 line: request.excerpt_line_range.end,
182 column: 0,
183 },
184 EDITABLE_REGION_END_MARKER_WITH_NEWLINE,
185 ),
186 ],
187 PromptFormat::LabeledSections
188 | PromptFormat::NumLinesUniDiff
189 | PromptFormat::Minimal
190 | PromptFormat::OldTextNewText => {
191 vec![(request.cursor_point, CURSOR_MARKER)]
192 }
193 PromptFormat::OnlySnippets => vec![],
194 };
195
196 let mut prompt = match request.prompt_format {
197 PromptFormat::MarkedExcerpt => MARKED_EXCERPT_INSTRUCTIONS.to_string(),
198 PromptFormat::LabeledSections => LABELED_SECTIONS_INSTRUCTIONS.to_string(),
199 PromptFormat::NumLinesUniDiff => NUMBERED_LINES_INSTRUCTIONS.to_string(),
200 PromptFormat::OldTextNewText => XML_TAGS_INSTRUCTIONS.to_string(),
201 PromptFormat::OnlySnippets => String::new(),
202 PromptFormat::Minimal => STUDENT_MODEL_INSTRUCTIONS.to_string(),
203 };
204
205 if request.events.is_empty() {
206 prompt.push_str("(No edit history)\n\n");
207 } else {
208 let edit_preamble = if request.prompt_format == PromptFormat::Minimal {
209 "The following are the latest edits made by the user, from earlier to later.\n\n"
210 } else {
211 "Here are the latest edits made by the user, from earlier to later.\n\n"
212 };
213 prompt.push_str(edit_preamble);
214 push_events(&mut prompt, &request.events);
215 }
216
217 let excerpts_preamble = match request.prompt_format {
218 PromptFormat::Minimal => indoc! {"
219 # Part of the file under the cursor:
220
221 (The cursor marker <|user_cursor|> indicates the current user cursor position.
222 The file is in current state, edits from edit history has been applied.
223 We only show part of the file around the cursor.
224 You can only edit exactly this part of the file.
225 We prepend line numbers (e.g., `123|<actual line>`); they are not part of the file.)
226 "},
227 PromptFormat::NumLinesUniDiff => indoc! {"
228 # Code Excerpts
229
230 The cursor marker <|user_cursor|> indicates the current user cursor position.
231 The file is in current state, edits from edit history have been applied.
232 We prepend line numbers (e.g., `123|<actual line>`); they are not part of the file.
233 "},
234 _ => indoc! {"
235 # Code Excerpts
236
237 The cursor marker <|user_cursor|> indicates the current user cursor position.
238 The file is in current state, edits from edit history have been applied.
239 "},
240 };
241
242 prompt.push_str(excerpts_preamble);
243 prompt.push('\n');
244
245 let mut section_labels = Default::default();
246
247 if !request.referenced_declarations.is_empty() || !request.signatures.is_empty() {
248 let syntax_based_prompt = SyntaxBasedPrompt::populate(request)?;
249 section_labels = syntax_based_prompt.write(&mut insertions, &mut prompt)?;
250 } else {
251 if request.prompt_format == PromptFormat::LabeledSections {
252 anyhow::bail!("PromptFormat::LabeledSections cannot be used with ContextMode::Llm");
253 }
254
255 let include_line_numbers = matches!(
256 request.prompt_format,
257 PromptFormat::NumLinesUniDiff | PromptFormat::Minimal
258 );
259 for related_file in &request.included_files {
260 if request.prompt_format == PromptFormat::Minimal {
261 write_codeblock_with_filename(
262 &related_file.path,
263 &related_file.excerpts,
264 if related_file.path == request.excerpt_path {
265 &insertions
266 } else {
267 &[]
268 },
269 related_file.max_row,
270 include_line_numbers,
271 &mut prompt,
272 );
273 } else {
274 write_codeblock(
275 &related_file.path,
276 &related_file.excerpts,
277 if related_file.path == request.excerpt_path {
278 &insertions
279 } else {
280 &[]
281 },
282 related_file.max_row,
283 include_line_numbers,
284 &mut prompt,
285 );
286 }
287 }
288 }
289
290 match request.prompt_format {
291 PromptFormat::NumLinesUniDiff => {
292 prompt.push_str(UNIFIED_DIFF_REMINDER);
293 }
294 PromptFormat::OldTextNewText => {
295 prompt.push_str(OLD_TEXT_NEW_TEXT_REMINDER);
296 }
297 PromptFormat::Minimal => {
298 prompt.push_str(MINIMAL_PROMPT_REMINDER);
299 }
300 _ => {}
301 }
302
303 Ok((prompt, section_labels))
304}
305
306pub fn write_codeblock<'a>(
307 path: &Path,
308 excerpts: impl IntoIterator<Item = &'a Excerpt>,
309 sorted_insertions: &[(Point, &str)],
310 file_line_count: Line,
311 include_line_numbers: bool,
312 output: &'a mut String,
313) {
314 writeln!(output, "`````{}", DiffPathFmt(path)).unwrap();
315
316 write_excerpts(
317 excerpts,
318 sorted_insertions,
319 file_line_count,
320 include_line_numbers,
321 output,
322 );
323 write!(output, "`````\n\n").unwrap();
324}
325
326fn write_codeblock_with_filename<'a>(
327 path: &Path,
328 excerpts: impl IntoIterator<Item = &'a Excerpt>,
329 sorted_insertions: &[(Point, &str)],
330 file_line_count: Line,
331 include_line_numbers: bool,
332 output: &'a mut String,
333) {
334 writeln!(output, "`````filename={}", DiffPathFmt(path)).unwrap();
335
336 write_excerpts(
337 excerpts,
338 sorted_insertions,
339 file_line_count,
340 include_line_numbers,
341 output,
342 );
343 write!(output, "`````\n\n").unwrap();
344}
345
346pub fn write_excerpts<'a>(
347 excerpts: impl IntoIterator<Item = &'a Excerpt>,
348 sorted_insertions: &[(Point, &str)],
349 file_line_count: Line,
350 include_line_numbers: bool,
351 output: &mut String,
352) {
353 let mut current_row = Line(0);
354 let mut sorted_insertions = sorted_insertions.iter().peekable();
355
356 for excerpt in excerpts {
357 if excerpt.start_line > current_row {
358 writeln!(output, "…").unwrap();
359 }
360 if excerpt.text.is_empty() {
361 return;
362 }
363
364 current_row = excerpt.start_line;
365
366 for mut line in excerpt.text.lines() {
367 if include_line_numbers {
368 write!(output, "{}|", current_row.0 + 1).unwrap();
369 }
370
371 while let Some((insertion_location, insertion_marker)) = sorted_insertions.peek() {
372 match current_row.cmp(&insertion_location.line) {
373 cmp::Ordering::Equal => {
374 let (prefix, suffix) = line.split_at(insertion_location.column as usize);
375 output.push_str(prefix);
376 output.push_str(insertion_marker);
377 line = suffix;
378 sorted_insertions.next();
379 }
380 cmp::Ordering::Less => break,
381 cmp::Ordering::Greater => {
382 sorted_insertions.next();
383 break;
384 }
385 }
386 }
387 output.push_str(line);
388 output.push('\n');
389 current_row.0 += 1;
390 }
391 }
392
393 if current_row < file_line_count {
394 writeln!(output, "…").unwrap();
395 }
396}
397
398pub fn push_events(output: &mut String, events: &[predict_edits_v3::Event]) {
399 if events.is_empty() {
400 return;
401 };
402
403 writeln!(output, "`````diff").unwrap();
404 for event in events {
405 writeln!(output, "{}", event).unwrap();
406 }
407 writeln!(output, "`````\n").unwrap();
408}
409
410pub struct SyntaxBasedPrompt<'a> {
411 request: &'a predict_edits_v3::PredictEditsRequest,
412 /// Snippets to include in the prompt. These may overlap - they are merged / deduplicated in
413 /// `to_prompt_string`.
414 snippets: Vec<PlannedSnippet<'a>>,
415 budget_used: usize,
416}
417
418#[derive(Clone, Debug)]
419pub struct PlannedSnippet<'a> {
420 path: Arc<Path>,
421 range: Range<Line>,
422 text: &'a str,
423 // TODO: Indicate this in the output
424 #[allow(dead_code)]
425 text_is_truncated: bool,
426}
427
428#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
429pub enum DeclarationStyle {
430 Signature,
431 Declaration,
432}
433
434#[derive(Default, Clone, Debug, Serialize)]
435pub struct SectionLabels {
436 pub excerpt_index: usize,
437 pub section_ranges: Vec<(Arc<Path>, Range<Line>)>,
438}
439
440impl<'a> SyntaxBasedPrompt<'a> {
441 /// Greedy one-pass knapsack algorithm to populate the prompt plan. Does the following:
442 ///
443 /// Initializes a priority queue by populating it with each snippet, finding the
444 /// DeclarationStyle that minimizes `score_density = score / snippet.range(style).len()`. When a
445 /// "signature" snippet is popped, insert an entry for the "declaration" variant that reflects
446 /// the cost of upgrade.
447 ///
448 /// TODO: Implement an early halting condition. One option might be to have another priority
449 /// queue where the score is the size, and update it accordingly. Another option might be to
450 /// have some simpler heuristic like bailing after N failed insertions, or based on how much
451 /// budget is left.
452 ///
453 /// TODO: Has the current known sources of imprecision:
454 ///
455 /// * Does not consider snippet overlap when ranking. For example, it might add a field to the
456 /// plan even though the containing struct is already included.
457 ///
458 /// * Does not consider cost of signatures when ranking snippets - this is tricky since
459 /// signatures may be shared by multiple snippets.
460 ///
461 /// * Does not include file paths / other text when considering max_bytes.
462 pub fn populate(request: &'a predict_edits_v3::PredictEditsRequest) -> Result<Self> {
463 let mut this = Self {
464 request,
465 snippets: Vec::new(),
466 budget_used: request.excerpt.len(),
467 };
468 let mut included_parents = FxHashSet::default();
469 let additional_parents = this.additional_parent_signatures(
470 &request.excerpt_path,
471 request.excerpt_parent,
472 &included_parents,
473 )?;
474 this.add_parents(&mut included_parents, additional_parents);
475
476 let max_bytes = request.prompt_max_bytes.unwrap_or(DEFAULT_MAX_PROMPT_BYTES);
477
478 if this.budget_used > max_bytes {
479 return Err(anyhow!(
480 "Excerpt + signatures size of {} already exceeds budget of {}",
481 this.budget_used,
482 max_bytes
483 ));
484 }
485
486 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
487 struct QueueEntry {
488 score_density: OrderedFloat<f32>,
489 declaration_index: usize,
490 style: DeclarationStyle,
491 }
492
493 // Initialize priority queue with the best score for each snippet.
494 let mut queue: BinaryHeap<QueueEntry> = BinaryHeap::new();
495 for (declaration_index, declaration) in request.referenced_declarations.iter().enumerate() {
496 let (style, score_density) = DeclarationStyle::iter()
497 .map(|style| {
498 (
499 style,
500 OrderedFloat(declaration_score_density(&declaration, style)),
501 )
502 })
503 .max_by_key(|(_, score_density)| *score_density)
504 .unwrap();
505 queue.push(QueueEntry {
506 score_density,
507 declaration_index,
508 style,
509 });
510 }
511
512 // Knapsack selection loop
513 while let Some(queue_entry) = queue.pop() {
514 let Some(declaration) = request
515 .referenced_declarations
516 .get(queue_entry.declaration_index)
517 else {
518 return Err(anyhow!(
519 "Invalid declaration index {}",
520 queue_entry.declaration_index
521 ));
522 };
523
524 let mut additional_bytes = declaration_size(declaration, queue_entry.style);
525 if this.budget_used + additional_bytes > max_bytes {
526 continue;
527 }
528
529 let additional_parents = this.additional_parent_signatures(
530 &declaration.path,
531 declaration.parent_index,
532 &mut included_parents,
533 )?;
534 additional_bytes += additional_parents
535 .iter()
536 .map(|(_, snippet)| snippet.text.len())
537 .sum::<usize>();
538 if this.budget_used + additional_bytes > max_bytes {
539 continue;
540 }
541
542 this.budget_used += additional_bytes;
543 this.add_parents(&mut included_parents, additional_parents);
544 let planned_snippet = match queue_entry.style {
545 DeclarationStyle::Signature => {
546 let Some(text) = declaration.text.get(declaration.signature_range.clone())
547 else {
548 return Err(anyhow!(
549 "Invalid declaration signature_range {:?} with text.len() = {}",
550 declaration.signature_range,
551 declaration.text.len()
552 ));
553 };
554 let signature_start_line = declaration.range.start
555 + Line(
556 declaration.text[..declaration.signature_range.start]
557 .lines()
558 .count() as u32,
559 );
560 let signature_end_line = signature_start_line
561 + Line(
562 declaration.text
563 [declaration.signature_range.start..declaration.signature_range.end]
564 .lines()
565 .count() as u32,
566 );
567 let range = signature_start_line..signature_end_line;
568
569 PlannedSnippet {
570 path: declaration.path.clone(),
571 range,
572 text,
573 text_is_truncated: declaration.text_is_truncated,
574 }
575 }
576 DeclarationStyle::Declaration => PlannedSnippet {
577 path: declaration.path.clone(),
578 range: declaration.range.clone(),
579 text: &declaration.text,
580 text_is_truncated: declaration.text_is_truncated,
581 },
582 };
583 this.snippets.push(planned_snippet);
584
585 // When a Signature is consumed, insert an entry for Definition style.
586 if queue_entry.style == DeclarationStyle::Signature {
587 let signature_size = declaration_size(&declaration, DeclarationStyle::Signature);
588 let declaration_size =
589 declaration_size(&declaration, DeclarationStyle::Declaration);
590 let signature_score = declaration_score(&declaration, DeclarationStyle::Signature);
591 let declaration_score =
592 declaration_score(&declaration, DeclarationStyle::Declaration);
593
594 let score_diff = declaration_score - signature_score;
595 let size_diff = declaration_size.saturating_sub(signature_size);
596 if score_diff > 0.0001 && size_diff > 0 {
597 queue.push(QueueEntry {
598 declaration_index: queue_entry.declaration_index,
599 score_density: OrderedFloat(score_diff / (size_diff as f32)),
600 style: DeclarationStyle::Declaration,
601 });
602 }
603 }
604 }
605
606 anyhow::Ok(this)
607 }
608
609 fn add_parents(
610 &mut self,
611 included_parents: &mut FxHashSet<usize>,
612 snippets: Vec<(usize, PlannedSnippet<'a>)>,
613 ) {
614 for (parent_index, snippet) in snippets {
615 included_parents.insert(parent_index);
616 self.budget_used += snippet.text.len();
617 self.snippets.push(snippet);
618 }
619 }
620
621 fn additional_parent_signatures(
622 &self,
623 path: &Arc<Path>,
624 parent_index: Option<usize>,
625 included_parents: &FxHashSet<usize>,
626 ) -> Result<Vec<(usize, PlannedSnippet<'a>)>> {
627 let mut results = Vec::new();
628 self.additional_parent_signatures_impl(path, parent_index, included_parents, &mut results)?;
629 Ok(results)
630 }
631
632 fn additional_parent_signatures_impl(
633 &self,
634 path: &Arc<Path>,
635 parent_index: Option<usize>,
636 included_parents: &FxHashSet<usize>,
637 results: &mut Vec<(usize, PlannedSnippet<'a>)>,
638 ) -> Result<()> {
639 let Some(parent_index) = parent_index else {
640 return Ok(());
641 };
642 if included_parents.contains(&parent_index) {
643 return Ok(());
644 }
645 let Some(parent_signature) = self.request.signatures.get(parent_index) else {
646 return Err(anyhow!("Invalid parent index {}", parent_index));
647 };
648 results.push((
649 parent_index,
650 PlannedSnippet {
651 path: path.clone(),
652 range: parent_signature.range.clone(),
653 text: &parent_signature.text,
654 text_is_truncated: parent_signature.text_is_truncated,
655 },
656 ));
657 self.additional_parent_signatures_impl(
658 path,
659 parent_signature.parent_index,
660 included_parents,
661 results,
662 )
663 }
664
665 /// Renders the planned context. Each file starts with "```FILE_PATH\n` and ends with triple
666 /// backticks, with a newline after each file. Outputs a line with "..." between nonconsecutive
667 /// chunks.
668 pub fn write(
669 &'a self,
670 excerpt_file_insertions: &mut Vec<(Point, &'static str)>,
671 prompt: &mut String,
672 ) -> Result<SectionLabels> {
673 let mut file_to_snippets: FxHashMap<&'a std::path::Path, Vec<&PlannedSnippet<'a>>> =
674 FxHashMap::default();
675 for snippet in &self.snippets {
676 file_to_snippets
677 .entry(&snippet.path)
678 .or_default()
679 .push(snippet);
680 }
681
682 // Reorder so that file with cursor comes last
683 let mut file_snippets = Vec::new();
684 let mut excerpt_file_snippets = Vec::new();
685 for (file_path, snippets) in file_to_snippets {
686 if file_path == self.request.excerpt_path.as_ref() {
687 excerpt_file_snippets = snippets;
688 } else {
689 file_snippets.push((file_path, snippets, false));
690 }
691 }
692 let excerpt_snippet = PlannedSnippet {
693 path: self.request.excerpt_path.clone(),
694 range: self.request.excerpt_line_range.clone(),
695 text: &self.request.excerpt,
696 text_is_truncated: false,
697 };
698 excerpt_file_snippets.push(&excerpt_snippet);
699 file_snippets.push((&self.request.excerpt_path, excerpt_file_snippets, true));
700
701 let section_labels =
702 self.push_file_snippets(prompt, excerpt_file_insertions, file_snippets)?;
703
704 Ok(section_labels)
705 }
706
707 fn push_file_snippets(
708 &self,
709 output: &mut String,
710 excerpt_file_insertions: &mut Vec<(Point, &'static str)>,
711 file_snippets: Vec<(&'a Path, Vec<&'a PlannedSnippet>, bool)>,
712 ) -> Result<SectionLabels> {
713 let mut section_ranges = Vec::new();
714 let mut excerpt_index = None;
715
716 for (file_path, mut snippets, is_excerpt_file) in file_snippets {
717 snippets.sort_by_key(|s| (s.range.start, Reverse(s.range.end)));
718
719 // TODO: What if the snippets get expanded too large to be editable?
720 let mut current_snippet: Option<(&PlannedSnippet, Range<Line>)> = None;
721 let mut disjoint_snippets: Vec<(&PlannedSnippet, Range<Line>)> = Vec::new();
722 for snippet in snippets {
723 if let Some((_, current_snippet_range)) = current_snippet.as_mut()
724 && snippet.range.start <= current_snippet_range.end
725 {
726 current_snippet_range.end = current_snippet_range.end.max(snippet.range.end);
727 continue;
728 }
729 if let Some(current_snippet) = current_snippet.take() {
730 disjoint_snippets.push(current_snippet);
731 }
732 current_snippet = Some((snippet, snippet.range.clone()));
733 }
734 if let Some(current_snippet) = current_snippet.take() {
735 disjoint_snippets.push(current_snippet);
736 }
737
738 writeln!(output, "`````path={}", file_path.display()).ok();
739 let mut skipped_last_snippet = false;
740 for (snippet, range) in disjoint_snippets {
741 let section_index = section_ranges.len();
742
743 match self.request.prompt_format {
744 PromptFormat::MarkedExcerpt
745 | PromptFormat::OnlySnippets
746 | PromptFormat::OldTextNewText
747 | PromptFormat::Minimal
748 | PromptFormat::NumLinesUniDiff => {
749 if range.start.0 > 0 && !skipped_last_snippet {
750 output.push_str("…\n");
751 }
752 }
753 PromptFormat::LabeledSections => {
754 if is_excerpt_file
755 && range.start <= self.request.excerpt_line_range.start
756 && range.end >= self.request.excerpt_line_range.end
757 {
758 writeln!(output, "<|current_section|>").ok();
759 } else {
760 writeln!(output, "<|section_{}|>", section_index).ok();
761 }
762 }
763 }
764
765 let push_full_snippet = |output: &mut String| {
766 if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
767 for (i, line) in snippet.text.lines().enumerate() {
768 writeln!(output, "{}|{}", i as u32 + range.start.0 + 1, line)?;
769 }
770 } else {
771 output.push_str(&snippet.text);
772 }
773 anyhow::Ok(())
774 };
775
776 if is_excerpt_file {
777 if self.request.prompt_format == PromptFormat::OnlySnippets {
778 if range.start >= self.request.excerpt_line_range.start
779 && range.end <= self.request.excerpt_line_range.end
780 {
781 skipped_last_snippet = true;
782 } else {
783 skipped_last_snippet = false;
784 output.push_str(snippet.text);
785 }
786 } else if !excerpt_file_insertions.is_empty() {
787 let lines = snippet.text.lines().collect::<Vec<_>>();
788 let push_line = |output: &mut String, line_ix: usize| {
789 if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
790 write!(output, "{}|", line_ix as u32 + range.start.0 + 1)?;
791 }
792 anyhow::Ok(writeln!(output, "{}", lines[line_ix])?)
793 };
794 let mut last_line_ix = 0;
795 let mut insertion_ix = 0;
796 while insertion_ix < excerpt_file_insertions.len() {
797 let (point, insertion) = &excerpt_file_insertions[insertion_ix];
798 let found = point.line >= range.start && point.line <= range.end;
799 if found {
800 excerpt_index = Some(section_index);
801 let insertion_line_ix = (point.line.0 - range.start.0) as usize;
802 for line_ix in last_line_ix..insertion_line_ix {
803 push_line(output, line_ix)?;
804 }
805 if let Some(next_line) = lines.get(insertion_line_ix) {
806 if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
807 write!(
808 output,
809 "{}|",
810 insertion_line_ix as u32 + range.start.0 + 1
811 )?
812 }
813 output.push_str(&next_line[..point.column as usize]);
814 output.push_str(insertion);
815 writeln!(output, "{}", &next_line[point.column as usize..])?;
816 } else {
817 writeln!(output, "{}", insertion)?;
818 }
819 last_line_ix = insertion_line_ix + 1;
820 excerpt_file_insertions.remove(insertion_ix);
821 continue;
822 }
823 insertion_ix += 1;
824 }
825 skipped_last_snippet = false;
826 for line_ix in last_line_ix..lines.len() {
827 push_line(output, line_ix)?;
828 }
829 } else {
830 skipped_last_snippet = false;
831 push_full_snippet(output)?;
832 }
833 } else {
834 skipped_last_snippet = false;
835 push_full_snippet(output)?;
836 }
837
838 section_ranges.push((snippet.path.clone(), range));
839 }
840
841 output.push_str("`````\n\n");
842 }
843
844 Ok(SectionLabels {
845 // TODO: Clean this up
846 excerpt_index: match self.request.prompt_format {
847 PromptFormat::OnlySnippets => 0,
848 _ => excerpt_index.context("bug: no snippet found for excerpt")?,
849 },
850 section_ranges,
851 })
852 }
853}
854
855fn declaration_score_density(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> f32 {
856 declaration_score(declaration, style) / declaration_size(declaration, style) as f32
857}
858
859fn declaration_score(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> f32 {
860 match style {
861 DeclarationStyle::Signature => declaration.signature_score,
862 DeclarationStyle::Declaration => declaration.declaration_score,
863 }
864}
865
866fn declaration_size(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> usize {
867 match style {
868 DeclarationStyle::Signature => declaration.signature_range.len(),
869 DeclarationStyle::Declaration => declaration.text.len(),
870 }
871}