cloud_zeta2_prompt.rs

  1//! Zeta2 prompt planning and generation code shared with cloud.
  2pub mod retrieval_prompt;
  3
  4use anyhow::{Context as _, Result, anyhow};
  5use cloud_llm_client::predict_edits_v3::{
  6    self, DiffPathFmt, Excerpt, Line, Point, PromptFormat, ReferencedDeclaration,
  7};
  8use indoc::indoc;
  9use ordered_float::OrderedFloat;
 10use rustc_hash::{FxHashMap, FxHashSet};
 11use serde::Serialize;
 12use std::cmp;
 13use std::fmt::Write;
 14use std::sync::Arc;
 15use std::{cmp::Reverse, collections::BinaryHeap, ops::Range, path::Path};
 16use strum::{EnumIter, IntoEnumIterator};
 17
 18pub const DEFAULT_MAX_PROMPT_BYTES: usize = 10 * 1024;
 19
 20pub const CURSOR_MARKER: &str = "<|user_cursor|>";
 21/// NOTE: Differs from zed version of constant - includes a newline
 22pub const EDITABLE_REGION_START_MARKER_WITH_NEWLINE: &str = "<|editable_region_start|>\n";
 23/// NOTE: Differs from zed version of constant - includes a newline
 24pub const EDITABLE_REGION_END_MARKER_WITH_NEWLINE: &str = "<|editable_region_end|>\n";
 25
 26// TODO: use constants for markers?
 27const MARKED_EXCERPT_INSTRUCTIONS: &str = indoc! {"
 28    You are a code completion assistant and your task is to analyze user edits and then rewrite an excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking into account the cursor location.
 29
 30    The excerpt to edit will be wrapped in markers <|editable_region_start|> and <|editable_region_end|>. The cursor position is marked with <|user_cursor|>.  Please respond with edited code for that region.
 31
 32    Other code is provided for context, and `…` indicates when code has been skipped.
 33
 34    ## Edit History
 35
 36"};
 37
 38const LABELED_SECTIONS_INSTRUCTIONS: &str = indoc! {r#"
 39    You are a code completion assistant and your task is to analyze user edits, and suggest an edit to one of the provided sections of code.
 40
 41    Sections of code are grouped by file and then labeled by `<|section_N|>` (e.g `<|section_8|>`).
 42
 43    The cursor position is marked with `<|user_cursor|>` and it will appear within a special section labeled `<|current_section|>`. Prefer editing the current section until no more changes are needed within it.
 44
 45    Respond ONLY with the name of the section to edit on a single line, followed by all of the code that should replace that section. For example:
 46
 47    <|current_section|>
 48    for i in 0..16 {
 49        println!("{i}");
 50    }
 51
 52    ## Edit History
 53
 54"#};
 55
 56const NUMBERED_LINES_INSTRUCTIONS: &str = indoc! {r#"
 57    # Instructions
 58
 59    You are an edit prediction agent in a code editor.
 60    Your job is to predict the next edit that the user will make,
 61    based on their last few edits and their current cursor location.
 62
 63    ## Output Format
 64
 65    You must briefly explain your understanding of the user's goal, in one
 66    or two sentences, and then specify their next edit in the form of a
 67    unified diff, like this:
 68
 69    ```
 70    --- a/src/myapp/cli.py
 71    +++ b/src/myapp/cli.py
 72    @@ ... @@
 73     import os
 74     import time
 75     import sys
 76    +from constants import LOG_LEVEL_WARNING
 77    @@ ... @@
 78     config.headless()
 79     config.set_interactive(false)
 80    -config.set_log_level(LOG_L)
 81    +config.set_log_level(LOG_LEVEL_WARNING)
 82     config.set_use_color(True)
 83    ```
 84
 85    ## Edit History
 86
 87"#};
 88
 89const STUDENT_MODEL_INSTRUCTIONS: &str = indoc! {r#"
 90    You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.
 91
 92    ## Edit History
 93
 94    "#};
 95
 96const UNIFIED_DIFF_REMINDER: &str = indoc! {"
 97    ---
 98
 99    Analyze the edit history and the files, then provide the unified diff for your predicted edits.
100    Do not include the cursor marker in your output.
101    Your diff should include edited file paths in its file headers (lines beginning with `---` and `+++`).
102    Do not include line numbers in the hunk headers, use `@@ ... @@`.
103    Removed lines begin with `-`.
104    Added lines begin with `+`.
105    Context lines begin with an extra space.
106    Context and removed lines are used to match the target edit location, so make sure to include enough of them
107    to uniquely identify it amongst all excerpts of code provided.
108"};
109
110const MINIMAL_PROMPT_REMINDER: &str = indoc! {"
111    ---
112
113    Please analyze the edit history and the files, then provide the unified diff for your predicted edits.
114    Do not include the cursor marker in your output.
115    If you're editing multiple files, be sure to reflect filename in the hunk's header.
116    "};
117
118const XML_TAGS_INSTRUCTIONS: &str = indoc! {r#"
119    # Instructions
120
121    You are an edit prediction agent in a code editor.
122
123    Analyze the history of edits made by the user in order to infer what they are currently trying to accomplish.
124    Then complete the remainder of the current change if it is incomplete, or predict the next edit the user intends to make.
125    Always continue along the user's current trajectory, rather than changing course.
126
127    ## Output Format
128
129    You should briefly explain your understanding of the user's overall goal in one sentence, then explain what the next change
130    along the users current trajectory will be in another, and finally specify the next edit using the following XML-like format:
131
132    <edits path="my-project/src/myapp/cli.py">
133    <old_text>
134    OLD TEXT 1 HERE
135    </old_text>
136    <new_text>
137    NEW TEXT 1 HERE
138    </new_text>
139
140    <old_text>
141    OLD TEXT 1 HERE
142    </old_text>
143    <new_text>
144    NEW TEXT 1 HERE
145    </new_text>
146    </edits>
147
148    - Specify the file to edit using the `path` attribute.
149    - Use `<old_text>` and `<new_text>` tags to replace content
150    - `<old_text>` must exactly match existing file content, including indentation
151    - `<old_text>` cannot be empty
152    - Do not escape quotes, newlines, or other characters within tags
153    - Always close all tags properly
154    - Don't include the <|user_cursor|> marker in your output.
155
156    ## Edit History
157
158"#};
159
160const OLD_TEXT_NEW_TEXT_REMINDER: &str = indoc! {r#"
161    ---
162
163    Remember that the edits in the edit history have already been applied.
164"#};
165
166pub fn build_prompt(
167    request: &predict_edits_v3::PredictEditsRequest,
168) -> Result<(String, SectionLabels)> {
169    let mut insertions = match request.prompt_format {
170        PromptFormat::MarkedExcerpt => vec![
171            (
172                Point {
173                    line: request.excerpt_line_range.start,
174                    column: 0,
175                },
176                EDITABLE_REGION_START_MARKER_WITH_NEWLINE,
177            ),
178            (request.cursor_point, CURSOR_MARKER),
179            (
180                Point {
181                    line: request.excerpt_line_range.end,
182                    column: 0,
183                },
184                EDITABLE_REGION_END_MARKER_WITH_NEWLINE,
185            ),
186        ],
187        PromptFormat::LabeledSections
188        | PromptFormat::NumLinesUniDiff
189        | PromptFormat::Minimal
190        | PromptFormat::OldTextNewText => {
191            vec![(request.cursor_point, CURSOR_MARKER)]
192        }
193        PromptFormat::OnlySnippets => vec![],
194    };
195
196    let mut prompt = match request.prompt_format {
197        PromptFormat::MarkedExcerpt => MARKED_EXCERPT_INSTRUCTIONS.to_string(),
198        PromptFormat::LabeledSections => LABELED_SECTIONS_INSTRUCTIONS.to_string(),
199        PromptFormat::NumLinesUniDiff => NUMBERED_LINES_INSTRUCTIONS.to_string(),
200        PromptFormat::OldTextNewText => XML_TAGS_INSTRUCTIONS.to_string(),
201        PromptFormat::OnlySnippets => String::new(),
202        PromptFormat::Minimal => STUDENT_MODEL_INSTRUCTIONS.to_string(),
203    };
204
205    if request.events.is_empty() {
206        prompt.push_str("(No edit history)\n\n");
207    } else {
208        let edit_preamble = if request.prompt_format == PromptFormat::Minimal {
209            "The following are the latest edits made by the user, from earlier to later.\n\n"
210        } else {
211            "Here are the latest edits made by the user, from earlier to later.\n\n"
212        };
213        prompt.push_str(edit_preamble);
214        push_events(&mut prompt, &request.events);
215    }
216
217    let excerpts_preamble = match request.prompt_format {
218        PromptFormat::Minimal => indoc! {"
219             ## Part of the file under the cursor
220
221             (The cursor marker <|user_cursor|> indicates the current user cursor position.
222             The file is in current state, edits from edit history has been applied.
223             We only show part of the file around the cursor.
224             You can only edit exactly this part of the file.
225             We prepend line numbers (e.g., `123|<actual line>`); they are not part of the file.)
226             "},
227        PromptFormat::NumLinesUniDiff | PromptFormat::OldTextNewText => indoc! {"
228            ## Code Excerpts
229
230            Here is some excerpts of code that you should take into account to predict the next edit.
231
232            The cursor position is marked by `<|user_cursor|>` as it stands after the last edit in the history.
233
234            In addition other excerpts are included to better understand what the edit will be, including the declaration
235            or references of symbols around the cursor, or other similar code snippets that may need to be updated
236            following patterns that appear in the edit history.
237
238            Consider each of them carefully in relation to the edit history, and that the user may not have navigated
239            to the next place they want to edit yet.
240
241            Lines starting with `…` indicate omitted line ranges. These may appear inside multi-line code constructs.
242        "},
243        _ => indoc! {"
244            ## Code Excerpts
245
246            The cursor marker <|user_cursor|> indicates the current user cursor position.
247            The file is in current state, edits from edit history have been applied.
248        "},
249    };
250
251    prompt.push_str(excerpts_preamble);
252    prompt.push('\n');
253
254    let mut section_labels = Default::default();
255
256    if !request.referenced_declarations.is_empty() || !request.signatures.is_empty() {
257        let syntax_based_prompt = SyntaxBasedPrompt::populate(request)?;
258        section_labels = syntax_based_prompt.write(&mut insertions, &mut prompt)?;
259    } else {
260        if request.prompt_format == PromptFormat::LabeledSections {
261            anyhow::bail!("PromptFormat::LabeledSections cannot be used with ContextMode::Llm");
262        }
263
264        let include_line_numbers = matches!(
265            request.prompt_format,
266            PromptFormat::NumLinesUniDiff | PromptFormat::Minimal
267        );
268        for related_file in &request.included_files {
269            if request.prompt_format == PromptFormat::Minimal {
270                write_codeblock_with_filename(
271                    &related_file.path,
272                    &related_file.excerpts,
273                    if related_file.path == request.excerpt_path {
274                        &insertions
275                    } else {
276                        &[]
277                    },
278                    related_file.max_row,
279                    include_line_numbers,
280                    &mut prompt,
281                );
282            } else {
283                write_codeblock(
284                    &related_file.path,
285                    &related_file.excerpts,
286                    if related_file.path == request.excerpt_path {
287                        &insertions
288                    } else {
289                        &[]
290                    },
291                    related_file.max_row,
292                    include_line_numbers,
293                    &mut prompt,
294                );
295            }
296        }
297    }
298
299    match request.prompt_format {
300        PromptFormat::NumLinesUniDiff => {
301            prompt.push_str(UNIFIED_DIFF_REMINDER);
302        }
303        PromptFormat::OldTextNewText => {
304            prompt.push_str(OLD_TEXT_NEW_TEXT_REMINDER);
305        }
306        PromptFormat::Minimal => {
307            prompt.push_str(MINIMAL_PROMPT_REMINDER);
308        }
309        _ => {}
310    }
311
312    Ok((prompt, section_labels))
313}
314
315pub fn write_codeblock<'a>(
316    path: &Path,
317    excerpts: impl IntoIterator<Item = &'a Excerpt>,
318    sorted_insertions: &[(Point, &str)],
319    file_line_count: Line,
320    include_line_numbers: bool,
321    output: &'a mut String,
322) {
323    writeln!(output, "`````{}", DiffPathFmt(path)).unwrap();
324
325    write_excerpts(
326        excerpts,
327        sorted_insertions,
328        file_line_count,
329        include_line_numbers,
330        output,
331    );
332    write!(output, "`````\n\n").unwrap();
333}
334
335fn write_codeblock_with_filename<'a>(
336    path: &Path,
337    excerpts: impl IntoIterator<Item = &'a Excerpt>,
338    sorted_insertions: &[(Point, &str)],
339    file_line_count: Line,
340    include_line_numbers: bool,
341    output: &'a mut String,
342) {
343    writeln!(output, "`````filename={}", DiffPathFmt(path)).unwrap();
344
345    write_excerpts(
346        excerpts,
347        sorted_insertions,
348        file_line_count,
349        include_line_numbers,
350        output,
351    );
352    write!(output, "`````\n\n").unwrap();
353}
354
355pub fn write_excerpts<'a>(
356    excerpts: impl IntoIterator<Item = &'a Excerpt>,
357    sorted_insertions: &[(Point, &str)],
358    file_line_count: Line,
359    include_line_numbers: bool,
360    output: &mut String,
361) {
362    let mut current_row = Line(0);
363    let mut sorted_insertions = sorted_insertions.iter().peekable();
364
365    for excerpt in excerpts {
366        if excerpt.start_line > current_row {
367            writeln!(output, "").unwrap();
368        }
369        if excerpt.text.is_empty() {
370            return;
371        }
372
373        current_row = excerpt.start_line;
374
375        for mut line in excerpt.text.lines() {
376            if include_line_numbers {
377                write!(output, "{}|", current_row.0 + 1).unwrap();
378            }
379
380            while let Some((insertion_location, insertion_marker)) = sorted_insertions.peek() {
381                match current_row.cmp(&insertion_location.line) {
382                    cmp::Ordering::Equal => {
383                        let (prefix, suffix) = line.split_at(insertion_location.column as usize);
384                        output.push_str(prefix);
385                        output.push_str(insertion_marker);
386                        line = suffix;
387                        sorted_insertions.next();
388                    }
389                    cmp::Ordering::Less => break,
390                    cmp::Ordering::Greater => {
391                        sorted_insertions.next();
392                        break;
393                    }
394                }
395            }
396            output.push_str(line);
397            output.push('\n');
398            current_row.0 += 1;
399        }
400    }
401
402    if current_row < file_line_count {
403        writeln!(output, "").unwrap();
404    }
405}
406
407pub fn push_events(output: &mut String, events: &[predict_edits_v3::Event]) {
408    if events.is_empty() {
409        return;
410    };
411
412    writeln!(output, "`````diff").unwrap();
413    for event in events {
414        writeln!(output, "{}", event).unwrap();
415    }
416    writeln!(output, "`````\n").unwrap();
417}
418
419pub struct SyntaxBasedPrompt<'a> {
420    request: &'a predict_edits_v3::PredictEditsRequest,
421    /// Snippets to include in the prompt. These may overlap - they are merged / deduplicated in
422    /// `to_prompt_string`.
423    snippets: Vec<PlannedSnippet<'a>>,
424    budget_used: usize,
425}
426
427#[derive(Clone, Debug)]
428pub struct PlannedSnippet<'a> {
429    path: Arc<Path>,
430    range: Range<Line>,
431    text: &'a str,
432    // TODO: Indicate this in the output
433    #[allow(dead_code)]
434    text_is_truncated: bool,
435}
436
437#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
438pub enum DeclarationStyle {
439    Signature,
440    Declaration,
441}
442
443#[derive(Default, Clone, Debug, Serialize)]
444pub struct SectionLabels {
445    pub excerpt_index: usize,
446    pub section_ranges: Vec<(Arc<Path>, Range<Line>)>,
447}
448
449impl<'a> SyntaxBasedPrompt<'a> {
450    /// Greedy one-pass knapsack algorithm to populate the prompt plan. Does the following:
451    ///
452    /// Initializes a priority queue by populating it with each snippet, finding the
453    /// DeclarationStyle that minimizes `score_density = score / snippet.range(style).len()`. When a
454    /// "signature" snippet is popped, insert an entry for the "declaration" variant that reflects
455    /// the cost of upgrade.
456    ///
457    /// TODO: Implement an early halting condition. One option might be to have another priority
458    /// queue where the score is the size, and update it accordingly. Another option might be to
459    /// have some simpler heuristic like bailing after N failed insertions, or based on how much
460    /// budget is left.
461    ///
462    /// TODO: Has the current known sources of imprecision:
463    ///
464    /// * Does not consider snippet overlap when ranking. For example, it might add a field to the
465    /// plan even though the containing struct is already included.
466    ///
467    /// * Does not consider cost of signatures when ranking snippets - this is tricky since
468    /// signatures may be shared by multiple snippets.
469    ///
470    /// * Does not include file paths / other text when considering max_bytes.
471    pub fn populate(request: &'a predict_edits_v3::PredictEditsRequest) -> Result<Self> {
472        let mut this = Self {
473            request,
474            snippets: Vec::new(),
475            budget_used: request.excerpt.len(),
476        };
477        let mut included_parents = FxHashSet::default();
478        let additional_parents = this.additional_parent_signatures(
479            &request.excerpt_path,
480            request.excerpt_parent,
481            &included_parents,
482        )?;
483        this.add_parents(&mut included_parents, additional_parents);
484
485        let max_bytes = request.prompt_max_bytes.unwrap_or(DEFAULT_MAX_PROMPT_BYTES);
486
487        if this.budget_used > max_bytes {
488            return Err(anyhow!(
489                "Excerpt + signatures size of {} already exceeds budget of {}",
490                this.budget_used,
491                max_bytes
492            ));
493        }
494
495        #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
496        struct QueueEntry {
497            score_density: OrderedFloat<f32>,
498            declaration_index: usize,
499            style: DeclarationStyle,
500        }
501
502        // Initialize priority queue with the best score for each snippet.
503        let mut queue: BinaryHeap<QueueEntry> = BinaryHeap::new();
504        for (declaration_index, declaration) in request.referenced_declarations.iter().enumerate() {
505            let (style, score_density) = DeclarationStyle::iter()
506                .map(|style| {
507                    (
508                        style,
509                        OrderedFloat(declaration_score_density(&declaration, style)),
510                    )
511                })
512                .max_by_key(|(_, score_density)| *score_density)
513                .unwrap();
514            queue.push(QueueEntry {
515                score_density,
516                declaration_index,
517                style,
518            });
519        }
520
521        // Knapsack selection loop
522        while let Some(queue_entry) = queue.pop() {
523            let Some(declaration) = request
524                .referenced_declarations
525                .get(queue_entry.declaration_index)
526            else {
527                return Err(anyhow!(
528                    "Invalid declaration index {}",
529                    queue_entry.declaration_index
530                ));
531            };
532
533            let mut additional_bytes = declaration_size(declaration, queue_entry.style);
534            if this.budget_used + additional_bytes > max_bytes {
535                continue;
536            }
537
538            let additional_parents = this.additional_parent_signatures(
539                &declaration.path,
540                declaration.parent_index,
541                &mut included_parents,
542            )?;
543            additional_bytes += additional_parents
544                .iter()
545                .map(|(_, snippet)| snippet.text.len())
546                .sum::<usize>();
547            if this.budget_used + additional_bytes > max_bytes {
548                continue;
549            }
550
551            this.budget_used += additional_bytes;
552            this.add_parents(&mut included_parents, additional_parents);
553            let planned_snippet = match queue_entry.style {
554                DeclarationStyle::Signature => {
555                    let Some(text) = declaration.text.get(declaration.signature_range.clone())
556                    else {
557                        return Err(anyhow!(
558                            "Invalid declaration signature_range {:?} with text.len() = {}",
559                            declaration.signature_range,
560                            declaration.text.len()
561                        ));
562                    };
563                    let signature_start_line = declaration.range.start
564                        + Line(
565                            declaration.text[..declaration.signature_range.start]
566                                .lines()
567                                .count() as u32,
568                        );
569                    let signature_end_line = signature_start_line
570                        + Line(
571                            declaration.text
572                                [declaration.signature_range.start..declaration.signature_range.end]
573                                .lines()
574                                .count() as u32,
575                        );
576                    let range = signature_start_line..signature_end_line;
577
578                    PlannedSnippet {
579                        path: declaration.path.clone(),
580                        range,
581                        text,
582                        text_is_truncated: declaration.text_is_truncated,
583                    }
584                }
585                DeclarationStyle::Declaration => PlannedSnippet {
586                    path: declaration.path.clone(),
587                    range: declaration.range.clone(),
588                    text: &declaration.text,
589                    text_is_truncated: declaration.text_is_truncated,
590                },
591            };
592            this.snippets.push(planned_snippet);
593
594            // When a Signature is consumed, insert an entry for Definition style.
595            if queue_entry.style == DeclarationStyle::Signature {
596                let signature_size = declaration_size(&declaration, DeclarationStyle::Signature);
597                let declaration_size =
598                    declaration_size(&declaration, DeclarationStyle::Declaration);
599                let signature_score = declaration_score(&declaration, DeclarationStyle::Signature);
600                let declaration_score =
601                    declaration_score(&declaration, DeclarationStyle::Declaration);
602
603                let score_diff = declaration_score - signature_score;
604                let size_diff = declaration_size.saturating_sub(signature_size);
605                if score_diff > 0.0001 && size_diff > 0 {
606                    queue.push(QueueEntry {
607                        declaration_index: queue_entry.declaration_index,
608                        score_density: OrderedFloat(score_diff / (size_diff as f32)),
609                        style: DeclarationStyle::Declaration,
610                    });
611                }
612            }
613        }
614
615        anyhow::Ok(this)
616    }
617
618    fn add_parents(
619        &mut self,
620        included_parents: &mut FxHashSet<usize>,
621        snippets: Vec<(usize, PlannedSnippet<'a>)>,
622    ) {
623        for (parent_index, snippet) in snippets {
624            included_parents.insert(parent_index);
625            self.budget_used += snippet.text.len();
626            self.snippets.push(snippet);
627        }
628    }
629
630    fn additional_parent_signatures(
631        &self,
632        path: &Arc<Path>,
633        parent_index: Option<usize>,
634        included_parents: &FxHashSet<usize>,
635    ) -> Result<Vec<(usize, PlannedSnippet<'a>)>> {
636        let mut results = Vec::new();
637        self.additional_parent_signatures_impl(path, parent_index, included_parents, &mut results)?;
638        Ok(results)
639    }
640
641    fn additional_parent_signatures_impl(
642        &self,
643        path: &Arc<Path>,
644        parent_index: Option<usize>,
645        included_parents: &FxHashSet<usize>,
646        results: &mut Vec<(usize, PlannedSnippet<'a>)>,
647    ) -> Result<()> {
648        let Some(parent_index) = parent_index else {
649            return Ok(());
650        };
651        if included_parents.contains(&parent_index) {
652            return Ok(());
653        }
654        let Some(parent_signature) = self.request.signatures.get(parent_index) else {
655            return Err(anyhow!("Invalid parent index {}", parent_index));
656        };
657        results.push((
658            parent_index,
659            PlannedSnippet {
660                path: path.clone(),
661                range: parent_signature.range.clone(),
662                text: &parent_signature.text,
663                text_is_truncated: parent_signature.text_is_truncated,
664            },
665        ));
666        self.additional_parent_signatures_impl(
667            path,
668            parent_signature.parent_index,
669            included_parents,
670            results,
671        )
672    }
673
674    /// Renders the planned context. Each file starts with "```FILE_PATH\n` and ends with triple
675    /// backticks, with a newline after each file. Outputs a line with "..." between nonconsecutive
676    /// chunks.
677    pub fn write(
678        &'a self,
679        excerpt_file_insertions: &mut Vec<(Point, &'static str)>,
680        prompt: &mut String,
681    ) -> Result<SectionLabels> {
682        let mut file_to_snippets: FxHashMap<&'a std::path::Path, Vec<&PlannedSnippet<'a>>> =
683            FxHashMap::default();
684        for snippet in &self.snippets {
685            file_to_snippets
686                .entry(&snippet.path)
687                .or_default()
688                .push(snippet);
689        }
690
691        // Reorder so that file with cursor comes last
692        let mut file_snippets = Vec::new();
693        let mut excerpt_file_snippets = Vec::new();
694        for (file_path, snippets) in file_to_snippets {
695            if file_path == self.request.excerpt_path.as_ref() {
696                excerpt_file_snippets = snippets;
697            } else {
698                file_snippets.push((file_path, snippets, false));
699            }
700        }
701        let excerpt_snippet = PlannedSnippet {
702            path: self.request.excerpt_path.clone(),
703            range: self.request.excerpt_line_range.clone(),
704            text: &self.request.excerpt,
705            text_is_truncated: false,
706        };
707        excerpt_file_snippets.push(&excerpt_snippet);
708        file_snippets.push((&self.request.excerpt_path, excerpt_file_snippets, true));
709
710        let section_labels =
711            self.push_file_snippets(prompt, excerpt_file_insertions, file_snippets)?;
712
713        Ok(section_labels)
714    }
715
716    fn push_file_snippets(
717        &self,
718        output: &mut String,
719        excerpt_file_insertions: &mut Vec<(Point, &'static str)>,
720        file_snippets: Vec<(&'a Path, Vec<&'a PlannedSnippet>, bool)>,
721    ) -> Result<SectionLabels> {
722        let mut section_ranges = Vec::new();
723        let mut excerpt_index = None;
724
725        for (file_path, mut snippets, is_excerpt_file) in file_snippets {
726            snippets.sort_by_key(|s| (s.range.start, Reverse(s.range.end)));
727
728            // TODO: What if the snippets get expanded too large to be editable?
729            let mut current_snippet: Option<(&PlannedSnippet, Range<Line>)> = None;
730            let mut disjoint_snippets: Vec<(&PlannedSnippet, Range<Line>)> = Vec::new();
731            for snippet in snippets {
732                if let Some((_, current_snippet_range)) = current_snippet.as_mut()
733                    && snippet.range.start <= current_snippet_range.end
734                {
735                    current_snippet_range.end = current_snippet_range.end.max(snippet.range.end);
736                    continue;
737                }
738                if let Some(current_snippet) = current_snippet.take() {
739                    disjoint_snippets.push(current_snippet);
740                }
741                current_snippet = Some((snippet, snippet.range.clone()));
742            }
743            if let Some(current_snippet) = current_snippet.take() {
744                disjoint_snippets.push(current_snippet);
745            }
746
747            writeln!(output, "`````path={}", file_path.display()).ok();
748            let mut skipped_last_snippet = false;
749            for (snippet, range) in disjoint_snippets {
750                let section_index = section_ranges.len();
751
752                match self.request.prompt_format {
753                    PromptFormat::MarkedExcerpt
754                    | PromptFormat::OnlySnippets
755                    | PromptFormat::OldTextNewText
756                    | PromptFormat::Minimal
757                    | PromptFormat::NumLinesUniDiff => {
758                        if range.start.0 > 0 && !skipped_last_snippet {
759                            output.push_str("\n");
760                        }
761                    }
762                    PromptFormat::LabeledSections => {
763                        if is_excerpt_file
764                            && range.start <= self.request.excerpt_line_range.start
765                            && range.end >= self.request.excerpt_line_range.end
766                        {
767                            writeln!(output, "<|current_section|>").ok();
768                        } else {
769                            writeln!(output, "<|section_{}|>", section_index).ok();
770                        }
771                    }
772                }
773
774                let push_full_snippet = |output: &mut String| {
775                    if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
776                        for (i, line) in snippet.text.lines().enumerate() {
777                            writeln!(output, "{}|{}", i as u32 + range.start.0 + 1, line)?;
778                        }
779                    } else {
780                        output.push_str(&snippet.text);
781                    }
782                    anyhow::Ok(())
783                };
784
785                if is_excerpt_file {
786                    if self.request.prompt_format == PromptFormat::OnlySnippets {
787                        if range.start >= self.request.excerpt_line_range.start
788                            && range.end <= self.request.excerpt_line_range.end
789                        {
790                            skipped_last_snippet = true;
791                        } else {
792                            skipped_last_snippet = false;
793                            output.push_str(snippet.text);
794                        }
795                    } else if !excerpt_file_insertions.is_empty() {
796                        let lines = snippet.text.lines().collect::<Vec<_>>();
797                        let push_line = |output: &mut String, line_ix: usize| {
798                            if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
799                                write!(output, "{}|", line_ix as u32 + range.start.0 + 1)?;
800                            }
801                            anyhow::Ok(writeln!(output, "{}", lines[line_ix])?)
802                        };
803                        let mut last_line_ix = 0;
804                        let mut insertion_ix = 0;
805                        while insertion_ix < excerpt_file_insertions.len() {
806                            let (point, insertion) = &excerpt_file_insertions[insertion_ix];
807                            let found = point.line >= range.start && point.line <= range.end;
808                            if found {
809                                excerpt_index = Some(section_index);
810                                let insertion_line_ix = (point.line.0 - range.start.0) as usize;
811                                for line_ix in last_line_ix..insertion_line_ix {
812                                    push_line(output, line_ix)?;
813                                }
814                                if let Some(next_line) = lines.get(insertion_line_ix) {
815                                    if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
816                                        write!(
817                                            output,
818                                            "{}|",
819                                            insertion_line_ix as u32 + range.start.0 + 1
820                                        )?
821                                    }
822                                    output.push_str(&next_line[..point.column as usize]);
823                                    output.push_str(insertion);
824                                    writeln!(output, "{}", &next_line[point.column as usize..])?;
825                                } else {
826                                    writeln!(output, "{}", insertion)?;
827                                }
828                                last_line_ix = insertion_line_ix + 1;
829                                excerpt_file_insertions.remove(insertion_ix);
830                                continue;
831                            }
832                            insertion_ix += 1;
833                        }
834                        skipped_last_snippet = false;
835                        for line_ix in last_line_ix..lines.len() {
836                            push_line(output, line_ix)?;
837                        }
838                    } else {
839                        skipped_last_snippet = false;
840                        push_full_snippet(output)?;
841                    }
842                } else {
843                    skipped_last_snippet = false;
844                    push_full_snippet(output)?;
845                }
846
847                section_ranges.push((snippet.path.clone(), range));
848            }
849
850            output.push_str("`````\n\n");
851        }
852
853        Ok(SectionLabels {
854            // TODO: Clean this up
855            excerpt_index: match self.request.prompt_format {
856                PromptFormat::OnlySnippets => 0,
857                _ => excerpt_index.context("bug: no snippet found for excerpt")?,
858            },
859            section_ranges,
860        })
861    }
862}
863
864fn declaration_score_density(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> f32 {
865    declaration_score(declaration, style) / declaration_size(declaration, style) as f32
866}
867
868fn declaration_score(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> f32 {
869    match style {
870        DeclarationStyle::Signature => declaration.signature_score,
871        DeclarationStyle::Declaration => declaration.declaration_score,
872    }
873}
874
875fn declaration_size(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> usize {
876    match style {
877        DeclarationStyle::Signature => declaration.signature_range.len(),
878        DeclarationStyle::Declaration => declaration.text.len(),
879    }
880}