cloud_zeta2_prompt.rs

  1//! Zeta2 prompt planning and generation code shared with cloud.
  2pub mod retrieval_prompt;
  3
  4use anyhow::{Context as _, Result, anyhow};
  5use cloud_llm_client::predict_edits_v3::{
  6    self, DiffPathFmt, Event, Excerpt, IncludedFile, Line, Point, PromptFormat,
  7    ReferencedDeclaration,
  8};
  9use indoc::indoc;
 10use ordered_float::OrderedFloat;
 11use rustc_hash::{FxHashMap, FxHashSet};
 12use serde::Serialize;
 13use std::cmp;
 14use std::fmt::Write;
 15use std::sync::Arc;
 16use std::{cmp::Reverse, collections::BinaryHeap, ops::Range, path::Path};
 17use strum::{EnumIter, IntoEnumIterator};
 18
 19pub const DEFAULT_MAX_PROMPT_BYTES: usize = 10 * 1024;
 20
 21pub const CURSOR_MARKER: &str = "<|user_cursor|>";
 22/// NOTE: Differs from zed version of constant - includes a newline
 23pub const EDITABLE_REGION_START_MARKER_WITH_NEWLINE: &str = "<|editable_region_start|>\n";
 24/// NOTE: Differs from zed version of constant - includes a newline
 25pub const EDITABLE_REGION_END_MARKER_WITH_NEWLINE: &str = "<|editable_region_end|>\n";
 26
 27// TODO: use constants for markers?
 28const MARKED_EXCERPT_INSTRUCTIONS: &str = indoc! {"
 29    You are a code completion assistant and your task is to analyze user edits and then rewrite an excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking into account the cursor location.
 30
 31    The excerpt to edit will be wrapped in markers <|editable_region_start|> and <|editable_region_end|>. The cursor position is marked with <|user_cursor|>.  Please respond with edited code for that region.
 32
 33    Other code is provided for context, and `…` indicates when code has been skipped.
 34
 35    ## Edit History
 36
 37"};
 38
 39const LABELED_SECTIONS_INSTRUCTIONS: &str = indoc! {r#"
 40    You are a code completion assistant and your task is to analyze user edits, and suggest an edit to one of the provided sections of code.
 41
 42    Sections of code are grouped by file and then labeled by `<|section_N|>` (e.g `<|section_8|>`).
 43
 44    The cursor position is marked with `<|user_cursor|>` and it will appear within a special section labeled `<|current_section|>`. Prefer editing the current section until no more changes are needed within it.
 45
 46    Respond ONLY with the name of the section to edit on a single line, followed by all of the code that should replace that section. For example:
 47
 48    <|current_section|>
 49    for i in 0..16 {
 50        println!("{i}");
 51    }
 52
 53    ## Edit History
 54
 55"#};
 56
 57const NUMBERED_LINES_INSTRUCTIONS: &str = indoc! {r#"
 58    # Instructions
 59
 60    You are an edit prediction agent in a code editor.
 61    Your job is to predict the next edit that the user will make,
 62    based on their last few edits and their current cursor location.
 63
 64    ## Output Format
 65
 66    You must briefly explain your understanding of the user's goal, in one
 67    or two sentences, and then specify their next edit in the form of a
 68    unified diff, like this:
 69
 70    ```
 71    --- a/src/myapp/cli.py
 72    +++ b/src/myapp/cli.py
 73    @@ ... @@
 74     import os
 75     import time
 76     import sys
 77    +from constants import LOG_LEVEL_WARNING
 78    @@ ... @@
 79     config.headless()
 80     config.set_interactive(false)
 81    -config.set_log_level(LOG_L)
 82    +config.set_log_level(LOG_LEVEL_WARNING)
 83     config.set_use_color(True)
 84    ```
 85
 86    ## Edit History
 87
 88"#};
 89
 90const STUDENT_MODEL_INSTRUCTIONS: &str = indoc! {r#"
 91    You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.
 92
 93    ## Edit History
 94
 95    "#};
 96
 97const UNIFIED_DIFF_REMINDER: &str = indoc! {"
 98    ---
 99
100    Analyze the edit history and the files, then provide the unified diff for your predicted edits.
101    Do not include the cursor marker in your output.
102    Your diff should include edited file paths in its file headers (lines beginning with `---` and `+++`).
103    Do not include line numbers in the hunk headers, use `@@ ... @@`.
104    Removed lines begin with `-`.
105    Added lines begin with `+`.
106    Context lines begin with an extra space.
107    Context and removed lines are used to match the target edit location, so make sure to include enough of them
108    to uniquely identify it amongst all excerpts of code provided.
109"};
110
111const MINIMAL_PROMPT_REMINDER: &str = indoc! {"
112    ---
113
114    Please analyze the edit history and the files, then provide the unified diff for your predicted edits.
115    Do not include the cursor marker in your output.
116    If you're editing multiple files, be sure to reflect filename in the hunk's header.
117    "};
118
119const XML_TAGS_INSTRUCTIONS: &str = indoc! {r#"
120    # Instructions
121
122    You are an edit prediction agent in a code editor.
123
124    Analyze the history of edits made by the user in order to infer what they are currently trying to accomplish.
125    Then complete the remainder of the current change if it is incomplete, or predict the next edit the user intends to make.
126    Always continue along the user's current trajectory, rather than changing course.
127
128    ## Output Format
129
130    You should briefly explain your understanding of the user's overall goal in one sentence, then explain what the next change
131    along the users current trajectory will be in another, and finally specify the next edit using the following XML-like format:
132
133    <edits path="my-project/src/myapp/cli.py">
134    <old_text>
135    OLD TEXT 1 HERE
136    </old_text>
137    <new_text>
138    NEW TEXT 1 HERE
139    </new_text>
140
141    <old_text>
142    OLD TEXT 1 HERE
143    </old_text>
144    <new_text>
145    NEW TEXT 1 HERE
146    </new_text>
147    </edits>
148
149    - Specify the file to edit using the `path` attribute.
150    - Use `<old_text>` and `<new_text>` tags to replace content
151    - `<old_text>` must exactly match existing file content, including indentation
152    - `<old_text>` cannot be empty
153    - Do not escape quotes, newlines, or other characters within tags
154    - Always close all tags properly
155    - Don't include the <|user_cursor|> marker in your output.
156
157    ## Edit History
158
159"#};
160
161const OLD_TEXT_NEW_TEXT_REMINDER: &str = indoc! {r#"
162    ---
163
164    Remember that the edits in the edit history have already been applied.
165"#};
166
167pub fn build_prompt(
168    request: &predict_edits_v3::PredictEditsRequest,
169) -> Result<(String, SectionLabels)> {
170    let mut section_labels = Default::default();
171
172    match request.prompt_format {
173        PromptFormat::MinimalQwen => {
174            let prompt = MinimalQwenPrompt {
175                events: request.events.clone(),
176                cursor_point: request.cursor_point,
177                cursor_path: request.excerpt_path.clone(),
178                included_files: request.included_files.clone(),
179            };
180            return Ok((prompt.render(), section_labels));
181        }
182        _ => (),
183    };
184
185    let mut insertions = match request.prompt_format {
186        PromptFormat::MarkedExcerpt => vec![
187            (
188                Point {
189                    line: request.excerpt_line_range.start,
190                    column: 0,
191                },
192                EDITABLE_REGION_START_MARKER_WITH_NEWLINE,
193            ),
194            (request.cursor_point, CURSOR_MARKER),
195            (
196                Point {
197                    line: request.excerpt_line_range.end,
198                    column: 0,
199                },
200                EDITABLE_REGION_END_MARKER_WITH_NEWLINE,
201            ),
202        ],
203        PromptFormat::LabeledSections
204        | PromptFormat::NumLinesUniDiff
205        | PromptFormat::Minimal
206        | PromptFormat::OldTextNewText => {
207            vec![(request.cursor_point, CURSOR_MARKER)]
208        }
209        PromptFormat::OnlySnippets => vec![],
210        PromptFormat::MinimalQwen => unreachable!(),
211    };
212
213    let mut prompt = match request.prompt_format {
214        PromptFormat::MarkedExcerpt => MARKED_EXCERPT_INSTRUCTIONS.to_string(),
215        PromptFormat::LabeledSections => LABELED_SECTIONS_INSTRUCTIONS.to_string(),
216        PromptFormat::NumLinesUniDiff => NUMBERED_LINES_INSTRUCTIONS.to_string(),
217        PromptFormat::OldTextNewText => XML_TAGS_INSTRUCTIONS.to_string(),
218        PromptFormat::OnlySnippets => String::new(),
219        PromptFormat::Minimal => STUDENT_MODEL_INSTRUCTIONS.to_string(),
220        PromptFormat::MinimalQwen => unreachable!(),
221    };
222
223    if request.events.is_empty() {
224        prompt.push_str("(No edit history)\n\n");
225    } else {
226        let edit_preamble = if request.prompt_format == PromptFormat::Minimal {
227            "The following are the latest edits made by the user, from earlier to later.\n\n"
228        } else {
229            "Here are the latest edits made by the user, from earlier to later.\n\n"
230        };
231        prompt.push_str(edit_preamble);
232        push_events(&mut prompt, &request.events);
233    }
234
235    let excerpts_preamble = match request.prompt_format {
236        PromptFormat::Minimal => indoc! {"
237             ## Part of the file under the cursor
238
239             (The cursor marker <|user_cursor|> indicates the current user cursor position.
240             The file is in current state, edits from edit history has been applied.
241             We only show part of the file around the cursor.
242             You can only edit exactly this part of the file.
243             We prepend line numbers (e.g., `123|<actual line>`); they are not part of the file.)
244             "},
245        PromptFormat::NumLinesUniDiff | PromptFormat::OldTextNewText => indoc! {"
246            ## Code Excerpts
247
248            Here is some excerpts of code that you should take into account to predict the next edit.
249
250            The cursor position is marked by `<|user_cursor|>` as it stands after the last edit in the history.
251
252            In addition other excerpts are included to better understand what the edit will be, including the declaration
253            or references of symbols around the cursor, or other similar code snippets that may need to be updated
254            following patterns that appear in the edit history.
255
256            Consider each of them carefully in relation to the edit history, and that the user may not have navigated
257            to the next place they want to edit yet.
258
259            Lines starting with `…` indicate omitted line ranges. These may appear inside multi-line code constructs.
260        "},
261        _ => indoc! {"
262            ## Code Excerpts
263
264            The cursor marker <|user_cursor|> indicates the current user cursor position.
265            The file is in current state, edits from edit history have been applied.
266        "},
267    };
268
269    prompt.push_str(excerpts_preamble);
270    prompt.push('\n');
271
272    if !request.referenced_declarations.is_empty() || !request.signatures.is_empty() {
273        let syntax_based_prompt = SyntaxBasedPrompt::populate(request)?;
274        section_labels = syntax_based_prompt.write(&mut insertions, &mut prompt)?;
275    } else {
276        if request.prompt_format == PromptFormat::LabeledSections {
277            anyhow::bail!("PromptFormat::LabeledSections cannot be used with ContextMode::Llm");
278        }
279
280        let include_line_numbers = matches!(
281            request.prompt_format,
282            PromptFormat::NumLinesUniDiff | PromptFormat::Minimal
283        );
284        for related_file in &request.included_files {
285            if request.prompt_format == PromptFormat::Minimal {
286                write_codeblock_with_filename(
287                    &related_file.path,
288                    &related_file.excerpts,
289                    if related_file.path == request.excerpt_path {
290                        &insertions
291                    } else {
292                        &[]
293                    },
294                    related_file.max_row,
295                    include_line_numbers,
296                    &mut prompt,
297                );
298            } else {
299                write_codeblock(
300                    &related_file.path,
301                    &related_file.excerpts,
302                    if related_file.path == request.excerpt_path {
303                        &insertions
304                    } else {
305                        &[]
306                    },
307                    related_file.max_row,
308                    include_line_numbers,
309                    &mut prompt,
310                );
311            }
312        }
313    }
314
315    match request.prompt_format {
316        PromptFormat::NumLinesUniDiff => {
317            prompt.push_str(UNIFIED_DIFF_REMINDER);
318        }
319        PromptFormat::OldTextNewText => {
320            prompt.push_str(OLD_TEXT_NEW_TEXT_REMINDER);
321        }
322        PromptFormat::Minimal => {
323            prompt.push_str(MINIMAL_PROMPT_REMINDER);
324        }
325        _ => {}
326    }
327
328    Ok((prompt, section_labels))
329}
330
331pub fn write_codeblock<'a>(
332    path: &Path,
333    excerpts: impl IntoIterator<Item = &'a Excerpt>,
334    sorted_insertions: &[(Point, &str)],
335    file_line_count: Line,
336    include_line_numbers: bool,
337    output: &'a mut String,
338) {
339    writeln!(output, "`````{}", DiffPathFmt(path)).unwrap();
340
341    write_excerpts(
342        excerpts,
343        sorted_insertions,
344        file_line_count,
345        include_line_numbers,
346        output,
347    );
348    write!(output, "`````\n\n").unwrap();
349}
350
351fn write_codeblock_with_filename<'a>(
352    path: &Path,
353    excerpts: impl IntoIterator<Item = &'a Excerpt>,
354    sorted_insertions: &[(Point, &str)],
355    file_line_count: Line,
356    include_line_numbers: bool,
357    output: &'a mut String,
358) {
359    writeln!(output, "`````filename={}", DiffPathFmt(path)).unwrap();
360
361    write_excerpts(
362        excerpts,
363        sorted_insertions,
364        file_line_count,
365        include_line_numbers,
366        output,
367    );
368    write!(output, "`````\n\n").unwrap();
369}
370
371pub fn write_excerpts<'a>(
372    excerpts: impl IntoIterator<Item = &'a Excerpt>,
373    sorted_insertions: &[(Point, &str)],
374    file_line_count: Line,
375    include_line_numbers: bool,
376    output: &mut String,
377) {
378    let mut current_row = Line(0);
379    let mut sorted_insertions = sorted_insertions.iter().peekable();
380
381    for excerpt in excerpts {
382        if excerpt.start_line > current_row {
383            writeln!(output, "").unwrap();
384        }
385        if excerpt.text.is_empty() {
386            return;
387        }
388
389        current_row = excerpt.start_line;
390
391        for mut line in excerpt.text.lines() {
392            if include_line_numbers {
393                write!(output, "{}|", current_row.0 + 1).unwrap();
394            }
395
396            while let Some((insertion_location, insertion_marker)) = sorted_insertions.peek() {
397                match current_row.cmp(&insertion_location.line) {
398                    cmp::Ordering::Equal => {
399                        let (prefix, suffix) = line.split_at(insertion_location.column as usize);
400                        output.push_str(prefix);
401                        output.push_str(insertion_marker);
402                        line = suffix;
403                        sorted_insertions.next();
404                    }
405                    cmp::Ordering::Less => break,
406                    cmp::Ordering::Greater => {
407                        sorted_insertions.next();
408                        break;
409                    }
410                }
411            }
412            output.push_str(line);
413            output.push('\n');
414            current_row.0 += 1;
415        }
416    }
417
418    if current_row < file_line_count {
419        writeln!(output, "").unwrap();
420    }
421}
422
423pub fn push_events(output: &mut String, events: &[predict_edits_v3::Event]) {
424    if events.is_empty() {
425        return;
426    };
427
428    writeln!(output, "`````diff").unwrap();
429    for event in events {
430        writeln!(output, "{}", event).unwrap();
431    }
432    writeln!(output, "`````\n").unwrap();
433}
434
435pub struct SyntaxBasedPrompt<'a> {
436    request: &'a predict_edits_v3::PredictEditsRequest,
437    /// Snippets to include in the prompt. These may overlap - they are merged / deduplicated in
438    /// `to_prompt_string`.
439    snippets: Vec<PlannedSnippet<'a>>,
440    budget_used: usize,
441}
442
443#[derive(Clone, Debug)]
444pub struct PlannedSnippet<'a> {
445    path: Arc<Path>,
446    range: Range<Line>,
447    text: &'a str,
448    // TODO: Indicate this in the output
449    #[allow(dead_code)]
450    text_is_truncated: bool,
451}
452
453#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
454pub enum DeclarationStyle {
455    Signature,
456    Declaration,
457}
458
459#[derive(Default, Clone, Debug, Serialize)]
460pub struct SectionLabels {
461    pub excerpt_index: usize,
462    pub section_ranges: Vec<(Arc<Path>, Range<Line>)>,
463}
464
465impl<'a> SyntaxBasedPrompt<'a> {
466    /// Greedy one-pass knapsack algorithm to populate the prompt plan. Does the following:
467    ///
468    /// Initializes a priority queue by populating it with each snippet, finding the
469    /// DeclarationStyle that minimizes `score_density = score / snippet.range(style).len()`. When a
470    /// "signature" snippet is popped, insert an entry for the "declaration" variant that reflects
471    /// the cost of upgrade.
472    ///
473    /// TODO: Implement an early halting condition. One option might be to have another priority
474    /// queue where the score is the size, and update it accordingly. Another option might be to
475    /// have some simpler heuristic like bailing after N failed insertions, or based on how much
476    /// budget is left.
477    ///
478    /// TODO: Has the current known sources of imprecision:
479    ///
480    /// * Does not consider snippet overlap when ranking. For example, it might add a field to the
481    /// plan even though the containing struct is already included.
482    ///
483    /// * Does not consider cost of signatures when ranking snippets - this is tricky since
484    /// signatures may be shared by multiple snippets.
485    ///
486    /// * Does not include file paths / other text when considering max_bytes.
487    pub fn populate(request: &'a predict_edits_v3::PredictEditsRequest) -> Result<Self> {
488        let mut this = Self {
489            request,
490            snippets: Vec::new(),
491            budget_used: request.excerpt.len(),
492        };
493        let mut included_parents = FxHashSet::default();
494        let additional_parents = this.additional_parent_signatures(
495            &request.excerpt_path,
496            request.excerpt_parent,
497            &included_parents,
498        )?;
499        this.add_parents(&mut included_parents, additional_parents);
500
501        let max_bytes = request.prompt_max_bytes.unwrap_or(DEFAULT_MAX_PROMPT_BYTES);
502
503        if this.budget_used > max_bytes {
504            return Err(anyhow!(
505                "Excerpt + signatures size of {} already exceeds budget of {}",
506                this.budget_used,
507                max_bytes
508            ));
509        }
510
511        #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
512        struct QueueEntry {
513            score_density: OrderedFloat<f32>,
514            declaration_index: usize,
515            style: DeclarationStyle,
516        }
517
518        // Initialize priority queue with the best score for each snippet.
519        let mut queue: BinaryHeap<QueueEntry> = BinaryHeap::new();
520        for (declaration_index, declaration) in request.referenced_declarations.iter().enumerate() {
521            let (style, score_density) = DeclarationStyle::iter()
522                .map(|style| {
523                    (
524                        style,
525                        OrderedFloat(declaration_score_density(&declaration, style)),
526                    )
527                })
528                .max_by_key(|(_, score_density)| *score_density)
529                .unwrap();
530            queue.push(QueueEntry {
531                score_density,
532                declaration_index,
533                style,
534            });
535        }
536
537        // Knapsack selection loop
538        while let Some(queue_entry) = queue.pop() {
539            let Some(declaration) = request
540                .referenced_declarations
541                .get(queue_entry.declaration_index)
542            else {
543                return Err(anyhow!(
544                    "Invalid declaration index {}",
545                    queue_entry.declaration_index
546                ));
547            };
548
549            let mut additional_bytes = declaration_size(declaration, queue_entry.style);
550            if this.budget_used + additional_bytes > max_bytes {
551                continue;
552            }
553
554            let additional_parents = this.additional_parent_signatures(
555                &declaration.path,
556                declaration.parent_index,
557                &mut included_parents,
558            )?;
559            additional_bytes += additional_parents
560                .iter()
561                .map(|(_, snippet)| snippet.text.len())
562                .sum::<usize>();
563            if this.budget_used + additional_bytes > max_bytes {
564                continue;
565            }
566
567            this.budget_used += additional_bytes;
568            this.add_parents(&mut included_parents, additional_parents);
569            let planned_snippet = match queue_entry.style {
570                DeclarationStyle::Signature => {
571                    let Some(text) = declaration.text.get(declaration.signature_range.clone())
572                    else {
573                        return Err(anyhow!(
574                            "Invalid declaration signature_range {:?} with text.len() = {}",
575                            declaration.signature_range,
576                            declaration.text.len()
577                        ));
578                    };
579                    let signature_start_line = declaration.range.start
580                        + Line(
581                            declaration.text[..declaration.signature_range.start]
582                                .lines()
583                                .count() as u32,
584                        );
585                    let signature_end_line = signature_start_line
586                        + Line(
587                            declaration.text
588                                [declaration.signature_range.start..declaration.signature_range.end]
589                                .lines()
590                                .count() as u32,
591                        );
592                    let range = signature_start_line..signature_end_line;
593
594                    PlannedSnippet {
595                        path: declaration.path.clone(),
596                        range,
597                        text,
598                        text_is_truncated: declaration.text_is_truncated,
599                    }
600                }
601                DeclarationStyle::Declaration => PlannedSnippet {
602                    path: declaration.path.clone(),
603                    range: declaration.range.clone(),
604                    text: &declaration.text,
605                    text_is_truncated: declaration.text_is_truncated,
606                },
607            };
608            this.snippets.push(planned_snippet);
609
610            // When a Signature is consumed, insert an entry for Definition style.
611            if queue_entry.style == DeclarationStyle::Signature {
612                let signature_size = declaration_size(&declaration, DeclarationStyle::Signature);
613                let declaration_size =
614                    declaration_size(&declaration, DeclarationStyle::Declaration);
615                let signature_score = declaration_score(&declaration, DeclarationStyle::Signature);
616                let declaration_score =
617                    declaration_score(&declaration, DeclarationStyle::Declaration);
618
619                let score_diff = declaration_score - signature_score;
620                let size_diff = declaration_size.saturating_sub(signature_size);
621                if score_diff > 0.0001 && size_diff > 0 {
622                    queue.push(QueueEntry {
623                        declaration_index: queue_entry.declaration_index,
624                        score_density: OrderedFloat(score_diff / (size_diff as f32)),
625                        style: DeclarationStyle::Declaration,
626                    });
627                }
628            }
629        }
630
631        anyhow::Ok(this)
632    }
633
634    fn add_parents(
635        &mut self,
636        included_parents: &mut FxHashSet<usize>,
637        snippets: Vec<(usize, PlannedSnippet<'a>)>,
638    ) {
639        for (parent_index, snippet) in snippets {
640            included_parents.insert(parent_index);
641            self.budget_used += snippet.text.len();
642            self.snippets.push(snippet);
643        }
644    }
645
646    fn additional_parent_signatures(
647        &self,
648        path: &Arc<Path>,
649        parent_index: Option<usize>,
650        included_parents: &FxHashSet<usize>,
651    ) -> Result<Vec<(usize, PlannedSnippet<'a>)>> {
652        let mut results = Vec::new();
653        self.additional_parent_signatures_impl(path, parent_index, included_parents, &mut results)?;
654        Ok(results)
655    }
656
657    fn additional_parent_signatures_impl(
658        &self,
659        path: &Arc<Path>,
660        parent_index: Option<usize>,
661        included_parents: &FxHashSet<usize>,
662        results: &mut Vec<(usize, PlannedSnippet<'a>)>,
663    ) -> Result<()> {
664        let Some(parent_index) = parent_index else {
665            return Ok(());
666        };
667        if included_parents.contains(&parent_index) {
668            return Ok(());
669        }
670        let Some(parent_signature) = self.request.signatures.get(parent_index) else {
671            return Err(anyhow!("Invalid parent index {}", parent_index));
672        };
673        results.push((
674            parent_index,
675            PlannedSnippet {
676                path: path.clone(),
677                range: parent_signature.range.clone(),
678                text: &parent_signature.text,
679                text_is_truncated: parent_signature.text_is_truncated,
680            },
681        ));
682        self.additional_parent_signatures_impl(
683            path,
684            parent_signature.parent_index,
685            included_parents,
686            results,
687        )
688    }
689
690    /// Renders the planned context. Each file starts with "```FILE_PATH\n` and ends with triple
691    /// backticks, with a newline after each file. Outputs a line with "..." between nonconsecutive
692    /// chunks.
693    pub fn write(
694        &'a self,
695        excerpt_file_insertions: &mut Vec<(Point, &'static str)>,
696        prompt: &mut String,
697    ) -> Result<SectionLabels> {
698        let mut file_to_snippets: FxHashMap<&'a std::path::Path, Vec<&PlannedSnippet<'a>>> =
699            FxHashMap::default();
700        for snippet in &self.snippets {
701            file_to_snippets
702                .entry(&snippet.path)
703                .or_default()
704                .push(snippet);
705        }
706
707        // Reorder so that file with cursor comes last
708        let mut file_snippets = Vec::new();
709        let mut excerpt_file_snippets = Vec::new();
710        for (file_path, snippets) in file_to_snippets {
711            if file_path == self.request.excerpt_path.as_ref() {
712                excerpt_file_snippets = snippets;
713            } else {
714                file_snippets.push((file_path, snippets, false));
715            }
716        }
717        let excerpt_snippet = PlannedSnippet {
718            path: self.request.excerpt_path.clone(),
719            range: self.request.excerpt_line_range.clone(),
720            text: &self.request.excerpt,
721            text_is_truncated: false,
722        };
723        excerpt_file_snippets.push(&excerpt_snippet);
724        file_snippets.push((&self.request.excerpt_path, excerpt_file_snippets, true));
725
726        let section_labels =
727            self.push_file_snippets(prompt, excerpt_file_insertions, file_snippets)?;
728
729        Ok(section_labels)
730    }
731
732    fn push_file_snippets(
733        &self,
734        output: &mut String,
735        excerpt_file_insertions: &mut Vec<(Point, &'static str)>,
736        file_snippets: Vec<(&'a Path, Vec<&'a PlannedSnippet>, bool)>,
737    ) -> Result<SectionLabels> {
738        let mut section_ranges = Vec::new();
739        let mut excerpt_index = None;
740
741        for (file_path, mut snippets, is_excerpt_file) in file_snippets {
742            snippets.sort_by_key(|s| (s.range.start, Reverse(s.range.end)));
743
744            // TODO: What if the snippets get expanded too large to be editable?
745            let mut current_snippet: Option<(&PlannedSnippet, Range<Line>)> = None;
746            let mut disjoint_snippets: Vec<(&PlannedSnippet, Range<Line>)> = Vec::new();
747            for snippet in snippets {
748                if let Some((_, current_snippet_range)) = current_snippet.as_mut()
749                    && snippet.range.start <= current_snippet_range.end
750                {
751                    current_snippet_range.end = current_snippet_range.end.max(snippet.range.end);
752                    continue;
753                }
754                if let Some(current_snippet) = current_snippet.take() {
755                    disjoint_snippets.push(current_snippet);
756                }
757                current_snippet = Some((snippet, snippet.range.clone()));
758            }
759            if let Some(current_snippet) = current_snippet.take() {
760                disjoint_snippets.push(current_snippet);
761            }
762
763            writeln!(output, "`````path={}", file_path.display()).ok();
764            let mut skipped_last_snippet = false;
765            for (snippet, range) in disjoint_snippets {
766                let section_index = section_ranges.len();
767
768                match self.request.prompt_format {
769                    PromptFormat::MarkedExcerpt
770                    | PromptFormat::OnlySnippets
771                    | PromptFormat::OldTextNewText
772                    | PromptFormat::Minimal
773                    | PromptFormat::NumLinesUniDiff => {
774                        if range.start.0 > 0 && !skipped_last_snippet {
775                            output.push_str("\n");
776                        }
777                    }
778                    PromptFormat::LabeledSections => {
779                        if is_excerpt_file
780                            && range.start <= self.request.excerpt_line_range.start
781                            && range.end >= self.request.excerpt_line_range.end
782                        {
783                            writeln!(output, "<|current_section|>").ok();
784                        } else {
785                            writeln!(output, "<|section_{}|>", section_index).ok();
786                        }
787                    }
788                    PromptFormat::MinimalQwen => unreachable!(),
789                }
790
791                let push_full_snippet = |output: &mut String| {
792                    if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
793                        for (i, line) in snippet.text.lines().enumerate() {
794                            writeln!(output, "{}|{}", i as u32 + range.start.0 + 1, line)?;
795                        }
796                    } else {
797                        output.push_str(&snippet.text);
798                    }
799                    anyhow::Ok(())
800                };
801
802                if is_excerpt_file {
803                    if self.request.prompt_format == PromptFormat::OnlySnippets {
804                        if range.start >= self.request.excerpt_line_range.start
805                            && range.end <= self.request.excerpt_line_range.end
806                        {
807                            skipped_last_snippet = true;
808                        } else {
809                            skipped_last_snippet = false;
810                            output.push_str(snippet.text);
811                        }
812                    } else if !excerpt_file_insertions.is_empty() {
813                        let lines = snippet.text.lines().collect::<Vec<_>>();
814                        let push_line = |output: &mut String, line_ix: usize| {
815                            if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
816                                write!(output, "{}|", line_ix as u32 + range.start.0 + 1)?;
817                            }
818                            anyhow::Ok(writeln!(output, "{}", lines[line_ix])?)
819                        };
820                        let mut last_line_ix = 0;
821                        let mut insertion_ix = 0;
822                        while insertion_ix < excerpt_file_insertions.len() {
823                            let (point, insertion) = &excerpt_file_insertions[insertion_ix];
824                            let found = point.line >= range.start && point.line <= range.end;
825                            if found {
826                                excerpt_index = Some(section_index);
827                                let insertion_line_ix = (point.line.0 - range.start.0) as usize;
828                                for line_ix in last_line_ix..insertion_line_ix {
829                                    push_line(output, line_ix)?;
830                                }
831                                if let Some(next_line) = lines.get(insertion_line_ix) {
832                                    if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
833                                        write!(
834                                            output,
835                                            "{}|",
836                                            insertion_line_ix as u32 + range.start.0 + 1
837                                        )?
838                                    }
839                                    output.push_str(&next_line[..point.column as usize]);
840                                    output.push_str(insertion);
841                                    writeln!(output, "{}", &next_line[point.column as usize..])?;
842                                } else {
843                                    writeln!(output, "{}", insertion)?;
844                                }
845                                last_line_ix = insertion_line_ix + 1;
846                                excerpt_file_insertions.remove(insertion_ix);
847                                continue;
848                            }
849                            insertion_ix += 1;
850                        }
851                        skipped_last_snippet = false;
852                        for line_ix in last_line_ix..lines.len() {
853                            push_line(output, line_ix)?;
854                        }
855                    } else {
856                        skipped_last_snippet = false;
857                        push_full_snippet(output)?;
858                    }
859                } else {
860                    skipped_last_snippet = false;
861                    push_full_snippet(output)?;
862                }
863
864                section_ranges.push((snippet.path.clone(), range));
865            }
866
867            output.push_str("`````\n\n");
868        }
869
870        Ok(SectionLabels {
871            // TODO: Clean this up
872            excerpt_index: match self.request.prompt_format {
873                PromptFormat::OnlySnippets => 0,
874                _ => excerpt_index.context("bug: no snippet found for excerpt")?,
875            },
876            section_ranges,
877        })
878    }
879}
880
881fn declaration_score_density(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> f32 {
882    declaration_score(declaration, style) / declaration_size(declaration, style) as f32
883}
884
885fn declaration_score(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> f32 {
886    match style {
887        DeclarationStyle::Signature => declaration.signature_score,
888        DeclarationStyle::Declaration => declaration.declaration_score,
889    }
890}
891
892fn declaration_size(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> usize {
893    match style {
894        DeclarationStyle::Signature => declaration.signature_range.len(),
895        DeclarationStyle::Declaration => declaration.text.len(),
896    }
897}
898
899struct MinimalQwenPrompt {
900    events: Vec<Event>,
901    cursor_point: Point,
902    cursor_path: Arc<Path>, // TODO: make a common struct with cursor_point
903    included_files: Vec<IncludedFile>,
904}
905
906impl MinimalQwenPrompt {
907    const INSTRUCTIONS: &str = "You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.\n";
908
909    fn render(&self) -> String {
910        let edit_history = self.fmt_edit_history();
911        let context = self.fmt_context();
912
913        format!(
914            "{instructions}\n\n{edit_history}\n\n{context}",
915            instructions = MinimalQwenPrompt::INSTRUCTIONS,
916            edit_history = edit_history,
917            context = context
918        )
919    }
920
921    fn fmt_edit_history(&self) -> String {
922        if self.events.is_empty() {
923            "(No edit history)\n\n".to_string()
924        } else {
925            let mut events_str = String::new();
926            push_events(&mut events_str, &self.events);
927            format!(
928                "The following are the latest edits made by the user, from earlier to later.\n\n{}",
929                events_str
930            )
931        }
932    }
933
934    fn fmt_context(&self) -> String {
935        let mut context = String::new();
936        let include_line_numbers = true;
937
938        for related_file in &self.included_files {
939            writeln!(context, "<|file_sep|>{}", DiffPathFmt(&related_file.path)).unwrap();
940
941            if related_file.path == self.cursor_path {
942                write!(context, "<|fim_prefix|>").unwrap();
943                write_excerpts(
944                    &related_file.excerpts,
945                    &[(self.cursor_point, "<|fim_suffix|>")],
946                    related_file.max_row,
947                    include_line_numbers,
948                    &mut context,
949                );
950                writeln!(context, "<|fim_middle|>").unwrap();
951            } else {
952                write_excerpts(
953                    &related_file.excerpts,
954                    &[],
955                    related_file.max_row,
956                    include_line_numbers,
957                    &mut context,
958                );
959            }
960        }
961        context
962    }
963}