cloud_zeta2_prompt.rs

   1//! Zeta2 prompt planning and generation code shared with cloud.
   2pub mod retrieval_prompt;
   3
   4use anyhow::{Context as _, Result, anyhow};
   5use cloud_llm_client::predict_edits_v3::{
   6    self, DiffPathFmt, Event, Excerpt, IncludedFile, Line, Point, PromptFormat,
   7    ReferencedDeclaration,
   8};
   9use indoc::indoc;
  10use ordered_float::OrderedFloat;
  11use rustc_hash::{FxHashMap, FxHashSet};
  12use serde::Serialize;
  13use std::cmp;
  14use std::fmt::Write;
  15use std::sync::Arc;
  16use std::{cmp::Reverse, collections::BinaryHeap, ops::Range, path::Path};
  17use strum::{EnumIter, IntoEnumIterator};
  18
  19pub const DEFAULT_MAX_PROMPT_BYTES: usize = 10 * 1024;
  20
  21pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  22/// NOTE: Differs from zed version of constant - includes a newline
  23pub const EDITABLE_REGION_START_MARKER_WITH_NEWLINE: &str = "<|editable_region_start|>\n";
  24/// NOTE: Differs from zed version of constant - includes a newline
  25pub const EDITABLE_REGION_END_MARKER_WITH_NEWLINE: &str = "<|editable_region_end|>\n";
  26
  27// TODO: use constants for markers?
  28const MARKED_EXCERPT_INSTRUCTIONS: &str = indoc! {"
  29    You are a code completion assistant and your task is to analyze user edits and then rewrite an excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking into account the cursor location.
  30
  31    The excerpt to edit will be wrapped in markers <|editable_region_start|> and <|editable_region_end|>. The cursor position is marked with <|user_cursor|>.  Please respond with edited code for that region.
  32
  33    Other code is provided for context, and `…` indicates when code has been skipped.
  34
  35    ## Edit History
  36
  37"};
  38
  39const LABELED_SECTIONS_INSTRUCTIONS: &str = indoc! {r#"
  40    You are a code completion assistant and your task is to analyze user edits, and suggest an edit to one of the provided sections of code.
  41
  42    Sections of code are grouped by file and then labeled by `<|section_N|>` (e.g `<|section_8|>`).
  43
  44    The cursor position is marked with `<|user_cursor|>` and it will appear within a special section labeled `<|current_section|>`. Prefer editing the current section until no more changes are needed within it.
  45
  46    Respond ONLY with the name of the section to edit on a single line, followed by all of the code that should replace that section. For example:
  47
  48    <|current_section|>
  49    for i in 0..16 {
  50        println!("{i}");
  51    }
  52
  53    ## Edit History
  54
  55"#};
  56
  57const NUMBERED_LINES_INSTRUCTIONS: &str = indoc! {r#"
  58    # Instructions
  59
  60    You are an edit prediction agent in a code editor.
  61    Your job is to predict the next edit that the user will make,
  62    based on their last few edits and their current cursor location.
  63
  64    ## Output Format
  65
  66    You must briefly explain your understanding of the user's goal, in one
  67    or two sentences, and then specify their next edit in the form of a
  68    unified diff, like this:
  69
  70    ```
  71    --- a/src/myapp/cli.py
  72    +++ b/src/myapp/cli.py
  73    @@ ... @@
  74     import os
  75     import time
  76     import sys
  77    +from constants import LOG_LEVEL_WARNING
  78    @@ ... @@
  79     config.headless()
  80     config.set_interactive(false)
  81    -config.set_log_level(LOG_L)
  82    +config.set_log_level(LOG_LEVEL_WARNING)
  83     config.set_use_color(True)
  84    ```
  85
  86    ## Edit History
  87
  88"#};
  89
  90const STUDENT_MODEL_INSTRUCTIONS: &str = indoc! {r#"
  91    You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.
  92
  93    ## Edit History
  94
  95    "#};
  96
  97const UNIFIED_DIFF_REMINDER: &str = indoc! {"
  98    ---
  99
 100    Analyze the edit history and the files, then provide the unified diff for your predicted edits.
 101    Do not include the cursor marker in your output.
 102    Your diff should include edited file paths in its file headers (lines beginning with `---` and `+++`).
 103    Do not include line numbers in the hunk headers, use `@@ ... @@`.
 104    Removed lines begin with `-`.
 105    Added lines begin with `+`.
 106    Context lines begin with an extra space.
 107    Context and removed lines are used to match the target edit location, so make sure to include enough of them
 108    to uniquely identify it amongst all excerpts of code provided.
 109"};
 110
 111const MINIMAL_PROMPT_REMINDER: &str = indoc! {"
 112    ---
 113
 114    Please analyze the edit history and the files, then provide the unified diff for your predicted edits.
 115    Do not include the cursor marker in your output.
 116    If you're editing multiple files, be sure to reflect filename in the hunk's header.
 117    "};
 118
 119const XML_TAGS_INSTRUCTIONS: &str = indoc! {r#"
 120    # Instructions
 121
 122    You are an edit prediction agent in a code editor.
 123
 124    Analyze the history of edits made by the user in order to infer what they are currently trying to accomplish.
 125    Then complete the remainder of the current change if it is incomplete, or predict the next edit the user intends to make.
 126    Always continue along the user's current trajectory, rather than changing course.
 127
 128    ## Output Format
 129
 130    You should briefly explain your understanding of the user's overall goal in one sentence, then explain what the next change
 131    along the users current trajectory will be in another, and finally specify the next edit using the following XML-like format:
 132
 133    <edits path="my-project/src/myapp/cli.py">
 134    <old_text>
 135    OLD TEXT 1 HERE
 136    </old_text>
 137    <new_text>
 138    NEW TEXT 1 HERE
 139    </new_text>
 140
 141    <old_text>
 142    OLD TEXT 1 HERE
 143    </old_text>
 144    <new_text>
 145    NEW TEXT 1 HERE
 146    </new_text>
 147    </edits>
 148
 149    - Specify the file to edit using the `path` attribute.
 150    - Use `<old_text>` and `<new_text>` tags to replace content
 151    - `<old_text>` must exactly match existing file content, including indentation
 152    - `<old_text>` cannot be empty
 153    - Do not escape quotes, newlines, or other characters within tags
 154    - Always close all tags properly
 155    - Don't include the <|user_cursor|> marker in your output.
 156
 157    ## Edit History
 158
 159"#};
 160
 161const OLD_TEXT_NEW_TEXT_REMINDER: &str = indoc! {r#"
 162    ---
 163
 164    Remember that the edits in the edit history have already been applied.
 165"#};
 166
 167pub fn build_prompt(
 168    request: &predict_edits_v3::PredictEditsRequest,
 169) -> Result<(String, SectionLabels)> {
 170    let mut section_labels = Default::default();
 171
 172    let prompt_data = PromptData {
 173        events: request.events.clone(),
 174        cursor_point: request.cursor_point,
 175        cursor_path: request.excerpt_path.clone(),
 176        included_files: request.included_files.clone(),
 177    };
 178    match request.prompt_format {
 179        PromptFormat::MinimalQwen => {
 180            return Ok((MinimalQwenPrompt.render(&prompt_data), section_labels));
 181        }
 182        PromptFormat::SeedCoder1120 => {
 183            return Ok((SeedCoder1120Prompt.render(&prompt_data), section_labels));
 184        }
 185        _ => (),
 186    };
 187
 188    let mut insertions = match request.prompt_format {
 189        PromptFormat::MarkedExcerpt => vec![
 190            (
 191                Point {
 192                    line: request.excerpt_line_range.start,
 193                    column: 0,
 194                },
 195                EDITABLE_REGION_START_MARKER_WITH_NEWLINE,
 196            ),
 197            (request.cursor_point, CURSOR_MARKER),
 198            (
 199                Point {
 200                    line: request.excerpt_line_range.end,
 201                    column: 0,
 202                },
 203                EDITABLE_REGION_END_MARKER_WITH_NEWLINE,
 204            ),
 205        ],
 206        PromptFormat::LabeledSections
 207        | PromptFormat::NumLinesUniDiff
 208        | PromptFormat::Minimal
 209        | PromptFormat::OldTextNewText => {
 210            vec![(request.cursor_point, CURSOR_MARKER)]
 211        }
 212        PromptFormat::OnlySnippets => vec![],
 213        PromptFormat::MinimalQwen => unreachable!(),
 214        PromptFormat::SeedCoder1120 => unreachable!(),
 215    };
 216
 217    let mut prompt = match request.prompt_format {
 218        PromptFormat::MarkedExcerpt => MARKED_EXCERPT_INSTRUCTIONS.to_string(),
 219        PromptFormat::LabeledSections => LABELED_SECTIONS_INSTRUCTIONS.to_string(),
 220        PromptFormat::NumLinesUniDiff => NUMBERED_LINES_INSTRUCTIONS.to_string(),
 221        PromptFormat::OldTextNewText => XML_TAGS_INSTRUCTIONS.to_string(),
 222        PromptFormat::OnlySnippets => String::new(),
 223        PromptFormat::Minimal => STUDENT_MODEL_INSTRUCTIONS.to_string(),
 224        PromptFormat::MinimalQwen => unreachable!(),
 225        PromptFormat::SeedCoder1120 => unreachable!(),
 226    };
 227
 228    if request.events.is_empty() {
 229        prompt.push_str("(No edit history)\n\n");
 230    } else {
 231        let edit_preamble = if request.prompt_format == PromptFormat::Minimal {
 232            "The following are the latest edits made by the user, from earlier to later.\n\n"
 233        } else {
 234            "Here are the latest edits made by the user, from earlier to later.\n\n"
 235        };
 236        prompt.push_str(edit_preamble);
 237        push_events(&mut prompt, &request.events);
 238    }
 239
 240    let excerpts_preamble = match request.prompt_format {
 241        PromptFormat::Minimal => indoc! {"
 242             ## Part of the file under the cursor
 243
 244             (The cursor marker <|user_cursor|> indicates the current user cursor position.
 245             The file is in current state, edits from edit history has been applied.
 246             We only show part of the file around the cursor.
 247             You can only edit exactly this part of the file.
 248             We prepend line numbers (e.g., `123|<actual line>`); they are not part of the file.)
 249             "},
 250        PromptFormat::NumLinesUniDiff | PromptFormat::OldTextNewText => indoc! {"
 251            ## Code Excerpts
 252
 253            Here is some excerpts of code that you should take into account to predict the next edit.
 254
 255            The cursor position is marked by `<|user_cursor|>` as it stands after the last edit in the history.
 256
 257            In addition other excerpts are included to better understand what the edit will be, including the declaration
 258            or references of symbols around the cursor, or other similar code snippets that may need to be updated
 259            following patterns that appear in the edit history.
 260
 261            Consider each of them carefully in relation to the edit history, and that the user may not have navigated
 262            to the next place they want to edit yet.
 263
 264            Lines starting with `…` indicate omitted line ranges. These may appear inside multi-line code constructs.
 265        "},
 266        _ => indoc! {"
 267            ## Code Excerpts
 268
 269            The cursor marker <|user_cursor|> indicates the current user cursor position.
 270            The file is in current state, edits from edit history have been applied.
 271        "},
 272    };
 273
 274    prompt.push_str(excerpts_preamble);
 275    prompt.push('\n');
 276
 277    if !request.referenced_declarations.is_empty() || !request.signatures.is_empty() {
 278        let syntax_based_prompt = SyntaxBasedPrompt::populate(request)?;
 279        section_labels = syntax_based_prompt.write(&mut insertions, &mut prompt)?;
 280    } else {
 281        if request.prompt_format == PromptFormat::LabeledSections {
 282            anyhow::bail!("PromptFormat::LabeledSections cannot be used with ContextMode::Llm");
 283        }
 284
 285        let include_line_numbers = matches!(
 286            request.prompt_format,
 287            PromptFormat::NumLinesUniDiff | PromptFormat::Minimal
 288        );
 289        for related_file in &request.included_files {
 290            if request.prompt_format == PromptFormat::Minimal {
 291                write_codeblock_with_filename(
 292                    &related_file.path,
 293                    &related_file.excerpts,
 294                    if related_file.path == request.excerpt_path {
 295                        &insertions
 296                    } else {
 297                        &[]
 298                    },
 299                    related_file.max_row,
 300                    include_line_numbers,
 301                    &mut prompt,
 302                );
 303            } else {
 304                write_codeblock(
 305                    &related_file.path,
 306                    &related_file.excerpts,
 307                    if related_file.path == request.excerpt_path {
 308                        &insertions
 309                    } else {
 310                        &[]
 311                    },
 312                    related_file.max_row,
 313                    include_line_numbers,
 314                    &mut prompt,
 315                );
 316            }
 317        }
 318    }
 319
 320    match request.prompt_format {
 321        PromptFormat::NumLinesUniDiff => {
 322            prompt.push_str(UNIFIED_DIFF_REMINDER);
 323        }
 324        PromptFormat::OldTextNewText => {
 325            prompt.push_str(OLD_TEXT_NEW_TEXT_REMINDER);
 326        }
 327        PromptFormat::Minimal => {
 328            prompt.push_str(MINIMAL_PROMPT_REMINDER);
 329        }
 330        _ => {}
 331    }
 332
 333    Ok((prompt, section_labels))
 334}
 335
 336pub fn generation_params(prompt_format: PromptFormat) -> GenerationParams {
 337    match prompt_format {
 338        PromptFormat::SeedCoder1120 => SeedCoder1120Prompt::generation_params(),
 339        _ => GenerationParams::default(),
 340    }
 341}
 342
 343pub fn write_codeblock<'a>(
 344    path: &Path,
 345    excerpts: impl IntoIterator<Item = &'a Excerpt>,
 346    sorted_insertions: &[(Point, &str)],
 347    file_line_count: Line,
 348    include_line_numbers: bool,
 349    output: &'a mut String,
 350) {
 351    writeln!(output, "`````{}", DiffPathFmt(path)).unwrap();
 352
 353    write_excerpts(
 354        excerpts,
 355        sorted_insertions,
 356        file_line_count,
 357        include_line_numbers,
 358        output,
 359    );
 360    write!(output, "`````\n\n").unwrap();
 361}
 362
 363fn write_codeblock_with_filename<'a>(
 364    path: &Path,
 365    excerpts: impl IntoIterator<Item = &'a Excerpt>,
 366    sorted_insertions: &[(Point, &str)],
 367    file_line_count: Line,
 368    include_line_numbers: bool,
 369    output: &'a mut String,
 370) {
 371    writeln!(output, "`````filename={}", DiffPathFmt(path)).unwrap();
 372
 373    write_excerpts(
 374        excerpts,
 375        sorted_insertions,
 376        file_line_count,
 377        include_line_numbers,
 378        output,
 379    );
 380    write!(output, "`````\n\n").unwrap();
 381}
 382
 383pub fn write_excerpts<'a>(
 384    excerpts: impl IntoIterator<Item = &'a Excerpt>,
 385    sorted_insertions: &[(Point, &str)],
 386    file_line_count: Line,
 387    include_line_numbers: bool,
 388    output: &mut String,
 389) {
 390    let mut current_row = Line(0);
 391    let mut sorted_insertions = sorted_insertions.iter().peekable();
 392
 393    for excerpt in excerpts {
 394        if excerpt.start_line > current_row {
 395            writeln!(output, "").unwrap();
 396        }
 397        if excerpt.text.is_empty() {
 398            return;
 399        }
 400
 401        current_row = excerpt.start_line;
 402
 403        for mut line in excerpt.text.lines() {
 404            if include_line_numbers {
 405                write!(output, "{}|", current_row.0 + 1).unwrap();
 406            }
 407
 408            while let Some((insertion_location, insertion_marker)) = sorted_insertions.peek() {
 409                match current_row.cmp(&insertion_location.line) {
 410                    cmp::Ordering::Equal => {
 411                        let (prefix, suffix) = line.split_at(insertion_location.column as usize);
 412                        output.push_str(prefix);
 413                        output.push_str(insertion_marker);
 414                        line = suffix;
 415                        sorted_insertions.next();
 416                    }
 417                    cmp::Ordering::Less => break,
 418                    cmp::Ordering::Greater => {
 419                        sorted_insertions.next();
 420                        break;
 421                    }
 422                }
 423            }
 424            output.push_str(line);
 425            output.push('\n');
 426            current_row.0 += 1;
 427        }
 428    }
 429
 430    if current_row < file_line_count {
 431        writeln!(output, "").unwrap();
 432    }
 433}
 434
 435pub fn push_events(output: &mut String, events: &[Arc<predict_edits_v3::Event>]) {
 436    if events.is_empty() {
 437        return;
 438    };
 439
 440    writeln!(output, "`````diff").unwrap();
 441    for event in events {
 442        writeln!(output, "{}", event).unwrap();
 443    }
 444    writeln!(output, "`````\n").unwrap();
 445}
 446
 447pub struct SyntaxBasedPrompt<'a> {
 448    request: &'a predict_edits_v3::PredictEditsRequest,
 449    /// Snippets to include in the prompt. These may overlap - they are merged / deduplicated in
 450    /// `to_prompt_string`.
 451    snippets: Vec<PlannedSnippet<'a>>,
 452    budget_used: usize,
 453}
 454
 455#[derive(Clone, Debug)]
 456pub struct PlannedSnippet<'a> {
 457    path: Arc<Path>,
 458    range: Range<Line>,
 459    text: &'a str,
 460    // TODO: Indicate this in the output
 461    #[allow(dead_code)]
 462    text_is_truncated: bool,
 463}
 464
 465#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
 466pub enum DeclarationStyle {
 467    Signature,
 468    Declaration,
 469}
 470
 471#[derive(Default, Clone, Debug, Serialize)]
 472pub struct SectionLabels {
 473    pub excerpt_index: usize,
 474    pub section_ranges: Vec<(Arc<Path>, Range<Line>)>,
 475}
 476
 477impl<'a> SyntaxBasedPrompt<'a> {
 478    /// Greedy one-pass knapsack algorithm to populate the prompt plan. Does the following:
 479    ///
 480    /// Initializes a priority queue by populating it with each snippet, finding the
 481    /// DeclarationStyle that minimizes `score_density = score / snippet.range(style).len()`. When a
 482    /// "signature" snippet is popped, insert an entry for the "declaration" variant that reflects
 483    /// the cost of upgrade.
 484    ///
 485    /// TODO: Implement an early halting condition. One option might be to have another priority
 486    /// queue where the score is the size, and update it accordingly. Another option might be to
 487    /// have some simpler heuristic like bailing after N failed insertions, or based on how much
 488    /// budget is left.
 489    ///
 490    /// TODO: Has the current known sources of imprecision:
 491    ///
 492    /// * Does not consider snippet overlap when ranking. For example, it might add a field to the
 493    /// plan even though the containing struct is already included.
 494    ///
 495    /// * Does not consider cost of signatures when ranking snippets - this is tricky since
 496    /// signatures may be shared by multiple snippets.
 497    ///
 498    /// * Does not include file paths / other text when considering max_bytes.
 499    pub fn populate(request: &'a predict_edits_v3::PredictEditsRequest) -> Result<Self> {
 500        let mut this = Self {
 501            request,
 502            snippets: Vec::new(),
 503            budget_used: request.excerpt.len(),
 504        };
 505        let mut included_parents = FxHashSet::default();
 506        let additional_parents = this.additional_parent_signatures(
 507            &request.excerpt_path,
 508            request.excerpt_parent,
 509            &included_parents,
 510        )?;
 511        this.add_parents(&mut included_parents, additional_parents);
 512
 513        let max_bytes = request.prompt_max_bytes.unwrap_or(DEFAULT_MAX_PROMPT_BYTES);
 514
 515        if this.budget_used > max_bytes {
 516            return Err(anyhow!(
 517                "Excerpt + signatures size of {} already exceeds budget of {}",
 518                this.budget_used,
 519                max_bytes
 520            ));
 521        }
 522
 523        #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
 524        struct QueueEntry {
 525            score_density: OrderedFloat<f32>,
 526            declaration_index: usize,
 527            style: DeclarationStyle,
 528        }
 529
 530        // Initialize priority queue with the best score for each snippet.
 531        let mut queue: BinaryHeap<QueueEntry> = BinaryHeap::new();
 532        for (declaration_index, declaration) in request.referenced_declarations.iter().enumerate() {
 533            let (style, score_density) = DeclarationStyle::iter()
 534                .map(|style| {
 535                    (
 536                        style,
 537                        OrderedFloat(declaration_score_density(&declaration, style)),
 538                    )
 539                })
 540                .max_by_key(|(_, score_density)| *score_density)
 541                .unwrap();
 542            queue.push(QueueEntry {
 543                score_density,
 544                declaration_index,
 545                style,
 546            });
 547        }
 548
 549        // Knapsack selection loop
 550        while let Some(queue_entry) = queue.pop() {
 551            let Some(declaration) = request
 552                .referenced_declarations
 553                .get(queue_entry.declaration_index)
 554            else {
 555                return Err(anyhow!(
 556                    "Invalid declaration index {}",
 557                    queue_entry.declaration_index
 558                ));
 559            };
 560
 561            let mut additional_bytes = declaration_size(declaration, queue_entry.style);
 562            if this.budget_used + additional_bytes > max_bytes {
 563                continue;
 564            }
 565
 566            let additional_parents = this.additional_parent_signatures(
 567                &declaration.path,
 568                declaration.parent_index,
 569                &mut included_parents,
 570            )?;
 571            additional_bytes += additional_parents
 572                .iter()
 573                .map(|(_, snippet)| snippet.text.len())
 574                .sum::<usize>();
 575            if this.budget_used + additional_bytes > max_bytes {
 576                continue;
 577            }
 578
 579            this.budget_used += additional_bytes;
 580            this.add_parents(&mut included_parents, additional_parents);
 581            let planned_snippet = match queue_entry.style {
 582                DeclarationStyle::Signature => {
 583                    let Some(text) = declaration.text.get(declaration.signature_range.clone())
 584                    else {
 585                        return Err(anyhow!(
 586                            "Invalid declaration signature_range {:?} with text.len() = {}",
 587                            declaration.signature_range,
 588                            declaration.text.len()
 589                        ));
 590                    };
 591                    let signature_start_line = declaration.range.start
 592                        + Line(
 593                            declaration.text[..declaration.signature_range.start]
 594                                .lines()
 595                                .count() as u32,
 596                        );
 597                    let signature_end_line = signature_start_line
 598                        + Line(
 599                            declaration.text
 600                                [declaration.signature_range.start..declaration.signature_range.end]
 601                                .lines()
 602                                .count() as u32,
 603                        );
 604                    let range = signature_start_line..signature_end_line;
 605
 606                    PlannedSnippet {
 607                        path: declaration.path.clone(),
 608                        range,
 609                        text,
 610                        text_is_truncated: declaration.text_is_truncated,
 611                    }
 612                }
 613                DeclarationStyle::Declaration => PlannedSnippet {
 614                    path: declaration.path.clone(),
 615                    range: declaration.range.clone(),
 616                    text: &declaration.text,
 617                    text_is_truncated: declaration.text_is_truncated,
 618                },
 619            };
 620            this.snippets.push(planned_snippet);
 621
 622            // When a Signature is consumed, insert an entry for Definition style.
 623            if queue_entry.style == DeclarationStyle::Signature {
 624                let signature_size = declaration_size(&declaration, DeclarationStyle::Signature);
 625                let declaration_size =
 626                    declaration_size(&declaration, DeclarationStyle::Declaration);
 627                let signature_score = declaration_score(&declaration, DeclarationStyle::Signature);
 628                let declaration_score =
 629                    declaration_score(&declaration, DeclarationStyle::Declaration);
 630
 631                let score_diff = declaration_score - signature_score;
 632                let size_diff = declaration_size.saturating_sub(signature_size);
 633                if score_diff > 0.0001 && size_diff > 0 {
 634                    queue.push(QueueEntry {
 635                        declaration_index: queue_entry.declaration_index,
 636                        score_density: OrderedFloat(score_diff / (size_diff as f32)),
 637                        style: DeclarationStyle::Declaration,
 638                    });
 639                }
 640            }
 641        }
 642
 643        anyhow::Ok(this)
 644    }
 645
 646    fn add_parents(
 647        &mut self,
 648        included_parents: &mut FxHashSet<usize>,
 649        snippets: Vec<(usize, PlannedSnippet<'a>)>,
 650    ) {
 651        for (parent_index, snippet) in snippets {
 652            included_parents.insert(parent_index);
 653            self.budget_used += snippet.text.len();
 654            self.snippets.push(snippet);
 655        }
 656    }
 657
 658    fn additional_parent_signatures(
 659        &self,
 660        path: &Arc<Path>,
 661        parent_index: Option<usize>,
 662        included_parents: &FxHashSet<usize>,
 663    ) -> Result<Vec<(usize, PlannedSnippet<'a>)>> {
 664        let mut results = Vec::new();
 665        self.additional_parent_signatures_impl(path, parent_index, included_parents, &mut results)?;
 666        Ok(results)
 667    }
 668
 669    fn additional_parent_signatures_impl(
 670        &self,
 671        path: &Arc<Path>,
 672        parent_index: Option<usize>,
 673        included_parents: &FxHashSet<usize>,
 674        results: &mut Vec<(usize, PlannedSnippet<'a>)>,
 675    ) -> Result<()> {
 676        let Some(parent_index) = parent_index else {
 677            return Ok(());
 678        };
 679        if included_parents.contains(&parent_index) {
 680            return Ok(());
 681        }
 682        let Some(parent_signature) = self.request.signatures.get(parent_index) else {
 683            return Err(anyhow!("Invalid parent index {}", parent_index));
 684        };
 685        results.push((
 686            parent_index,
 687            PlannedSnippet {
 688                path: path.clone(),
 689                range: parent_signature.range.clone(),
 690                text: &parent_signature.text,
 691                text_is_truncated: parent_signature.text_is_truncated,
 692            },
 693        ));
 694        self.additional_parent_signatures_impl(
 695            path,
 696            parent_signature.parent_index,
 697            included_parents,
 698            results,
 699        )
 700    }
 701
 702    /// Renders the planned context. Each file starts with "```FILE_PATH\n` and ends with triple
 703    /// backticks, with a newline after each file. Outputs a line with "..." between nonconsecutive
 704    /// chunks.
 705    pub fn write(
 706        &'a self,
 707        excerpt_file_insertions: &mut Vec<(Point, &'static str)>,
 708        prompt: &mut String,
 709    ) -> Result<SectionLabels> {
 710        let mut file_to_snippets: FxHashMap<&'a std::path::Path, Vec<&PlannedSnippet<'a>>> =
 711            FxHashMap::default();
 712        for snippet in &self.snippets {
 713            file_to_snippets
 714                .entry(&snippet.path)
 715                .or_default()
 716                .push(snippet);
 717        }
 718
 719        // Reorder so that file with cursor comes last
 720        let mut file_snippets = Vec::new();
 721        let mut excerpt_file_snippets = Vec::new();
 722        for (file_path, snippets) in file_to_snippets {
 723            if file_path == self.request.excerpt_path.as_ref() {
 724                excerpt_file_snippets = snippets;
 725            } else {
 726                file_snippets.push((file_path, snippets, false));
 727            }
 728        }
 729        let excerpt_snippet = PlannedSnippet {
 730            path: self.request.excerpt_path.clone(),
 731            range: self.request.excerpt_line_range.clone(),
 732            text: &self.request.excerpt,
 733            text_is_truncated: false,
 734        };
 735        excerpt_file_snippets.push(&excerpt_snippet);
 736        file_snippets.push((&self.request.excerpt_path, excerpt_file_snippets, true));
 737
 738        let section_labels =
 739            self.push_file_snippets(prompt, excerpt_file_insertions, file_snippets)?;
 740
 741        Ok(section_labels)
 742    }
 743
 744    fn push_file_snippets(
 745        &self,
 746        output: &mut String,
 747        excerpt_file_insertions: &mut Vec<(Point, &'static str)>,
 748        file_snippets: Vec<(&'a Path, Vec<&'a PlannedSnippet>, bool)>,
 749    ) -> Result<SectionLabels> {
 750        let mut section_ranges = Vec::new();
 751        let mut excerpt_index = None;
 752
 753        for (file_path, mut snippets, is_excerpt_file) in file_snippets {
 754            snippets.sort_by_key(|s| (s.range.start, Reverse(s.range.end)));
 755
 756            // TODO: What if the snippets get expanded too large to be editable?
 757            let mut current_snippet: Option<(&PlannedSnippet, Range<Line>)> = None;
 758            let mut disjoint_snippets: Vec<(&PlannedSnippet, Range<Line>)> = Vec::new();
 759            for snippet in snippets {
 760                if let Some((_, current_snippet_range)) = current_snippet.as_mut()
 761                    && snippet.range.start <= current_snippet_range.end
 762                {
 763                    current_snippet_range.end = current_snippet_range.end.max(snippet.range.end);
 764                    continue;
 765                }
 766                if let Some(current_snippet) = current_snippet.take() {
 767                    disjoint_snippets.push(current_snippet);
 768                }
 769                current_snippet = Some((snippet, snippet.range.clone()));
 770            }
 771            if let Some(current_snippet) = current_snippet.take() {
 772                disjoint_snippets.push(current_snippet);
 773            }
 774
 775            writeln!(output, "`````path={}", file_path.display()).ok();
 776            let mut skipped_last_snippet = false;
 777            for (snippet, range) in disjoint_snippets {
 778                let section_index = section_ranges.len();
 779
 780                match self.request.prompt_format {
 781                    PromptFormat::MarkedExcerpt
 782                    | PromptFormat::OnlySnippets
 783                    | PromptFormat::OldTextNewText
 784                    | PromptFormat::Minimal
 785                    | PromptFormat::NumLinesUniDiff => {
 786                        if range.start.0 > 0 && !skipped_last_snippet {
 787                            output.push_str("\n");
 788                        }
 789                    }
 790                    PromptFormat::LabeledSections => {
 791                        if is_excerpt_file
 792                            && range.start <= self.request.excerpt_line_range.start
 793                            && range.end >= self.request.excerpt_line_range.end
 794                        {
 795                            writeln!(output, "<|current_section|>").ok();
 796                        } else {
 797                            writeln!(output, "<|section_{}|>", section_index).ok();
 798                        }
 799                    }
 800                    PromptFormat::MinimalQwen => unreachable!(),
 801                    PromptFormat::SeedCoder1120 => unreachable!(),
 802                }
 803
 804                let push_full_snippet = |output: &mut String| {
 805                    if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
 806                        for (i, line) in snippet.text.lines().enumerate() {
 807                            writeln!(output, "{}|{}", i as u32 + range.start.0 + 1, line)?;
 808                        }
 809                    } else {
 810                        output.push_str(&snippet.text);
 811                    }
 812                    anyhow::Ok(())
 813                };
 814
 815                if is_excerpt_file {
 816                    if self.request.prompt_format == PromptFormat::OnlySnippets {
 817                        if range.start >= self.request.excerpt_line_range.start
 818                            && range.end <= self.request.excerpt_line_range.end
 819                        {
 820                            skipped_last_snippet = true;
 821                        } else {
 822                            skipped_last_snippet = false;
 823                            output.push_str(snippet.text);
 824                        }
 825                    } else if !excerpt_file_insertions.is_empty() {
 826                        let lines = snippet.text.lines().collect::<Vec<_>>();
 827                        let push_line = |output: &mut String, line_ix: usize| {
 828                            if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
 829                                write!(output, "{}|", line_ix as u32 + range.start.0 + 1)?;
 830                            }
 831                            anyhow::Ok(writeln!(output, "{}", lines[line_ix])?)
 832                        };
 833                        let mut last_line_ix = 0;
 834                        let mut insertion_ix = 0;
 835                        while insertion_ix < excerpt_file_insertions.len() {
 836                            let (point, insertion) = &excerpt_file_insertions[insertion_ix];
 837                            let found = point.line >= range.start && point.line <= range.end;
 838                            if found {
 839                                excerpt_index = Some(section_index);
 840                                let insertion_line_ix = (point.line.0 - range.start.0) as usize;
 841                                for line_ix in last_line_ix..insertion_line_ix {
 842                                    push_line(output, line_ix)?;
 843                                }
 844                                if let Some(next_line) = lines.get(insertion_line_ix) {
 845                                    if self.request.prompt_format == PromptFormat::NumLinesUniDiff {
 846                                        write!(
 847                                            output,
 848                                            "{}|",
 849                                            insertion_line_ix as u32 + range.start.0 + 1
 850                                        )?
 851                                    }
 852                                    output.push_str(&next_line[..point.column as usize]);
 853                                    output.push_str(insertion);
 854                                    writeln!(output, "{}", &next_line[point.column as usize..])?;
 855                                } else {
 856                                    writeln!(output, "{}", insertion)?;
 857                                }
 858                                last_line_ix = insertion_line_ix + 1;
 859                                excerpt_file_insertions.remove(insertion_ix);
 860                                continue;
 861                            }
 862                            insertion_ix += 1;
 863                        }
 864                        skipped_last_snippet = false;
 865                        for line_ix in last_line_ix..lines.len() {
 866                            push_line(output, line_ix)?;
 867                        }
 868                    } else {
 869                        skipped_last_snippet = false;
 870                        push_full_snippet(output)?;
 871                    }
 872                } else {
 873                    skipped_last_snippet = false;
 874                    push_full_snippet(output)?;
 875                }
 876
 877                section_ranges.push((snippet.path.clone(), range));
 878            }
 879
 880            output.push_str("`````\n\n");
 881        }
 882
 883        Ok(SectionLabels {
 884            // TODO: Clean this up
 885            excerpt_index: match self.request.prompt_format {
 886                PromptFormat::OnlySnippets => 0,
 887                _ => excerpt_index.context("bug: no snippet found for excerpt")?,
 888            },
 889            section_ranges,
 890        })
 891    }
 892}
 893
 894fn declaration_score_density(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> f32 {
 895    declaration_score(declaration, style) / declaration_size(declaration, style) as f32
 896}
 897
 898fn declaration_score(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> f32 {
 899    match style {
 900        DeclarationStyle::Signature => declaration.signature_score,
 901        DeclarationStyle::Declaration => declaration.declaration_score,
 902    }
 903}
 904
 905fn declaration_size(declaration: &ReferencedDeclaration, style: DeclarationStyle) -> usize {
 906    match style {
 907        DeclarationStyle::Signature => declaration.signature_range.len(),
 908        DeclarationStyle::Declaration => declaration.text.len(),
 909    }
 910}
 911
 912struct PromptData {
 913    events: Vec<Arc<Event>>,
 914    cursor_point: Point,
 915    cursor_path: Arc<Path>, // TODO: make a common struct with cursor_point
 916    included_files: Vec<IncludedFile>,
 917}
 918
 919#[derive(Default)]
 920pub struct GenerationParams {
 921    pub temperature: Option<f32>,
 922    pub top_p: Option<f32>,
 923    pub stop: Option<Vec<String>>,
 924}
 925
 926trait PromptFormatter {
 927    fn render(&self, data: &PromptData) -> String;
 928
 929    fn generation_params() -> GenerationParams {
 930        return GenerationParams::default();
 931    }
 932}
 933
 934struct MinimalQwenPrompt;
 935
 936impl PromptFormatter for MinimalQwenPrompt {
 937    fn render(&self, data: &PromptData) -> String {
 938        let edit_history = self.fmt_edit_history(data);
 939        let context = self.fmt_context(data);
 940
 941        format!(
 942            "{instructions}\n\n{edit_history}\n\n{context}",
 943            instructions = MinimalQwenPrompt::INSTRUCTIONS,
 944            edit_history = edit_history,
 945            context = context
 946        )
 947    }
 948}
 949
 950impl MinimalQwenPrompt {
 951    const INSTRUCTIONS: &str = "You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.\n";
 952
 953    fn fmt_edit_history(&self, data: &PromptData) -> String {
 954        if data.events.is_empty() {
 955            "(No edit history)\n\n".to_string()
 956        } else {
 957            let mut events_str = String::new();
 958            push_events(&mut events_str, &data.events);
 959            format!(
 960                "The following are the latest edits made by the user, from earlier to later.\n\n{}",
 961                events_str
 962            )
 963        }
 964    }
 965
 966    fn fmt_context(&self, data: &PromptData) -> String {
 967        let mut context = String::new();
 968        let include_line_numbers = true;
 969
 970        for related_file in &data.included_files {
 971            writeln!(context, "<|file_sep|>{}", DiffPathFmt(&related_file.path)).unwrap();
 972
 973            if related_file.path == data.cursor_path {
 974                write!(context, "<|fim_prefix|>").unwrap();
 975                write_excerpts(
 976                    &related_file.excerpts,
 977                    &[(data.cursor_point, "<|fim_suffix|>")],
 978                    related_file.max_row,
 979                    include_line_numbers,
 980                    &mut context,
 981                );
 982                writeln!(context, "<|fim_middle|>").unwrap();
 983            } else {
 984                write_excerpts(
 985                    &related_file.excerpts,
 986                    &[],
 987                    related_file.max_row,
 988                    include_line_numbers,
 989                    &mut context,
 990                );
 991            }
 992        }
 993        context
 994    }
 995}
 996
 997struct SeedCoder1120Prompt;
 998
 999impl PromptFormatter for SeedCoder1120Prompt {
1000    fn render(&self, data: &PromptData) -> String {
1001        let edit_history = self.fmt_edit_history(data);
1002        let context = self.fmt_context(data);
1003
1004        format!(
1005            "# Edit History:\n{edit_history}\n\n{context}",
1006            edit_history = edit_history,
1007            context = context
1008        )
1009    }
1010
1011    fn generation_params() -> GenerationParams {
1012        GenerationParams {
1013            temperature: Some(0.2),
1014            top_p: Some(0.9),
1015            stop: Some(vec!["<[end_of_sentence]>".into()]),
1016        }
1017    }
1018}
1019
1020impl SeedCoder1120Prompt {
1021    fn fmt_edit_history(&self, data: &PromptData) -> String {
1022        if data.events.is_empty() {
1023            "(No edit history)\n\n".to_string()
1024        } else {
1025            let mut events_str = String::new();
1026            push_events(&mut events_str, &data.events);
1027            events_str
1028        }
1029    }
1030
1031    fn fmt_context(&self, data: &PromptData) -> String {
1032        let mut context = String::new();
1033        let include_line_numbers = true;
1034
1035        for related_file in &data.included_files {
1036            writeln!(context, "# Path: {}\n", DiffPathFmt(&related_file.path)).unwrap();
1037
1038            if related_file.path == data.cursor_path {
1039                let fim_prompt = self.fmt_fim(&related_file, data.cursor_point);
1040                context.push_str(&fim_prompt);
1041            } else {
1042                write_excerpts(
1043                    &related_file.excerpts,
1044                    &[],
1045                    related_file.max_row,
1046                    include_line_numbers,
1047                    &mut context,
1048                );
1049            }
1050        }
1051        context
1052    }
1053
1054    fn fmt_fim(&self, file: &IncludedFile, cursor_point: Point) -> String {
1055        let mut buf = String::new();
1056        const FIM_SUFFIX: &str = "<[fim-suffix]>";
1057        const FIM_PREFIX: &str = "<[fim-prefix]>";
1058        const FIM_MIDDLE: &str = "<[fim-middle]>";
1059        write!(buf, "{}", FIM_PREFIX).unwrap();
1060        write_excerpts(
1061            &file.excerpts,
1062            &[(cursor_point, FIM_SUFFIX)],
1063            file.max_row,
1064            true,
1065            &mut buf,
1066        );
1067
1068        // Swap prefix and suffix parts
1069        let index = buf.find(FIM_SUFFIX).unwrap();
1070        let prefix = &buf[..index];
1071        let suffix = &buf[index..];
1072
1073        format!("{}{}{}", suffix, prefix, FIM_MIDDLE)
1074    }
1075}