cloud_zeta2_prompt.rs

  1use anyhow::Result;
  2use cloud_llm_client::predict_edits_v3::{
  3    self, DiffPathFmt, Event, Excerpt, Line, Point, PromptFormat, RelatedFile,
  4};
  5use indoc::indoc;
  6use std::cmp;
  7use std::fmt::Write;
  8use std::path::Path;
  9use std::sync::Arc;
 10
 11pub const DEFAULT_MAX_PROMPT_BYTES: usize = 10 * 1024;
 12
 13pub const CURSOR_MARKER: &str = "<|user_cursor|>";
 14/// NOTE: Differs from zed version of constant - includes a newline
 15pub const EDITABLE_REGION_START_MARKER_WITH_NEWLINE: &str = "<|editable_region_start|>\n";
 16/// NOTE: Differs from zed version of constant - includes a newline
 17pub const EDITABLE_REGION_END_MARKER_WITH_NEWLINE: &str = "<|editable_region_end|>\n";
 18
 19const STUDENT_MODEL_INSTRUCTIONS: &str = indoc! {r#"
 20    You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.
 21
 22    ## Edit History
 23
 24    "#};
 25
 26const MINIMAL_PROMPT_REMINDER: &str = indoc! {"
 27    ---
 28
 29    Please analyze the edit history and the files, then provide the unified diff for your predicted edits.
 30    Do not include the cursor marker in your output.
 31    If you're editing multiple files, be sure to reflect filename in the hunk's header.
 32    "};
 33
 34const XML_TAGS_INSTRUCTIONS: &str = indoc! {r#"
 35    # Instructions
 36
 37    You are an edit prediction agent in a code editor.
 38
 39    Analyze the history of edits made by the user in order to infer what they are currently trying to accomplish.
 40    Then complete the remainder of the current change if it is incomplete, or predict the next edit the user intends to make.
 41    Always continue along the user's current trajectory, rather than changing course.
 42
 43    ## Output Format
 44
 45    You should briefly explain your understanding of the user's overall goal in one sentence, then explain what the next change
 46    along the users current trajectory will be in another, and finally specify the next edit using the following XML-like format:
 47
 48    <edits path="my-project/src/myapp/cli.py">
 49    <old_text>
 50    OLD TEXT 1 HERE
 51    </old_text>
 52    <new_text>
 53    NEW TEXT 1 HERE
 54    </new_text>
 55
 56    <old_text>
 57    OLD TEXT 1 HERE
 58    </old_text>
 59    <new_text>
 60    NEW TEXT 1 HERE
 61    </new_text>
 62    </edits>
 63
 64    - Specify the file to edit using the `path` attribute.
 65    - Use `<old_text>` and `<new_text>` tags to replace content
 66    - `<old_text>` must exactly match existing file content, including indentation
 67    - `<old_text>` cannot be empty
 68    - Do not escape quotes, newlines, or other characters within tags
 69    - Always close all tags properly
 70    - Don't include the <|user_cursor|> marker in your output.
 71
 72    ## Edit History
 73
 74"#};
 75
 76const OLD_TEXT_NEW_TEXT_REMINDER: &str = indoc! {r#"
 77    ---
 78
 79    Remember that the edits in the edit history have already been applied.
 80"#};
 81
 82pub fn build_prompt(request: &predict_edits_v3::PredictEditsRequest) -> Result<String> {
 83    let prompt_data = PromptData {
 84        events: request.events.clone(),
 85        cursor_point: request.cursor_point,
 86        cursor_path: request.excerpt_path.clone(),
 87        included_files: request.related_files.clone(),
 88    };
 89    match request.prompt_format {
 90        PromptFormat::MinimalQwen => {
 91            return Ok(MinimalQwenPrompt.render(&prompt_data));
 92        }
 93        PromptFormat::SeedCoder1120 => {
 94            return Ok(SeedCoder1120Prompt.render(&prompt_data));
 95        }
 96        _ => (),
 97    };
 98
 99    let insertions = match request.prompt_format {
100        PromptFormat::Minimal | PromptFormat::OldTextNewText => {
101            vec![(request.cursor_point, CURSOR_MARKER)]
102        }
103        PromptFormat::OnlySnippets => vec![],
104        PromptFormat::MinimalQwen => unreachable!(),
105        PromptFormat::SeedCoder1120 => unreachable!(),
106    };
107
108    let mut prompt = match request.prompt_format {
109        PromptFormat::OldTextNewText => XML_TAGS_INSTRUCTIONS.to_string(),
110        PromptFormat::OnlySnippets => String::new(),
111        PromptFormat::Minimal => STUDENT_MODEL_INSTRUCTIONS.to_string(),
112        PromptFormat::MinimalQwen => unreachable!(),
113        PromptFormat::SeedCoder1120 => unreachable!(),
114    };
115
116    if request.events.is_empty() {
117        prompt.push_str("(No edit history)\n\n");
118    } else {
119        let edit_preamble = if request.prompt_format == PromptFormat::Minimal {
120            "The following are the latest edits made by the user, from earlier to later.\n\n"
121        } else {
122            "Here are the latest edits made by the user, from earlier to later.\n\n"
123        };
124        prompt.push_str(edit_preamble);
125        push_events(&mut prompt, &request.events);
126    }
127
128    let excerpts_preamble = match request.prompt_format {
129        PromptFormat::Minimal => indoc! {"
130             ## Part of the file under the cursor
131
132             (The cursor marker <|user_cursor|> indicates the current user cursor position.
133             The file is in current state, edits from edit history has been applied.
134             We only show part of the file around the cursor.
135             You can only edit exactly this part of the file.
136             We prepend line numbers (e.g., `123|<actual line>`); they are not part of the file.)
137             "},
138        PromptFormat::OldTextNewText => indoc! {"
139            ## Code Excerpts
140
141            Here is some excerpts of code that you should take into account to predict the next edit.
142
143            The cursor position is marked by `<|user_cursor|>` as it stands after the last edit in the history.
144
145            In addition other excerpts are included to better understand what the edit will be, including the declaration
146            or references of symbols around the cursor, or other similar code snippets that may need to be updated
147            following patterns that appear in the edit history.
148
149            Consider each of them carefully in relation to the edit history, and that the user may not have navigated
150            to the next place they want to edit yet.
151
152            Lines starting with `…` indicate omitted line ranges. These may appear inside multi-line code constructs.
153        "},
154        PromptFormat::OnlySnippets | PromptFormat::MinimalQwen | PromptFormat::SeedCoder1120 => {
155            indoc! {"
156            ## Code Excerpts
157
158            The cursor marker <|user_cursor|> indicates the current user cursor position.
159            The file is in current state, edits from edit history have been applied.
160        "}
161        }
162    };
163
164    prompt.push_str(excerpts_preamble);
165    prompt.push('\n');
166
167    let include_line_numbers = matches!(request.prompt_format, PromptFormat::Minimal);
168    for related_file in &request.related_files {
169        if request.prompt_format == PromptFormat::Minimal {
170            write_codeblock_with_filename(
171                &related_file.path,
172                &related_file.excerpts,
173                if related_file.path == request.excerpt_path {
174                    &insertions
175                } else {
176                    &[]
177                },
178                related_file.max_row,
179                include_line_numbers,
180                &mut prompt,
181            );
182        } else {
183            write_codeblock(
184                &related_file.path,
185                &related_file.excerpts,
186                if related_file.path == request.excerpt_path {
187                    &insertions
188                } else {
189                    &[]
190                },
191                related_file.max_row,
192                include_line_numbers,
193                &mut prompt,
194            );
195        }
196    }
197
198    match request.prompt_format {
199        PromptFormat::OldTextNewText => {
200            prompt.push_str(OLD_TEXT_NEW_TEXT_REMINDER);
201        }
202        PromptFormat::Minimal => {
203            prompt.push_str(MINIMAL_PROMPT_REMINDER);
204        }
205        _ => {}
206    }
207
208    Ok(prompt)
209}
210
211pub fn generation_params(prompt_format: PromptFormat) -> GenerationParams {
212    match prompt_format {
213        PromptFormat::SeedCoder1120 => SeedCoder1120Prompt::generation_params(),
214        _ => GenerationParams::default(),
215    }
216}
217
218pub fn write_codeblock<'a>(
219    path: &Path,
220    excerpts: impl IntoIterator<Item = &'a Excerpt>,
221    sorted_insertions: &[(Point, &str)],
222    file_line_count: Line,
223    include_line_numbers: bool,
224    output: &'a mut String,
225) {
226    writeln!(output, "`````{}", DiffPathFmt(path)).unwrap();
227
228    write_excerpts(
229        excerpts,
230        sorted_insertions,
231        file_line_count,
232        include_line_numbers,
233        output,
234    );
235    write!(output, "`````\n\n").unwrap();
236}
237
238fn write_codeblock_with_filename<'a>(
239    path: &Path,
240    excerpts: impl IntoIterator<Item = &'a Excerpt>,
241    sorted_insertions: &[(Point, &str)],
242    file_line_count: Line,
243    include_line_numbers: bool,
244    output: &'a mut String,
245) {
246    writeln!(output, "`````filename={}", DiffPathFmt(path)).unwrap();
247
248    write_excerpts(
249        excerpts,
250        sorted_insertions,
251        file_line_count,
252        include_line_numbers,
253        output,
254    );
255    write!(output, "`````\n\n").unwrap();
256}
257
258pub fn write_excerpts<'a>(
259    excerpts: impl IntoIterator<Item = &'a Excerpt>,
260    sorted_insertions: &[(Point, &str)],
261    file_line_count: Line,
262    include_line_numbers: bool,
263    output: &mut String,
264) {
265    let mut current_row = Line(0);
266    let mut sorted_insertions = sorted_insertions.iter().peekable();
267
268    for excerpt in excerpts {
269        if excerpt.start_line > current_row {
270            writeln!(output, "").unwrap();
271        }
272        if excerpt.text.is_empty() {
273            return;
274        }
275
276        current_row = excerpt.start_line;
277
278        for mut line in excerpt.text.lines() {
279            if include_line_numbers {
280                write!(output, "{}|", current_row.0 + 1).unwrap();
281            }
282
283            while let Some((insertion_location, insertion_marker)) = sorted_insertions.peek() {
284                match current_row.cmp(&insertion_location.line) {
285                    cmp::Ordering::Equal => {
286                        let (prefix, suffix) = line.split_at(insertion_location.column as usize);
287                        output.push_str(prefix);
288                        output.push_str(insertion_marker);
289                        line = suffix;
290                        sorted_insertions.next();
291                    }
292                    cmp::Ordering::Less => break,
293                    cmp::Ordering::Greater => {
294                        sorted_insertions.next();
295                        break;
296                    }
297                }
298            }
299            output.push_str(line);
300            output.push('\n');
301            current_row.0 += 1;
302        }
303    }
304
305    if current_row < file_line_count {
306        writeln!(output, "").unwrap();
307    }
308}
309
310pub fn push_events(output: &mut String, events: &[Arc<predict_edits_v3::Event>]) {
311    if events.is_empty() {
312        return;
313    };
314
315    writeln!(output, "`````diff").unwrap();
316    for event in events {
317        writeln!(output, "{}", event).unwrap();
318    }
319    writeln!(output, "`````\n").unwrap();
320}
321
322struct PromptData {
323    events: Vec<Arc<Event>>,
324    cursor_point: Point,
325    cursor_path: Arc<Path>, // TODO: make a common struct with cursor_point
326    included_files: Vec<RelatedFile>,
327}
328
329#[derive(Default)]
330pub struct GenerationParams {
331    pub temperature: Option<f32>,
332    pub top_p: Option<f32>,
333    pub stop: Option<Vec<String>>,
334}
335
336trait PromptFormatter {
337    fn render(&self, data: &PromptData) -> String;
338
339    fn generation_params() -> GenerationParams {
340        return GenerationParams::default();
341    }
342}
343
344struct MinimalQwenPrompt;
345
346impl PromptFormatter for MinimalQwenPrompt {
347    fn render(&self, data: &PromptData) -> String {
348        let edit_history = self.fmt_edit_history(data);
349        let context = self.fmt_context(data);
350
351        format!(
352            "{instructions}\n\n{edit_history}\n\n{context}",
353            instructions = MinimalQwenPrompt::INSTRUCTIONS,
354            edit_history = edit_history,
355            context = context
356        )
357    }
358}
359
360impl MinimalQwenPrompt {
361    const INSTRUCTIONS: &str = "You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.\n";
362
363    fn fmt_edit_history(&self, data: &PromptData) -> String {
364        if data.events.is_empty() {
365            "(No edit history)\n\n".to_string()
366        } else {
367            let mut events_str = String::new();
368            push_events(&mut events_str, &data.events);
369            format!(
370                "The following are the latest edits made by the user, from earlier to later.\n\n{}",
371                events_str
372            )
373        }
374    }
375
376    fn fmt_context(&self, data: &PromptData) -> String {
377        let mut context = String::new();
378        let include_line_numbers = true;
379
380        for related_file in &data.included_files {
381            writeln!(context, "<|file_sep|>{}", DiffPathFmt(&related_file.path)).unwrap();
382
383            if related_file.path == data.cursor_path {
384                write!(context, "<|fim_prefix|>").unwrap();
385                write_excerpts(
386                    &related_file.excerpts,
387                    &[(data.cursor_point, "<|fim_suffix|>")],
388                    related_file.max_row,
389                    include_line_numbers,
390                    &mut context,
391                );
392                writeln!(context, "<|fim_middle|>").unwrap();
393            } else {
394                write_excerpts(
395                    &related_file.excerpts,
396                    &[],
397                    related_file.max_row,
398                    include_line_numbers,
399                    &mut context,
400                );
401            }
402        }
403        context
404    }
405}
406
407struct SeedCoder1120Prompt;
408
409impl PromptFormatter for SeedCoder1120Prompt {
410    fn render(&self, data: &PromptData) -> String {
411        let edit_history = self.fmt_edit_history(data);
412        let context = self.fmt_context(data);
413
414        format!(
415            "# Edit History:\n{edit_history}\n\n{context}",
416            edit_history = edit_history,
417            context = context
418        )
419    }
420
421    fn generation_params() -> GenerationParams {
422        GenerationParams {
423            temperature: Some(0.2),
424            top_p: Some(0.9),
425            stop: Some(vec!["<[end_of_sentence]>".into()]),
426        }
427    }
428}
429
430impl SeedCoder1120Prompt {
431    fn fmt_edit_history(&self, data: &PromptData) -> String {
432        if data.events.is_empty() {
433            "(No edit history)\n\n".to_string()
434        } else {
435            let mut events_str = String::new();
436            push_events(&mut events_str, &data.events);
437            events_str
438        }
439    }
440
441    fn fmt_context(&self, data: &PromptData) -> String {
442        let mut context = String::new();
443        let include_line_numbers = true;
444
445        for related_file in &data.included_files {
446            writeln!(context, "# Path: {}\n", DiffPathFmt(&related_file.path)).unwrap();
447
448            if related_file.path == data.cursor_path {
449                let fim_prompt = self.fmt_fim(&related_file, data.cursor_point);
450                context.push_str(&fim_prompt);
451            } else {
452                write_excerpts(
453                    &related_file.excerpts,
454                    &[],
455                    related_file.max_row,
456                    include_line_numbers,
457                    &mut context,
458                );
459            }
460        }
461        context
462    }
463
464    fn fmt_fim(&self, file: &RelatedFile, cursor_point: Point) -> String {
465        let mut buf = String::new();
466        const FIM_SUFFIX: &str = "<[fim-suffix]>";
467        const FIM_PREFIX: &str = "<[fim-prefix]>";
468        const FIM_MIDDLE: &str = "<[fim-middle]>";
469        write!(buf, "{}", FIM_PREFIX).unwrap();
470        write_excerpts(
471            &file.excerpts,
472            &[(cursor_point, FIM_SUFFIX)],
473            file.max_row,
474            true,
475            &mut buf,
476        );
477
478        // Swap prefix and suffix parts
479        let index = buf.find(FIM_SUFFIX).unwrap();
480        let prefix = &buf[..index];
481        let suffix = &buf[index..];
482
483        format!("{}{}{}", suffix, prefix, FIM_MIDDLE)
484    }
485}