1use anyhow::Result;
2use cloud_llm_client::predict_edits_v3::{
3 self, DiffPathFmt, Event, Excerpt, Line, Point, PromptFormat, RelatedFile,
4};
5use indoc::indoc;
6use std::cmp;
7use std::fmt::Write;
8use std::path::Path;
9use std::sync::Arc;
10
11pub const DEFAULT_MAX_PROMPT_BYTES: usize = 10 * 1024;
12
13pub const CURSOR_MARKER: &str = "<|user_cursor|>";
14/// NOTE: Differs from zed version of constant - includes a newline
15pub const EDITABLE_REGION_START_MARKER_WITH_NEWLINE: &str = "<|editable_region_start|>\n";
16/// NOTE: Differs from zed version of constant - includes a newline
17pub const EDITABLE_REGION_END_MARKER_WITH_NEWLINE: &str = "<|editable_region_end|>\n";
18
19const STUDENT_MODEL_INSTRUCTIONS: &str = indoc! {r#"
20 You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.
21
22 ## Edit History
23
24 "#};
25
26const MINIMAL_PROMPT_REMINDER: &str = indoc! {"
27 ---
28
29 Please analyze the edit history and the files, then provide the unified diff for your predicted edits.
30 Do not include the cursor marker in your output.
31 If you're editing multiple files, be sure to reflect filename in the hunk's header.
32 "};
33
34const XML_TAGS_INSTRUCTIONS: &str = indoc! {r#"
35 # Instructions
36
37 You are an edit prediction agent in a code editor.
38
39 Analyze the history of edits made by the user in order to infer what they are currently trying to accomplish.
40 Then complete the remainder of the current change if it is incomplete, or predict the next edit the user intends to make.
41 Always continue along the user's current trajectory, rather than changing course.
42
43 ## Output Format
44
45 You should briefly explain your understanding of the user's overall goal in one sentence, then explain what the next change
46 along the users current trajectory will be in another, and finally specify the next edit using the following XML-like format:
47
48 <edits path="my-project/src/myapp/cli.py">
49 <old_text>
50 OLD TEXT 1 HERE
51 </old_text>
52 <new_text>
53 NEW TEXT 1 HERE
54 </new_text>
55
56 <old_text>
57 OLD TEXT 1 HERE
58 </old_text>
59 <new_text>
60 NEW TEXT 1 HERE
61 </new_text>
62 </edits>
63
64 - Specify the file to edit using the `path` attribute.
65 - Use `<old_text>` and `<new_text>` tags to replace content
66 - `<old_text>` must exactly match existing file content, including indentation
67 - `<old_text>` cannot be empty
68 - Do not escape quotes, newlines, or other characters within tags
69 - Always close all tags properly
70 - Don't include the <|user_cursor|> marker in your output.
71
72 ## Edit History
73
74"#};
75
76const OLD_TEXT_NEW_TEXT_REMINDER: &str = indoc! {r#"
77 ---
78
79 Remember that the edits in the edit history have already been applied.
80"#};
81
82pub fn build_prompt(request: &predict_edits_v3::PredictEditsRequest) -> Result<String> {
83 let prompt_data = PromptData {
84 events: request.events.clone(),
85 cursor_point: request.cursor_point,
86 cursor_path: request.excerpt_path.clone(),
87 included_files: request.related_files.clone(),
88 };
89 match request.prompt_format {
90 PromptFormat::MinimalQwen => {
91 return Ok(MinimalQwenPrompt.render(&prompt_data));
92 }
93 PromptFormat::SeedCoder1120 => {
94 return Ok(SeedCoder1120Prompt.render(&prompt_data));
95 }
96 _ => (),
97 };
98
99 let insertions = match request.prompt_format {
100 PromptFormat::Minimal | PromptFormat::OldTextNewText => {
101 vec![(request.cursor_point, CURSOR_MARKER)]
102 }
103 PromptFormat::OnlySnippets => vec![],
104 PromptFormat::MinimalQwen => unreachable!(),
105 PromptFormat::SeedCoder1120 => unreachable!(),
106 };
107
108 let mut prompt = match request.prompt_format {
109 PromptFormat::OldTextNewText => XML_TAGS_INSTRUCTIONS.to_string(),
110 PromptFormat::OnlySnippets => String::new(),
111 PromptFormat::Minimal => STUDENT_MODEL_INSTRUCTIONS.to_string(),
112 PromptFormat::MinimalQwen => unreachable!(),
113 PromptFormat::SeedCoder1120 => unreachable!(),
114 };
115
116 if request.events.is_empty() {
117 prompt.push_str("(No edit history)\n\n");
118 } else {
119 let edit_preamble = if request.prompt_format == PromptFormat::Minimal {
120 "The following are the latest edits made by the user, from earlier to later.\n\n"
121 } else {
122 "Here are the latest edits made by the user, from earlier to later.\n\n"
123 };
124 prompt.push_str(edit_preamble);
125 push_events(&mut prompt, &request.events);
126 }
127
128 let excerpts_preamble = match request.prompt_format {
129 PromptFormat::Minimal => indoc! {"
130 ## Part of the file under the cursor
131
132 (The cursor marker <|user_cursor|> indicates the current user cursor position.
133 The file is in current state, edits from edit history has been applied.
134 We only show part of the file around the cursor.
135 You can only edit exactly this part of the file.
136 We prepend line numbers (e.g., `123|<actual line>`); they are not part of the file.)
137 "},
138 PromptFormat::OldTextNewText => indoc! {"
139 ## Code Excerpts
140
141 Here is some excerpts of code that you should take into account to predict the next edit.
142
143 The cursor position is marked by `<|user_cursor|>` as it stands after the last edit in the history.
144
145 In addition other excerpts are included to better understand what the edit will be, including the declaration
146 or references of symbols around the cursor, or other similar code snippets that may need to be updated
147 following patterns that appear in the edit history.
148
149 Consider each of them carefully in relation to the edit history, and that the user may not have navigated
150 to the next place they want to edit yet.
151
152 Lines starting with `…` indicate omitted line ranges. These may appear inside multi-line code constructs.
153 "},
154 PromptFormat::OnlySnippets | PromptFormat::MinimalQwen | PromptFormat::SeedCoder1120 => {
155 indoc! {"
156 ## Code Excerpts
157
158 The cursor marker <|user_cursor|> indicates the current user cursor position.
159 The file is in current state, edits from edit history have been applied.
160 "}
161 }
162 };
163
164 prompt.push_str(excerpts_preamble);
165 prompt.push('\n');
166
167 let include_line_numbers = matches!(request.prompt_format, PromptFormat::Minimal);
168 for related_file in &request.related_files {
169 if request.prompt_format == PromptFormat::Minimal {
170 write_codeblock_with_filename(
171 &related_file.path,
172 &related_file.excerpts,
173 if related_file.path == request.excerpt_path {
174 &insertions
175 } else {
176 &[]
177 },
178 related_file.max_row,
179 include_line_numbers,
180 &mut prompt,
181 );
182 } else {
183 write_codeblock(
184 &related_file.path,
185 &related_file.excerpts,
186 if related_file.path == request.excerpt_path {
187 &insertions
188 } else {
189 &[]
190 },
191 related_file.max_row,
192 include_line_numbers,
193 &mut prompt,
194 );
195 }
196 }
197
198 match request.prompt_format {
199 PromptFormat::OldTextNewText => {
200 prompt.push_str(OLD_TEXT_NEW_TEXT_REMINDER);
201 }
202 PromptFormat::Minimal => {
203 prompt.push_str(MINIMAL_PROMPT_REMINDER);
204 }
205 _ => {}
206 }
207
208 Ok(prompt)
209}
210
211pub fn generation_params(prompt_format: PromptFormat) -> GenerationParams {
212 match prompt_format {
213 PromptFormat::SeedCoder1120 => SeedCoder1120Prompt::generation_params(),
214 _ => GenerationParams::default(),
215 }
216}
217
218pub fn write_codeblock<'a>(
219 path: &Path,
220 excerpts: impl IntoIterator<Item = &'a Excerpt>,
221 sorted_insertions: &[(Point, &str)],
222 file_line_count: Line,
223 include_line_numbers: bool,
224 output: &'a mut String,
225) {
226 writeln!(output, "`````{}", DiffPathFmt(path)).unwrap();
227
228 write_excerpts(
229 excerpts,
230 sorted_insertions,
231 file_line_count,
232 include_line_numbers,
233 output,
234 );
235 write!(output, "`````\n\n").unwrap();
236}
237
238fn write_codeblock_with_filename<'a>(
239 path: &Path,
240 excerpts: impl IntoIterator<Item = &'a Excerpt>,
241 sorted_insertions: &[(Point, &str)],
242 file_line_count: Line,
243 include_line_numbers: bool,
244 output: &'a mut String,
245) {
246 writeln!(output, "`````filename={}", DiffPathFmt(path)).unwrap();
247
248 write_excerpts(
249 excerpts,
250 sorted_insertions,
251 file_line_count,
252 include_line_numbers,
253 output,
254 );
255 write!(output, "`````\n\n").unwrap();
256}
257
258pub fn write_excerpts<'a>(
259 excerpts: impl IntoIterator<Item = &'a Excerpt>,
260 sorted_insertions: &[(Point, &str)],
261 file_line_count: Line,
262 include_line_numbers: bool,
263 output: &mut String,
264) {
265 let mut current_row = Line(0);
266 let mut sorted_insertions = sorted_insertions.iter().peekable();
267
268 for excerpt in excerpts {
269 if excerpt.start_line > current_row {
270 writeln!(output, "…").unwrap();
271 }
272 if excerpt.text.is_empty() {
273 return;
274 }
275
276 current_row = excerpt.start_line;
277
278 for mut line in excerpt.text.lines() {
279 if include_line_numbers {
280 write!(output, "{}|", current_row.0 + 1).unwrap();
281 }
282
283 while let Some((insertion_location, insertion_marker)) = sorted_insertions.peek() {
284 match current_row.cmp(&insertion_location.line) {
285 cmp::Ordering::Equal => {
286 let (prefix, suffix) = line.split_at(insertion_location.column as usize);
287 output.push_str(prefix);
288 output.push_str(insertion_marker);
289 line = suffix;
290 sorted_insertions.next();
291 }
292 cmp::Ordering::Less => break,
293 cmp::Ordering::Greater => {
294 sorted_insertions.next();
295 break;
296 }
297 }
298 }
299 output.push_str(line);
300 output.push('\n');
301 current_row.0 += 1;
302 }
303 }
304
305 if current_row < file_line_count {
306 writeln!(output, "…").unwrap();
307 }
308}
309
310pub fn push_events(output: &mut String, events: &[Arc<predict_edits_v3::Event>]) {
311 if events.is_empty() {
312 return;
313 };
314
315 writeln!(output, "`````diff").unwrap();
316 for event in events {
317 writeln!(output, "{}", event).unwrap();
318 }
319 writeln!(output, "`````\n").unwrap();
320}
321
322struct PromptData {
323 events: Vec<Arc<Event>>,
324 cursor_point: Point,
325 cursor_path: Arc<Path>, // TODO: make a common struct with cursor_point
326 included_files: Vec<RelatedFile>,
327}
328
329#[derive(Default)]
330pub struct GenerationParams {
331 pub temperature: Option<f32>,
332 pub top_p: Option<f32>,
333 pub stop: Option<Vec<String>>,
334}
335
336trait PromptFormatter {
337 fn render(&self, data: &PromptData) -> String;
338
339 fn generation_params() -> GenerationParams {
340 return GenerationParams::default();
341 }
342}
343
344struct MinimalQwenPrompt;
345
346impl PromptFormatter for MinimalQwenPrompt {
347 fn render(&self, data: &PromptData) -> String {
348 let edit_history = self.fmt_edit_history(data);
349 let context = self.fmt_context(data);
350
351 format!(
352 "{instructions}\n\n{edit_history}\n\n{context}",
353 instructions = MinimalQwenPrompt::INSTRUCTIONS,
354 edit_history = edit_history,
355 context = context
356 )
357 }
358}
359
360impl MinimalQwenPrompt {
361 const INSTRUCTIONS: &str = "You are a code completion assistant that analyzes edit history to identify and systematically complete incomplete refactorings or patterns across the entire codebase.\n";
362
363 fn fmt_edit_history(&self, data: &PromptData) -> String {
364 if data.events.is_empty() {
365 "(No edit history)\n\n".to_string()
366 } else {
367 let mut events_str = String::new();
368 push_events(&mut events_str, &data.events);
369 format!(
370 "The following are the latest edits made by the user, from earlier to later.\n\n{}",
371 events_str
372 )
373 }
374 }
375
376 fn fmt_context(&self, data: &PromptData) -> String {
377 let mut context = String::new();
378 let include_line_numbers = true;
379
380 for related_file in &data.included_files {
381 writeln!(context, "<|file_sep|>{}", DiffPathFmt(&related_file.path)).unwrap();
382
383 if related_file.path == data.cursor_path {
384 write!(context, "<|fim_prefix|>").unwrap();
385 write_excerpts(
386 &related_file.excerpts,
387 &[(data.cursor_point, "<|fim_suffix|>")],
388 related_file.max_row,
389 include_line_numbers,
390 &mut context,
391 );
392 writeln!(context, "<|fim_middle|>").unwrap();
393 } else {
394 write_excerpts(
395 &related_file.excerpts,
396 &[],
397 related_file.max_row,
398 include_line_numbers,
399 &mut context,
400 );
401 }
402 }
403 context
404 }
405}
406
407struct SeedCoder1120Prompt;
408
409impl PromptFormatter for SeedCoder1120Prompt {
410 fn render(&self, data: &PromptData) -> String {
411 let edit_history = self.fmt_edit_history(data);
412 let context = self.fmt_context(data);
413
414 format!(
415 "# Edit History:\n{edit_history}\n\n{context}",
416 edit_history = edit_history,
417 context = context
418 )
419 }
420
421 fn generation_params() -> GenerationParams {
422 GenerationParams {
423 temperature: Some(0.2),
424 top_p: Some(0.9),
425 stop: Some(vec!["<[end_of_sentence]>".into()]),
426 }
427 }
428}
429
430impl SeedCoder1120Prompt {
431 fn fmt_edit_history(&self, data: &PromptData) -> String {
432 if data.events.is_empty() {
433 "(No edit history)\n\n".to_string()
434 } else {
435 let mut events_str = String::new();
436 push_events(&mut events_str, &data.events);
437 events_str
438 }
439 }
440
441 fn fmt_context(&self, data: &PromptData) -> String {
442 let mut context = String::new();
443 let include_line_numbers = true;
444
445 for related_file in &data.included_files {
446 writeln!(context, "# Path: {}\n", DiffPathFmt(&related_file.path)).unwrap();
447
448 if related_file.path == data.cursor_path {
449 let fim_prompt = self.fmt_fim(&related_file, data.cursor_point);
450 context.push_str(&fim_prompt);
451 } else {
452 write_excerpts(
453 &related_file.excerpts,
454 &[],
455 related_file.max_row,
456 include_line_numbers,
457 &mut context,
458 );
459 }
460 }
461 context
462 }
463
464 fn fmt_fim(&self, file: &RelatedFile, cursor_point: Point) -> String {
465 let mut buf = String::new();
466 const FIM_SUFFIX: &str = "<[fim-suffix]>";
467 const FIM_PREFIX: &str = "<[fim-prefix]>";
468 const FIM_MIDDLE: &str = "<[fim-middle]>";
469 write!(buf, "{}", FIM_PREFIX).unwrap();
470 write_excerpts(
471 &file.excerpts,
472 &[(cursor_point, FIM_SUFFIX)],
473 file.max_row,
474 true,
475 &mut buf,
476 );
477
478 // Swap prefix and suffix parts
479 let index = buf.find(FIM_SUFFIX).unwrap();
480 let prefix = &buf[..index];
481 let suffix = &buf[index..];
482
483 format!("{}{}{}", suffix, prefix, FIM_MIDDLE)
484 }
485}