1use anyhow::{Context as _, Result};
2use serde::{Deserialize, Serialize};
3use std::{borrow::Cow, fmt::Write as _, mem, ops::Range, path::Path, sync::Arc};
4use telemetry_events::EditPredictionRating;
5
6pub const CURSOR_POSITION_MARKER: &str = "[CURSOR_POSITION]";
7pub const INLINE_CURSOR_MARKER: &str = "<|user_cursor|>";
8
9/// Maximum cursor file size to capture (64KB).
10/// Files larger than this will not have their content captured,
11/// falling back to git-based loading.
12pub const MAX_CURSOR_FILE_SIZE: usize = 64 * 1024;
13
14#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
15pub struct ExampleSpec {
16 #[serde(default)]
17 pub name: String,
18 pub repository_url: String,
19 pub revision: String,
20 #[serde(default, skip_serializing_if = "Vec::is_empty")]
21 pub tags: Vec<String>,
22 #[serde(default, skip_serializing_if = "Option::is_none")]
23 pub reasoning: Option<String>,
24 #[serde(default)]
25 pub uncommitted_diff: String,
26 pub cursor_path: Arc<Path>,
27 pub cursor_position: String,
28 pub edit_history: String,
29 pub expected_patches: Vec<String>,
30 #[serde(default, skip_serializing_if = "Option::is_none")]
31 pub rejected_patch: Option<String>,
32 #[serde(default, skip_serializing_if = "Option::is_none")]
33 pub captured_prompt_input: Option<CapturedPromptInput>,
34 #[serde(default, skip_serializing_if = "Option::is_none")]
35 pub telemetry: Option<TelemetrySource>,
36 #[serde(default, skip_serializing_if = "Vec::is_empty")]
37 pub human_feedback: Vec<HumanFeedback>,
38 #[serde(default, skip_serializing_if = "Option::is_none")]
39 pub rating: Option<EditPredictionRating>,
40}
41
42#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
43pub struct HumanFeedback {
44 pub message: String,
45}
46
47/// Metadata for examples sourced from production telemetry (rejected predictions).
48#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
49pub struct TelemetrySource {
50 pub request_id: String,
51 pub device_id: String,
52 pub time: String,
53 pub rejection_reason: String,
54 pub was_shown: bool,
55}
56
57/// All data needed to run format_prompt without loading the project.
58#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
59pub struct CapturedPromptInput {
60 pub cursor_file_content: String,
61 pub cursor_offset: usize,
62 pub cursor_row: u32,
63 pub cursor_column: u32,
64 #[serde(default, skip_serializing_if = "Option::is_none")]
65 pub excerpt_start_row: Option<u32>,
66 pub events: Vec<CapturedEvent>,
67 pub related_files: Vec<CapturedRelatedFile>,
68}
69
70#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
71pub struct CapturedEvent {
72 pub path: Arc<Path>,
73 pub old_path: Arc<Path>,
74 pub diff: String,
75 pub predicted: bool,
76 pub in_open_source_repo: bool,
77}
78
79impl CapturedEvent {
80 pub fn to_event(&self) -> zeta_prompt::Event {
81 zeta_prompt::Event::BufferChange {
82 path: self.path.clone(),
83 old_path: self.old_path.clone(),
84 diff: self.diff.clone(),
85 predicted: self.predicted,
86 in_open_source_repo: self.in_open_source_repo,
87 }
88 }
89}
90
91#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
92pub struct CapturedRelatedFile {
93 pub path: Arc<Path>,
94 pub max_row: u32,
95 pub excerpts: Vec<CapturedRelatedExcerpt>,
96}
97
98impl CapturedRelatedFile {
99 pub fn to_related_file(&self) -> zeta_prompt::RelatedFile {
100 zeta_prompt::RelatedFile {
101 path: self.path.clone(),
102 max_row: self.max_row,
103 excerpts: self
104 .excerpts
105 .iter()
106 .map(|e| zeta_prompt::RelatedExcerpt {
107 row_range: e.row_range.clone(),
108 text: e.text.clone().into(),
109 })
110 .collect(),
111 }
112 }
113}
114
115#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
116pub struct CapturedRelatedExcerpt {
117 pub row_range: Range<u32>,
118 pub text: String,
119}
120
121const REASONING_HEADING: &str = "Reasoning";
122const UNCOMMITTED_DIFF_HEADING: &str = "Uncommitted Diff";
123const EDIT_HISTORY_HEADING: &str = "Edit History";
124const CURSOR_POSITION_HEADING: &str = "Cursor Position";
125const EXPECTED_PATCH_HEADING: &str = "Expected Patch";
126const REJECTED_PATCH_HEADING: &str = "Rejected Patch";
127
128#[derive(Serialize, Deserialize)]
129struct FrontMatter<'a> {
130 repository_url: Cow<'a, str>,
131 revision: Cow<'a, str>,
132 #[serde(default, skip_serializing_if = "Vec::is_empty")]
133 tags: Vec<String>,
134}
135
136impl ExampleSpec {
137 /// Generate a sanitized filename for this example.
138 pub fn filename(&self) -> String {
139 self.name
140 .chars()
141 .map(|c| match c {
142 ' ' | ':' | '~' | '^' | '?' | '*' | '[' | '\\' | '@' | '{' | '/' | '<' | '>'
143 | '|' | '"' => '-',
144 c => c,
145 })
146 .collect()
147 }
148
149 /// Format this example spec as markdown.
150 pub fn to_markdown(&self) -> String {
151 use std::fmt::Write as _;
152
153 let front_matter = FrontMatter {
154 repository_url: Cow::Borrowed(&self.repository_url),
155 revision: Cow::Borrowed(&self.revision),
156 tags: self.tags.clone(),
157 };
158 let front_matter_toml =
159 toml::to_string_pretty(&front_matter).unwrap_or_else(|_| String::new());
160
161 let mut markdown = String::new();
162
163 _ = writeln!(markdown, "+++");
164 markdown.push_str(&front_matter_toml);
165 if !markdown.ends_with('\n') {
166 markdown.push('\n');
167 }
168 _ = writeln!(markdown, "+++");
169 markdown.push('\n');
170
171 _ = writeln!(markdown, "# {}", self.name);
172 markdown.push('\n');
173
174 if let Some(reasoning) = &self.reasoning {
175 _ = writeln!(markdown, "## {}", REASONING_HEADING);
176 markdown.push('\n');
177 markdown.push_str(reasoning);
178 if !markdown.ends_with('\n') {
179 markdown.push('\n');
180 }
181 markdown.push('\n');
182 }
183
184 if !self.uncommitted_diff.is_empty() {
185 _ = writeln!(markdown, "## {}", UNCOMMITTED_DIFF_HEADING);
186 _ = writeln!(markdown);
187 _ = writeln!(markdown, "```diff");
188 markdown.push_str(&self.uncommitted_diff);
189 if !markdown.ends_with('\n') {
190 markdown.push('\n');
191 }
192 _ = writeln!(markdown, "```");
193 markdown.push('\n');
194 }
195
196 _ = writeln!(markdown, "## {}", EDIT_HISTORY_HEADING);
197 _ = writeln!(markdown);
198
199 if self.edit_history.is_empty() {
200 _ = writeln!(markdown, "(No edit history)");
201 _ = writeln!(markdown);
202 } else {
203 _ = writeln!(markdown, "```diff");
204 markdown.push_str(&self.edit_history);
205 if !markdown.ends_with('\n') {
206 markdown.push('\n');
207 }
208 _ = writeln!(markdown, "```");
209 markdown.push('\n');
210 }
211
212 _ = writeln!(markdown, "## {}", CURSOR_POSITION_HEADING);
213 _ = writeln!(markdown);
214 _ = writeln!(markdown, "```{}", self.cursor_path.to_string_lossy());
215 markdown.push_str(&self.cursor_position);
216 if !markdown.ends_with('\n') {
217 markdown.push('\n');
218 }
219 _ = writeln!(markdown, "```");
220 markdown.push('\n');
221
222 _ = writeln!(markdown, "## {}", EXPECTED_PATCH_HEADING);
223 markdown.push('\n');
224 for patch in &self.expected_patches {
225 _ = writeln!(markdown, "```diff");
226 markdown.push_str(patch);
227 if !markdown.ends_with('\n') {
228 markdown.push('\n');
229 }
230 _ = writeln!(markdown, "```");
231 markdown.push('\n');
232 }
233
234 if let Some(rejected_patch) = &self.rejected_patch {
235 _ = writeln!(markdown, "## {}", REJECTED_PATCH_HEADING);
236 markdown.push('\n');
237 _ = writeln!(markdown, "```diff");
238 markdown.push_str(rejected_patch);
239 if !markdown.ends_with('\n') {
240 markdown.push('\n');
241 }
242 _ = writeln!(markdown, "```");
243 markdown.push('\n');
244 }
245
246 markdown
247 }
248
249 /// Parse an example spec from markdown.
250 pub fn from_markdown(mut input: &str) -> anyhow::Result<Self> {
251 use pulldown_cmark::{CodeBlockKind, CowStr, Event, HeadingLevel, Parser, Tag, TagEnd};
252
253 let mut spec = ExampleSpec {
254 name: String::new(),
255 repository_url: String::new(),
256 revision: String::new(),
257 tags: Vec::new(),
258 reasoning: None,
259 uncommitted_diff: String::new(),
260 cursor_path: Path::new("").into(),
261 cursor_position: String::new(),
262 edit_history: String::new(),
263 expected_patches: Vec::new(),
264 rejected_patch: None,
265 captured_prompt_input: None,
266 telemetry: None,
267 human_feedback: Vec::new(),
268 rating: None,
269 };
270
271 if let Some(rest) = input.strip_prefix("+++\n")
272 && let Some((front_matter, rest)) = rest.split_once("+++\n")
273 {
274 if let Ok(data) = toml::from_str::<FrontMatter<'_>>(front_matter) {
275 spec.repository_url = data.repository_url.into_owned();
276 spec.revision = data.revision.into_owned();
277 spec.tags = data.tags;
278 }
279 input = rest.trim_start();
280 }
281
282 let parser = Parser::new(input);
283 let mut text = String::new();
284 let mut block_info: CowStr = "".into();
285
286 #[derive(PartialEq)]
287 enum Section {
288 Start,
289 UncommittedDiff,
290 EditHistory,
291 CursorPosition,
292 ExpectedPatch,
293 RejectedPatch,
294 Other,
295 }
296
297 let mut current_section = Section::Start;
298
299 for event in parser {
300 match event {
301 Event::Text(line) => {
302 text.push_str(&line);
303 }
304 Event::End(TagEnd::Heading(HeadingLevel::H1)) => {
305 spec.name = mem::take(&mut text);
306 }
307 Event::End(TagEnd::Heading(HeadingLevel::H2)) => {
308 let title = mem::take(&mut text);
309 current_section = if title.eq_ignore_ascii_case(UNCOMMITTED_DIFF_HEADING) {
310 Section::UncommittedDiff
311 } else if title.eq_ignore_ascii_case(EDIT_HISTORY_HEADING) {
312 Section::EditHistory
313 } else if title.eq_ignore_ascii_case(CURSOR_POSITION_HEADING) {
314 Section::CursorPosition
315 } else if title.eq_ignore_ascii_case(EXPECTED_PATCH_HEADING) {
316 Section::ExpectedPatch
317 } else if title.eq_ignore_ascii_case(REJECTED_PATCH_HEADING) {
318 Section::RejectedPatch
319 } else {
320 Section::Other
321 };
322 }
323 Event::End(TagEnd::Heading(HeadingLevel::H3)) => {
324 mem::take(&mut text);
325 }
326 Event::End(TagEnd::Heading(HeadingLevel::H4)) => {
327 mem::take(&mut text);
328 }
329 Event::End(TagEnd::Heading(level)) => {
330 anyhow::bail!("Unexpected heading level: {level}");
331 }
332 Event::Start(Tag::CodeBlock(kind)) => {
333 match kind {
334 CodeBlockKind::Fenced(info) => {
335 block_info = info;
336 }
337 CodeBlockKind::Indented => {
338 anyhow::bail!("Unexpected indented codeblock");
339 }
340 };
341 }
342 Event::Start(_) => {
343 text.clear();
344 block_info = "".into();
345 }
346 Event::End(TagEnd::CodeBlock) => {
347 let block_info = block_info.trim();
348 match current_section {
349 Section::UncommittedDiff => {
350 spec.uncommitted_diff = mem::take(&mut text);
351 }
352 Section::EditHistory => {
353 spec.edit_history.push_str(&mem::take(&mut text));
354 }
355 Section::CursorPosition => {
356 spec.cursor_path = Path::new(block_info).into();
357 spec.cursor_position = mem::take(&mut text);
358 }
359 Section::ExpectedPatch => {
360 spec.expected_patches.push(mem::take(&mut text));
361 }
362 Section::RejectedPatch => {
363 spec.rejected_patch = Some(mem::take(&mut text));
364 }
365 Section::Start | Section::Other => {}
366 }
367 }
368 _ => {}
369 }
370 }
371
372 if spec.cursor_path.as_ref() == Path::new("") || spec.cursor_position.is_empty() {
373 anyhow::bail!("Missing cursor position codeblock");
374 }
375
376 Ok(spec)
377 }
378
379 /// Returns the excerpt of text around the cursor, and the offset of the cursor within that
380 /// excerpt.
381 ///
382 /// The cursor's position is marked with a special comment that appears
383 /// below the cursor line, which contains the string `[CURSOR_POSITION]`,
384 /// preceded by an arrow marking the cursor's column. The arrow can be
385 /// either:
386 /// - `^` - The cursor column is at the position of the `^` character (pointing up to the cursor)
387 /// - `<` - The cursor column is at the first non-whitespace character on that line.
388 pub fn cursor_excerpt(&self) -> Result<(String, usize)> {
389 let input = &self.cursor_position;
390
391 // Check for inline cursor marker first
392 if let Some(inline_offset) = input.find(INLINE_CURSOR_MARKER) {
393 let excerpt = input[..inline_offset].to_string()
394 + &input[inline_offset + INLINE_CURSOR_MARKER.len()..];
395 return Ok((excerpt, inline_offset));
396 }
397
398 let marker_offset = input
399 .find(CURSOR_POSITION_MARKER)
400 .context("missing [CURSOR_POSITION] marker")?;
401 let marker_line_start = input[..marker_offset]
402 .rfind('\n')
403 .map(|pos| pos + 1)
404 .unwrap_or(0);
405 let marker_line_end = input[marker_line_start..]
406 .find('\n')
407 .map(|pos| marker_line_start + pos + 1)
408 .unwrap_or(input.len());
409 let marker_line = &input[marker_line_start..marker_line_end].trim_end_matches('\n');
410
411 let cursor_column = if let Some(cursor_offset) = marker_line.find('^') {
412 cursor_offset
413 } else if let Some(less_than_pos) = marker_line.find('<') {
414 marker_line
415 .find(|c: char| !c.is_whitespace())
416 .unwrap_or(less_than_pos)
417 } else {
418 anyhow::bail!(
419 "cursor position marker line must contain '^' or '<' before [CURSOR_POSITION]"
420 );
421 };
422
423 let mut excerpt = input[..marker_line_start].to_string() + &input[marker_line_end..];
424 excerpt.truncate(excerpt.trim_end_matches('\n').len());
425
426 // The cursor is on the line above the marker line.
427 let cursor_line_end = marker_line_start.saturating_sub(1);
428 let cursor_line_start = excerpt[..cursor_line_end]
429 .rfind('\n')
430 .map(|pos| pos + 1)
431 .unwrap_or(0);
432 let cursor_offset = cursor_line_start + cursor_column;
433
434 Ok((excerpt, cursor_offset))
435 }
436
437 /// Sets the cursor position excerpt from a plain excerpt and cursor byte offset.
438 ///
439 /// The `line_comment_prefix` is used to format the marker line as a comment.
440 /// If the cursor column is less than the comment prefix length, the `<` format is used.
441 /// Otherwise, the `^` format is used.
442 pub fn set_cursor_excerpt(
443 &mut self,
444 excerpt: &str,
445 cursor_offset: usize,
446 line_comment_prefix: &str,
447 ) {
448 // Find which line the cursor is on and its column
449 let cursor_line_start = excerpt[..cursor_offset]
450 .rfind('\n')
451 .map(|pos| pos + 1)
452 .unwrap_or(0);
453 let cursor_line_end = excerpt[cursor_line_start..]
454 .find('\n')
455 .map(|pos| cursor_line_start + pos + 1)
456 .unwrap_or(excerpt.len());
457 let cursor_line = &excerpt[cursor_line_start..cursor_line_end];
458 let cursor_line_indent = &cursor_line[..cursor_line.len() - cursor_line.trim_start().len()];
459 let cursor_column = cursor_offset - cursor_line_start;
460
461 // Build the marker line
462 let mut marker_line = String::new();
463 if cursor_column < line_comment_prefix.len() {
464 for _ in 0..cursor_column {
465 marker_line.push(' ');
466 }
467 marker_line.push_str(line_comment_prefix);
468 write!(marker_line, " <{}", CURSOR_POSITION_MARKER).unwrap();
469 } else {
470 if cursor_column >= cursor_line_indent.len() + line_comment_prefix.len() {
471 marker_line.push_str(cursor_line_indent);
472 }
473 marker_line.push_str(line_comment_prefix);
474 while marker_line.len() < cursor_column {
475 marker_line.push(' ');
476 }
477 write!(marker_line, "^{}", CURSOR_POSITION_MARKER).unwrap();
478 }
479
480 // Build the final cursor_position string
481 let mut result = String::with_capacity(excerpt.len() + marker_line.len() + 2);
482 result.push_str(&excerpt[..cursor_line_end]);
483 if !result.ends_with('\n') {
484 result.push('\n');
485 }
486 result.push_str(&marker_line);
487 if cursor_line_end < excerpt.len() {
488 result.push('\n');
489 result.push_str(&excerpt[cursor_line_end..]);
490 }
491
492 self.cursor_position = result;
493 }
494}
495
496#[cfg(test)]
497mod tests {
498 use super::*;
499 use indoc::indoc;
500
501 #[test]
502 fn test_cursor_excerpt_with_caret() {
503 let mut spec = ExampleSpec {
504 name: String::new(),
505 repository_url: String::new(),
506 revision: String::new(),
507 tags: Vec::new(),
508 reasoning: None,
509 uncommitted_diff: String::new(),
510 cursor_path: Path::new("test.rs").into(),
511 cursor_position: String::new(),
512 edit_history: String::new(),
513 expected_patches: Vec::new(),
514 rejected_patch: None,
515 captured_prompt_input: None,
516 telemetry: None,
517 human_feedback: Vec::new(),
518 rating: None,
519 };
520
521 // Cursor before `42`
522 let excerpt = indoc! {"
523 fn main() {
524 let x = 42;
525 println!(\"{}\", x);
526 }"
527 };
528 let offset = excerpt.find("42").unwrap();
529 let position_string = indoc! {"
530 fn main() {
531 let x = 42;
532 // ^[CURSOR_POSITION]
533 println!(\"{}\", x);
534 }"
535 }
536 .to_string();
537
538 spec.set_cursor_excerpt(excerpt, offset, "//");
539 assert_eq!(spec.cursor_position, position_string);
540 assert_eq!(
541 spec.cursor_excerpt().unwrap(),
542 (excerpt.to_string(), offset)
543 );
544
545 // Cursor after `l` in `let`
546 let offset = excerpt.find("et x").unwrap();
547 let position_string = indoc! {"
548 fn main() {
549 let x = 42;
550 // ^[CURSOR_POSITION]
551 println!(\"{}\", x);
552 }"
553 }
554 .to_string();
555
556 spec.set_cursor_excerpt(excerpt, offset, "//");
557 assert_eq!(spec.cursor_position, position_string);
558 assert_eq!(
559 spec.cursor_excerpt().unwrap(),
560 (excerpt.to_string(), offset)
561 );
562
563 // Cursor before `let`
564 let offset = excerpt.find("let").unwrap();
565 let position_string = indoc! {"
566 fn main() {
567 let x = 42;
568 // ^[CURSOR_POSITION]
569 println!(\"{}\", x);
570 }"
571 }
572 .to_string();
573
574 spec.set_cursor_excerpt(excerpt, offset, "//");
575 assert_eq!(spec.cursor_position, position_string);
576 assert_eq!(
577 spec.cursor_excerpt().unwrap(),
578 (excerpt.to_string(), offset)
579 );
580
581 // Cursor at beginning of the line with `let`
582 let offset = excerpt.find(" let").unwrap();
583 let position_string = indoc! {"
584 fn main() {
585 let x = 42;
586 // <[CURSOR_POSITION]
587 println!(\"{}\", x);
588 }"
589 }
590 .to_string();
591
592 spec.set_cursor_excerpt(excerpt, offset, "//");
593 assert_eq!(spec.cursor_position, position_string);
594 assert_eq!(
595 spec.cursor_excerpt().unwrap(),
596 (excerpt.to_string(), offset)
597 );
598
599 // Cursor at end of line, after the semicolon
600 let offset = excerpt.find(';').unwrap() + 1;
601 let position_string = indoc! {"
602 fn main() {
603 let x = 42;
604 // ^[CURSOR_POSITION]
605 println!(\"{}\", x);
606 }"
607 }
608 .to_string();
609
610 spec.set_cursor_excerpt(excerpt, offset, "//");
611 assert_eq!(spec.cursor_position, position_string);
612 assert_eq!(
613 spec.cursor_excerpt().unwrap(),
614 (excerpt.to_string(), offset)
615 );
616
617 // Caret at end of file (no trailing newline)
618 let excerpt = indoc! {"
619 fn main() {
620 let x = 42;"
621 };
622 let offset = excerpt.find(';').unwrap() + 1;
623 let position_string = indoc! {"
624 fn main() {
625 let x = 42;
626 // ^[CURSOR_POSITION]"
627 }
628 .to_string();
629
630 spec.set_cursor_excerpt(excerpt, offset, "//");
631 assert_eq!(spec.cursor_position, position_string);
632 assert_eq!(
633 spec.cursor_excerpt().unwrap(),
634 (excerpt.to_string(), offset)
635 );
636 }
637
638 #[test]
639 fn test_cursor_excerpt_with_inline_marker() {
640 let mut spec = ExampleSpec {
641 name: String::new(),
642 repository_url: String::new(),
643 revision: String::new(),
644 tags: Vec::new(),
645 reasoning: None,
646 uncommitted_diff: String::new(),
647 cursor_path: Path::new("test.rs").into(),
648 cursor_position: String::new(),
649 edit_history: String::new(),
650 expected_patches: Vec::new(),
651 rejected_patch: None,
652 captured_prompt_input: None,
653 telemetry: None,
654 human_feedback: Vec::new(),
655 rating: None,
656 };
657
658 // Cursor before `42` using inline marker
659 spec.cursor_position = indoc! {"
660 fn main() {
661 let x = <|user_cursor|>42;
662 println!(\"{}\", x);
663 }"
664 }
665 .to_string();
666
667 let expected_excerpt = indoc! {"
668 fn main() {
669 let x = 42;
670 println!(\"{}\", x);
671 }"
672 };
673 let expected_offset = expected_excerpt.find("42").unwrap();
674
675 assert_eq!(
676 spec.cursor_excerpt().unwrap(),
677 (expected_excerpt.to_string(), expected_offset)
678 );
679
680 // Cursor at beginning of line
681 spec.cursor_position = indoc! {"
682 fn main() {
683 <|user_cursor|> let x = 42;
684 }"
685 }
686 .to_string();
687
688 let expected_excerpt = indoc! {"
689 fn main() {
690 let x = 42;
691 }"
692 };
693 let expected_offset = expected_excerpt.find(" let").unwrap();
694
695 assert_eq!(
696 spec.cursor_excerpt().unwrap(),
697 (expected_excerpt.to_string(), expected_offset)
698 );
699
700 // Cursor at end of file
701 spec.cursor_position = "fn main() {}<|user_cursor|>".to_string();
702 let expected_excerpt = "fn main() {}";
703 let expected_offset = expected_excerpt.len();
704
705 assert_eq!(
706 spec.cursor_excerpt().unwrap(),
707 (expected_excerpt.to_string(), expected_offset)
708 );
709 }
710}