1use anyhow::{Context as _, Result};
2use serde::{Deserialize, Serialize};
3use std::{borrow::Cow, fmt::Write as _, mem, ops::Range, path::Path, sync::Arc};
4use telemetry_events::EditPredictionRating;
5
6pub const CURSOR_POSITION_MARKER: &str = "[CURSOR_POSITION]";
7pub const INLINE_CURSOR_MARKER: &str = "<|user_cursor|>";
8
9/// Maximum cursor file size to capture (64KB).
10/// Files larger than this will not have their content captured,
11/// falling back to git-based loading.
12pub const MAX_CURSOR_FILE_SIZE: usize = 64 * 1024;
13
14#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
15pub struct ExampleSpec {
16 #[serde(default)]
17 pub name: String,
18 pub repository_url: String,
19 pub revision: String,
20 #[serde(default, skip_serializing_if = "Vec::is_empty")]
21 pub tags: Vec<String>,
22 #[serde(default, skip_serializing_if = "Option::is_none")]
23 pub reasoning: Option<String>,
24 #[serde(default)]
25 pub uncommitted_diff: String,
26 pub cursor_path: Arc<Path>,
27 pub cursor_position: String,
28 pub edit_history: String,
29 pub expected_patches: Vec<String>,
30 #[serde(default, skip_serializing_if = "Option::is_none")]
31 pub rejected_patch: Option<String>,
32 #[serde(default, skip_serializing_if = "Option::is_none")]
33 pub captured_prompt_input: Option<CapturedPromptInput>,
34 #[serde(default, skip_serializing_if = "Option::is_none")]
35 pub telemetry: Option<TelemetrySource>,
36 #[serde(default, skip_serializing_if = "Vec::is_empty")]
37 pub human_feedback: Vec<HumanFeedback>,
38 #[serde(default, skip_serializing_if = "Option::is_none")]
39 pub rating: Option<EditPredictionRating>,
40}
41
42#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
43pub struct HumanFeedback {
44 pub message: String,
45}
46
47/// Metadata for examples sourced from production telemetry (rejected predictions).
48#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
49pub struct TelemetrySource {
50 pub request_id: String,
51 pub device_id: String,
52 pub time: String,
53 pub rejection_reason: String,
54 pub was_shown: bool,
55}
56
57/// All data needed to run format_prompt without loading the project.
58#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
59pub struct CapturedPromptInput {
60 pub cursor_file_content: String,
61 pub cursor_offset: usize,
62 pub cursor_row: u32,
63 pub cursor_column: u32,
64 pub events: Vec<CapturedEvent>,
65 pub related_files: Vec<CapturedRelatedFile>,
66}
67
68#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
69pub struct CapturedEvent {
70 pub path: Arc<Path>,
71 pub old_path: Arc<Path>,
72 pub diff: String,
73 pub predicted: bool,
74 pub in_open_source_repo: bool,
75}
76
77impl CapturedEvent {
78 pub fn to_event(&self) -> zeta_prompt::Event {
79 zeta_prompt::Event::BufferChange {
80 path: self.path.clone(),
81 old_path: self.old_path.clone(),
82 diff: self.diff.clone(),
83 predicted: self.predicted,
84 in_open_source_repo: self.in_open_source_repo,
85 }
86 }
87}
88
89#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
90pub struct CapturedRelatedFile {
91 pub path: Arc<Path>,
92 pub max_row: u32,
93 pub excerpts: Vec<CapturedRelatedExcerpt>,
94}
95
96impl CapturedRelatedFile {
97 pub fn to_related_file(&self) -> zeta_prompt::RelatedFile {
98 zeta_prompt::RelatedFile {
99 path: self.path.clone(),
100 max_row: self.max_row,
101 excerpts: self
102 .excerpts
103 .iter()
104 .map(|e| zeta_prompt::RelatedExcerpt {
105 row_range: e.row_range.clone(),
106 text: e.text.clone().into(),
107 })
108 .collect(),
109 }
110 }
111}
112
113#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
114pub struct CapturedRelatedExcerpt {
115 pub row_range: Range<u32>,
116 pub text: String,
117}
118
119const REASONING_HEADING: &str = "Reasoning";
120const UNCOMMITTED_DIFF_HEADING: &str = "Uncommitted Diff";
121const EDIT_HISTORY_HEADING: &str = "Edit History";
122const CURSOR_POSITION_HEADING: &str = "Cursor Position";
123const EXPECTED_PATCH_HEADING: &str = "Expected Patch";
124const REJECTED_PATCH_HEADING: &str = "Rejected Patch";
125
126#[derive(Serialize, Deserialize)]
127struct FrontMatter<'a> {
128 repository_url: Cow<'a, str>,
129 revision: Cow<'a, str>,
130 #[serde(default, skip_serializing_if = "Vec::is_empty")]
131 tags: Vec<String>,
132}
133
134impl ExampleSpec {
135 /// Generate a sanitized filename for this example.
136 pub fn filename(&self) -> String {
137 self.name
138 .chars()
139 .map(|c| match c {
140 ' ' | ':' | '~' | '^' | '?' | '*' | '[' | '\\' | '@' | '{' | '/' | '<' | '>'
141 | '|' | '"' => '-',
142 c => c,
143 })
144 .collect()
145 }
146
147 /// Format this example spec as markdown.
148 pub fn to_markdown(&self) -> String {
149 use std::fmt::Write as _;
150
151 let front_matter = FrontMatter {
152 repository_url: Cow::Borrowed(&self.repository_url),
153 revision: Cow::Borrowed(&self.revision),
154 tags: self.tags.clone(),
155 };
156 let front_matter_toml =
157 toml::to_string_pretty(&front_matter).unwrap_or_else(|_| String::new());
158
159 let mut markdown = String::new();
160
161 _ = writeln!(markdown, "+++");
162 markdown.push_str(&front_matter_toml);
163 if !markdown.ends_with('\n') {
164 markdown.push('\n');
165 }
166 _ = writeln!(markdown, "+++");
167 markdown.push('\n');
168
169 _ = writeln!(markdown, "# {}", self.name);
170 markdown.push('\n');
171
172 if let Some(reasoning) = &self.reasoning {
173 _ = writeln!(markdown, "## {}", REASONING_HEADING);
174 markdown.push('\n');
175 markdown.push_str(reasoning);
176 if !markdown.ends_with('\n') {
177 markdown.push('\n');
178 }
179 markdown.push('\n');
180 }
181
182 if !self.uncommitted_diff.is_empty() {
183 _ = writeln!(markdown, "## {}", UNCOMMITTED_DIFF_HEADING);
184 _ = writeln!(markdown);
185 _ = writeln!(markdown, "```diff");
186 markdown.push_str(&self.uncommitted_diff);
187 if !markdown.ends_with('\n') {
188 markdown.push('\n');
189 }
190 _ = writeln!(markdown, "```");
191 markdown.push('\n');
192 }
193
194 _ = writeln!(markdown, "## {}", EDIT_HISTORY_HEADING);
195 _ = writeln!(markdown);
196
197 if self.edit_history.is_empty() {
198 _ = writeln!(markdown, "(No edit history)");
199 _ = writeln!(markdown);
200 } else {
201 _ = writeln!(markdown, "```diff");
202 markdown.push_str(&self.edit_history);
203 if !markdown.ends_with('\n') {
204 markdown.push('\n');
205 }
206 _ = writeln!(markdown, "```");
207 markdown.push('\n');
208 }
209
210 _ = writeln!(markdown, "## {}", CURSOR_POSITION_HEADING);
211 _ = writeln!(markdown);
212 _ = writeln!(markdown, "```{}", self.cursor_path.to_string_lossy());
213 markdown.push_str(&self.cursor_position);
214 if !markdown.ends_with('\n') {
215 markdown.push('\n');
216 }
217 _ = writeln!(markdown, "```");
218 markdown.push('\n');
219
220 _ = writeln!(markdown, "## {}", EXPECTED_PATCH_HEADING);
221 markdown.push('\n');
222 for patch in &self.expected_patches {
223 _ = writeln!(markdown, "```diff");
224 markdown.push_str(patch);
225 if !markdown.ends_with('\n') {
226 markdown.push('\n');
227 }
228 _ = writeln!(markdown, "```");
229 markdown.push('\n');
230 }
231
232 if let Some(rejected_patch) = &self.rejected_patch {
233 _ = writeln!(markdown, "## {}", REJECTED_PATCH_HEADING);
234 markdown.push('\n');
235 _ = writeln!(markdown, "```diff");
236 markdown.push_str(rejected_patch);
237 if !markdown.ends_with('\n') {
238 markdown.push('\n');
239 }
240 _ = writeln!(markdown, "```");
241 markdown.push('\n');
242 }
243
244 markdown
245 }
246
247 /// Parse an example spec from markdown.
248 pub fn from_markdown(mut input: &str) -> anyhow::Result<Self> {
249 use pulldown_cmark::{CodeBlockKind, CowStr, Event, HeadingLevel, Parser, Tag, TagEnd};
250
251 let mut spec = ExampleSpec {
252 name: String::new(),
253 repository_url: String::new(),
254 revision: String::new(),
255 tags: Vec::new(),
256 reasoning: None,
257 uncommitted_diff: String::new(),
258 cursor_path: Path::new("").into(),
259 cursor_position: String::new(),
260 edit_history: String::new(),
261 expected_patches: Vec::new(),
262 rejected_patch: None,
263 captured_prompt_input: None,
264 telemetry: None,
265 human_feedback: Vec::new(),
266 rating: None,
267 };
268
269 if let Some(rest) = input.strip_prefix("+++\n")
270 && let Some((front_matter, rest)) = rest.split_once("+++\n")
271 {
272 if let Ok(data) = toml::from_str::<FrontMatter<'_>>(front_matter) {
273 spec.repository_url = data.repository_url.into_owned();
274 spec.revision = data.revision.into_owned();
275 spec.tags = data.tags;
276 }
277 input = rest.trim_start();
278 }
279
280 let parser = Parser::new(input);
281 let mut text = String::new();
282 let mut block_info: CowStr = "".into();
283
284 #[derive(PartialEq)]
285 enum Section {
286 Start,
287 UncommittedDiff,
288 EditHistory,
289 CursorPosition,
290 ExpectedPatch,
291 RejectedPatch,
292 Other,
293 }
294
295 let mut current_section = Section::Start;
296
297 for event in parser {
298 match event {
299 Event::Text(line) => {
300 text.push_str(&line);
301 }
302 Event::End(TagEnd::Heading(HeadingLevel::H1)) => {
303 spec.name = mem::take(&mut text);
304 }
305 Event::End(TagEnd::Heading(HeadingLevel::H2)) => {
306 let title = mem::take(&mut text);
307 current_section = if title.eq_ignore_ascii_case(UNCOMMITTED_DIFF_HEADING) {
308 Section::UncommittedDiff
309 } else if title.eq_ignore_ascii_case(EDIT_HISTORY_HEADING) {
310 Section::EditHistory
311 } else if title.eq_ignore_ascii_case(CURSOR_POSITION_HEADING) {
312 Section::CursorPosition
313 } else if title.eq_ignore_ascii_case(EXPECTED_PATCH_HEADING) {
314 Section::ExpectedPatch
315 } else if title.eq_ignore_ascii_case(REJECTED_PATCH_HEADING) {
316 Section::RejectedPatch
317 } else {
318 Section::Other
319 };
320 }
321 Event::End(TagEnd::Heading(HeadingLevel::H3)) => {
322 mem::take(&mut text);
323 }
324 Event::End(TagEnd::Heading(HeadingLevel::H4)) => {
325 mem::take(&mut text);
326 }
327 Event::End(TagEnd::Heading(level)) => {
328 anyhow::bail!("Unexpected heading level: {level}");
329 }
330 Event::Start(Tag::CodeBlock(kind)) => {
331 match kind {
332 CodeBlockKind::Fenced(info) => {
333 block_info = info;
334 }
335 CodeBlockKind::Indented => {
336 anyhow::bail!("Unexpected indented codeblock");
337 }
338 };
339 }
340 Event::Start(_) => {
341 text.clear();
342 block_info = "".into();
343 }
344 Event::End(TagEnd::CodeBlock) => {
345 let block_info = block_info.trim();
346 match current_section {
347 Section::UncommittedDiff => {
348 spec.uncommitted_diff = mem::take(&mut text);
349 }
350 Section::EditHistory => {
351 spec.edit_history.push_str(&mem::take(&mut text));
352 }
353 Section::CursorPosition => {
354 spec.cursor_path = Path::new(block_info).into();
355 spec.cursor_position = mem::take(&mut text);
356 }
357 Section::ExpectedPatch => {
358 spec.expected_patches.push(mem::take(&mut text));
359 }
360 Section::RejectedPatch => {
361 spec.rejected_patch = Some(mem::take(&mut text));
362 }
363 Section::Start | Section::Other => {}
364 }
365 }
366 _ => {}
367 }
368 }
369
370 if spec.cursor_path.as_ref() == Path::new("") || spec.cursor_position.is_empty() {
371 anyhow::bail!("Missing cursor position codeblock");
372 }
373
374 Ok(spec)
375 }
376
377 /// Returns the excerpt of text around the cursor, and the offset of the cursor within that
378 /// excerpt.
379 ///
380 /// The cursor's position is marked with a special comment that appears
381 /// below the cursor line, which contains the string `[CURSOR_POSITION]`,
382 /// preceded by an arrow marking the cursor's column. The arrow can be
383 /// either:
384 /// - `^` - The cursor column is at the position of the `^` character (pointing up to the cursor)
385 /// - `<` - The cursor column is at the first non-whitespace character on that line.
386 pub fn cursor_excerpt(&self) -> Result<(String, usize)> {
387 let input = &self.cursor_position;
388
389 // Check for inline cursor marker first
390 if let Some(inline_offset) = input.find(INLINE_CURSOR_MARKER) {
391 let excerpt = input[..inline_offset].to_string()
392 + &input[inline_offset + INLINE_CURSOR_MARKER.len()..];
393 return Ok((excerpt, inline_offset));
394 }
395
396 let marker_offset = input
397 .find(CURSOR_POSITION_MARKER)
398 .context("missing [CURSOR_POSITION] marker")?;
399 let marker_line_start = input[..marker_offset]
400 .rfind('\n')
401 .map(|pos| pos + 1)
402 .unwrap_or(0);
403 let marker_line_end = input[marker_line_start..]
404 .find('\n')
405 .map(|pos| marker_line_start + pos + 1)
406 .unwrap_or(input.len());
407 let marker_line = &input[marker_line_start..marker_line_end].trim_end_matches('\n');
408
409 let cursor_column = if let Some(cursor_offset) = marker_line.find('^') {
410 cursor_offset
411 } else if let Some(less_than_pos) = marker_line.find('<') {
412 marker_line
413 .find(|c: char| !c.is_whitespace())
414 .unwrap_or(less_than_pos)
415 } else {
416 anyhow::bail!(
417 "cursor position marker line must contain '^' or '<' before [CURSOR_POSITION]"
418 );
419 };
420
421 let mut excerpt = input[..marker_line_start].to_string() + &input[marker_line_end..];
422 excerpt.truncate(excerpt.trim_end_matches('\n').len());
423
424 // The cursor is on the line above the marker line.
425 let cursor_line_end = marker_line_start.saturating_sub(1);
426 let cursor_line_start = excerpt[..cursor_line_end]
427 .rfind('\n')
428 .map(|pos| pos + 1)
429 .unwrap_or(0);
430 let cursor_offset = cursor_line_start + cursor_column;
431
432 Ok((excerpt, cursor_offset))
433 }
434
435 /// Sets the cursor position excerpt from a plain excerpt and cursor byte offset.
436 ///
437 /// The `line_comment_prefix` is used to format the marker line as a comment.
438 /// If the cursor column is less than the comment prefix length, the `<` format is used.
439 /// Otherwise, the `^` format is used.
440 pub fn set_cursor_excerpt(
441 &mut self,
442 excerpt: &str,
443 cursor_offset: usize,
444 line_comment_prefix: &str,
445 ) {
446 // Find which line the cursor is on and its column
447 let cursor_line_start = excerpt[..cursor_offset]
448 .rfind('\n')
449 .map(|pos| pos + 1)
450 .unwrap_or(0);
451 let cursor_line_end = excerpt[cursor_line_start..]
452 .find('\n')
453 .map(|pos| cursor_line_start + pos + 1)
454 .unwrap_or(excerpt.len());
455 let cursor_line = &excerpt[cursor_line_start..cursor_line_end];
456 let cursor_line_indent = &cursor_line[..cursor_line.len() - cursor_line.trim_start().len()];
457 let cursor_column = cursor_offset - cursor_line_start;
458
459 // Build the marker line
460 let mut marker_line = String::new();
461 if cursor_column < line_comment_prefix.len() {
462 for _ in 0..cursor_column {
463 marker_line.push(' ');
464 }
465 marker_line.push_str(line_comment_prefix);
466 write!(marker_line, " <{}", CURSOR_POSITION_MARKER).unwrap();
467 } else {
468 if cursor_column >= cursor_line_indent.len() + line_comment_prefix.len() {
469 marker_line.push_str(cursor_line_indent);
470 }
471 marker_line.push_str(line_comment_prefix);
472 while marker_line.len() < cursor_column {
473 marker_line.push(' ');
474 }
475 write!(marker_line, "^{}", CURSOR_POSITION_MARKER).unwrap();
476 }
477
478 // Build the final cursor_position string
479 let mut result = String::with_capacity(excerpt.len() + marker_line.len() + 2);
480 result.push_str(&excerpt[..cursor_line_end]);
481 if !result.ends_with('\n') {
482 result.push('\n');
483 }
484 result.push_str(&marker_line);
485 if cursor_line_end < excerpt.len() {
486 result.push('\n');
487 result.push_str(&excerpt[cursor_line_end..]);
488 }
489
490 self.cursor_position = result;
491 }
492}
493
494#[cfg(test)]
495mod tests {
496 use super::*;
497 use indoc::indoc;
498
499 #[test]
500 fn test_cursor_excerpt_with_caret() {
501 let mut spec = ExampleSpec {
502 name: String::new(),
503 repository_url: String::new(),
504 revision: String::new(),
505 tags: Vec::new(),
506 reasoning: None,
507 uncommitted_diff: String::new(),
508 cursor_path: Path::new("test.rs").into(),
509 cursor_position: String::new(),
510 edit_history: String::new(),
511 expected_patches: Vec::new(),
512 rejected_patch: None,
513 captured_prompt_input: None,
514 telemetry: None,
515 human_feedback: Vec::new(),
516 rating: None,
517 };
518
519 // Cursor before `42`
520 let excerpt = indoc! {"
521 fn main() {
522 let x = 42;
523 println!(\"{}\", x);
524 }"
525 };
526 let offset = excerpt.find("42").unwrap();
527 let position_string = indoc! {"
528 fn main() {
529 let x = 42;
530 // ^[CURSOR_POSITION]
531 println!(\"{}\", x);
532 }"
533 }
534 .to_string();
535
536 spec.set_cursor_excerpt(excerpt, offset, "//");
537 assert_eq!(spec.cursor_position, position_string);
538 assert_eq!(
539 spec.cursor_excerpt().unwrap(),
540 (excerpt.to_string(), offset)
541 );
542
543 // Cursor after `l` in `let`
544 let offset = excerpt.find("et x").unwrap();
545 let position_string = indoc! {"
546 fn main() {
547 let x = 42;
548 // ^[CURSOR_POSITION]
549 println!(\"{}\", x);
550 }"
551 }
552 .to_string();
553
554 spec.set_cursor_excerpt(excerpt, offset, "//");
555 assert_eq!(spec.cursor_position, position_string);
556 assert_eq!(
557 spec.cursor_excerpt().unwrap(),
558 (excerpt.to_string(), offset)
559 );
560
561 // Cursor before `let`
562 let offset = excerpt.find("let").unwrap();
563 let position_string = indoc! {"
564 fn main() {
565 let x = 42;
566 // ^[CURSOR_POSITION]
567 println!(\"{}\", x);
568 }"
569 }
570 .to_string();
571
572 spec.set_cursor_excerpt(excerpt, offset, "//");
573 assert_eq!(spec.cursor_position, position_string);
574 assert_eq!(
575 spec.cursor_excerpt().unwrap(),
576 (excerpt.to_string(), offset)
577 );
578
579 // Cursor at beginning of the line with `let`
580 let offset = excerpt.find(" let").unwrap();
581 let position_string = indoc! {"
582 fn main() {
583 let x = 42;
584 // <[CURSOR_POSITION]
585 println!(\"{}\", x);
586 }"
587 }
588 .to_string();
589
590 spec.set_cursor_excerpt(excerpt, offset, "//");
591 assert_eq!(spec.cursor_position, position_string);
592 assert_eq!(
593 spec.cursor_excerpt().unwrap(),
594 (excerpt.to_string(), offset)
595 );
596
597 // Cursor at end of line, after the semicolon
598 let offset = excerpt.find(';').unwrap() + 1;
599 let position_string = indoc! {"
600 fn main() {
601 let x = 42;
602 // ^[CURSOR_POSITION]
603 println!(\"{}\", x);
604 }"
605 }
606 .to_string();
607
608 spec.set_cursor_excerpt(excerpt, offset, "//");
609 assert_eq!(spec.cursor_position, position_string);
610 assert_eq!(
611 spec.cursor_excerpt().unwrap(),
612 (excerpt.to_string(), offset)
613 );
614
615 // Caret at end of file (no trailing newline)
616 let excerpt = indoc! {"
617 fn main() {
618 let x = 42;"
619 };
620 let offset = excerpt.find(';').unwrap() + 1;
621 let position_string = indoc! {"
622 fn main() {
623 let x = 42;
624 // ^[CURSOR_POSITION]"
625 }
626 .to_string();
627
628 spec.set_cursor_excerpt(excerpt, offset, "//");
629 assert_eq!(spec.cursor_position, position_string);
630 assert_eq!(
631 spec.cursor_excerpt().unwrap(),
632 (excerpt.to_string(), offset)
633 );
634 }
635
636 #[test]
637 fn test_cursor_excerpt_with_inline_marker() {
638 let mut spec = ExampleSpec {
639 name: String::new(),
640 repository_url: String::new(),
641 revision: String::new(),
642 tags: Vec::new(),
643 reasoning: None,
644 uncommitted_diff: String::new(),
645 cursor_path: Path::new("test.rs").into(),
646 cursor_position: String::new(),
647 edit_history: String::new(),
648 expected_patches: Vec::new(),
649 rejected_patch: None,
650 captured_prompt_input: None,
651 telemetry: None,
652 human_feedback: Vec::new(),
653 rating: None,
654 };
655
656 // Cursor before `42` using inline marker
657 spec.cursor_position = indoc! {"
658 fn main() {
659 let x = <|user_cursor|>42;
660 println!(\"{}\", x);
661 }"
662 }
663 .to_string();
664
665 let expected_excerpt = indoc! {"
666 fn main() {
667 let x = 42;
668 println!(\"{}\", x);
669 }"
670 };
671 let expected_offset = expected_excerpt.find("42").unwrap();
672
673 assert_eq!(
674 spec.cursor_excerpt().unwrap(),
675 (expected_excerpt.to_string(), expected_offset)
676 );
677
678 // Cursor at beginning of line
679 spec.cursor_position = indoc! {"
680 fn main() {
681 <|user_cursor|> let x = 42;
682 }"
683 }
684 .to_string();
685
686 let expected_excerpt = indoc! {"
687 fn main() {
688 let x = 42;
689 }"
690 };
691 let expected_offset = expected_excerpt.find(" let").unwrap();
692
693 assert_eq!(
694 spec.cursor_excerpt().unwrap(),
695 (expected_excerpt.to_string(), expected_offset)
696 );
697
698 // Cursor at end of file
699 spec.cursor_position = "fn main() {}<|user_cursor|>".to_string();
700 let expected_excerpt = "fn main() {}";
701 let expected_offset = expected_excerpt.len();
702
703 assert_eq!(
704 spec.cursor_excerpt().unwrap(),
705 (expected_excerpt.to_string(), expected_offset)
706 );
707 }
708}