1use anyhow::{Context as _, Result};
2use serde::{Deserialize, Serialize};
3use std::{borrow::Cow, fmt::Write as _, mem, ops::Range, path::Path, sync::Arc};
4
5pub const CURSOR_POSITION_MARKER: &str = "[CURSOR_POSITION]";
6pub const INLINE_CURSOR_MARKER: &str = "<|user_cursor|>";
7
8/// Maximum cursor file size to capture (64KB).
9/// Files larger than this will not have their content captured,
10/// falling back to git-based loading.
11pub const MAX_CURSOR_FILE_SIZE: usize = 64 * 1024;
12
13#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
14pub struct ExampleSpec {
15 #[serde(default)]
16 pub name: String,
17 pub repository_url: String,
18 pub revision: String,
19 #[serde(default, skip_serializing_if = "Vec::is_empty")]
20 pub tags: Vec<String>,
21 #[serde(default, skip_serializing_if = "Option::is_none")]
22 pub reasoning: Option<String>,
23 #[serde(default)]
24 pub uncommitted_diff: String,
25 pub cursor_path: Arc<Path>,
26 pub cursor_position: String,
27 pub edit_history: String,
28 pub expected_patches: Vec<String>,
29 #[serde(default, skip_serializing_if = "Option::is_none")]
30 pub rejected_patch: Option<String>,
31 #[serde(default, skip_serializing_if = "Option::is_none")]
32 pub captured_prompt_input: Option<CapturedPromptInput>,
33 #[serde(default, skip_serializing_if = "Option::is_none")]
34 pub telemetry: Option<TelemetrySource>,
35}
36
37/// Metadata for examples sourced from production telemetry (rejected predictions).
38#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
39pub struct TelemetrySource {
40 pub request_id: String,
41 pub device_id: String,
42 pub time: String,
43 pub rejection_reason: String,
44 pub was_shown: bool,
45}
46
47/// All data needed to run format_prompt without loading the project.
48#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
49pub struct CapturedPromptInput {
50 pub cursor_file_content: String,
51 pub cursor_offset: usize,
52 pub cursor_row: u32,
53 pub cursor_column: u32,
54 pub events: Vec<CapturedEvent>,
55 pub related_files: Vec<CapturedRelatedFile>,
56}
57
58#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
59pub struct CapturedEvent {
60 pub path: Arc<Path>,
61 pub old_path: Arc<Path>,
62 pub diff: String,
63 pub predicted: bool,
64 pub in_open_source_repo: bool,
65}
66
67impl CapturedEvent {
68 pub fn to_event(&self) -> zeta_prompt::Event {
69 zeta_prompt::Event::BufferChange {
70 path: self.path.clone(),
71 old_path: self.old_path.clone(),
72 diff: self.diff.clone(),
73 predicted: self.predicted,
74 in_open_source_repo: self.in_open_source_repo,
75 }
76 }
77}
78
79#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
80pub struct CapturedRelatedFile {
81 pub path: Arc<Path>,
82 pub max_row: u32,
83 pub excerpts: Vec<CapturedRelatedExcerpt>,
84}
85
86impl CapturedRelatedFile {
87 pub fn to_related_file(&self) -> zeta_prompt::RelatedFile {
88 zeta_prompt::RelatedFile {
89 path: self.path.clone(),
90 max_row: self.max_row,
91 excerpts: self
92 .excerpts
93 .iter()
94 .map(|e| zeta_prompt::RelatedExcerpt {
95 row_range: e.row_range.clone(),
96 text: e.text.clone().into(),
97 })
98 .collect(),
99 }
100 }
101}
102
103#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
104pub struct CapturedRelatedExcerpt {
105 pub row_range: Range<u32>,
106 pub text: String,
107}
108
109const REASONING_HEADING: &str = "Reasoning";
110const UNCOMMITTED_DIFF_HEADING: &str = "Uncommitted Diff";
111const EDIT_HISTORY_HEADING: &str = "Edit History";
112const CURSOR_POSITION_HEADING: &str = "Cursor Position";
113const EXPECTED_PATCH_HEADING: &str = "Expected Patch";
114const REJECTED_PATCH_HEADING: &str = "Rejected Patch";
115
116#[derive(Serialize, Deserialize)]
117struct FrontMatter<'a> {
118 repository_url: Cow<'a, str>,
119 revision: Cow<'a, str>,
120 #[serde(default, skip_serializing_if = "Vec::is_empty")]
121 tags: Vec<String>,
122}
123
124impl ExampleSpec {
125 /// Generate a sanitized filename for this example.
126 pub fn filename(&self) -> String {
127 self.name
128 .chars()
129 .map(|c| match c {
130 ' ' | ':' | '~' | '^' | '?' | '*' | '[' | '\\' | '@' | '{' | '/' | '<' | '>'
131 | '|' | '"' => '-',
132 c => c,
133 })
134 .collect()
135 }
136
137 /// Format this example spec as markdown.
138 pub fn to_markdown(&self) -> String {
139 use std::fmt::Write as _;
140
141 let front_matter = FrontMatter {
142 repository_url: Cow::Borrowed(&self.repository_url),
143 revision: Cow::Borrowed(&self.revision),
144 tags: self.tags.clone(),
145 };
146 let front_matter_toml =
147 toml::to_string_pretty(&front_matter).unwrap_or_else(|_| String::new());
148
149 let mut markdown = String::new();
150
151 _ = writeln!(markdown, "+++");
152 markdown.push_str(&front_matter_toml);
153 if !markdown.ends_with('\n') {
154 markdown.push('\n');
155 }
156 _ = writeln!(markdown, "+++");
157 markdown.push('\n');
158
159 _ = writeln!(markdown, "# {}", self.name);
160 markdown.push('\n');
161
162 if let Some(reasoning) = &self.reasoning {
163 _ = writeln!(markdown, "## {}", REASONING_HEADING);
164 markdown.push('\n');
165 markdown.push_str(reasoning);
166 if !markdown.ends_with('\n') {
167 markdown.push('\n');
168 }
169 markdown.push('\n');
170 }
171
172 if !self.uncommitted_diff.is_empty() {
173 _ = writeln!(markdown, "## {}", UNCOMMITTED_DIFF_HEADING);
174 _ = writeln!(markdown);
175 _ = writeln!(markdown, "```diff");
176 markdown.push_str(&self.uncommitted_diff);
177 if !markdown.ends_with('\n') {
178 markdown.push('\n');
179 }
180 _ = writeln!(markdown, "```");
181 markdown.push('\n');
182 }
183
184 _ = writeln!(markdown, "## {}", EDIT_HISTORY_HEADING);
185 _ = writeln!(markdown);
186
187 if self.edit_history.is_empty() {
188 _ = writeln!(markdown, "(No edit history)");
189 _ = writeln!(markdown);
190 } else {
191 _ = writeln!(markdown, "```diff");
192 markdown.push_str(&self.edit_history);
193 if !markdown.ends_with('\n') {
194 markdown.push('\n');
195 }
196 _ = writeln!(markdown, "```");
197 markdown.push('\n');
198 }
199
200 _ = writeln!(markdown, "## {}", CURSOR_POSITION_HEADING);
201 _ = writeln!(markdown);
202 _ = writeln!(markdown, "```{}", self.cursor_path.to_string_lossy());
203 markdown.push_str(&self.cursor_position);
204 if !markdown.ends_with('\n') {
205 markdown.push('\n');
206 }
207 _ = writeln!(markdown, "```");
208 markdown.push('\n');
209
210 _ = writeln!(markdown, "## {}", EXPECTED_PATCH_HEADING);
211 markdown.push('\n');
212 for patch in &self.expected_patches {
213 _ = writeln!(markdown, "```diff");
214 markdown.push_str(patch);
215 if !markdown.ends_with('\n') {
216 markdown.push('\n');
217 }
218 _ = writeln!(markdown, "```");
219 markdown.push('\n');
220 }
221
222 if let Some(rejected_patch) = &self.rejected_patch {
223 _ = writeln!(markdown, "## {}", REJECTED_PATCH_HEADING);
224 markdown.push('\n');
225 _ = writeln!(markdown, "```diff");
226 markdown.push_str(rejected_patch);
227 if !markdown.ends_with('\n') {
228 markdown.push('\n');
229 }
230 _ = writeln!(markdown, "```");
231 markdown.push('\n');
232 }
233
234 markdown
235 }
236
237 /// Parse an example spec from markdown.
238 pub fn from_markdown(mut input: &str) -> anyhow::Result<Self> {
239 use pulldown_cmark::{CodeBlockKind, CowStr, Event, HeadingLevel, Parser, Tag, TagEnd};
240
241 let mut spec = ExampleSpec {
242 name: String::new(),
243 repository_url: String::new(),
244 revision: String::new(),
245 tags: Vec::new(),
246 reasoning: None,
247 uncommitted_diff: String::new(),
248 cursor_path: Path::new("").into(),
249 cursor_position: String::new(),
250 edit_history: String::new(),
251 expected_patches: Vec::new(),
252 rejected_patch: None,
253 captured_prompt_input: None,
254 telemetry: None,
255 };
256
257 if let Some(rest) = input.strip_prefix("+++\n")
258 && let Some((front_matter, rest)) = rest.split_once("+++\n")
259 {
260 if let Ok(data) = toml::from_str::<FrontMatter<'_>>(front_matter) {
261 spec.repository_url = data.repository_url.into_owned();
262 spec.revision = data.revision.into_owned();
263 spec.tags = data.tags;
264 }
265 input = rest.trim_start();
266 }
267
268 let parser = Parser::new(input);
269 let mut text = String::new();
270 let mut block_info: CowStr = "".into();
271
272 #[derive(PartialEq)]
273 enum Section {
274 Start,
275 UncommittedDiff,
276 EditHistory,
277 CursorPosition,
278 ExpectedPatch,
279 RejectedPatch,
280 Other,
281 }
282
283 let mut current_section = Section::Start;
284
285 for event in parser {
286 match event {
287 Event::Text(line) => {
288 text.push_str(&line);
289 }
290 Event::End(TagEnd::Heading(HeadingLevel::H1)) => {
291 spec.name = mem::take(&mut text);
292 }
293 Event::End(TagEnd::Heading(HeadingLevel::H2)) => {
294 let title = mem::take(&mut text);
295 current_section = if title.eq_ignore_ascii_case(UNCOMMITTED_DIFF_HEADING) {
296 Section::UncommittedDiff
297 } else if title.eq_ignore_ascii_case(EDIT_HISTORY_HEADING) {
298 Section::EditHistory
299 } else if title.eq_ignore_ascii_case(CURSOR_POSITION_HEADING) {
300 Section::CursorPosition
301 } else if title.eq_ignore_ascii_case(EXPECTED_PATCH_HEADING) {
302 Section::ExpectedPatch
303 } else if title.eq_ignore_ascii_case(REJECTED_PATCH_HEADING) {
304 Section::RejectedPatch
305 } else {
306 Section::Other
307 };
308 }
309 Event::End(TagEnd::Heading(HeadingLevel::H3)) => {
310 mem::take(&mut text);
311 }
312 Event::End(TagEnd::Heading(HeadingLevel::H4)) => {
313 mem::take(&mut text);
314 }
315 Event::End(TagEnd::Heading(level)) => {
316 anyhow::bail!("Unexpected heading level: {level}");
317 }
318 Event::Start(Tag::CodeBlock(kind)) => {
319 match kind {
320 CodeBlockKind::Fenced(info) => {
321 block_info = info;
322 }
323 CodeBlockKind::Indented => {
324 anyhow::bail!("Unexpected indented codeblock");
325 }
326 };
327 }
328 Event::Start(_) => {
329 text.clear();
330 block_info = "".into();
331 }
332 Event::End(TagEnd::CodeBlock) => {
333 let block_info = block_info.trim();
334 match current_section {
335 Section::UncommittedDiff => {
336 spec.uncommitted_diff = mem::take(&mut text);
337 }
338 Section::EditHistory => {
339 spec.edit_history.push_str(&mem::take(&mut text));
340 }
341 Section::CursorPosition => {
342 spec.cursor_path = Path::new(block_info).into();
343 spec.cursor_position = mem::take(&mut text);
344 }
345 Section::ExpectedPatch => {
346 spec.expected_patches.push(mem::take(&mut text));
347 }
348 Section::RejectedPatch => {
349 spec.rejected_patch = Some(mem::take(&mut text));
350 }
351 Section::Start | Section::Other => {}
352 }
353 }
354 _ => {}
355 }
356 }
357
358 if spec.cursor_path.as_ref() == Path::new("") || spec.cursor_position.is_empty() {
359 anyhow::bail!("Missing cursor position codeblock");
360 }
361
362 Ok(spec)
363 }
364
365 /// Returns the excerpt of text around the cursor, and the offset of the cursor within that
366 /// excerpt.
367 ///
368 /// The cursor's position is marked with a special comment that appears
369 /// below the cursor line, which contains the string `[CURSOR_POSITION]`,
370 /// preceded by an arrow marking the cursor's column. The arrow can be
371 /// either:
372 /// - `^` - The cursor column is at the position of the `^` character (pointing up to the cursor)
373 /// - `<` - The cursor column is at the first non-whitespace character on that line.
374 pub fn cursor_excerpt(&self) -> Result<(String, usize)> {
375 let input = &self.cursor_position;
376
377 // Check for inline cursor marker first
378 if let Some(inline_offset) = input.find(INLINE_CURSOR_MARKER) {
379 let excerpt = input[..inline_offset].to_string()
380 + &input[inline_offset + INLINE_CURSOR_MARKER.len()..];
381 return Ok((excerpt, inline_offset));
382 }
383
384 let marker_offset = input
385 .find(CURSOR_POSITION_MARKER)
386 .context("missing [CURSOR_POSITION] marker")?;
387 let marker_line_start = input[..marker_offset]
388 .rfind('\n')
389 .map(|pos| pos + 1)
390 .unwrap_or(0);
391 let marker_line_end = input[marker_line_start..]
392 .find('\n')
393 .map(|pos| marker_line_start + pos + 1)
394 .unwrap_or(input.len());
395 let marker_line = &input[marker_line_start..marker_line_end].trim_end_matches('\n');
396
397 let cursor_column = if let Some(cursor_offset) = marker_line.find('^') {
398 cursor_offset
399 } else if let Some(less_than_pos) = marker_line.find('<') {
400 marker_line
401 .find(|c: char| !c.is_whitespace())
402 .unwrap_or(less_than_pos)
403 } else {
404 anyhow::bail!(
405 "cursor position marker line must contain '^' or '<' before [CURSOR_POSITION]"
406 );
407 };
408
409 let mut excerpt = input[..marker_line_start].to_string() + &input[marker_line_end..];
410 excerpt.truncate(excerpt.trim_end_matches('\n').len());
411
412 // The cursor is on the line above the marker line.
413 let cursor_line_end = marker_line_start.saturating_sub(1);
414 let cursor_line_start = excerpt[..cursor_line_end]
415 .rfind('\n')
416 .map(|pos| pos + 1)
417 .unwrap_or(0);
418 let cursor_offset = cursor_line_start + cursor_column;
419
420 Ok((excerpt, cursor_offset))
421 }
422
423 /// Sets the cursor position excerpt from a plain excerpt and cursor byte offset.
424 ///
425 /// The `line_comment_prefix` is used to format the marker line as a comment.
426 /// If the cursor column is less than the comment prefix length, the `<` format is used.
427 /// Otherwise, the `^` format is used.
428 pub fn set_cursor_excerpt(
429 &mut self,
430 excerpt: &str,
431 cursor_offset: usize,
432 line_comment_prefix: &str,
433 ) {
434 // Find which line the cursor is on and its column
435 let cursor_line_start = excerpt[..cursor_offset]
436 .rfind('\n')
437 .map(|pos| pos + 1)
438 .unwrap_or(0);
439 let cursor_line_end = excerpt[cursor_line_start..]
440 .find('\n')
441 .map(|pos| cursor_line_start + pos + 1)
442 .unwrap_or(excerpt.len());
443 let cursor_line = &excerpt[cursor_line_start..cursor_line_end];
444 let cursor_line_indent = &cursor_line[..cursor_line.len() - cursor_line.trim_start().len()];
445 let cursor_column = cursor_offset - cursor_line_start;
446
447 // Build the marker line
448 let mut marker_line = String::new();
449 if cursor_column < line_comment_prefix.len() {
450 for _ in 0..cursor_column {
451 marker_line.push(' ');
452 }
453 marker_line.push_str(line_comment_prefix);
454 write!(marker_line, " <{}", CURSOR_POSITION_MARKER).unwrap();
455 } else {
456 if cursor_column >= cursor_line_indent.len() + line_comment_prefix.len() {
457 marker_line.push_str(cursor_line_indent);
458 }
459 marker_line.push_str(line_comment_prefix);
460 while marker_line.len() < cursor_column {
461 marker_line.push(' ');
462 }
463 write!(marker_line, "^{}", CURSOR_POSITION_MARKER).unwrap();
464 }
465
466 // Build the final cursor_position string
467 let mut result = String::with_capacity(excerpt.len() + marker_line.len() + 2);
468 result.push_str(&excerpt[..cursor_line_end]);
469 if !result.ends_with('\n') {
470 result.push('\n');
471 }
472 result.push_str(&marker_line);
473 if cursor_line_end < excerpt.len() {
474 result.push('\n');
475 result.push_str(&excerpt[cursor_line_end..]);
476 }
477
478 self.cursor_position = result;
479 }
480}
481
482#[cfg(test)]
483mod tests {
484 use super::*;
485 use indoc::indoc;
486
487 #[test]
488 fn test_cursor_excerpt_with_caret() {
489 let mut spec = ExampleSpec {
490 name: String::new(),
491 repository_url: String::new(),
492 revision: String::new(),
493 tags: Vec::new(),
494 reasoning: None,
495 uncommitted_diff: String::new(),
496 cursor_path: Path::new("test.rs").into(),
497 cursor_position: String::new(),
498 edit_history: String::new(),
499 expected_patches: Vec::new(),
500 rejected_patch: None,
501 captured_prompt_input: None,
502 telemetry: None,
503 };
504
505 // Cursor before `42`
506 let excerpt = indoc! {"
507 fn main() {
508 let x = 42;
509 println!(\"{}\", x);
510 }"
511 };
512 let offset = excerpt.find("42").unwrap();
513 let position_string = indoc! {"
514 fn main() {
515 let x = 42;
516 // ^[CURSOR_POSITION]
517 println!(\"{}\", x);
518 }"
519 }
520 .to_string();
521
522 spec.set_cursor_excerpt(excerpt, offset, "//");
523 assert_eq!(spec.cursor_position, position_string);
524 assert_eq!(
525 spec.cursor_excerpt().unwrap(),
526 (excerpt.to_string(), offset)
527 );
528
529 // Cursor after `l` in `let`
530 let offset = excerpt.find("et x").unwrap();
531 let position_string = indoc! {"
532 fn main() {
533 let x = 42;
534 // ^[CURSOR_POSITION]
535 println!(\"{}\", x);
536 }"
537 }
538 .to_string();
539
540 spec.set_cursor_excerpt(excerpt, offset, "//");
541 assert_eq!(spec.cursor_position, position_string);
542 assert_eq!(
543 spec.cursor_excerpt().unwrap(),
544 (excerpt.to_string(), offset)
545 );
546
547 // Cursor before `let`
548 let offset = excerpt.find("let").unwrap();
549 let position_string = indoc! {"
550 fn main() {
551 let x = 42;
552 // ^[CURSOR_POSITION]
553 println!(\"{}\", x);
554 }"
555 }
556 .to_string();
557
558 spec.set_cursor_excerpt(excerpt, offset, "//");
559 assert_eq!(spec.cursor_position, position_string);
560 assert_eq!(
561 spec.cursor_excerpt().unwrap(),
562 (excerpt.to_string(), offset)
563 );
564
565 // Cursor at beginning of the line with `let`
566 let offset = excerpt.find(" let").unwrap();
567 let position_string = indoc! {"
568 fn main() {
569 let x = 42;
570 // <[CURSOR_POSITION]
571 println!(\"{}\", x);
572 }"
573 }
574 .to_string();
575
576 spec.set_cursor_excerpt(excerpt, offset, "//");
577 assert_eq!(spec.cursor_position, position_string);
578 assert_eq!(
579 spec.cursor_excerpt().unwrap(),
580 (excerpt.to_string(), offset)
581 );
582
583 // Cursor at end of line, after the semicolon
584 let offset = excerpt.find(';').unwrap() + 1;
585 let position_string = indoc! {"
586 fn main() {
587 let x = 42;
588 // ^[CURSOR_POSITION]
589 println!(\"{}\", x);
590 }"
591 }
592 .to_string();
593
594 spec.set_cursor_excerpt(excerpt, offset, "//");
595 assert_eq!(spec.cursor_position, position_string);
596 assert_eq!(
597 spec.cursor_excerpt().unwrap(),
598 (excerpt.to_string(), offset)
599 );
600
601 // Caret at end of file (no trailing newline)
602 let excerpt = indoc! {"
603 fn main() {
604 let x = 42;"
605 };
606 let offset = excerpt.find(';').unwrap() + 1;
607 let position_string = indoc! {"
608 fn main() {
609 let x = 42;
610 // ^[CURSOR_POSITION]"
611 }
612 .to_string();
613
614 spec.set_cursor_excerpt(excerpt, offset, "//");
615 assert_eq!(spec.cursor_position, position_string);
616 assert_eq!(
617 spec.cursor_excerpt().unwrap(),
618 (excerpt.to_string(), offset)
619 );
620 }
621
622 #[test]
623 fn test_cursor_excerpt_with_inline_marker() {
624 let mut spec = ExampleSpec {
625 name: String::new(),
626 repository_url: String::new(),
627 revision: String::new(),
628 tags: Vec::new(),
629 reasoning: None,
630 uncommitted_diff: String::new(),
631 cursor_path: Path::new("test.rs").into(),
632 cursor_position: String::new(),
633 edit_history: String::new(),
634 expected_patches: Vec::new(),
635 rejected_patch: None,
636 captured_prompt_input: None,
637 telemetry: None,
638 };
639
640 // Cursor before `42` using inline marker
641 spec.cursor_position = indoc! {"
642 fn main() {
643 let x = <|user_cursor|>42;
644 println!(\"{}\", x);
645 }"
646 }
647 .to_string();
648
649 let expected_excerpt = indoc! {"
650 fn main() {
651 let x = 42;
652 println!(\"{}\", x);
653 }"
654 };
655 let expected_offset = expected_excerpt.find("42").unwrap();
656
657 assert_eq!(
658 spec.cursor_excerpt().unwrap(),
659 (expected_excerpt.to_string(), expected_offset)
660 );
661
662 // Cursor at beginning of line
663 spec.cursor_position = indoc! {"
664 fn main() {
665 <|user_cursor|> let x = 42;
666 }"
667 }
668 .to_string();
669
670 let expected_excerpt = indoc! {"
671 fn main() {
672 let x = 42;
673 }"
674 };
675 let expected_offset = expected_excerpt.find(" let").unwrap();
676
677 assert_eq!(
678 spec.cursor_excerpt().unwrap(),
679 (expected_excerpt.to_string(), expected_offset)
680 );
681
682 // Cursor at end of file
683 spec.cursor_position = "fn main() {}<|user_cursor|>".to_string();
684 let expected_excerpt = "fn main() {}";
685 let expected_offset = expected_excerpt.len();
686
687 assert_eq!(
688 spec.cursor_excerpt().unwrap(),
689 (expected_excerpt.to_string(), expected_offset)
690 );
691 }
692}