example_spec.rs

  1use anyhow::{Context as _, Result};
  2use serde::{Deserialize, Serialize};
  3use std::{borrow::Cow, fmt::Write as _, mem, ops::Range, path::Path, sync::Arc};
  4
  5pub const CURSOR_POSITION_MARKER: &str = "[CURSOR_POSITION]";
  6pub const INLINE_CURSOR_MARKER: &str = "<|user_cursor|>";
  7
  8/// Maximum cursor file size to capture (64KB).
  9/// Files larger than this will not have their content captured,
 10/// falling back to git-based loading.
 11pub const MAX_CURSOR_FILE_SIZE: usize = 64 * 1024;
 12
 13#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 14pub struct ExampleSpec {
 15    #[serde(default)]
 16    pub name: String,
 17    pub repository_url: String,
 18    pub revision: String,
 19    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 20    pub tags: Vec<String>,
 21    #[serde(default, skip_serializing_if = "Option::is_none")]
 22    pub reasoning: Option<String>,
 23    #[serde(default)]
 24    pub uncommitted_diff: String,
 25    pub cursor_path: Arc<Path>,
 26    pub cursor_position: String,
 27    pub edit_history: String,
 28    pub expected_patches: Vec<String>,
 29    #[serde(default, skip_serializing_if = "Option::is_none")]
 30    pub rejected_patch: Option<String>,
 31    #[serde(default, skip_serializing_if = "Option::is_none")]
 32    pub captured_prompt_input: Option<CapturedPromptInput>,
 33    #[serde(default, skip_serializing_if = "Option::is_none")]
 34    pub telemetry: Option<TelemetrySource>,
 35}
 36
 37/// Metadata for examples sourced from production telemetry (rejected predictions).
 38#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 39pub struct TelemetrySource {
 40    pub request_id: String,
 41    pub device_id: String,
 42    pub time: String,
 43    pub rejection_reason: String,
 44    pub was_shown: bool,
 45}
 46
 47/// All data needed to run format_prompt without loading the project.
 48#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 49pub struct CapturedPromptInput {
 50    pub cursor_file_content: String,
 51    pub cursor_offset: usize,
 52    pub cursor_row: u32,
 53    pub cursor_column: u32,
 54    pub events: Vec<CapturedEvent>,
 55    pub related_files: Vec<CapturedRelatedFile>,
 56}
 57
 58#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 59pub struct CapturedEvent {
 60    pub path: Arc<Path>,
 61    pub old_path: Arc<Path>,
 62    pub diff: String,
 63    pub predicted: bool,
 64    pub in_open_source_repo: bool,
 65}
 66
 67impl CapturedEvent {
 68    pub fn to_event(&self) -> zeta_prompt::Event {
 69        zeta_prompt::Event::BufferChange {
 70            path: self.path.clone(),
 71            old_path: self.old_path.clone(),
 72            diff: self.diff.clone(),
 73            predicted: self.predicted,
 74            in_open_source_repo: self.in_open_source_repo,
 75        }
 76    }
 77}
 78
 79#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 80pub struct CapturedRelatedFile {
 81    pub path: Arc<Path>,
 82    pub max_row: u32,
 83    pub excerpts: Vec<CapturedRelatedExcerpt>,
 84}
 85
 86impl CapturedRelatedFile {
 87    pub fn to_related_file(&self) -> zeta_prompt::RelatedFile {
 88        zeta_prompt::RelatedFile {
 89            path: self.path.clone(),
 90            max_row: self.max_row,
 91            excerpts: self
 92                .excerpts
 93                .iter()
 94                .map(|e| zeta_prompt::RelatedExcerpt {
 95                    row_range: e.row_range.clone(),
 96                    text: e.text.clone().into(),
 97                })
 98                .collect(),
 99        }
100    }
101}
102
103#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
104pub struct CapturedRelatedExcerpt {
105    pub row_range: Range<u32>,
106    pub text: String,
107}
108
109const REASONING_HEADING: &str = "Reasoning";
110const UNCOMMITTED_DIFF_HEADING: &str = "Uncommitted Diff";
111const EDIT_HISTORY_HEADING: &str = "Edit History";
112const CURSOR_POSITION_HEADING: &str = "Cursor Position";
113const EXPECTED_PATCH_HEADING: &str = "Expected Patch";
114const REJECTED_PATCH_HEADING: &str = "Rejected Patch";
115
116#[derive(Serialize, Deserialize)]
117struct FrontMatter<'a> {
118    repository_url: Cow<'a, str>,
119    revision: Cow<'a, str>,
120    #[serde(default, skip_serializing_if = "Vec::is_empty")]
121    tags: Vec<String>,
122}
123
124impl ExampleSpec {
125    /// Generate a sanitized filename for this example.
126    pub fn filename(&self) -> String {
127        self.name
128            .chars()
129            .map(|c| match c {
130                ' ' | ':' | '~' | '^' | '?' | '*' | '[' | '\\' | '@' | '{' | '/' | '<' | '>'
131                | '|' | '"' => '-',
132                c => c,
133            })
134            .collect()
135    }
136
137    /// Format this example spec as markdown.
138    pub fn to_markdown(&self) -> String {
139        use std::fmt::Write as _;
140
141        let front_matter = FrontMatter {
142            repository_url: Cow::Borrowed(&self.repository_url),
143            revision: Cow::Borrowed(&self.revision),
144            tags: self.tags.clone(),
145        };
146        let front_matter_toml =
147            toml::to_string_pretty(&front_matter).unwrap_or_else(|_| String::new());
148
149        let mut markdown = String::new();
150
151        _ = writeln!(markdown, "+++");
152        markdown.push_str(&front_matter_toml);
153        if !markdown.ends_with('\n') {
154            markdown.push('\n');
155        }
156        _ = writeln!(markdown, "+++");
157        markdown.push('\n');
158
159        _ = writeln!(markdown, "# {}", self.name);
160        markdown.push('\n');
161
162        if let Some(reasoning) = &self.reasoning {
163            _ = writeln!(markdown, "## {}", REASONING_HEADING);
164            markdown.push('\n');
165            markdown.push_str(reasoning);
166            if !markdown.ends_with('\n') {
167                markdown.push('\n');
168            }
169            markdown.push('\n');
170        }
171
172        if !self.uncommitted_diff.is_empty() {
173            _ = writeln!(markdown, "## {}", UNCOMMITTED_DIFF_HEADING);
174            _ = writeln!(markdown);
175            _ = writeln!(markdown, "```diff");
176            markdown.push_str(&self.uncommitted_diff);
177            if !markdown.ends_with('\n') {
178                markdown.push('\n');
179            }
180            _ = writeln!(markdown, "```");
181            markdown.push('\n');
182        }
183
184        _ = writeln!(markdown, "## {}", EDIT_HISTORY_HEADING);
185        _ = writeln!(markdown);
186
187        if self.edit_history.is_empty() {
188            _ = writeln!(markdown, "(No edit history)");
189            _ = writeln!(markdown);
190        } else {
191            _ = writeln!(markdown, "```diff");
192            markdown.push_str(&self.edit_history);
193            if !markdown.ends_with('\n') {
194                markdown.push('\n');
195            }
196            _ = writeln!(markdown, "```");
197            markdown.push('\n');
198        }
199
200        _ = writeln!(markdown, "## {}", CURSOR_POSITION_HEADING);
201        _ = writeln!(markdown);
202        _ = writeln!(markdown, "```{}", self.cursor_path.to_string_lossy());
203        markdown.push_str(&self.cursor_position);
204        if !markdown.ends_with('\n') {
205            markdown.push('\n');
206        }
207        _ = writeln!(markdown, "```");
208        markdown.push('\n');
209
210        _ = writeln!(markdown, "## {}", EXPECTED_PATCH_HEADING);
211        markdown.push('\n');
212        for patch in &self.expected_patches {
213            _ = writeln!(markdown, "```diff");
214            markdown.push_str(patch);
215            if !markdown.ends_with('\n') {
216                markdown.push('\n');
217            }
218            _ = writeln!(markdown, "```");
219            markdown.push('\n');
220        }
221
222        if let Some(rejected_patch) = &self.rejected_patch {
223            _ = writeln!(markdown, "## {}", REJECTED_PATCH_HEADING);
224            markdown.push('\n');
225            _ = writeln!(markdown, "```diff");
226            markdown.push_str(rejected_patch);
227            if !markdown.ends_with('\n') {
228                markdown.push('\n');
229            }
230            _ = writeln!(markdown, "```");
231            markdown.push('\n');
232        }
233
234        markdown
235    }
236
237    /// Parse an example spec from markdown.
238    pub fn from_markdown(mut input: &str) -> anyhow::Result<Self> {
239        use pulldown_cmark::{CodeBlockKind, CowStr, Event, HeadingLevel, Parser, Tag, TagEnd};
240
241        let mut spec = ExampleSpec {
242            name: String::new(),
243            repository_url: String::new(),
244            revision: String::new(),
245            tags: Vec::new(),
246            reasoning: None,
247            uncommitted_diff: String::new(),
248            cursor_path: Path::new("").into(),
249            cursor_position: String::new(),
250            edit_history: String::new(),
251            expected_patches: Vec::new(),
252            rejected_patch: None,
253            captured_prompt_input: None,
254            telemetry: None,
255        };
256
257        if let Some(rest) = input.strip_prefix("+++\n")
258            && let Some((front_matter, rest)) = rest.split_once("+++\n")
259        {
260            if let Ok(data) = toml::from_str::<FrontMatter<'_>>(front_matter) {
261                spec.repository_url = data.repository_url.into_owned();
262                spec.revision = data.revision.into_owned();
263                spec.tags = data.tags;
264            }
265            input = rest.trim_start();
266        }
267
268        let parser = Parser::new(input);
269        let mut text = String::new();
270        let mut block_info: CowStr = "".into();
271
272        #[derive(PartialEq)]
273        enum Section {
274            Start,
275            UncommittedDiff,
276            EditHistory,
277            CursorPosition,
278            ExpectedPatch,
279            RejectedPatch,
280            Other,
281        }
282
283        let mut current_section = Section::Start;
284
285        for event in parser {
286            match event {
287                Event::Text(line) => {
288                    text.push_str(&line);
289                }
290                Event::End(TagEnd::Heading(HeadingLevel::H1)) => {
291                    spec.name = mem::take(&mut text);
292                }
293                Event::End(TagEnd::Heading(HeadingLevel::H2)) => {
294                    let title = mem::take(&mut text);
295                    current_section = if title.eq_ignore_ascii_case(UNCOMMITTED_DIFF_HEADING) {
296                        Section::UncommittedDiff
297                    } else if title.eq_ignore_ascii_case(EDIT_HISTORY_HEADING) {
298                        Section::EditHistory
299                    } else if title.eq_ignore_ascii_case(CURSOR_POSITION_HEADING) {
300                        Section::CursorPosition
301                    } else if title.eq_ignore_ascii_case(EXPECTED_PATCH_HEADING) {
302                        Section::ExpectedPatch
303                    } else if title.eq_ignore_ascii_case(REJECTED_PATCH_HEADING) {
304                        Section::RejectedPatch
305                    } else {
306                        Section::Other
307                    };
308                }
309                Event::End(TagEnd::Heading(HeadingLevel::H3)) => {
310                    mem::take(&mut text);
311                }
312                Event::End(TagEnd::Heading(HeadingLevel::H4)) => {
313                    mem::take(&mut text);
314                }
315                Event::End(TagEnd::Heading(level)) => {
316                    anyhow::bail!("Unexpected heading level: {level}");
317                }
318                Event::Start(Tag::CodeBlock(kind)) => {
319                    match kind {
320                        CodeBlockKind::Fenced(info) => {
321                            block_info = info;
322                        }
323                        CodeBlockKind::Indented => {
324                            anyhow::bail!("Unexpected indented codeblock");
325                        }
326                    };
327                }
328                Event::Start(_) => {
329                    text.clear();
330                    block_info = "".into();
331                }
332                Event::End(TagEnd::CodeBlock) => {
333                    let block_info = block_info.trim();
334                    match current_section {
335                        Section::UncommittedDiff => {
336                            spec.uncommitted_diff = mem::take(&mut text);
337                        }
338                        Section::EditHistory => {
339                            spec.edit_history.push_str(&mem::take(&mut text));
340                        }
341                        Section::CursorPosition => {
342                            spec.cursor_path = Path::new(block_info).into();
343                            spec.cursor_position = mem::take(&mut text);
344                        }
345                        Section::ExpectedPatch => {
346                            spec.expected_patches.push(mem::take(&mut text));
347                        }
348                        Section::RejectedPatch => {
349                            spec.rejected_patch = Some(mem::take(&mut text));
350                        }
351                        Section::Start | Section::Other => {}
352                    }
353                }
354                _ => {}
355            }
356        }
357
358        if spec.cursor_path.as_ref() == Path::new("") || spec.cursor_position.is_empty() {
359            anyhow::bail!("Missing cursor position codeblock");
360        }
361
362        Ok(spec)
363    }
364
365    /// Returns the excerpt of text around the cursor, and the offset of the cursor within that
366    /// excerpt.
367    ///
368    /// The cursor's position is marked with a special comment that appears
369    /// below the cursor line, which contains the string `[CURSOR_POSITION]`,
370    /// preceded by an arrow marking the cursor's column. The arrow can be
371    /// either:
372    /// - `^` - The cursor column is at the position of the `^` character (pointing up to the cursor)
373    /// - `<` - The cursor column is at the first non-whitespace character on that line.
374    pub fn cursor_excerpt(&self) -> Result<(String, usize)> {
375        let input = &self.cursor_position;
376
377        // Check for inline cursor marker first
378        if let Some(inline_offset) = input.find(INLINE_CURSOR_MARKER) {
379            let excerpt = input[..inline_offset].to_string()
380                + &input[inline_offset + INLINE_CURSOR_MARKER.len()..];
381            return Ok((excerpt, inline_offset));
382        }
383
384        let marker_offset = input
385            .find(CURSOR_POSITION_MARKER)
386            .context("missing [CURSOR_POSITION] marker")?;
387        let marker_line_start = input[..marker_offset]
388            .rfind('\n')
389            .map(|pos| pos + 1)
390            .unwrap_or(0);
391        let marker_line_end = input[marker_line_start..]
392            .find('\n')
393            .map(|pos| marker_line_start + pos + 1)
394            .unwrap_or(input.len());
395        let marker_line = &input[marker_line_start..marker_line_end].trim_end_matches('\n');
396
397        let cursor_column = if let Some(cursor_offset) = marker_line.find('^') {
398            cursor_offset
399        } else if let Some(less_than_pos) = marker_line.find('<') {
400            marker_line
401                .find(|c: char| !c.is_whitespace())
402                .unwrap_or(less_than_pos)
403        } else {
404            anyhow::bail!(
405                "cursor position marker line must contain '^' or '<' before [CURSOR_POSITION]"
406            );
407        };
408
409        let mut excerpt = input[..marker_line_start].to_string() + &input[marker_line_end..];
410        excerpt.truncate(excerpt.trim_end_matches('\n').len());
411
412        // The cursor is on the line above the marker line.
413        let cursor_line_end = marker_line_start.saturating_sub(1);
414        let cursor_line_start = excerpt[..cursor_line_end]
415            .rfind('\n')
416            .map(|pos| pos + 1)
417            .unwrap_or(0);
418        let cursor_offset = cursor_line_start + cursor_column;
419
420        Ok((excerpt, cursor_offset))
421    }
422
423    /// Sets the cursor position excerpt from a plain excerpt and cursor byte offset.
424    ///
425    /// The `line_comment_prefix` is used to format the marker line as a comment.
426    /// If the cursor column is less than the comment prefix length, the `<` format is used.
427    /// Otherwise, the `^` format is used.
428    pub fn set_cursor_excerpt(
429        &mut self,
430        excerpt: &str,
431        cursor_offset: usize,
432        line_comment_prefix: &str,
433    ) {
434        // Find which line the cursor is on and its column
435        let cursor_line_start = excerpt[..cursor_offset]
436            .rfind('\n')
437            .map(|pos| pos + 1)
438            .unwrap_or(0);
439        let cursor_line_end = excerpt[cursor_line_start..]
440            .find('\n')
441            .map(|pos| cursor_line_start + pos + 1)
442            .unwrap_or(excerpt.len());
443        let cursor_line = &excerpt[cursor_line_start..cursor_line_end];
444        let cursor_line_indent = &cursor_line[..cursor_line.len() - cursor_line.trim_start().len()];
445        let cursor_column = cursor_offset - cursor_line_start;
446
447        // Build the marker line
448        let mut marker_line = String::new();
449        if cursor_column < line_comment_prefix.len() {
450            for _ in 0..cursor_column {
451                marker_line.push(' ');
452            }
453            marker_line.push_str(line_comment_prefix);
454            write!(marker_line, " <{}", CURSOR_POSITION_MARKER).unwrap();
455        } else {
456            if cursor_column >= cursor_line_indent.len() + line_comment_prefix.len() {
457                marker_line.push_str(cursor_line_indent);
458            }
459            marker_line.push_str(line_comment_prefix);
460            while marker_line.len() < cursor_column {
461                marker_line.push(' ');
462            }
463            write!(marker_line, "^{}", CURSOR_POSITION_MARKER).unwrap();
464        }
465
466        // Build the final cursor_position string
467        let mut result = String::with_capacity(excerpt.len() + marker_line.len() + 2);
468        result.push_str(&excerpt[..cursor_line_end]);
469        if !result.ends_with('\n') {
470            result.push('\n');
471        }
472        result.push_str(&marker_line);
473        if cursor_line_end < excerpt.len() {
474            result.push('\n');
475            result.push_str(&excerpt[cursor_line_end..]);
476        }
477
478        self.cursor_position = result;
479    }
480}
481
482#[cfg(test)]
483mod tests {
484    use super::*;
485    use indoc::indoc;
486
487    #[test]
488    fn test_cursor_excerpt_with_caret() {
489        let mut spec = ExampleSpec {
490            name: String::new(),
491            repository_url: String::new(),
492            revision: String::new(),
493            tags: Vec::new(),
494            reasoning: None,
495            uncommitted_diff: String::new(),
496            cursor_path: Path::new("test.rs").into(),
497            cursor_position: String::new(),
498            edit_history: String::new(),
499            expected_patches: Vec::new(),
500            rejected_patch: None,
501            captured_prompt_input: None,
502            telemetry: None,
503        };
504
505        // Cursor before `42`
506        let excerpt = indoc! {"
507            fn main() {
508                let x = 42;
509                println!(\"{}\", x);
510            }"
511        };
512        let offset = excerpt.find("42").unwrap();
513        let position_string = indoc! {"
514            fn main() {
515                let x = 42;
516                //      ^[CURSOR_POSITION]
517                println!(\"{}\", x);
518            }"
519        }
520        .to_string();
521
522        spec.set_cursor_excerpt(excerpt, offset, "//");
523        assert_eq!(spec.cursor_position, position_string);
524        assert_eq!(
525            spec.cursor_excerpt().unwrap(),
526            (excerpt.to_string(), offset)
527        );
528
529        // Cursor after `l` in `let`
530        let offset = excerpt.find("et x").unwrap();
531        let position_string = indoc! {"
532            fn main() {
533                let x = 42;
534            //   ^[CURSOR_POSITION]
535                println!(\"{}\", x);
536            }"
537        }
538        .to_string();
539
540        spec.set_cursor_excerpt(excerpt, offset, "//");
541        assert_eq!(spec.cursor_position, position_string);
542        assert_eq!(
543            spec.cursor_excerpt().unwrap(),
544            (excerpt.to_string(), offset)
545        );
546
547        // Cursor before `let`
548        let offset = excerpt.find("let").unwrap();
549        let position_string = indoc! {"
550            fn main() {
551                let x = 42;
552            //  ^[CURSOR_POSITION]
553                println!(\"{}\", x);
554            }"
555        }
556        .to_string();
557
558        spec.set_cursor_excerpt(excerpt, offset, "//");
559        assert_eq!(spec.cursor_position, position_string);
560        assert_eq!(
561            spec.cursor_excerpt().unwrap(),
562            (excerpt.to_string(), offset)
563        );
564
565        // Cursor at beginning of the line with `let`
566        let offset = excerpt.find("    let").unwrap();
567        let position_string = indoc! {"
568            fn main() {
569                let x = 42;
570            // <[CURSOR_POSITION]
571                println!(\"{}\", x);
572            }"
573        }
574        .to_string();
575
576        spec.set_cursor_excerpt(excerpt, offset, "//");
577        assert_eq!(spec.cursor_position, position_string);
578        assert_eq!(
579            spec.cursor_excerpt().unwrap(),
580            (excerpt.to_string(), offset)
581        );
582
583        // Cursor at end of line, after the semicolon
584        let offset = excerpt.find(';').unwrap() + 1;
585        let position_string = indoc! {"
586            fn main() {
587                let x = 42;
588                //         ^[CURSOR_POSITION]
589                println!(\"{}\", x);
590            }"
591        }
592        .to_string();
593
594        spec.set_cursor_excerpt(excerpt, offset, "//");
595        assert_eq!(spec.cursor_position, position_string);
596        assert_eq!(
597            spec.cursor_excerpt().unwrap(),
598            (excerpt.to_string(), offset)
599        );
600
601        // Caret at end of file (no trailing newline)
602        let excerpt = indoc! {"
603            fn main() {
604                let x = 42;"
605        };
606        let offset = excerpt.find(';').unwrap() + 1;
607        let position_string = indoc! {"
608            fn main() {
609                let x = 42;
610                //         ^[CURSOR_POSITION]"
611        }
612        .to_string();
613
614        spec.set_cursor_excerpt(excerpt, offset, "//");
615        assert_eq!(spec.cursor_position, position_string);
616        assert_eq!(
617            spec.cursor_excerpt().unwrap(),
618            (excerpt.to_string(), offset)
619        );
620    }
621
622    #[test]
623    fn test_cursor_excerpt_with_inline_marker() {
624        let mut spec = ExampleSpec {
625            name: String::new(),
626            repository_url: String::new(),
627            revision: String::new(),
628            tags: Vec::new(),
629            reasoning: None,
630            uncommitted_diff: String::new(),
631            cursor_path: Path::new("test.rs").into(),
632            cursor_position: String::new(),
633            edit_history: String::new(),
634            expected_patches: Vec::new(),
635            rejected_patch: None,
636            captured_prompt_input: None,
637            telemetry: None,
638        };
639
640        // Cursor before `42` using inline marker
641        spec.cursor_position = indoc! {"
642            fn main() {
643                let x = <|user_cursor|>42;
644                println!(\"{}\", x);
645            }"
646        }
647        .to_string();
648
649        let expected_excerpt = indoc! {"
650            fn main() {
651                let x = 42;
652                println!(\"{}\", x);
653            }"
654        };
655        let expected_offset = expected_excerpt.find("42").unwrap();
656
657        assert_eq!(
658            spec.cursor_excerpt().unwrap(),
659            (expected_excerpt.to_string(), expected_offset)
660        );
661
662        // Cursor at beginning of line
663        spec.cursor_position = indoc! {"
664            fn main() {
665            <|user_cursor|>    let x = 42;
666            }"
667        }
668        .to_string();
669
670        let expected_excerpt = indoc! {"
671            fn main() {
672                let x = 42;
673            }"
674        };
675        let expected_offset = expected_excerpt.find("    let").unwrap();
676
677        assert_eq!(
678            spec.cursor_excerpt().unwrap(),
679            (expected_excerpt.to_string(), expected_offset)
680        );
681
682        // Cursor at end of file
683        spec.cursor_position = "fn main() {}<|user_cursor|>".to_string();
684        let expected_excerpt = "fn main() {}";
685        let expected_offset = expected_excerpt.len();
686
687        assert_eq!(
688            spec.cursor_excerpt().unwrap(),
689            (expected_excerpt.to_string(), expected_offset)
690        );
691    }
692}