From 4283c325eba6e0b756c2bfb6185fed7bde7442aa Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 23 Dec 2025 14:45:19 -0800 Subject: [PATCH] Represent cursor location in excerpt with a comment line --- crates/edit_prediction/src/capture_example.rs | 33 ++- crates/edit_prediction/src/example_spec.rs | 250 +++++++++++++++++- .../edit_prediction_cli/src/load_project.rs | 14 +- 3 files changed, 272 insertions(+), 25 deletions(-) diff --git a/crates/edit_prediction/src/capture_example.rs b/crates/edit_prediction/src/capture_example.rs index d64a9ea03d056e0b428507104a8d422d309c76db..54bf027f2afc0ed1282c73ecd73884c518b3d66b 100644 --- a/crates/edit_prediction/src/capture_example.rs +++ b/crates/edit_prediction/src/capture_example.rs @@ -9,7 +9,7 @@ use gpui::{App, Entity, Task}; use language::{Buffer, ToPoint as _}; use project::Project; use std::{collections::hash_map, fmt::Write as _, path::Path, sync::Arc}; -use text::{BufferSnapshot as TextBufferSnapshot, ToOffset as _}; +use text::BufferSnapshot as TextBufferSnapshot; pub fn capture_example( project: Entity, @@ -43,7 +43,13 @@ pub fn capture_example( Some(cx.spawn(async move |mut cx| { let snapshots_by_path = collect_snapshots(&project, &git_store, &events, &mut cx).await?; - let cursor_excerpt = cx + + let line_comment_prefix = snapshot + .language() + .and_then(|lang| lang.config().line_comments.first()) + .map(|s| s.to_string()) + .unwrap_or_default(); + let (cursor_excerpt, cursor_offset) = cx .background_executor() .spawn(async move { compute_cursor_excerpt(&snapshot, cursor_anchor) }) .await; @@ -60,35 +66,35 @@ pub fn capture_example( } } - Ok(ExampleSpec { + let mut spec = ExampleSpec { name: generate_timestamp_name(), repository_url, revision, uncommitted_diff, cursor_path: cursor_path.as_std_path().into(), - cursor_position: cursor_excerpt, + cursor_position: String::new(), edit_history, expected_patch: String::new(), - }) + }; + spec.set_cursor_excerpt(&cursor_excerpt, cursor_offset, &line_comment_prefix); + Ok(spec) })) } fn compute_cursor_excerpt( snapshot: &language::BufferSnapshot, cursor_anchor: language::Anchor, -) -> String { +) -> (String, usize) { + use text::ToOffset as _; + let cursor_point = cursor_anchor.to_point(snapshot); let (_editable_range, context_range) = editable_and_context_ranges_for_cursor_position(cursor_point, snapshot, 100, 50); - let context_start_offset = context_range.start.to_offset(snapshot); let cursor_offset = cursor_anchor.to_offset(snapshot); let cursor_offset_in_excerpt = cursor_offset.saturating_sub(context_start_offset); - let mut excerpt = snapshot.text_for_range(context_range).collect::(); - if cursor_offset_in_excerpt <= excerpt.len() { - excerpt.insert_str(cursor_offset_in_excerpt, zeta_prompt::CURSOR_MARKER); - } - excerpt + let excerpt = snapshot.text_for_range(context_range).collect::(); + (excerpt, cursor_offset_in_excerpt) } async fn collect_snapshots( @@ -310,7 +316,8 @@ mod tests { .to_string(), cursor_path: Path::new("project/src/main.rs").into(), cursor_position: indoc! {" - <|user_cursor|>fn main() { + fn main() { + ^[CURSOR_POSITION] // comment 1 one(); two(); diff --git a/crates/edit_prediction/src/example_spec.rs b/crates/edit_prediction/src/example_spec.rs index 47e8390691ae36bde751b8bba67ade52db0d7315..1caa918d406a02b2b0a2f33e86dd2825d16da3e4 100644 --- a/crates/edit_prediction/src/example_spec.rs +++ b/crates/edit_prediction/src/example_spec.rs @@ -1,5 +1,8 @@ +use anyhow::{Context as _, Result}; use serde::{Deserialize, Serialize}; -use std::{borrow::Cow, mem, path::Path, sync::Arc}; +use std::{borrow::Cow, fmt::Write as _, mem, path::Path, sync::Arc}; + +pub const CURSOR_POSITION_MARKER: &str = "[CURSOR_POSITION]"; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct ExampleSpec { @@ -224,4 +227,249 @@ impl ExampleSpec { Ok(spec) } + + /// Returns the excerpt of text around the cursor, and the offset of the cursor within that + /// excerpt. + /// + /// The cursor's position is marked with a special comment that appears + /// below the cursor line, which contains the string `[CURSOR_POSITION]`, + /// preceded by an arrow marking the cursor's column. The arrow can be + /// either: + /// - `^` - The cursor column is at the position of the `^` character (pointing up to the cursor) + /// - `<` - The cursor column is at the first non-whitespace character on that line. + pub fn cursor_excerpt(&self) -> Result<(String, usize)> { + let input = &self.cursor_position; + + let marker_offset = input + .find(CURSOR_POSITION_MARKER) + .context("missing [CURSOR_POSITION] marker")?; + let marker_line_start = input[..marker_offset] + .rfind('\n') + .map(|pos| pos + 1) + .unwrap_or(0); + let marker_line_end = input[marker_line_start..] + .find('\n') + .map(|pos| marker_line_start + pos + 1) + .unwrap_or(input.len()); + let marker_line = &input[marker_line_start..marker_line_end].trim_end_matches('\n'); + + let cursor_column = if let Some(cursor_offset) = marker_line.find('^') { + cursor_offset + } else if let Some(less_than_pos) = marker_line.find('<') { + marker_line + .find(|c: char| !c.is_whitespace()) + .unwrap_or(less_than_pos) + } else { + anyhow::bail!( + "cursor position marker line must contain '^' or '<' before [CURSOR_POSITION]" + ); + }; + + let mut excerpt = input[..marker_line_start].to_string() + &input[marker_line_end..]; + excerpt.truncate(excerpt.trim_end_matches('\n').len()); + + // The cursor is on the line above the marker line. + let cursor_line_end = marker_line_start.saturating_sub(1); + let cursor_line_start = excerpt[..cursor_line_end] + .rfind('\n') + .map(|pos| pos + 1) + .unwrap_or(0); + let cursor_offset = cursor_line_start + cursor_column; + + Ok((excerpt, cursor_offset)) + } + + /// Sets the cursor position excerpt from a plain excerpt and cursor byte offset. + /// + /// The `line_comment_prefix` is used to format the marker line as a comment. + /// If the cursor column is less than the comment prefix length, the `<` format is used. + /// Otherwise, the `^` format is used. + pub fn set_cursor_excerpt( + &mut self, + excerpt: &str, + cursor_offset: usize, + line_comment_prefix: &str, + ) { + // Find which line the cursor is on and its column + let cursor_line_start = excerpt[..cursor_offset] + .rfind('\n') + .map(|pos| pos + 1) + .unwrap_or(0); + let cursor_line_end = excerpt[cursor_line_start..] + .find('\n') + .map(|pos| cursor_line_start + pos + 1) + .unwrap_or(excerpt.len()); + let cursor_line = &excerpt[cursor_line_start..cursor_line_end]; + let cursor_line_indent = &cursor_line[..cursor_line.len() - cursor_line.trim_start().len()]; + let cursor_column = cursor_offset - cursor_line_start; + + // Build the marker line + let mut marker_line = String::new(); + if cursor_column < line_comment_prefix.len() { + for _ in 0..cursor_column { + marker_line.push(' '); + } + marker_line.push_str(line_comment_prefix); + write!(marker_line, " <{}", CURSOR_POSITION_MARKER).unwrap(); + } else { + if cursor_column >= cursor_line_indent.len() + line_comment_prefix.len() { + marker_line.push_str(cursor_line_indent); + } + marker_line.push_str(line_comment_prefix); + while marker_line.len() < cursor_column { + marker_line.push(' '); + } + write!(marker_line, "^{}", CURSOR_POSITION_MARKER).unwrap(); + } + + // Build the final cursor_position string + let mut result = String::with_capacity(excerpt.len() + marker_line.len() + 2); + result.push_str(&excerpt[..cursor_line_end]); + if !result.ends_with('\n') { + result.push('\n'); + } + result.push_str(&marker_line); + if cursor_line_end < excerpt.len() { + result.push('\n'); + result.push_str(&excerpt[cursor_line_end..]); + } + + self.cursor_position = result; + } +} + +#[cfg(test)] +mod tests { + use super::*; + use indoc::indoc; + + #[test] + fn test_cursor_excerpt_with_caret() { + let mut spec = ExampleSpec { + name: String::new(), + repository_url: String::new(), + revision: String::new(), + uncommitted_diff: String::new(), + cursor_path: Path::new("test.rs").into(), + cursor_position: String::new(), + edit_history: String::new(), + expected_patch: String::new(), + }; + + // Cursor before `42` + let excerpt = indoc! {" + fn main() { + let x = 42; + println!(\"{}\", x); + }" + }; + let offset = excerpt.find("42").unwrap(); + let position_string = indoc! {" + fn main() { + let x = 42; + // ^[CURSOR_POSITION] + println!(\"{}\", x); + }" + } + .to_string(); + + spec.set_cursor_excerpt(excerpt, offset, "//"); + assert_eq!(spec.cursor_position, position_string); + assert_eq!( + spec.cursor_excerpt().unwrap(), + (excerpt.to_string(), offset) + ); + + // Cursor after `l` in `let` + let offset = excerpt.find("et x").unwrap(); + let position_string = indoc! {" + fn main() { + let x = 42; + // ^[CURSOR_POSITION] + println!(\"{}\", x); + }" + } + .to_string(); + + spec.set_cursor_excerpt(excerpt, offset, "//"); + assert_eq!(spec.cursor_position, position_string); + assert_eq!( + spec.cursor_excerpt().unwrap(), + (excerpt.to_string(), offset) + ); + + // Cursor before `let` + let offset = excerpt.find("let").unwrap(); + let position_string = indoc! {" + fn main() { + let x = 42; + // ^[CURSOR_POSITION] + println!(\"{}\", x); + }" + } + .to_string(); + + spec.set_cursor_excerpt(excerpt, offset, "//"); + assert_eq!(spec.cursor_position, position_string); + assert_eq!( + spec.cursor_excerpt().unwrap(), + (excerpt.to_string(), offset) + ); + + // Cursor at beginning of the line with `let` + let offset = excerpt.find(" let").unwrap(); + let position_string = indoc! {" + fn main() { + let x = 42; + // <[CURSOR_POSITION] + println!(\"{}\", x); + }" + } + .to_string(); + + spec.set_cursor_excerpt(excerpt, offset, "//"); + assert_eq!(spec.cursor_position, position_string); + assert_eq!( + spec.cursor_excerpt().unwrap(), + (excerpt.to_string(), offset) + ); + + // Cursor at end of line, after the semicolon + let offset = excerpt.find(';').unwrap() + 1; + let position_string = indoc! {" + fn main() { + let x = 42; + // ^[CURSOR_POSITION] + println!(\"{}\", x); + }" + } + .to_string(); + + spec.set_cursor_excerpt(excerpt, offset, "//"); + assert_eq!(spec.cursor_position, position_string); + assert_eq!( + spec.cursor_excerpt().unwrap(), + (excerpt.to_string(), offset) + ); + + // Caret at end of file (no trailing newline) + let excerpt = indoc! {" + fn main() { + let x = 42;" + }; + let offset = excerpt.find(';').unwrap() + 1; + let position_string = indoc! {" + fn main() { + let x = 42; + // ^[CURSOR_POSITION]" + } + .to_string(); + + spec.set_cursor_excerpt(excerpt, offset, "//"); + assert_eq!(spec.cursor_position, position_string); + assert_eq!( + spec.cursor_excerpt().unwrap(), + (excerpt.to_string(), offset) + ); + } } diff --git a/crates/edit_prediction_cli/src/load_project.rs b/crates/edit_prediction_cli/src/load_project.rs index b2b289b249ad8707ff49e92cb780f93d428844af..270461e0a91e3ac8fb7d58c8b511ad643b6b4380 100644 --- a/crates/edit_prediction_cli/src/load_project.rs +++ b/crates/edit_prediction_cli/src/load_project.rs @@ -22,7 +22,6 @@ use std::{ path::{Path, PathBuf}, sync::Arc, }; -use zeta_prompt::CURSOR_MARKER; pub async fn run_load_project( example: &mut Example, @@ -98,16 +97,9 @@ async fn cursor_position( let cursor_buffer = project .update(cx, |project, cx| project.open_buffer(cursor_path, cx))? .await?; - let cursor_offset_within_excerpt = example - .spec - .cursor_position - .find(CURSOR_MARKER) - .context("missing cursor marker")?; - let mut cursor_excerpt = example.spec.cursor_position.clone(); - cursor_excerpt.replace_range( - cursor_offset_within_excerpt..(cursor_offset_within_excerpt + CURSOR_MARKER.len()), - "", - ); + + let (cursor_excerpt, cursor_offset_within_excerpt) = example.spec.cursor_excerpt()?; + let excerpt_offset = cursor_buffer.read_with(cx, |buffer, _cx| { let text = buffer.text();