Detailed changes
@@ -27,11 +27,11 @@ If you are unsure how to fulfill the user's request, gather more information wit
If appropriate, use tool calls to explore the current project, which contains the following root directories:
{{#each worktrees}}
-- `{{root_name}}`
+- `{{abs_path}}`
{{/each}}
- Bias towards not asking the user for help if you can find the answer yourself.
-- When providing paths to tools, the path should always begin with a path that starts with a project root directory listed above.
+- When providing paths to tools, the path should always start with the name of a project root directory listed above.
- Before you read or edit a file, you must first find the full path. DO NOT ever guess a file path!
{{# if (has_tool 'grep') }}
- When looking for symbols in the project, prefer the `grep` tool.
@@ -305,17 +305,19 @@ impl ThreadStore {
project: Entity<Project>,
cx: &mut App,
) -> Task<(WorktreeContext, Option<RulesLoadingError>)> {
- let root_name = worktree.read(cx).root_name().into();
+ let tree = worktree.read(cx);
+ let root_name = tree.root_name().into();
+ let abs_path = tree.abs_path();
+
+ let mut context = WorktreeContext {
+ root_name,
+ abs_path,
+ rules_file: None,
+ };
let rules_task = Self::load_worktree_rules_file(worktree, project, cx);
let Some(rules_task) = rules_task else {
- return Task::ready((
- WorktreeContext {
- root_name,
- rules_file: None,
- },
- None,
- ));
+ return Task::ready((context, None));
};
cx.spawn(async move |_| {
@@ -328,11 +330,8 @@ impl ThreadStore {
}),
),
};
- let worktree_info = WorktreeContext {
- root_name,
- rules_file,
- };
- (worktree_info, rules_file_error)
+ context.rules_file = rules_file;
+ (context, rules_file_error)
})
}
@@ -341,12 +340,12 @@ impl ThreadStore {
project: Entity<Project>,
cx: &mut App,
) -> Option<Task<Result<RulesFileContext>>> {
- let worktree_ref = worktree.read(cx);
- let worktree_id = worktree_ref.id();
+ let worktree = worktree.read(cx);
+ let worktree_id = worktree.id();
let selected_rules_file = RULES_FILE_NAMES
.into_iter()
.filter_map(|name| {
- worktree_ref
+ worktree
.entry_for_path(name)
.filter(|entry| entry.is_file())
.map(|entry| entry.path.clone())
@@ -26,6 +26,7 @@ use std::{
cmp::Reverse,
fmt::{self, Display},
io::Write as _,
+ path::Path,
str::FromStr,
sync::mpsc,
};
@@ -38,10 +39,11 @@ fn eval_extract_handle_command_output() {
//
// Model | Pass rate
// ----------------------------|----------
- // claude-3.7-sonnet | 0.98
- // gemini-2.5-pro-06-05 | 0.77
- // gemini-2.5-flash | 0.11
- // gpt-4.1 | 1.00
+ // claude-3.7-sonnet | 0.99 (2025-06-14)
+ // claude-sonnet-4 | 0.97 (2025-06-14)
+ // gemini-2.5-pro-06-05 | 0.77 (2025-05-22)
+ // gemini-2.5-flash | 0.11 (2025-05-22)
+ // gpt-4.1 | 1.00 (2025-05-22)
let input_file_path = "root/blame.rs";
let input_file_content = include_str!("evals/fixtures/extract_handle_command_output/before.rs");
@@ -110,6 +112,13 @@ fn eval_extract_handle_command_output() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_delete_run_git_blame() {
+ // Model | Pass rate
+ // ----------------------------|----------
+ // claude-3.7-sonnet | 1.0 (2025-06-14)
+ // claude-sonnet-4 | 0.96 (2025-06-14)
+ // gemini-2.5-pro-06-05 |
+ // gemini-2.5-flash |
+ // gpt-4.1 |
let input_file_path = "root/blame.rs";
let input_file_content = include_str!("evals/fixtures/delete_run_git_blame/before.rs");
let output_file_content = include_str!("evals/fixtures/delete_run_git_blame/after.rs");
@@ -165,13 +174,12 @@ fn eval_delete_run_git_blame() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_translate_doc_comments() {
- // Results for 2025-05-22
- //
// Model | Pass rate
// ============================================
//
- // claude-3.7-sonnet |
- // gemini-2.5-pro-preview-03-25 | 1.0
+ // claude-3.7-sonnet | 1.0 (2025-06-14)
+ // claude-sonnet-4 | 1.0 (2025-06-14)
+ // gemini-2.5-pro-preview-03-25 | 1.0 (2025-05-22)
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
let input_file_path = "root/canvas.rs";
@@ -228,13 +236,12 @@ fn eval_translate_doc_comments() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
- // Results for 2025-05-22
- //
// Model | Pass rate
// ============================================
//
- // claude-3.7-sonnet | 0.98
- // gemini-2.5-pro-preview-03-25 | 0.99
+ // claude-3.7-sonnet | 0.96 (2025-06-14)
+ // claude-sonnet-4 | 0.11 (2025-06-14)
+ // gemini-2.5-pro-preview-03-25 | 0.99 (2025-05-22)
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
let input_file_path = "root/lib.rs";
@@ -354,13 +361,12 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_disable_cursor_blinking() {
- // Results for 2025-05-22
- //
// Model | Pass rate
// ============================================
//
- // claude-3.7-sonnet |
- // gemini-2.5-pro-preview-03-25 | 1.0
+ // claude-3.7-sonnet | 0.99 (2025-06-14)
+ // claude-sonnet-4 | 0.85 (2025-06-14)
+ // gemini-2.5-pro-preview-03-25 | 1.0 (2025-05-22)
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
let input_file_path = "root/editor.rs";
@@ -462,7 +468,7 @@ fn eval_from_pixels_constructor() {
0.95,
// For whatever reason, this eval produces more mismatched tags.
// Increasing for now, let's see if we can bring this down.
- 0.2,
+ 0.25,
EvalInput::from_conversation(
vec![
message(
@@ -648,15 +654,14 @@ fn eval_from_pixels_constructor() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_zode() {
- // Results for 2025-05-22
- //
// Model | Pass rate
// ============================================
//
- // claude-3.7-sonnet | 1.0
- // gemini-2.5-pro-preview-03-25 | 1.0
- // gemini-2.5-flash-preview-04-17 | 1.0
- // gpt-4.1 | 1.0
+ // claude-3.7-sonnet | 1.0 (2025-06-14)
+ // claude-sonnet-4 | 1.0 (2025-06-14)
+ // gemini-2.5-pro-preview-03-25 | 1.0 (2025-05-22)
+ // gemini-2.5-flash-preview-04-17 | 1.0 (2025-05-22)
+ // gpt-4.1 | 1.0 (2025-05-22)
let input_file_path = "root/zode.py";
let input_content = None;
let edit_description = "Create the main Zode CLI script";
@@ -755,13 +760,12 @@ fn eval_zode() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_add_overwrite_test() {
- // Results for 2025-05-22
- //
// Model | Pass rate
// ============================================
//
- // claude-3.7-sonnet | 0.16
- // gemini-2.5-pro-preview-03-25 | 0.35
+ // claude-3.7-sonnet | 0.65 (2025-06-14)
+ // claude-sonnet-4 | 0.07 (2025-06-14)
+ // gemini-2.5-pro-preview-03-25 | 0.35 (2025-05-22)
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
let input_file_path = "root/action_log.rs";
@@ -991,15 +995,14 @@ fn eval_create_empty_file() {
// thoughts into it. This issue is not specific to empty files, but
// it's easier to reproduce with them.
//
- // Results for 2025-05-21:
- //
// Model | Pass rate
// ============================================
//
- // claude-3.7-sonnet | 1.00
- // gemini-2.5-pro-preview-03-25 | 1.00
- // gemini-2.5-flash-preview-04-17 | 1.00
- // gpt-4.1 | 1.00
+ // claude-3.7-sonnet | 1.00 (2025-06-14)
+ // claude-sonnet-4 | 1.00 (2025-06-14)
+ // gemini-2.5-pro-preview-03-25 | 1.00 (2025-05-21)
+ // gemini-2.5-flash-preview-04-17 | 1.00 (2025-05-21)
+ // gpt-4.1 | 1.00 (2025-05-21)
//
//
// TODO: gpt-4.1-mini errored 38 times:
@@ -1556,6 +1559,7 @@ impl EditAgentTest {
.collect::<Vec<_>>();
let worktrees = vec![WorktreeContext {
root_name: "root".to_string(),
+ abs_path: Path::new("/path/to/root").into(),
rules_file: None,
}];
let prompt_builder = PromptBuilder::new(None)?;
@@ -1650,7 +1654,7 @@ async fn retry_on_rate_limit<R>(mut request: impl AsyncFnMut() -> Result<R>) ->
Ok(err) => match err {
LanguageModelCompletionError::RateLimit(duration) => {
// Wait for the duration supplied, with some jitter to avoid all requests being made at the same time.
- let jitter = duration.mul_f64(rand::thread_rng().gen_range(0.0..0.5));
+ let jitter = duration.mul_f64(rand::thread_rng().gen_range(0.0..1.0));
eprintln!(
"Attempt #{attempt}: Rate limit exceeded. Retry after {duration:?} + jitter of {jitter:?}"
);
@@ -69,13 +69,13 @@ pub struct EditFileToolInput {
/// start each path with one of the project's root directories.
///
/// The following examples assume we have two root directories in the project:
- /// - backend
- /// - frontend
+ /// - /a/b/backend
+ /// - /c/d/frontend
///
/// <example>
/// `backend/src/main.rs`
///
- /// Notice how the file path starts with root-1. Without that, the path
+ /// Notice how the file path starts with `backend`. Without that, the path
/// would be ambiguous and the call would fail!
/// </example>
///
@@ -31,8 +31,8 @@ pub struct ReadFileToolInput {
/// <example>
/// If the project has the following root directories:
///
- /// - directory1
- /// - directory2
+ /// - /a/b/directory1
+ /// - /c/d/directory2
///
/// If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.
/// If you want to access `file.txt` in `directory2`, you should use the path `directory2/file.txt`.
@@ -74,6 +74,7 @@ pub struct UserRulesContext {
#[derive(Debug, Clone, Serialize)]
pub struct WorktreeContext {
pub root_name: String,
+ pub abs_path: Arc<Path>,
pub rules_file: Option<RulesFileContext>,
}
@@ -455,6 +456,7 @@ mod test {
fn test_assistant_system_prompt_renders() {
let worktrees = vec![WorktreeContext {
root_name: "path".into(),
+ abs_path: Path::new("/path/to/root").into(),
rules_file: Some(RulesFileContext {
path_in_worktree: Path::new(".rules").into(),
text: "".into(),
@@ -484,6 +486,7 @@ mod test {
fn test_assistant_system_prompt_depends_on_enabled_tools() {
let worktrees = vec![WorktreeContext {
root_name: "path".into(),
+ abs_path: Path::new("/path/to/root").into(),
rules_file: None,
}];
let default_user_rules = vec![];