agent: Improve initial file search quality (#29317)

Agus Zubiaga created

This PR significantly improves the quality of the initial file search
that occurs when the model doesn't yet know the full path to a file it
needs to read/edit.

Previously, the assertions in file_search often failed on main as the
model attempted to guess full file paths. On this branch, it reliably
calls `find_path` (previously `path_search`) before reading files.

After getting the model to find paths first, I noticed it would try
using `grep` instead of `path_search`. This motivated renaming
`path_search` to `find_path` (continuing the analogy to unix commands)
and adding system prompt instructions about proper tool selection.

Note: I know the command is just called `find`, but that seemed too
general.

In my eval runs, the `file_search` example improved from 40% ± 10% to
98% ± 2%. The only assertion I'm seeing occasionally fail is "glob
starts with `**` or project". We can probably add some instructions in
that regard.

Release Notes:

- N/A

Change summary

assets/prompts/assistant_system_prompt.hbs                    |  3 
assets/settings/default.json                                  |  4 
crates/assistant_tools/src/assistant_tools.rs                 |  8 
crates/assistant_tools/src/batch_tool.rs                      |  2 
crates/assistant_tools/src/find_path_tool.rs                  | 18 
crates/assistant_tools/src/find_path_tool/description.md      |  2 
crates/assistant_tools/src/grep_tool.rs                       |  2 
crates/assistant_tools/src/list_directory_tool/description.md |  2 
crates/eval/src/examples/file_search.rs                       | 15 
crates/migrator/src/migrations.rs                             |  6 
crates/migrator/src/migrations/m_2025_04_23/settings.rs       | 27 ++
crates/migrator/src/migrator.rs                               | 46 ++++
12 files changed, 107 insertions(+), 28 deletions(-)

Detailed changes

assets/prompts/assistant_system_prompt.hbs 🔗

@@ -31,6 +31,9 @@ If appropriate, use tool calls to explore the current project, which contains th
 - When looking for symbols in the project, prefer the `grep` tool.
 - As you learn about the structure of the project, use that information to scope `grep` searches to targeted subtrees of the project.
 - Bias towards not asking the user for help if you can find the answer yourself.
+{{! TODO: Only mention tools if they are enabled }}
+- The user might specify a partial file path. If you don't know the full path, use `find_path` (not `grep`) before you read the file.
+- Before you read or edit a file, you must first find the full path. DO NOT ever guess a file path!
 
 ## Fixing Diagnostics
 

assets/settings/default.json 🔗

@@ -646,7 +646,7 @@
           "fetch": true,
           "list_directory": false,
           "now": true,
-          "path_search": true,
+          "find_path": true,
           "read_file": true,
           "grep": true,
           "thinking": true,
@@ -670,7 +670,7 @@
           "list_directory": true,
           "move_path": false,
           "now": false,
-          "path_search": true,
+          "find_path": true,
           "read_file": true,
           "grep": true,
           "rename": false,

crates/assistant_tools/src/assistant_tools.rs 🔗

@@ -9,12 +9,12 @@ mod delete_path_tool;
 mod diagnostics_tool;
 mod edit_file_tool;
 mod fetch_tool;
+mod find_path_tool;
 mod grep_tool;
 mod list_directory_tool;
 mod move_path_tool;
 mod now_tool;
 mod open_tool;
-mod path_search_tool;
 mod read_file_tool;
 mod rename_tool;
 mod replace;
@@ -45,11 +45,11 @@ use crate::delete_path_tool::DeletePathTool;
 use crate::diagnostics_tool::DiagnosticsTool;
 use crate::edit_file_tool::EditFileTool;
 use crate::fetch_tool::FetchTool;
+use crate::find_path_tool::FindPathTool;
 use crate::grep_tool::GrepTool;
 use crate::list_directory_tool::ListDirectoryTool;
 use crate::now_tool::NowTool;
 use crate::open_tool::OpenTool;
-use crate::path_search_tool::PathSearchTool;
 use crate::read_file_tool::ReadFileTool;
 use crate::rename_tool::RenameTool;
 use crate::symbol_info_tool::SymbolInfoTool;
@@ -58,7 +58,7 @@ use crate::thinking_tool::ThinkingTool;
 
 pub use create_file_tool::CreateFileToolInput;
 pub use edit_file_tool::EditFileToolInput;
-pub use path_search_tool::PathSearchToolInput;
+pub use find_path_tool::FindPathToolInput;
 pub use read_file_tool::ReadFileToolInput;
 
 pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
@@ -81,7 +81,7 @@ pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
     registry.register_tool(OpenTool);
     registry.register_tool(CodeSymbolsTool);
     registry.register_tool(ContentsTool);
-    registry.register_tool(PathSearchTool);
+    registry.register_tool(FindPathTool);
     registry.register_tool(ReadFileTool);
     registry.register_tool(GrepTool);
     registry.register_tool(RenameTool);

crates/assistant_tools/src/path_search_tool.rs → crates/assistant_tools/src/find_path_tool.rs 🔗

@@ -12,7 +12,7 @@ use util::paths::PathMatcher;
 use worktree::Snapshot;
 
 #[derive(Debug, Serialize, Deserialize, JsonSchema)]
-pub struct PathSearchToolInput {
+pub struct FindPathToolInput {
     /// The glob to match against every path in the project.
     ///
     /// <example>
@@ -34,11 +34,11 @@ pub struct PathSearchToolInput {
 
 const RESULTS_PER_PAGE: usize = 50;
 
-pub struct PathSearchTool;
+pub struct FindPathTool;
 
-impl Tool for PathSearchTool {
+impl Tool for FindPathTool {
     fn name(&self) -> String {
-        "path_search".into()
+        "find_path".into()
     }
 
     fn needs_confirmation(&self, _: &serde_json::Value, _: &App) -> bool {
@@ -46,7 +46,7 @@ impl Tool for PathSearchTool {
     }
 
     fn description(&self) -> String {
-        include_str!("./path_search_tool/description.md").into()
+        include_str!("./find_path_tool/description.md").into()
     }
 
     fn icon(&self) -> IconName {
@@ -54,11 +54,11 @@ impl Tool for PathSearchTool {
     }
 
     fn input_schema(&self, format: LanguageModelToolSchemaFormat) -> Result<serde_json::Value> {
-        json_schema_for::<PathSearchToolInput>(format)
+        json_schema_for::<FindPathToolInput>(format)
     }
 
     fn ui_text(&self, input: &serde_json::Value) -> String {
-        match serde_json::from_value::<PathSearchToolInput>(input.clone()) {
+        match serde_json::from_value::<FindPathToolInput>(input.clone()) {
             Ok(input) => format!("Find paths matching “`{}`”", input.glob),
             Err(_) => "Search paths".to_string(),
         }
@@ -73,7 +73,7 @@ impl Tool for PathSearchTool {
         _window: Option<AnyWindowHandle>,
         cx: &mut App,
     ) -> ToolResult {
-        let (offset, glob) = match serde_json::from_value::<PathSearchToolInput>(input) {
+        let (offset, glob) = match serde_json::from_value::<FindPathToolInput>(input) {
             Ok(input) => (input.offset, input.glob),
             Err(err) => return Task::ready(Err(anyhow!(err))).into(),
         };
@@ -144,7 +144,7 @@ mod test {
     use util::path;
 
     #[gpui::test]
-    async fn test_path_search_tool(cx: &mut TestAppContext) {
+    async fn test_find_path_tool(cx: &mut TestAppContext) {
         init_test(cx);
 
         let fs = FakeFs::new(cx.executor());

crates/assistant_tools/src/path_search_tool/description.md → crates/assistant_tools/src/find_path_tool/description.md 🔗

@@ -1,4 +1,4 @@
-Fast file pattern matching tool that works with any codebase size
+Fast file path pattern matching tool that works with any codebase size
 
 - Supports glob patterns like "**/*.js" or "src/**/*.ts"
 - Returns matching file paths sorted alphabetically

crates/assistant_tools/src/grep_tool.rs 🔗

@@ -20,6 +20,8 @@ use util::paths::PathMatcher;
 pub struct GrepToolInput {
     /// A regex pattern to search for in the entire project. Note that the regex
     /// will be parsed by the Rust `regex` crate.
+    ///
+    /// Do NOT specify a path here! This will only be matched against the code **content**.
     pub regex: String,
 
     /// A glob pattern for the paths of files to include in the search.

crates/eval/src/examples/file_search.rs 🔗

@@ -1,5 +1,5 @@
 use anyhow::Result;
-use assistant_tools::PathSearchToolInput;
+use assistant_tools::FindPathToolInput;
 use async_trait::async_trait;
 use regex::Regex;
 
@@ -15,7 +15,7 @@ impl Example for FileSearchExample {
             url: "https://github.com/zed-industries/zed.git".to_string(),
             revision: "03ecb88fe30794873f191ddb728f597935b3101c".to_string(),
             language_server: None,
-            max_assertions: Some(4),
+            max_assertions: Some(3),
         }
     }
 
@@ -32,21 +32,18 @@ impl Example for FileSearchExample {
         ));
 
         let response = cx.run_turn().await?;
-        let tool_use = response.expect_tool("path_search", cx)?;
-        let input = tool_use.parse_input::<PathSearchToolInput>()?;
+        let tool_use = response.expect_tool("find_path", cx)?;
+        let input = tool_use.parse_input::<FindPathToolInput>()?;
 
         let glob = input.glob;
-        cx.assert(
-            glob.ends_with(FILENAME),
-            format!("glob ends with `{FILENAME}`"),
-        )?;
+        cx.assert(glob.ends_with(FILENAME), "glob ends with file name")?;
 
         let without_filename = glob.replace(FILENAME, "");
         let matches = Regex::new("(\\*\\*|zed)/(\\*\\*?/)?")
             .unwrap()
             .is_match(&without_filename);
 
-        cx.assert(matches, "glob starts with either `**` or `zed`")?;
+        cx.assert(matches, "glob starts with `**` or project")?;
 
         Ok(())
     }

crates/migrator/src/migrations.rs 🔗

@@ -51,3 +51,9 @@ pub(crate) mod m_2025_04_21 {
 
     pub(crate) use settings::SETTINGS_PATTERNS;
 }
+
+pub(crate) mod m_2025_04_23 {
+    mod settings;
+
+    pub(crate) use settings::SETTINGS_PATTERNS;
+}

crates/migrator/src/migrations/m_2025_04_23/settings.rs 🔗

@@ -0,0 +1,27 @@
+use std::ops::Range;
+use tree_sitter::{Query, QueryMatch};
+
+use crate::MigrationPatterns;
+use crate::patterns::SETTINGS_ASSISTANT_TOOLS_PATTERN;
+
+pub const SETTINGS_PATTERNS: MigrationPatterns =
+    &[(SETTINGS_ASSISTANT_TOOLS_PATTERN, rename_path_search_tool)];
+
+fn rename_path_search_tool(
+    contents: &str,
+    mat: &QueryMatch,
+    query: &Query,
+) -> Option<(Range<usize>, String)> {
+    let tool_name_capture_ix = query.capture_index_for_name("tool_name")?;
+    let tool_name_range = mat
+        .nodes_for_capture_index(tool_name_capture_ix)
+        .next()?
+        .byte_range();
+    let tool_name = contents.get(tool_name_range.clone())?;
+
+    if tool_name == "path_search" {
+        return Some((tool_name_range, "find_path".to_string()));
+    }
+
+    None
+}

crates/migrator/src/migrator.rs 🔗

@@ -132,6 +132,10 @@ pub fn migrate_settings(text: &str) -> Result<Option<String>> {
             migrations::m_2025_04_21::SETTINGS_PATTERNS,
             &SETTINGS_QUERY_2025_04_21,
         ),
+        (
+            migrations::m_2025_04_23::SETTINGS_PATTERNS,
+            &SETTINGS_QUERY_2025_04_23,
+        ),
     ];
     run_migrations(text, migrations)
 }
@@ -214,6 +218,10 @@ define_query!(
     SETTINGS_QUERY_2025_04_21,
     migrations::m_2025_04_21::SETTINGS_PATTERNS
 );
+define_query!(
+    SETTINGS_QUERY_2025_04_23,
+    migrations::m_2025_04_23::SETTINGS_PATTERNS
+);
 
 // custom query
 static EDIT_PREDICTION_SETTINGS_MIGRATION_QUERY: LazyLock<Query> = LazyLock::new(|| {
@@ -639,7 +647,7 @@ mod tests {
                                 "name": "Custom",
                                 "tools": {
                                     "diagnostics": true,
-                                    "path_search": true,
+                                    "find_path": true,
                                     "read_file": true
                                 }
                             }
@@ -650,4 +658,40 @@ mod tests {
             None,
         )
     }
+
+    #[test]
+    fn test_rename_path_search_to_find_path() {
+        assert_migrate_settings(
+            r#"
+                {
+                    "assistant": {
+                        "profiles": {
+                            "default": {
+                                "tools": {
+                                    "path_search": true,
+                                    "read_file": true
+                                }
+                            }
+                        }
+                    }
+                }
+            "#,
+            Some(
+                r#"
+                {
+                    "assistant": {
+                        "profiles": {
+                            "default": {
+                                "tools": {
+                                    "find_path": true,
+                                    "read_file": true
+                                }
+                            }
+                        }
+                    }
+                }
+            "#,
+            ),
+        );
+    }
 }