From b794919842c00662f9376dee0154909b84795f20 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 15 Apr 2025 00:54:25 -0400 Subject: [PATCH] Add contents_tool (#28738) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a combination of the "read file" and "list directory contents" tools as part of a push to reduce our quantity of builtin tools by combining some of them. The functionality is all there for this tool, although there's room for improvement on the visuals side: it currently always shows the same icon and always says "Read" - so you can't tell at a glance when it's reading a directory vs an individual file. Changing this will require a change to the `Tool` trait, which can be in a separate PR. (FYI @danilo-leal!) Screenshot 2025-04-14 at 11 56 27 PM Release Notes: - Added `contents` tool --- assets/settings/default.json | 2 + crates/assistant_tools/src/assistant_tools.rs | 3 + crates/assistant_tools/src/contents_tool.rs | 239 ++++++++++++++++++ .../src/contents_tool/description.md | 9 + 4 files changed, 253 insertions(+) create mode 100644 crates/assistant_tools/src/contents_tool.rs create mode 100644 crates/assistant_tools/src/contents_tool/description.md diff --git a/assets/settings/default.json b/assets/settings/default.json index df1a4a01af3c40ebeb07b93959d5bd4ca5eb2b0b..5c6335099b3dd398492f9e9d2346f851bd3893ed 100644 --- a/assets/settings/default.json +++ b/assets/settings/default.json @@ -644,6 +644,7 @@ // We don't know which of the context server tools are safe for the "Ask" profile, so we don't enable them by default. // "enable_all_context_servers": true, "tools": { + "contents": true, "diagnostics": true, "fetch": true, "list_directory": false, @@ -662,6 +663,7 @@ "batch_tool": true, "code_actions": true, "code_symbols": true, + "contents": true, "copy_path": false, "create_file": true, "delete_path": false, diff --git a/crates/assistant_tools/src/assistant_tools.rs b/crates/assistant_tools/src/assistant_tools.rs index 76e8b8670bdd16df2c2ee9b7ad8885a27d171110..3016f5412f9bfe8981d8eeabf445873d76c9198a 100644 --- a/crates/assistant_tools/src/assistant_tools.rs +++ b/crates/assistant_tools/src/assistant_tools.rs @@ -1,6 +1,7 @@ mod batch_tool; mod code_action_tool; mod code_symbols_tool; +mod contents_tool; mod copy_path_tool; mod create_directory_tool; mod create_file_tool; @@ -33,6 +34,7 @@ use move_path_tool::MovePathTool; use crate::batch_tool::BatchTool; use crate::code_action_tool::CodeActionTool; use crate::code_symbols_tool::CodeSymbolsTool; +use crate::contents_tool::ContentsTool; use crate::create_directory_tool::CreateDirectoryTool; use crate::create_file_tool::CreateFileTool; use crate::delete_path_tool::DeletePathTool; @@ -69,6 +71,7 @@ pub fn init(http_client: Arc, cx: &mut App) { registry.register_tool(NowTool); registry.register_tool(OpenTool); registry.register_tool(CodeSymbolsTool); + registry.register_tool(ContentsTool); registry.register_tool(PathSearchTool); registry.register_tool(ReadFileTool); registry.register_tool(RegexSearchTool); diff --git a/crates/assistant_tools/src/contents_tool.rs b/crates/assistant_tools/src/contents_tool.rs new file mode 100644 index 0000000000000000000000000000000000000000..be7c4927cb1d18da290ca4108f0f8989d554061b --- /dev/null +++ b/crates/assistant_tools/src/contents_tool.rs @@ -0,0 +1,239 @@ +use std::sync::Arc; + +use crate::{code_symbols_tool::file_outline, schema::json_schema_for}; +use anyhow::{Result, anyhow}; +use assistant_tool::{ActionLog, Tool}; +use gpui::{App, Entity, Task}; +use itertools::Itertools; +use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat}; +use project::Project; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::{fmt::Write, path::Path}; +use ui::IconName; +use util::markdown::MarkdownString; + +/// If the model requests to read a file whose size exceeds this, then +/// the tool will return the file's symbol outline instead of its contents, +/// and suggest trying again using line ranges from the outline. +const MAX_FILE_SIZE_TO_READ: usize = 16384; + +/// If the model requests to list the entries in a directory with more +/// entries than this, then the tool will return a subset of the entries +/// and suggest trying again. +const MAX_DIR_ENTRIES: usize = 1024; + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ContentsToolInput { + /// The relative path of the file or directory to access. + /// + /// This path should never be absolute, and the first component + /// of the path should always be a root directory in a project. + /// + /// + /// If the project has the following root directories: + /// + /// - directory1 + /// - directory2 + /// + /// If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`. + /// If you want to list contents in the directory `directory2/subfolder`, you should use the path `directory2/subfolder`. + /// + pub path: String, + + /// Optional position (1-based index) to start reading on, if you want to read a subset of the contents. + /// When reading a file, this refers to a line number in the file (e.g. 1 is the first line). + /// When reading a directory, this refers to the number of the directory entry (e.g. 1 is the first entry). + /// + /// Defaults to 1. + pub start: Option, + + /// Optional position (1-based index) to end reading on, if you want to read a subset of the contents. + /// When reading a file, this refers to a line number in the file (e.g. 1 is the first line). + /// When reading a directory, this refers to the number of the directory entry (e.g. 1 is the first entry). + /// + /// Defaults to reading until the end of the file or directory. + pub end: Option, +} + +pub struct ContentsTool; + +impl Tool for ContentsTool { + fn name(&self) -> String { + "contents".into() + } + + fn needs_confirmation(&self, _: &serde_json::Value, _: &App) -> bool { + false + } + + fn description(&self) -> String { + include_str!("./contents_tool/description.md").into() + } + + fn icon(&self) -> IconName { + IconName::FileSearch + } + + fn input_schema(&self, format: LanguageModelToolSchemaFormat) -> Result { + json_schema_for::(format) + } + + fn ui_text(&self, input: &serde_json::Value) -> String { + match serde_json::from_value::(input.clone()) { + Ok(input) => { + let path = MarkdownString::inline_code(&input.path); + + match (input.start, input.end) { + (Some(start), None) => format!("Read {path} (from line {start})"), + (Some(start), Some(end)) => { + format!("Read {path} (lines {start}-{end})") + } + _ => format!("Read {path}"), + } + } + Err(_) => "Read file or directory".to_string(), + } + } + + fn run( + self: Arc, + input: serde_json::Value, + _messages: &[LanguageModelRequestMessage], + project: Entity, + action_log: Entity, + cx: &mut App, + ) -> Task> { + let input = match serde_json::from_value::(input) { + Ok(input) => input, + Err(err) => return Task::ready(Err(anyhow!(err))), + }; + + // Sometimes models will return these even though we tell it to give a path and not a glob. + // When this happens, just list the root worktree directories. + if matches!(input.path.as_str(), "." | "" | "./" | "*") { + let output = project + .read(cx) + .worktrees(cx) + .filter_map(|worktree| { + worktree.read(cx).root_entry().and_then(|entry| { + if entry.is_dir() { + entry.path.to_str() + } else { + None + } + }) + }) + .collect::>() + .join("\n"); + + return Task::ready(Ok(output)); + } + + let Some(project_path) = project.read(cx).find_project_path(&input.path, cx) else { + return Task::ready(Err(anyhow!("Path {} not found in project", &input.path))); + }; + + let Some(worktree) = project + .read(cx) + .worktree_for_id(project_path.worktree_id, cx) + else { + return Task::ready(Err(anyhow!("Worktree not found"))); + }; + let worktree = worktree.read(cx); + + let Some(entry) = worktree.entry_for_path(&project_path.path) else { + return Task::ready(Err(anyhow!("Path not found: {}", input.path))); + }; + + // If it's a directory, list its contents + if entry.is_dir() { + let mut output = String::new(); + let start_index = input + .start + .map(|line| (line as usize).saturating_sub(1)) + .unwrap_or(0); + let end_index = input + .end + .map(|line| (line as usize).saturating_sub(1)) + .unwrap_or(MAX_DIR_ENTRIES); + let mut skipped = 0; + + for (index, entry) in worktree.child_entries(&project_path.path).enumerate() { + if index >= start_index && index <= end_index { + writeln!( + output, + "{}", + Path::new(worktree.root_name()).join(&entry.path).display(), + ) + .unwrap(); + } else { + skipped += 1; + } + } + + if output.is_empty() { + output.push_str(&input.path); + output.push_str(" is empty."); + } + + if skipped > 0 { + write!( + output, + "\n\nNote: Skipped {skipped} entries. Adjust start and end to see other entries.", + ).ok(); + } + + Task::ready(Ok(output)) + } else { + // It's a file, so read its contents + let file_path = input.path.clone(); + cx.spawn(async move |cx| { + let buffer = cx + .update(|cx| { + project.update(cx, |project, cx| project.open_buffer(project_path, cx)) + })? + .await?; + + if input.start.is_some() || input.end.is_some() { + let result = buffer.read_with(cx, |buffer, _cx| { + let text = buffer.text(); + let start = input.start.unwrap_or(1); + let lines = text.split('\n').skip(start as usize - 1); + if let Some(end) = input.end { + let count = end.saturating_sub(start).max(1); // Ensure at least 1 line + Itertools::intersperse(lines.take(count as usize), "\n").collect() + } else { + Itertools::intersperse(lines, "\n").collect() + } + })?; + + action_log.update(cx, |log, cx| { + log.buffer_read(buffer, cx); + })?; + + Ok(result) + } else { + // No line ranges specified, so check file size to see if it's too big. + let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?; + + if file_size <= MAX_FILE_SIZE_TO_READ { + let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?; + + action_log.update(cx, |log, cx| { + log.buffer_read(buffer, cx); + })?; + + Ok(result) + } else { + // File is too big, so return its outline and a suggestion to + // read again with a line number range specified. + let outline = file_outline(project, file_path, action_log, None, 0, cx).await?; + + Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start and end fields to see the implementations of symbols in the outline.")) + } + } + }) + } + } +} diff --git a/crates/assistant_tools/src/contents_tool/description.md b/crates/assistant_tools/src/contents_tool/description.md new file mode 100644 index 0000000000000000000000000000000000000000..b532f7c53461a082c79bf2eaec72aa74eba56ecf --- /dev/null +++ b/crates/assistant_tools/src/contents_tool/description.md @@ -0,0 +1,9 @@ +Reads the contents of a path on the filesystem. + +If the path is a directory, this lists all files and directories within that path. +If the path is a file, this returns the file's contents. + +When reading a file, if the file is too big and no line range is specified, an outline of the file's code symbols is listed instead, which can be used to request specific line ranges in a subsequent call. + +Similarly, if a directory has too many entries to show at once, a subset of entries will be shown, +and subsequent requests can use starting and ending line numbers to get other subsets.