contents_tool.rs

  1use std::sync::Arc;
  2
  3use crate::{code_symbols_tool::file_outline, schema::json_schema_for};
  4use anyhow::{Result, anyhow};
  5use assistant_tool::{ActionLog, Tool, ToolResult};
  6use gpui::{AnyWindowHandle, App, Entity, Task};
  7use itertools::Itertools;
  8use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
  9use project::Project;
 10use schemars::JsonSchema;
 11use serde::{Deserialize, Serialize};
 12use std::{fmt::Write, path::Path};
 13use ui::IconName;
 14use util::markdown::MarkdownString;
 15
 16/// If the model requests to read a file whose size exceeds this, then
 17/// the tool will return the file's symbol outline instead of its contents,
 18/// and suggest trying again using line ranges from the outline.
 19const MAX_FILE_SIZE_TO_READ: usize = 16384;
 20
 21/// If the model requests to list the entries in a directory with more
 22/// entries than this, then the tool will return a subset of the entries
 23/// and suggest trying again.
 24const MAX_DIR_ENTRIES: usize = 1024;
 25
 26#[derive(Debug, Serialize, Deserialize, JsonSchema)]
 27pub struct ContentsToolInput {
 28    /// The relative path of the file or directory to access.
 29    ///
 30    /// This path should never be absolute, and the first component
 31    /// of the path should always be a root directory in a project.
 32    ///
 33    /// <example>
 34    /// If the project has the following root directories:
 35    ///
 36    /// - directory1
 37    /// - directory2
 38    ///
 39    /// If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.
 40    /// If you want to list contents in the directory `directory2/subfolder`, you should use the path `directory2/subfolder`.
 41    /// </example>
 42    pub path: String,
 43
 44    /// Optional position (1-based index) to start reading on, if you want to read a subset of the contents.
 45    /// When reading a file, this refers to a line number in the file (e.g. 1 is the first line).
 46    /// When reading a directory, this refers to the number of the directory entry (e.g. 1 is the first entry).
 47    ///
 48    /// Defaults to 1.
 49    pub start: Option<u32>,
 50
 51    /// Optional position (1-based index) to end reading on, if you want to read a subset of the contents.
 52    /// When reading a file, this refers to a line number in the file (e.g. 1 is the first line).
 53    /// When reading a directory, this refers to the number of the directory entry (e.g. 1 is the first entry).
 54    ///
 55    /// Defaults to reading until the end of the file or directory.
 56    pub end: Option<u32>,
 57}
 58
 59pub struct ContentsTool;
 60
 61impl Tool for ContentsTool {
 62    fn name(&self) -> String {
 63        "contents".into()
 64    }
 65
 66    fn needs_confirmation(&self, _: &serde_json::Value, _: &App) -> bool {
 67        false
 68    }
 69
 70    fn description(&self) -> String {
 71        include_str!("./contents_tool/description.md").into()
 72    }
 73
 74    fn icon(&self) -> IconName {
 75        IconName::FileSearch
 76    }
 77
 78    fn input_schema(&self, format: LanguageModelToolSchemaFormat) -> Result<serde_json::Value> {
 79        json_schema_for::<ContentsToolInput>(format)
 80    }
 81
 82    fn ui_text(&self, input: &serde_json::Value) -> String {
 83        match serde_json::from_value::<ContentsToolInput>(input.clone()) {
 84            Ok(input) => {
 85                let path = MarkdownString::inline_code(&input.path);
 86
 87                match (input.start, input.end) {
 88                    (Some(start), None) => format!("Read {path} (from line {start})"),
 89                    (Some(start), Some(end)) => {
 90                        format!("Read {path} (lines {start}-{end})")
 91                    }
 92                    _ => format!("Read {path}"),
 93                }
 94            }
 95            Err(_) => "Read file or directory".to_string(),
 96        }
 97    }
 98
 99    fn run(
100        self: Arc<Self>,
101        input: serde_json::Value,
102        _messages: &[LanguageModelRequestMessage],
103        project: Entity<Project>,
104        action_log: Entity<ActionLog>,
105        _window: Option<AnyWindowHandle>,
106        cx: &mut App,
107    ) -> ToolResult {
108        let input = match serde_json::from_value::<ContentsToolInput>(input) {
109            Ok(input) => input,
110            Err(err) => return Task::ready(Err(anyhow!(err))).into(),
111        };
112
113        // Sometimes models will return these even though we tell it to give a path and not a glob.
114        // When this happens, just list the root worktree directories.
115        if matches!(input.path.as_str(), "." | "" | "./" | "*") {
116            let output = project
117                .read(cx)
118                .worktrees(cx)
119                .filter_map(|worktree| {
120                    worktree.read(cx).root_entry().and_then(|entry| {
121                        if entry.is_dir() {
122                            entry.path.to_str()
123                        } else {
124                            None
125                        }
126                    })
127                })
128                .collect::<Vec<_>>()
129                .join("\n");
130
131            return Task::ready(Ok(output)).into();
132        }
133
134        let Some(project_path) = project.read(cx).find_project_path(&input.path, cx) else {
135            return Task::ready(Err(anyhow!("Path {} not found in project", &input.path))).into();
136        };
137
138        let Some(worktree) = project
139            .read(cx)
140            .worktree_for_id(project_path.worktree_id, cx)
141        else {
142            return Task::ready(Err(anyhow!("Worktree not found"))).into();
143        };
144        let worktree = worktree.read(cx);
145
146        let Some(entry) = worktree.entry_for_path(&project_path.path) else {
147            return Task::ready(Err(anyhow!("Path not found: {}", input.path))).into();
148        };
149
150        // If it's a directory, list its contents
151        if entry.is_dir() {
152            let mut output = String::new();
153            let start_index = input
154                .start
155                .map(|line| (line as usize).saturating_sub(1))
156                .unwrap_or(0);
157            let end_index = input
158                .end
159                .map(|line| (line as usize).saturating_sub(1))
160                .unwrap_or(MAX_DIR_ENTRIES);
161            let mut skipped = 0;
162
163            for (index, entry) in worktree.child_entries(&project_path.path).enumerate() {
164                if index >= start_index && index <= end_index {
165                    writeln!(
166                        output,
167                        "{}",
168                        Path::new(worktree.root_name()).join(&entry.path).display(),
169                    )
170                    .unwrap();
171                } else {
172                    skipped += 1;
173                }
174            }
175
176            if output.is_empty() {
177                output.push_str(&input.path);
178                output.push_str(" is empty.");
179            }
180
181            if skipped > 0 {
182                write!(
183                    output,
184                    "\n\nNote: Skipped {skipped} entries. Adjust start and end to see other entries.",
185                ).ok();
186            }
187
188            Task::ready(Ok(output)).into()
189        } else {
190            // It's a file, so read its contents
191            let file_path = input.path.clone();
192            cx.spawn(async move |cx| {
193                let buffer = cx
194                    .update(|cx| {
195                        project.update(cx, |project, cx| project.open_buffer(project_path, cx))
196                    })?
197                    .await?;
198
199                if input.start.is_some() || input.end.is_some() {
200                    let result = buffer.read_with(cx, |buffer, _cx| {
201                        let text = buffer.text();
202                        let start = input.start.unwrap_or(1);
203                        let lines = text.split('\n').skip(start as usize - 1);
204                        if let Some(end) = input.end {
205                            let count = end.saturating_sub(start).max(1); // Ensure at least 1 line
206                            Itertools::intersperse(lines.take(count as usize), "\n").collect()
207                        } else {
208                            Itertools::intersperse(lines, "\n").collect()
209                        }
210                    })?;
211
212                    action_log.update(cx, |log, cx| {
213                        log.track_buffer(buffer, cx);
214                    })?;
215
216                    Ok(result)
217                } else {
218                    // No line ranges specified, so check file size to see if it's too big.
219                    let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
220
221                    if file_size <= MAX_FILE_SIZE_TO_READ {
222                        let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
223
224                        action_log.update(cx, |log, cx| {
225                            log.track_buffer(buffer, cx);
226                        })?;
227
228                        Ok(result)
229                    } else {
230                        // File is too big, so return its outline and a suggestion to
231                        // read again with a line number range specified.
232                        let outline = file_outline(project, file_path, action_log, None, cx).await?;
233
234                        Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start and end fields to see the implementations of symbols in the outline."))
235                    }
236                }
237            }).into()
238        }
239    }
240}