If file is too big, provide the outline and suggest a follow-up tool (#28158)

Richard Feldman created

<img width="622" alt="Screenshot 2025-04-05 at 5 48 14 PM"
src="https://github.com/user-attachments/assets/24b9c7d4-d3e2-4929-bca8-79db5b4e5748"
/>

Release Notes:

- The `read_files` tool now reads only the symbol outline files above a
certain size, to conserve context window space. Then it suggests that
the agent call `read_files` again with the relevant line ranges it saw
in the outline.

Change summary

crates/assistant_tools/src/code_symbols_tool.rs          |  2 
crates/assistant_tools/src/read_file_tool.rs             | 64 ++++++---
crates/assistant_tools/src/read_file_tool/description.md |  5 
3 files changed, 49 insertions(+), 22 deletions(-)

Detailed changes

crates/assistant_tools/src/read_file_tool.rs 🔗

@@ -1,6 +1,6 @@
-use std::path::Path;
 use std::sync::Arc;
 
+use crate::code_symbols_tool::file_outline;
 use crate::schema::json_schema_for;
 use anyhow::{Result, anyhow};
 use assistant_tool::{ActionLog, Tool};
@@ -13,6 +13,11 @@ use serde::{Deserialize, Serialize};
 use ui::IconName;
 use util::markdown::MarkdownString;
 
+/// If the model requests to read a file whose size exceeds this, then
+/// the tool will return an error along with the model's symbol outline,
+/// and suggest trying again using line ranges from the outline.
+const MAX_FILE_SIZE_TO_READ: usize = 4096;
+
 #[derive(Debug, Serialize, Deserialize, JsonSchema)]
 pub struct ReadFileToolInput {
     /// The relative path of the file to read.
@@ -26,10 +31,10 @@ pub struct ReadFileToolInput {
     /// - directory1
     /// - directory2
     ///
-    /// If you wanna access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.
-    /// If you wanna access `file.txt` in `directory2`, you should use the path `directory2/file.txt`.
+    /// If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.
+    /// If you want to access `file.txt` in `directory2`, you should use the path `directory2/file.txt`.
     /// </example>
-    pub path: Arc<Path>,
+    pub path: String,
 
     /// Optional line number to start reading on (1-based index)
     #[serde(default)]
@@ -66,7 +71,7 @@ impl Tool for ReadFileTool {
     fn ui_text(&self, input: &serde_json::Value) -> String {
         match serde_json::from_value::<ReadFileToolInput>(input.clone()) {
             Ok(input) => {
-                let path = MarkdownString::inline_code(&input.path.display().to_string());
+                let path = MarkdownString::inline_code(&input.path);
                 match (input.start_line, input.end_line) {
                     (Some(start), None) => format!("Read file {path} (from line {start})"),
                     (Some(start), Some(end)) => format!("Read file {path} (lines {start}-{end})"),
@@ -91,12 +96,10 @@ impl Tool for ReadFileTool {
         };
 
         let Some(project_path) = project.read(cx).find_project_path(&input.path, cx) else {
-            return Task::ready(Err(anyhow!(
-                "Path {} not found in project",
-                &input.path.display()
-            )));
+            return Task::ready(Err(anyhow!("Path {} not found in project", &input.path,)));
         };
 
+        let file_path = input.path.clone();
         cx.spawn(async move |cx| {
             let buffer = cx
                 .update(|cx| {
@@ -104,27 +107,46 @@ impl Tool for ReadFileTool {
                 })?
                 .await?;
 
-            let result = buffer.read_with(cx, |buffer, _cx| {
-                let text = buffer.text();
-                if input.start_line.is_some() || input.end_line.is_some() {
+            // Check if specific line ranges are provided
+            if input.start_line.is_some() || input.end_line.is_some() {
+                let result = buffer.read_with(cx, |buffer, _cx| {
+                    let text = buffer.text();
                     let start = input.start_line.unwrap_or(1);
                     let lines = text.split('\n').skip(start - 1);
                     if let Some(end) = input.end_line {
-                        let count = end.saturating_sub(start);
+                        let count = end.saturating_sub(start).max(1); // Ensure at least 1 line
                         Itertools::intersperse(lines.take(count), "\n").collect()
                     } else {
                         Itertools::intersperse(lines, "\n").collect()
                     }
-                } else {
-                    text
-                }
-            })?;
+                })?;
+
+                action_log.update(cx, |log, cx| {
+                    log.buffer_read(buffer, cx);
+                })?;
+
+                Ok(result)
+            } else {
+                // No line ranges specified, so check file size to see if it's too big.
+                let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
 
-            action_log.update(cx, |log, cx| {
-                log.buffer_read(buffer, cx);
-            })?;
+                if file_size <= MAX_FILE_SIZE_TO_READ {
+                    // File is small enough, so return its contents.
+                    let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
 
-            anyhow::Ok(result)
+                    action_log.update(cx, |log, cx| {
+                        log.buffer_read(buffer, cx);
+                    })?;
+
+                    Ok(result)
+                } else {
+                    // File is too big, so return an error with the outline
+                    // and a suggestion to read again with line numbers.
+                    let outline = file_outline(project, file_path, action_log, None, 0, cx).await?;
+
+                    Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start_line and end_line fields to see the implementations of symbols in the outline."))
+                }
+            }
         })
     }
 }

crates/assistant_tools/src/read_file_tool/description.md 🔗

@@ -1 +1,6 @@
 Reads the content of the given file in the project.
+
+If the file is too big to read all at once, and neither a start line
+nor an end line was specified, then this returns an outline of the
+file's symbols (with line numbers) instead of the file's contents,
+so that it can be called again with line ranges.