Don't send contents of large `@mention`-ed files (#38032)

Richard Feldman created 3 months ago

<img width="598" height="311" alt="Screenshot 2025-09-11 at 9 39 12 PM"
src="https://github.com/user-attachments/assets/b526e648-37cf-4412-83a0-42037b9fc94d"
/>

This is for both ACP and the regular agent. Previously we would always
include the whole file, which can easily blow the context window on huge
files.

Release Notes:

- When `@mention`ing large files, the Agent Panel now send an outline of
the file instead of the whole thing.

Change summary

crates/agent/src/context.rs                  |  46 +-----
crates/agent2/src/tools/read_file_tool.rs    |  32 +---
crates/agent_ui/src/acp/message_editor.rs    | 139 +++++++++++++++++++--
crates/assistant_tool/src/outline.rs         |  67 ++++++++++
crates/assistant_tools/src/read_file_tool.rs |  30 +--
5 files changed, 224 insertions(+), 90 deletions(-)

Detailed changes

crates/agent/src/context.rs 🔗

@@ -6,7 +6,7 @@ use futures::future;
 use futures::{FutureExt, future::Shared};
 use gpui::{App, AppContext as _, ElementId, Entity, SharedString, Task};
 use icons::IconName;
-use language::{Buffer, ParseStatus};
+use language::Buffer;
 use language_model::{LanguageModelImage, LanguageModelRequestMessage, MessageContent};
 use project::{Project, ProjectEntryId, ProjectPath, Worktree};
 use prompt_store::{PromptStore, UserPromptId};
@@ -191,45 +191,19 @@ impl FileContextHandle {
         let buffer = self.buffer.clone();
 
         cx.spawn(async move |cx| {
-            // For large files, use outline instead of full content
-            if rope.len() > outline::AUTO_OUTLINE_SIZE {
-                // Wait until the buffer has been fully parsed, so we can read its outline
-                if let Ok(mut parse_status) =
-                    buffer.read_with(cx, |buffer, _| buffer.parse_status())
-                {
-                    while *parse_status.borrow() != ParseStatus::Idle {
-                        parse_status.changed().await.log_err();
-                    }
-
-                    if let Ok(snapshot) = buffer.read_with(cx, |buffer, _| buffer.snapshot()) {
-                        let items = snapshot
-                            .outline(None)
-                            .items
-                            .into_iter()
-                            .map(|item| item.to_point(&snapshot));
-
-                        if let Ok(outline_text) =
-                            outline::render_outline(items, None, 0, usize::MAX).await
-                        {
-                            let context = AgentContext::File(FileContext {
-                                handle: self,
-                                full_path,
-                                text: outline_text.into(),
-                                is_outline: true,
-                            });
-                            return Some((context, vec![buffer]));
-                        }
-                    }
-                }
-            }
+            let buffer_content =
+                outline::get_buffer_content_or_outline(buffer.clone(), Some(&full_path), &cx)
+                    .await
+                    .unwrap_or_else(|_| outline::BufferContent {
+                        text: rope.to_string(),
+                        is_outline: false,
+                    });
 
-            // Fallback to full content if we couldn't build an outline
-            // (or didn't need to because the file was small enough)
             let context = AgentContext::File(FileContext {
                 handle: self,
                 full_path,
-                text: rope.to_string().into(),
-                is_outline: false,
+                text: buffer_content.text.into(),
+                is_outline: buffer_content.is_outline,
             });
             Some((context, vec![buffer]))
         })

crates/agent2/src/tools/read_file_tool.rs 🔗

@@ -147,7 +147,7 @@ impl AgentTool for ReadFileTool {
 
         event_stream.update_fields(ToolCallUpdateFields {
             locations: Some(vec![acp::ToolCallLocation {
-                path: abs_path,
+                path: abs_path.clone(),
                 line: input.start_line.map(|line| line.saturating_sub(1)),
             }]),
             ..Default::default()
@@ -225,38 +225,30 @@ impl AgentTool for ReadFileTool {
                 Ok(result.into())
             } else {
                 // No line ranges specified, so check file size to see if it's too big.
-                let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
+                let buffer_content =
+                    outline::get_buffer_content_or_outline(buffer.clone(), Some(&abs_path), cx)
+                        .await?;
 
-                if file_size <= outline::AUTO_OUTLINE_SIZE {
-                    // File is small enough, so return its contents.
-                    let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
-
-                    action_log.update(cx, |log, cx| {
-                        log.buffer_read(buffer.clone(), cx);
-                    })?;
+                action_log.update(cx, |log, cx| {
+                    log.buffer_read(buffer.clone(), cx);
+                })?;
 
-                    Ok(result.into())
-                } else {
-                    // File is too big, so return the outline
-                    // and a suggestion to read again with line numbers.
-                    let outline =
-                        outline::file_outline(project.clone(), file_path, action_log, None, cx)
-                            .await?;
+                if buffer_content.is_outline {
                     Ok(formatdoc! {"
                         This file was too big to read all at once.
 
-                        Here is an outline of its symbols:
-
-                        {outline}
+                        {}
 
                         Using the line numbers in this outline, you can call this tool again
                         while specifying the start_line and end_line fields to see the
                         implementations of symbols in the outline.
 
                         Alternatively, you can fall back to the `grep` tool (if available)
-                        to search the file for specific content."
+                        to search the file for specific content.", buffer_content.text
                     }
                     .into())
+                } else {
+                    Ok(buffer_content.text.into())
                 }
             };

crates/agent_ui/src/acp/message_editor.rs 🔗

@@ -8,6 +8,7 @@ use agent_servers::{AgentServer, AgentServerDelegate};
 use agent2::HistoryStore;
 use anyhow::{Result, anyhow};
 use assistant_slash_commands::codeblock_fence_for_path;
+use assistant_tool::outline;
 use collections::{HashMap, HashSet};
 use editor::{
     Addon, Anchor, AnchorRangeExt, ContextMenuOptions, ContextMenuPlacement, Editor, EditorElement,
@@ -456,11 +457,14 @@ impl MessageEditor {
             .update(cx, |project, cx| project.open_buffer(project_path, cx));
         cx.spawn(async move |_, cx| {
             let buffer = buffer.await?;
-            let mention = buffer.update(cx, |buffer, cx| Mention::Text {
-                content: buffer.text(),
-                tracked_buffers: vec![cx.entity()],
-            })?;
-            anyhow::Ok(mention)
+            let buffer_content =
+                outline::get_buffer_content_or_outline(buffer.clone(), Some(&abs_path), &cx)
+                    .await?;
+
+            Ok(Mention::Text {
+                content: buffer_content.text,
+                tracked_buffers: vec![buffer],
+            })
         })
     }
 
@@ -520,18 +524,17 @@ impl MessageEditor {
                         })
                     });
 
-                    // TODO: report load errors instead of just logging
-                    let rope_task = cx.spawn(async move |cx| {
+                    cx.spawn(async move |cx| {
                         let buffer = open_task.await.log_err()?;
-                        let rope = buffer
-                            .read_with(cx, |buffer, _cx| buffer.as_rope().clone())
-                            .log_err()?;
-                        Some((rope, buffer))
-                    });
+                        let buffer_content = outline::get_buffer_content_or_outline(
+                            buffer.clone(),
+                            Some(&full_path),
+                            &cx,
+                        )
+                        .await
+                        .ok()?;
 
-                    cx.background_spawn(async move {
-                        let (rope, buffer) = rope_task.await?;
-                        Some((rel_path, full_path, rope.to_string(), buffer))
+                        Some((rel_path, full_path, buffer_content.text, buffer))
                     })
                 }))
             })?;
@@ -1580,6 +1583,7 @@ mod tests {
     use agent_client_protocol as acp;
     use agent2::HistoryStore;
     use assistant_context::ContextStore;
+    use assistant_tool::outline;
     use editor::{AnchorRangeExt as _, Editor, EditorMode};
     use fs::FakeFs;
     use futures::StreamExt as _;
@@ -2584,4 +2588,109 @@ mod tests {
             })
             .collect::<Vec<_>>()
     }
+
+    #[gpui::test]
+    async fn test_large_file_mention_uses_outline(cx: &mut TestAppContext) {
+        init_test(cx);
+
+        let fs = FakeFs::new(cx.executor());
+
+        // Create a large file that exceeds AUTO_OUTLINE_SIZE
+        const LINE: &str = "fn example_function() { /* some code */ }\n";
+        let large_content = LINE.repeat(2 * (outline::AUTO_OUTLINE_SIZE / LINE.len()));
+        assert!(large_content.len() > outline::AUTO_OUTLINE_SIZE);
+
+        // Create a small file that doesn't exceed AUTO_OUTLINE_SIZE
+        let small_content = "fn small_function() { /* small */ }\n";
+        assert!(small_content.len() < outline::AUTO_OUTLINE_SIZE);
+
+        fs.insert_tree(
+            "/project",
+            json!({
+                "large_file.rs": large_content.clone(),
+                "small_file.rs": small_content,
+            }),
+        )
+        .await;
+
+        let project = Project::test(fs, [Path::new(path!("/project"))], cx).await;
+
+        let (workspace, cx) =
+            cx.add_window_view(|window, cx| Workspace::test_new(project.clone(), window, cx));
+
+        let context_store = cx.new(|cx| ContextStore::fake(project.clone(), cx));
+        let history_store = cx.new(|cx| HistoryStore::new(context_store, cx));
+
+        let message_editor = cx.update(|window, cx| {
+            cx.new(|cx| {
+                let editor = MessageEditor::new(
+                    workspace.downgrade(),
+                    project.clone(),
+                    history_store.clone(),
+                    None,
+                    Default::default(),
+                    Default::default(),
+                    "Test Agent".into(),
+                    "Test",
+                    EditorMode::AutoHeight {
+                        min_lines: 1,
+                        max_lines: None,
+                    },
+                    window,
+                    cx,
+                );
+                // Enable embedded context so files are actually included
+                editor.prompt_capabilities.set(acp::PromptCapabilities {
+                    embedded_context: true,
+                    ..Default::default()
+                });
+                editor
+            })
+        });
+
+        // Test large file mention
+        // Get the absolute path using the project's worktree
+        let large_file_abs_path = project.read_with(cx, |project, cx| {
+            let worktree = project.worktrees(cx).next().unwrap();
+            let worktree_root = worktree.read(cx).abs_path();
+            worktree_root.join("large_file.rs")
+        });
+        let large_file_task = message_editor.update(cx, |editor, cx| {
+            editor.confirm_mention_for_file(large_file_abs_path, cx)
+        });
+
+        let large_file_mention = large_file_task.await.unwrap();
+        match large_file_mention {
+            Mention::Text { content, .. } => {
+                // Should contain outline header for large files
+                assert!(content.contains("File outline for"));
+                assert!(content.contains("file too large to show full content"));
+                // Should not contain the full repeated content
+                assert!(!content.contains(&LINE.repeat(100)));
+            }
+            _ => panic!("Expected Text mention for large file"),
+        }
+
+        // Test small file mention
+        // Get the absolute path using the project's worktree
+        let small_file_abs_path = project.read_with(cx, |project, cx| {
+            let worktree = project.worktrees(cx).next().unwrap();
+            let worktree_root = worktree.read(cx).abs_path();
+            worktree_root.join("small_file.rs")
+        });
+        let small_file_task = message_editor.update(cx, |editor, cx| {
+            editor.confirm_mention_for_file(small_file_abs_path, cx)
+        });
+
+        let small_file_mention = small_file_task.await.unwrap();
+        match small_file_mention {
+            Mention::Text { content, .. } => {
+                // Should contain the actual content
+                assert_eq!(content, small_content);
+                // Should not contain outline header
+                assert!(!content.contains("File outline for"));
+            }
+            _ => panic!("Expected Text mention for small file"),
+        }
+    }
 }

crates/assistant_tool/src/outline.rs 🔗

@@ -1,10 +1,11 @@
 use action_log::ActionLog;
 use anyhow::{Context as _, Result};
 use gpui::{AsyncApp, Entity};
-use language::{OutlineItem, ParseStatus};
+use language::{Buffer, OutlineItem, ParseStatus};
 use project::Project;
 use regex::Regex;
 use std::fmt::Write;
+use std::path::Path;
 use text::Point;
 
 /// For files over this size, instead of reading them (or including them in context),
@@ -128,3 +129,67 @@ fn render_entries(
 
     entries_rendered
 }
+
+/// Result of getting buffer content, which can be either full content or an outline.
+pub struct BufferContent {
+    /// The actual content (either full text or outline)
+    pub text: String,
+    /// Whether this is an outline (true) or full content (false)
+    pub is_outline: bool,
+}
+
+/// Returns either the full content of a buffer or its outline, depending on size.
+/// For files larger than AUTO_OUTLINE_SIZE, returns an outline with a header.
+/// For smaller files, returns the full content.
+pub async fn get_buffer_content_or_outline(
+    buffer: Entity<Buffer>,
+    path: Option<&Path>,
+    cx: &AsyncApp,
+) -> Result<BufferContent> {
+    let file_size = buffer.read_with(cx, |buffer, _| buffer.text().len())?;
+
+    if file_size > AUTO_OUTLINE_SIZE {
+        // For large files, use outline instead of full content
+        // Wait until the buffer has been fully parsed, so we can read its outline
+        let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?;
+        while *parse_status.borrow() != ParseStatus::Idle {
+            parse_status.changed().await?;
+        }
+
+        let outline_items = buffer.read_with(cx, |buffer, _| {
+            let snapshot = buffer.snapshot();
+            snapshot
+                .outline(None)
+                .items
+                .into_iter()
+                .map(|item| item.to_point(&snapshot))
+                .collect::<Vec<_>>()
+        })?;
+
+        let outline_text = render_outline(outline_items, None, 0, usize::MAX).await?;
+
+        let text = if let Some(path) = path {
+            format!(
+                "# File outline for {} (file too large to show full content)\n\n{}",
+                path.display(),
+                outline_text
+            )
+        } else {
+            format!(
+                "# File outline (file too large to show full content)\n\n{}",
+                outline_text
+            )
+        };
+        Ok(BufferContent {
+            text,
+            is_outline: true,
+        })
+    } else {
+        // File is small enough, return full content
+        let text = buffer.read_with(cx, |buffer, _| buffer.text())?;
+        Ok(BufferContent {
+            text,
+            is_outline: false,
+        })
+    }
+}

crates/assistant_tools/src/read_file_tool.rs 🔗

@@ -261,37 +261,31 @@ impl Tool for ReadFileTool {
                 Ok(result)
             } else {
                 // No line ranges specified, so check file size to see if it's too big.
-                let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
+                let path_buf = std::path::PathBuf::from(&file_path);
+                let buffer_content =
+                    outline::get_buffer_content_or_outline(buffer.clone(), Some(&path_buf), cx)
+                        .await?;
 
-                if file_size <= outline::AUTO_OUTLINE_SIZE {
-                    // File is small enough, so return its contents.
-                    let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
-
-                    action_log.update(cx, |log, cx| {
-                        log.buffer_read(buffer, cx);
-                    })?;
+                action_log.update(cx, |log, cx| {
+                    log.buffer_read(buffer, cx);
+                })?;
 
-                    Ok(result.into())
-                } else {
-                    // File is too big, so return the outline
-                    // and a suggestion to read again with line numbers.
-                    let outline =
-                        outline::file_outline(project, file_path, action_log, None, cx).await?;
+                if buffer_content.is_outline {
                     Ok(formatdoc! {"
                         This file was too big to read all at once.
 
-                        Here is an outline of its symbols:
-
-                        {outline}
+                        {}
 
                         Using the line numbers in this outline, you can call this tool again
                         while specifying the start_line and end_line fields to see the
                         implementations of symbols in the outline.
 
                         Alternatively, you can fall back to the `grep` tool (if available)
-                        to search the file for specific content."
+                        to search the file for specific content.", buffer_content.text
                     }
                     .into())
+                } else {
+                    Ok(buffer_content.text.into())
                 }
             }
         })