Cargo.lock 🔗
@@ -690,6 +690,7 @@ dependencies = [
"pretty_assertions",
"project",
"rand 0.8.5",
+ "regex",
"serde",
"serde_json",
"settings",
Richard Feldman created
## Before

## After

Release Notes:
- Context picker and `@`-mentions now work with very large files.
Cargo.lock | 1
crates/agent/src/context.rs | 159 ++++++++++++++++++
crates/assistant_tool/Cargo.toml | 1
crates/assistant_tool/src/assistant_tool.rs | 1
crates/assistant_tool/src/outline.rs | 132 +++++++++++++++
crates/assistant_tools/src/code_symbols_tool.rs | 124 --------------
crates/assistant_tools/src/contents_tool.rs | 12
crates/assistant_tools/src/read_file_tool.rs | 13
8 files changed, 301 insertions(+), 142 deletions(-)
@@ -690,6 +690,7 @@ dependencies = [
"pretty_assertions",
"project",
"rand 0.8.5",
+ "regex",
"serde",
"serde_json",
"settings",
@@ -3,11 +3,12 @@ use std::hash::{Hash, Hasher};
use std::path::PathBuf;
use std::{ops::Range, path::Path, sync::Arc};
+use assistant_tool::outline;
use collections::HashSet;
use futures::future;
use futures::{FutureExt, future::Shared};
use gpui::{App, AppContext as _, Entity, SharedString, Task};
-use language::Buffer;
+use language::{Buffer, ParseStatus};
use language_model::{LanguageModelImage, LanguageModelRequestMessage, MessageContent};
use project::{Project, ProjectEntryId, ProjectPath, Worktree};
use prompt_store::{PromptStore, UserPromptId};
@@ -152,6 +153,7 @@ pub struct FileContext {
pub handle: FileContextHandle,
pub full_path: Arc<Path>,
pub text: SharedString,
+ pub is_outline: bool,
}
impl FileContextHandle {
@@ -177,14 +179,51 @@ impl FileContextHandle {
log::error!("file context missing path");
return Task::ready(None);
};
- let full_path = file.full_path(cx);
+ let full_path: Arc<Path> = file.full_path(cx).into();
let rope = buffer_ref.as_rope().clone();
let buffer = self.buffer.clone();
- cx.background_spawn(async move {
+
+ cx.spawn(async move |cx| {
+ // For large files, use outline instead of full content
+ if rope.len() > outline::AUTO_OUTLINE_SIZE {
+ // Wait until the buffer has been fully parsed, so we can read its outline
+ if let Ok(mut parse_status) =
+ buffer.read_with(cx, |buffer, _| buffer.parse_status())
+ {
+ while *parse_status.borrow() != ParseStatus::Idle {
+ parse_status.changed().await.log_err();
+ }
+
+ if let Ok(snapshot) = buffer.read_with(cx, |buffer, _| buffer.snapshot()) {
+ if let Some(outline) = snapshot.outline(None) {
+ let items = outline
+ .items
+ .into_iter()
+ .map(|item| item.to_point(&snapshot));
+
+ if let Ok(outline_text) =
+ outline::render_outline(items, None, 0, usize::MAX).await
+ {
+ let context = AgentContext::File(FileContext {
+ handle: self,
+ full_path,
+ text: outline_text.into(),
+ is_outline: true,
+ });
+ return Some((context, vec![buffer]));
+ }
+ }
+ }
+ }
+ }
+
+ // Fallback to full content if we couldn't build an outline
+ // (or didn't need to because the file was small enough)
let context = AgentContext::File(FileContext {
handle: self,
- full_path: full_path.into(),
+ full_path,
text: rope.to_string().into(),
+ is_outline: false,
});
Some((context, vec![buffer]))
})
@@ -996,3 +1035,115 @@ impl Hash for AgentContextKey {
}
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use gpui::TestAppContext;
+ use project::{FakeFs, Project};
+ use serde_json::json;
+ use settings::SettingsStore;
+ use util::path;
+
+ fn init_test_settings(cx: &mut TestAppContext) {
+ cx.update(|cx| {
+ let settings_store = SettingsStore::test(cx);
+ cx.set_global(settings_store);
+ language::init(cx);
+ Project::init_settings(cx);
+ });
+ }
+
+ // Helper to create a test project with test files
+ async fn create_test_project(
+ cx: &mut TestAppContext,
+ files: serde_json::Value,
+ ) -> Entity<Project> {
+ let fs = FakeFs::new(cx.background_executor.clone());
+ fs.insert_tree(path!("/test"), files).await;
+ Project::test(fs, [path!("/test").as_ref()], cx).await
+ }
+
+ #[gpui::test]
+ async fn test_large_file_uses_outline(cx: &mut TestAppContext) {
+ init_test_settings(cx);
+
+ // Create a large file that exceeds AUTO_OUTLINE_SIZE
+ const LINE: &str = "Line with some text\n";
+ let large_content = LINE.repeat(2 * (outline::AUTO_OUTLINE_SIZE / LINE.len()));
+ let content_len = large_content.len();
+
+ assert!(content_len > outline::AUTO_OUTLINE_SIZE);
+
+ let file_context = file_context_for(large_content, cx).await;
+
+ assert!(
+ file_context.is_outline,
+ "Large file should use outline format"
+ );
+
+ assert!(
+ file_context.text.len() < content_len,
+ "Outline should be smaller than original content"
+ );
+ }
+
+ #[gpui::test]
+ async fn test_small_file_uses_full_content(cx: &mut TestAppContext) {
+ init_test_settings(cx);
+
+ let small_content = "This is a small file.\n";
+ let content_len = small_content.len();
+
+ assert!(content_len < outline::AUTO_OUTLINE_SIZE);
+
+ let file_context = file_context_for(small_content.to_string(), cx).await;
+
+ assert!(
+ !file_context.is_outline,
+ "Small files should not get an outline"
+ );
+
+ assert_eq!(file_context.text, small_content);
+ }
+
+ async fn file_context_for(content: String, cx: &mut TestAppContext) -> FileContext {
+ // Create a test project with the file
+ let project = create_test_project(
+ cx,
+ json!({
+ "file.txt": content,
+ }),
+ )
+ .await;
+
+ // Open the buffer
+ let buffer_path = project
+ .read_with(cx, |project, cx| project.find_project_path("file.txt", cx))
+ .unwrap();
+
+ let buffer = project
+ .update(cx, |project, cx| project.open_buffer(buffer_path, cx))
+ .await
+ .unwrap();
+
+ let context_handle = AgentContextHandle::File(FileContextHandle {
+ buffer: buffer.clone(),
+ context_id: ContextId::zero(),
+ });
+
+ cx.update(|cx| load_context(vec![context_handle], &project, &None, cx))
+ .await
+ .loaded_context
+ .contexts
+ .into_iter()
+ .find_map(|ctx| {
+ if let AgentContext::File(file_ctx) = ctx {
+ Some(file_ctx)
+ } else {
+ None
+ }
+ })
+ .expect("Should have found a file context")
+ }
+}
@@ -24,6 +24,7 @@ language.workspace = true
language_model.workspace = true
parking_lot.workspace = true
project.workspace = true
+regex.workspace = true
serde.workspace = true
serde_json.workspace = true
text.workspace = true
@@ -1,4 +1,5 @@
mod action_log;
+pub mod outline;
mod tool_registry;
mod tool_schema;
mod tool_working_set;
@@ -0,0 +1,132 @@
+use crate::ActionLog;
+use anyhow::{Result, anyhow};
+use gpui::{AsyncApp, Entity};
+use language::{OutlineItem, ParseStatus};
+use project::Project;
+use regex::Regex;
+use std::fmt::Write;
+use text::Point;
+
+/// For files over this size, instead of reading them (or including them in context),
+/// we automatically provide the file's symbol outline instead, with line numbers.
+pub const AUTO_OUTLINE_SIZE: usize = 16384;
+
+pub async fn file_outline(
+ project: Entity<Project>,
+ path: String,
+ action_log: Entity<ActionLog>,
+ regex: Option<Regex>,
+ cx: &mut AsyncApp,
+) -> anyhow::Result<String> {
+ let buffer = {
+ let project_path = project.read_with(cx, |project, cx| {
+ project
+ .find_project_path(&path, cx)
+ .ok_or_else(|| anyhow!("Path {path} not found in project"))
+ })??;
+
+ project
+ .update(cx, |project, cx| project.open_buffer(project_path, cx))?
+ .await?
+ };
+
+ action_log.update(cx, |action_log, cx| {
+ action_log.track_buffer(buffer.clone(), cx);
+ })?;
+
+ // Wait until the buffer has been fully parsed, so that we can read its outline.
+ let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?;
+ while *parse_status.borrow() != ParseStatus::Idle {
+ parse_status.changed().await?;
+ }
+
+ let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
+ let Some(outline) = snapshot.outline(None) else {
+ return Err(anyhow!("No outline information available for this file."));
+ };
+
+ render_outline(
+ outline
+ .items
+ .into_iter()
+ .map(|item| item.to_point(&snapshot)),
+ regex,
+ 0,
+ usize::MAX,
+ )
+ .await
+}
+
+pub async fn render_outline(
+ items: impl IntoIterator<Item = OutlineItem<Point>>,
+ regex: Option<Regex>,
+ offset: usize,
+ results_per_page: usize,
+) -> Result<String> {
+ let mut items = items.into_iter().skip(offset);
+
+ let entries = items
+ .by_ref()
+ .filter(|item| {
+ regex
+ .as_ref()
+ .is_none_or(|regex| regex.is_match(&item.text))
+ })
+ .take(results_per_page)
+ .collect::<Vec<_>>();
+ let has_more = items.next().is_some();
+
+ let mut output = String::new();
+ let entries_rendered = render_entries(&mut output, entries);
+
+ // Calculate pagination information
+ let page_start = offset + 1;
+ let page_end = offset + entries_rendered;
+ let total_symbols = if has_more {
+ format!("more than {}", page_end)
+ } else {
+ page_end.to_string()
+ };
+
+ // Add pagination information
+ if has_more {
+ writeln!(&mut output, "\nShowing symbols {page_start}-{page_end} (there were more symbols found; use offset: {page_end} to see next page)",
+ )
+ } else {
+ writeln!(
+ &mut output,
+ "\nShowing symbols {page_start}-{page_end} (total symbols: {total_symbols})",
+ )
+ }
+ .ok();
+
+ Ok(output)
+}
+
+fn render_entries(
+ output: &mut String,
+ items: impl IntoIterator<Item = OutlineItem<Point>>,
+) -> usize {
+ let mut entries_rendered = 0;
+
+ for item in items {
+ // Indent based on depth ("" for level 0, " " for level 1, etc.)
+ for _ in 0..item.depth {
+ output.push(' ');
+ }
+ output.push_str(&item.text);
+
+ // Add position information - convert to 1-based line numbers for display
+ let start_line = item.range.start.row + 1;
+ let end_line = item.range.end.row + 1;
+
+ if start_line == end_line {
+ writeln!(output, " [L{}]", start_line).ok();
+ } else {
+ writeln!(output, " [L{}-{}]", start_line, end_line).ok();
+ }
+ entries_rendered += 1;
+ }
+
+ entries_rendered
+}
@@ -4,10 +4,10 @@ use std::sync::Arc;
use crate::schema::json_schema_for;
use anyhow::{Result, anyhow};
+use assistant_tool::outline;
use assistant_tool::{ActionLog, Tool, ToolResult};
use collections::IndexMap;
use gpui::{AnyWindowHandle, App, AsyncApp, Entity, Task};
-use language::{OutlineItem, ParseStatus, Point};
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
use project::{Project, Symbol};
use regex::{Regex, RegexBuilder};
@@ -148,59 +148,13 @@ impl Tool for CodeSymbolsTool {
};
cx.spawn(async move |cx| match input.path {
- Some(path) => file_outline(project, path, action_log, regex, cx).await,
+ Some(path) => outline::file_outline(project, path, action_log, regex, cx).await,
None => project_symbols(project, regex, input.offset, cx).await,
})
.into()
}
}
-pub async fn file_outline(
- project: Entity<Project>,
- path: String,
- action_log: Entity<ActionLog>,
- regex: Option<Regex>,
- cx: &mut AsyncApp,
-) -> anyhow::Result<String> {
- let buffer = {
- let project_path = project.read_with(cx, |project, cx| {
- project
- .find_project_path(&path, cx)
- .ok_or_else(|| anyhow!("Path {path} not found in project"))
- })??;
-
- project
- .update(cx, |project, cx| project.open_buffer(project_path, cx))?
- .await?
- };
-
- action_log.update(cx, |action_log, cx| {
- action_log.track_buffer(buffer.clone(), cx);
- })?;
-
- // Wait until the buffer has been fully parsed, so that we can read its outline.
- let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?;
- while *parse_status.borrow() != ParseStatus::Idle {
- parse_status.changed().await?;
- }
-
- let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
- let Some(outline) = snapshot.outline(None) else {
- return Err(anyhow!("No outline information available for this file."));
- };
-
- render_outline(
- outline
- .items
- .into_iter()
- .map(|item| item.to_point(&snapshot)),
- regex,
- 0,
- usize::MAX,
- )
- .await
-}
-
async fn project_symbols(
project: Entity<Project>,
regex: Option<Regex>,
@@ -291,77 +245,3 @@ async fn project_symbols(
output
})
}
-
-async fn render_outline(
- items: impl IntoIterator<Item = OutlineItem<Point>>,
- regex: Option<Regex>,
- offset: usize,
- results_per_page: usize,
-) -> Result<String> {
- let mut items = items.into_iter().skip(offset);
-
- let entries = items
- .by_ref()
- .filter(|item| {
- regex
- .as_ref()
- .is_none_or(|regex| regex.is_match(&item.text))
- })
- .take(results_per_page)
- .collect::<Vec<_>>();
- let has_more = items.next().is_some();
-
- let mut output = String::new();
- let entries_rendered = render_entries(&mut output, entries);
-
- // Calculate pagination information
- let page_start = offset + 1;
- let page_end = offset + entries_rendered;
- let total_symbols = if has_more {
- format!("more than {}", page_end)
- } else {
- page_end.to_string()
- };
-
- // Add pagination information
- if has_more {
- writeln!(&mut output, "\nShowing symbols {page_start}-{page_end} (there were more symbols found; use offset: {page_end} to see next page)",
- )
- } else {
- writeln!(
- &mut output,
- "\nShowing symbols {page_start}-{page_end} (total symbols: {total_symbols})",
- )
- }
- .ok();
-
- Ok(output)
-}
-
-fn render_entries(
- output: &mut String,
- items: impl IntoIterator<Item = OutlineItem<Point>>,
-) -> usize {
- let mut entries_rendered = 0;
-
- for item in items {
- // Indent based on depth ("" for level 0, " " for level 1, etc.)
- for _ in 0..item.depth {
- output.push(' ');
- }
- output.push_str(&item.text);
-
- // Add position information - convert to 1-based line numbers for display
- let start_line = item.range.start.row + 1;
- let end_line = item.range.end.row + 1;
-
- if start_line == end_line {
- writeln!(output, " [L{}]", start_line).ok();
- } else {
- writeln!(output, " [L{}-{}]", start_line, end_line).ok();
- }
- entries_rendered += 1;
- }
-
- entries_rendered
-}
@@ -1,8 +1,8 @@
use std::sync::Arc;
-use crate::{code_symbols_tool::file_outline, schema::json_schema_for};
+use crate::schema::json_schema_for;
use anyhow::{Result, anyhow};
-use assistant_tool::{ActionLog, Tool, ToolResult};
+use assistant_tool::{ActionLog, Tool, ToolResult, outline};
use gpui::{AnyWindowHandle, App, Entity, Task};
use itertools::Itertools;
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
@@ -14,10 +14,6 @@ use ui::IconName;
use util::markdown::MarkdownInlineCode;
/// If the model requests to read a file whose size exceeds this, then
-/// the tool will return the file's symbol outline instead of its contents,
-/// and suggest trying again using line ranges from the outline.
-const MAX_FILE_SIZE_TO_READ: usize = 16384;
-
/// If the model requests to list the entries in a directory with more
/// entries than this, then the tool will return a subset of the entries
/// and suggest trying again.
@@ -218,7 +214,7 @@ impl Tool for ContentsTool {
// No line ranges specified, so check file size to see if it's too big.
let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
- if file_size <= MAX_FILE_SIZE_TO_READ {
+ if file_size <= outline::AUTO_OUTLINE_SIZE {
let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
action_log.update(cx, |log, cx| {
@@ -229,7 +225,7 @@ impl Tool for ContentsTool {
} else {
// File is too big, so return its outline and a suggestion to
// read again with a line number range specified.
- let outline = file_outline(project, file_path, action_log, None, cx).await?;
+ let outline = outline::file_outline(project, file_path, action_log, None, cx).await?;
Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start and end fields to see the implementations of symbols in the outline."))
}
@@ -1,5 +1,6 @@
-use crate::{code_symbols_tool::file_outline, schema::json_schema_for};
+use crate::schema::json_schema_for;
use anyhow::{Result, anyhow};
+use assistant_tool::outline;
use assistant_tool::{ActionLog, Tool, ToolResult};
use gpui::{AnyWindowHandle, App, Entity, Task};
@@ -14,10 +15,6 @@ use ui::IconName;
use util::markdown::MarkdownInlineCode;
/// If the model requests to read a file whose size exceeds this, then
-/// the tool will return an error along with the model's symbol outline,
-/// and suggest trying again using line ranges from the outline.
-const MAX_FILE_SIZE_TO_READ: usize = 16384;
-
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
pub struct ReadFileToolInput {
/// The relative path of the file to read.
@@ -144,7 +141,7 @@ impl Tool for ReadFileTool {
// No line ranges specified, so check file size to see if it's too big.
let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
- if file_size <= MAX_FILE_SIZE_TO_READ {
+ if file_size <= outline::AUTO_OUTLINE_SIZE {
// File is small enough, so return its contents.
let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
@@ -154,9 +151,9 @@ impl Tool for ReadFileTool {
Ok(result)
} else {
- // File is too big, so return an error with the outline
+ // File is too big, so return the outline
// and a suggestion to read again with line numbers.
- let outline = file_outline(project, file_path, action_log, None, cx).await?;
+ let outline = outline::file_outline(project, file_path, action_log, None, cx).await?;
Ok(formatdoc! {"
This file was too big to read all at once. Here is an outline of its symbols: