Cargo.lock 🔗
@@ -779,6 +779,7 @@ dependencies = [
"collections",
"component",
"derive_more 0.99.19",
+ "diffy",
"editor",
"feature_flags",
"fs",
Oleksiy Syvokon created
Gradually remove details from a patch to keep it within the size limit.
This helps avoid using too much context when the user pastes large
files, generates files, or just makes many changes between agent
notifications.
Release Notes:
- N/A
Cargo.lock | 1
crates/assistant_tools/Cargo.toml | 1
crates/assistant_tools/src/project_notifications_tool.rs | 145 +++++++++
3 files changed, 145 insertions(+), 2 deletions(-)
@@ -779,6 +779,7 @@ dependencies = [
"collections",
"component",
"derive_more 0.99.19",
+ "diffy",
"editor",
"feature_flags",
"fs",
@@ -63,6 +63,7 @@ which.workspace = true
workspace-hack.workspace = true
workspace.workspace = true
zed_llm_client.workspace = true
+diffy = "0.4.2"
[dev-dependencies]
lsp = { workspace = true, features = ["test-support"] }
@@ -6,7 +6,7 @@ use language_model::{LanguageModel, LanguageModelRequest, LanguageModelToolSchem
use project::Project;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
-use std::sync::Arc;
+use std::{fmt::Write, sync::Arc};
use ui::IconName;
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
@@ -59,7 +59,9 @@ impl Tool for ProjectNotificationsTool {
// NOTE: Changes to this prompt require a symmetric update in the LLM Worker
const HEADER: &str = include_str!("./project_notifications_tool/prompt_header.txt");
- result(&format!("{HEADER}\n\n```diff\n{user_edits_diff}\n```\n").replace("\r\n", "\n"))
+ const MAX_BYTES: usize = 8000;
+ let diff = fit_patch_to_size(&user_edits_diff, MAX_BYTES);
+ result(&format!("{HEADER}\n\n```diff\n{diff}\n```\n").replace("\r\n", "\n"))
}
}
@@ -67,11 +69,95 @@ fn result(response: &str) -> ToolResult {
Task::ready(Ok(response.to_string().into())).into()
}
+/// Make sure that the patch fits into the size limit (in bytes).
+/// Compress the patch by omitting some parts if needed.
+/// Unified diff format is assumed.
+fn fit_patch_to_size(patch: &str, max_size: usize) -> String {
+ if patch.len() <= max_size {
+ return patch.to_string();
+ }
+
+ // Compression level 1: remove context lines in diff bodies, but
+ // leave the counts and positions of inserted/deleted lines
+ let mut current_size = patch.len();
+ let mut file_patches = split_patch(&patch);
+ file_patches.sort_by_key(|patch| patch.len());
+ let compressed_patches = file_patches
+ .iter()
+ .rev()
+ .map(|patch| {
+ if current_size > max_size {
+ let compressed = compress_patch(patch).unwrap_or_else(|_| patch.to_string());
+ current_size -= patch.len() - compressed.len();
+ compressed
+ } else {
+ patch.to_string()
+ }
+ })
+ .collect::<Vec<_>>();
+
+ if current_size <= max_size {
+ return compressed_patches.join("\n\n");
+ }
+
+ // Compression level 2: list paths of the changed files only
+ let filenames = file_patches
+ .iter()
+ .map(|patch| {
+ let patch = diffy::Patch::from_str(patch).unwrap();
+ let path = patch
+ .modified()
+ .and_then(|path| path.strip_prefix("b/"))
+ .unwrap_or_default();
+ format!("- {path}\n")
+ })
+ .collect::<Vec<_>>();
+
+ filenames.join("")
+}
+
+/// Split a potentially multi-file patch into multiple single-file patches
+fn split_patch(patch: &str) -> Vec<String> {
+ let mut result = Vec::new();
+ let mut current_patch = String::new();
+
+ for line in patch.lines() {
+ if line.starts_with("---") && !current_patch.is_empty() {
+ result.push(current_patch.trim_end_matches('\n').into());
+ current_patch = String::new();
+ }
+ current_patch.push_str(line);
+ current_patch.push('\n');
+ }
+
+ if !current_patch.is_empty() {
+ result.push(current_patch.trim_end_matches('\n').into());
+ }
+
+ result
+}
+
+fn compress_patch(patch: &str) -> anyhow::Result<String> {
+ let patch = diffy::Patch::from_str(patch)?;
+ let mut out = String::new();
+
+ writeln!(out, "--- {}", patch.original().unwrap_or("a"))?;
+ writeln!(out, "+++ {}", patch.modified().unwrap_or("b"))?;
+
+ for hunk in patch.hunks() {
+ writeln!(out, "@@ -{} +{} @@", hunk.old_range(), hunk.new_range())?;
+ writeln!(out, "[...skipped...]")?;
+ }
+
+ Ok(out)
+}
+
#[cfg(test)]
mod tests {
use super::*;
use assistant_tool::ToolResultContent;
use gpui::{AppContext, TestAppContext};
+ use indoc::indoc;
use language_model::{LanguageModelRequest, fake_provider::FakeLanguageModelProvider};
use project::{FakeFs, Project};
use serde_json::json;
@@ -206,6 +292,61 @@ mod tests {
);
}
+ #[test]
+ fn test_patch_compression() {
+ // Given a patch that doesn't fit into the size budget
+ let patch = indoc! {"
+ --- a/dir/test.txt
+ +++ b/dir/test.txt
+ @@ -1,3 +1,3 @@
+ line 1
+ -line 2
+ +CHANGED
+ line 3
+ @@ -10,2 +10,2 @@
+ line 10
+ -line 11
+ +line eleven
+
+
+ --- a/dir/another.txt
+ +++ b/dir/another.txt
+ @@ -100,1 +1,1 @@
+ -before
+ +after
+ "};
+
+ // When the size deficit can be compensated by dropping the body,
+ // then the body should be trimmed for larger files first
+ let limit = patch.len() - 10;
+ let compressed = fit_patch_to_size(patch, limit);
+ let expected = indoc! {"
+ --- a/dir/test.txt
+ +++ b/dir/test.txt
+ @@ -1,3 +1,3 @@
+ [...skipped...]
+ @@ -10,2 +10,2 @@
+ [...skipped...]
+
+
+ --- a/dir/another.txt
+ +++ b/dir/another.txt
+ @@ -100,1 +1,1 @@
+ -before
+ +after"};
+ assert_eq!(compressed, expected);
+
+ // When the size deficit is too large, then only file paths
+ // should be returned
+ let limit = 10;
+ let compressed = fit_patch_to_size(patch, limit);
+ let expected = indoc! {"
+ - dir/another.txt
+ - dir/test.txt
+ "};
+ assert_eq!(compressed, expected);
+ }
+
fn init_test(cx: &mut TestAppContext) {
cx.update(|cx| {
let settings_store = SettingsStore::test(cx);