assistant2: Background load of context + prep for refresh + efficiency (#22935)

Michael Sloan created

* Now loads context on background threads.

- For file and directory context, buffer ropes can be shared between
threads as they are immutable. This allows for traversal and
accumulation of buffer text on a background thread.

- For url context, the request, parsing, and rendering is now done on a
background thread.

* Prepares for support of buffer reload by individually storing the text
of directory buffers.

* Avoids some string copying / redundant strings.

- When attaching message context, no longer builds a string for each
context type.

- For directory context, does not build a `SharedString` for the full
text, instead has a slice of `SharedString` chunks which are then
directly appended to the message context.

- Building a fenced codeblock for a buffer now computes a precise
capacity in advance.

Release Notes:

- N/A

Change summary

crates/assistant2/src/context.rs                             | 104 +-
crates/assistant2/src/context_picker/fetch_context_picker.rs |  14 
crates/assistant2/src/context_picker/file_context_picker.rs  |   2 
crates/assistant2/src/context_store.rs                       | 192 ++++-
crates/assistant2/src/context_strip.rs                       |  42 +
5 files changed, 244 insertions(+), 110 deletions(-)

Detailed changes

crates/assistant2/src/context.rs 🔗

@@ -2,7 +2,6 @@ use std::path::Path;
 use std::rc::Rc;
 use std::sync::Arc;
 
-use collections::BTreeMap;
 use gpui::{AppContext, Model, SharedString};
 use language::Buffer;
 use language_model::{LanguageModelRequestMessage, MessageContent};
@@ -29,8 +28,8 @@ pub struct ContextSnapshot {
     pub parent: Option<SharedString>,
     pub tooltip: Option<SharedString>,
     pub kind: ContextKind,
-    /// Text to send to the model. This is not refreshed by `snapshot`.
-    pub text: SharedString,
+    /// Concatenating these strings yields text to send to the model. Not refreshed by `snapshot`.
+    pub text: Box<[SharedString]>,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -60,27 +59,18 @@ impl Context {
     }
 }
 
-// TODO: Model<Buffer> holds onto the buffer even if the file is deleted and closed. Should remove
-// the context from the message editor in this case.
-
 #[derive(Debug)]
 pub struct FileContext {
     pub id: ContextId,
-    pub buffer: Model<Buffer>,
-    #[allow(unused)]
-    pub version: clock::Global,
-    pub text: SharedString,
+    pub buffer: ContextBuffer,
 }
 
 #[derive(Debug)]
 pub struct DirectoryContext {
     #[allow(unused)]
     pub path: Rc<Path>,
-    // TODO: The choice to make this a BTreeMap was a result of use in a version of
-    // ContextStore::will_include_buffer before I realized that the path logic should be used there
-    // too.
     #[allow(unused)]
-    pub buffers: BTreeMap<BufferId, (Model<Buffer>, clock::Global)>,
+    pub buffers: Vec<ContextBuffer>,
     pub snapshot: ContextSnapshot,
 }
 
@@ -101,6 +91,19 @@ pub struct ThreadContext {
     pub text: SharedString,
 }
 
+// TODO: Model<Buffer> holds onto the buffer even if the file is deleted and closed. Should remove
+// the context from the message editor in this case.
+
+#[derive(Debug)]
+pub struct ContextBuffer {
+    #[allow(unused)]
+    pub id: BufferId,
+    pub buffer: Model<Buffer>,
+    #[allow(unused)]
+    pub version: clock::Global,
+    pub text: SharedString,
+}
+
 impl Context {
     pub fn snapshot(&self, cx: &AppContext) -> Option<ContextSnapshot> {
         match &self {
@@ -114,7 +117,7 @@ impl Context {
 
 impl FileContext {
     pub fn path(&self, cx: &AppContext) -> Option<Arc<Path>> {
-        let buffer = self.buffer.read(cx);
+        let buffer = self.buffer.buffer.read(cx);
         if let Some(file) = buffer.file() {
             Some(file.path().clone())
         } else {
@@ -141,7 +144,7 @@ impl FileContext {
             parent,
             tooltip: Some(full_path),
             kind: ContextKind::File,
-            text: self.text.clone(),
+            text: Box::new([self.buffer.text.clone()]),
         })
     }
 }
@@ -160,7 +163,7 @@ impl FetchedUrlContext {
             parent: None,
             tooltip: None,
             kind: ContextKind::FetchedUrl,
-            text: self.text.clone(),
+            text: Box::new([self.text.clone()]),
         }
     }
 }
@@ -174,7 +177,7 @@ impl ThreadContext {
             parent: None,
             tooltip: None,
             kind: ContextKind::Thread,
-            text: self.text.clone(),
+            text: Box::new([self.text.clone()]),
         }
     }
 }
@@ -183,55 +186,64 @@ pub fn attach_context_to_message(
     message: &mut LanguageModelRequestMessage,
     contexts: impl Iterator<Item = ContextSnapshot>,
 ) {
-    let mut file_context = String::new();
-    let mut directory_context = String::new();
-    let mut fetch_context = String::new();
-    let mut thread_context = String::new();
+    let mut file_context = Vec::new();
+    let mut directory_context = Vec::new();
+    let mut fetch_context = Vec::new();
+    let mut thread_context = Vec::new();
 
     for context in contexts {
         match context.kind {
-            ContextKind::File => {
-                file_context.push_str(&context.text);
-                file_context.push('\n');
-            }
-            ContextKind::Directory => {
-                directory_context.push_str(&context.text);
-                directory_context.push('\n');
-            }
-            ContextKind::FetchedUrl => {
-                fetch_context.push_str(&context.name);
-                fetch_context.push('\n');
-                fetch_context.push_str(&context.text);
-                fetch_context.push('\n');
-            }
-            ContextKind::Thread { .. } => {
-                thread_context.push_str(&context.name);
-                thread_context.push('\n');
-                thread_context.push_str(&context.text);
-                thread_context.push('\n');
-            }
+            ContextKind::File => file_context.push(context),
+            ContextKind::Directory => directory_context.push(context),
+            ContextKind::FetchedUrl => fetch_context.push(context),
+            ContextKind::Thread => thread_context.push(context),
         }
     }
 
     let mut context_text = String::new();
+
     if !file_context.is_empty() {
         context_text.push_str("The following files are available:\n");
-        context_text.push_str(&file_context);
+        for context in file_context {
+            for chunk in context.text {
+                context_text.push_str(&chunk);
+            }
+            context_text.push('\n');
+        }
     }
 
     if !directory_context.is_empty() {
         context_text.push_str("The following directories are available:\n");
-        context_text.push_str(&directory_context);
+        for context in directory_context {
+            for chunk in context.text {
+                context_text.push_str(&chunk);
+            }
+            context_text.push('\n');
+        }
     }
 
     if !fetch_context.is_empty() {
         context_text.push_str("The following fetched results are available\n");
-        context_text.push_str(&fetch_context);
+        for context in fetch_context {
+            context_text.push_str(&context.name);
+            context_text.push('\n');
+            for chunk in context.text {
+                context_text.push_str(&chunk);
+            }
+            context_text.push('\n');
+        }
     }
 
     if !thread_context.is_empty() {
         context_text.push_str("The following previous conversation threads are available\n");
-        context_text.push_str(&thread_context);
+        for context in thread_context {
+            context_text.push_str(&context.name);
+            context_text.push('\n');
+            for chunk in context.text {
+                context_text.push_str(&chunk);
+            }
+            context_text.push('\n');
+        }
     }
 
     if !context_text.is_empty() {

crates/assistant2/src/context_picker/fetch_context_picker.rs 🔗

@@ -81,13 +81,12 @@ impl FetchContextPickerDelegate {
         }
     }
 
-    async fn build_message(http_client: Arc<HttpClientWithUrl>, url: &str) -> Result<String> {
-        let prefixed_url = if !url.starts_with("https://") && !url.starts_with("http://") {
-            Some(format!("https://{url}"))
+    async fn build_message(http_client: Arc<HttpClientWithUrl>, url: String) -> Result<String> {
+        let url = if !url.starts_with("https://") && !url.starts_with("http://") {
+            format!("https://{url}")
         } else {
-            None
+            url
         };
-        let url = prefixed_url.as_deref().unwrap_or(url);
 
         let mut response = http_client.get(&url, AsyncBody::default(), true).await?;
 
@@ -196,7 +195,10 @@ impl PickerDelegate for FetchContextPickerDelegate {
         let url = self.url.clone();
         let confirm_behavior = self.confirm_behavior;
         cx.spawn(|this, mut cx| async move {
-            let text = Self::build_message(http_client, &url).await?;
+            let text = cx
+                .background_executor()
+                .spawn(Self::build_message(http_client, url.clone()))
+                .await?;
 
             this.update(&mut cx, |this, cx| {
                 this.delegate

crates/assistant2/src/context_picker/file_context_picker.rs 🔗

@@ -201,7 +201,7 @@ impl PickerDelegate for FileContextPickerDelegate {
         let Some(task) = self
             .context_store
             .update(cx, |context_store, cx| {
-                context_store.add_file(project_path, cx)
+                context_store.add_file_from_path(project_path, cx)
             })
             .ok()
         else {

crates/assistant2/src/context_store.rs 🔗

@@ -1,18 +1,18 @@
-use std::fmt::Write as _;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 
 use anyhow::{anyhow, bail, Result};
 use collections::{BTreeMap, HashMap};
-use gpui::{AppContext, Model, ModelContext, SharedString, Task, WeakView};
+use gpui::{AppContext, AsyncAppContext, Model, ModelContext, SharedString, Task, WeakView};
 use language::Buffer;
 use project::{ProjectPath, Worktree};
+use rope::Rope;
 use text::BufferId;
 use workspace::Workspace;
 
 use crate::context::{
-    Context, ContextId, ContextKind, ContextSnapshot, DirectoryContext, FetchedUrlContext,
-    FileContext, ThreadContext,
+    Context, ContextBuffer, ContextId, ContextKind, ContextSnapshot, DirectoryContext,
+    FetchedUrlContext, FileContext, ThreadContext,
 };
 use crate::thread::{Thread, ThreadId};
 
@@ -61,12 +61,13 @@ impl ContextStore {
         self.fetched_urls.clear();
     }
 
-    pub fn add_file(
+    pub fn add_file_from_path(
         &mut self,
         project_path: ProjectPath,
         cx: &mut ModelContext<Self>,
     ) -> Task<Result<()>> {
         let workspace = self.workspace.clone();
+
         let Some(project) = workspace
             .upgrade()
             .map(|workspace| workspace.read(cx).project().clone())
@@ -79,8 +80,8 @@ impl ContextStore {
                 project.open_buffer(project_path.clone(), cx)
             })?;
 
-            let buffer = open_buffer_task.await?;
-            let buffer_id = buffer.update(&mut cx, |buffer, _cx| buffer.remote_id())?;
+            let buffer_model = open_buffer_task.await?;
+            let buffer_id = this.update(&mut cx, |_, cx| buffer_model.read(cx).remote_id())?;
 
             let already_included = this.update(&mut cx, |this, _cx| {
                 match this.will_include_buffer(buffer_id, &project_path.path) {
@@ -97,30 +98,61 @@ impl ContextStore {
                 return anyhow::Ok(());
             }
 
-            this.update(&mut cx, |this, cx| {
-                this.insert_file(buffer, cx);
+            let (buffer_info, text_task) = this.update(&mut cx, |_, cx| {
+                let buffer = buffer_model.read(cx);
+                collect_buffer_info_and_text(
+                    project_path.path.clone(),
+                    buffer_model,
+                    buffer,
+                    &cx.to_async(),
+                )
+            })?;
+
+            let text = text_task.await;
+
+            this.update(&mut cx, |this, _cx| {
+                this.insert_file(make_context_buffer(buffer_info, text));
             })?;
 
             anyhow::Ok(())
         })
     }
 
-    pub fn insert_file(&mut self, buffer_model: Model<Buffer>, cx: &AppContext) {
-        let buffer = buffer_model.read(cx);
-        let Some(file) = buffer.file() else {
-            return;
-        };
+    pub fn add_file_from_buffer(
+        &mut self,
+        buffer_model: Model<Buffer>,
+        cx: &mut ModelContext<Self>,
+    ) -> Task<Result<()>> {
+        cx.spawn(|this, mut cx| async move {
+            let (buffer_info, text_task) = this.update(&mut cx, |_, cx| {
+                let buffer = buffer_model.read(cx);
+                let Some(file) = buffer.file() else {
+                    return Err(anyhow!("Buffer has no path."));
+                };
+                Ok(collect_buffer_info_and_text(
+                    file.path().clone(),
+                    buffer_model,
+                    buffer,
+                    &cx.to_async(),
+                ))
+            })??;
 
-        let mut text = String::new();
-        push_fenced_codeblock(file.path(), buffer.text(), &mut text);
+            let text = text_task.await;
 
+            this.update(&mut cx, |this, _cx| {
+                this.insert_file(make_context_buffer(buffer_info, text))
+            })?;
+
+            anyhow::Ok(())
+        })
+    }
+
+    pub fn insert_file(&mut self, context_buffer: ContextBuffer) {
         let id = self.next_context_id.post_inc();
-        self.files.insert(buffer.remote_id(), id);
+        self.files.insert(context_buffer.id, id);
         self.context.push(Context::File(FileContext {
             id,
-            buffer: buffer_model,
-            version: buffer.version.clone(),
-            text: text.into(),
+            buffer: context_buffer,
         }));
     }
 
@@ -162,7 +194,7 @@ impl ContextStore {
 
             let open_buffer_tasks = project.update(&mut cx, |project, cx| {
                 files
-                    .into_iter()
+                    .iter()
                     .map(|file_path| {
                         project.open_buffer(
                             ProjectPath {
@@ -177,29 +209,36 @@ impl ContextStore {
 
             let buffers = futures::future::join_all(open_buffer_tasks).await;
 
-            this.update(&mut cx, |this, cx| {
-                let mut text = String::new();
-                let mut directory_buffers = BTreeMap::new();
-                for buffer_model in buffers {
+            let mut buffer_infos = Vec::new();
+            let mut text_tasks = Vec::new();
+            this.update(&mut cx, |_, cx| {
+                for (path, buffer_model) in files.into_iter().zip(buffers) {
                     let buffer_model = buffer_model?;
                     let buffer = buffer_model.read(cx);
-                    let path = buffer.file().map_or(&project_path.path, |file| file.path());
-                    push_fenced_codeblock(&path, buffer.text(), &mut text);
-                    directory_buffers
-                        .insert(buffer.remote_id(), (buffer_model, buffer.version.clone()));
+                    let (buffer_info, text_task) =
+                        collect_buffer_info_and_text(path, buffer_model, buffer, &cx.to_async());
+                    buffer_infos.push(buffer_info);
+                    text_tasks.push(text_task);
                 }
+                anyhow::Ok(())
+            })??;
 
-                if directory_buffers.is_empty() {
-                    bail!(
-                        "could not read any text files from {}",
-                        &project_path.path.display()
-                    );
-                }
+            let buffer_texts = futures::future::join_all(text_tasks).await;
+            let directory_buffers = buffer_infos
+                .into_iter()
+                .zip(buffer_texts.iter())
+                .map(|(info, text)| make_context_buffer(info, text.clone()))
+                .collect::<Vec<_>>();
 
-                this.insert_directory(&project_path.path, directory_buffers, text);
+            if directory_buffers.is_empty() {
+                bail!("No text files found in {}", &project_path.path.display());
+            }
 
-                anyhow::Ok(())
-            })??;
+            // TODO: include directory path in text?
+
+            this.update(&mut cx, |this, _| {
+                this.insert_directory(&project_path.path, directory_buffers, buffer_texts.into());
+            })?;
 
             anyhow::Ok(())
         })
@@ -208,8 +247,8 @@ impl ContextStore {
     pub fn insert_directory(
         &mut self,
         path: &Path,
-        buffers: BTreeMap<BufferId, (Model<Buffer>, clock::Global)>,
-        text: impl Into<SharedString>,
+        buffers: Vec<ContextBuffer>,
+        text: Box<[SharedString]>,
     ) {
         let id = self.next_context_id.post_inc();
         self.directories.insert(path.to_path_buf(), id);
@@ -235,7 +274,7 @@ impl ContextStore {
                 parent,
                 tooltip: Some(full_path),
                 kind: ContextKind::Directory,
-                text: text.into(),
+                text,
             },
         }));
     }
@@ -357,25 +396,80 @@ pub enum FileInclusion {
     InDirectory(PathBuf),
 }
 
-pub(crate) fn push_fenced_codeblock(path: &Path, content: String, buffer: &mut String) {
-    buffer.reserve(content.len() + 64);
-
-    write!(buffer, "```").unwrap();
+// ContextBuffer without text.
+struct BufferInfo {
+    buffer_model: Model<Buffer>,
+    id: BufferId,
+    version: clock::Global,
+}
 
-    if let Some(extension) = path.extension().and_then(|ext| ext.to_str()) {
-        write!(buffer, "{} ", extension).unwrap();
+fn make_context_buffer(info: BufferInfo, text: SharedString) -> ContextBuffer {
+    ContextBuffer {
+        id: info.id,
+        buffer: info.buffer_model,
+        version: info.version,
+        text,
     }
+}
 
-    write!(buffer, "{}", path.display()).unwrap();
+fn collect_buffer_info_and_text(
+    path: Arc<Path>,
+    buffer_model: Model<Buffer>,
+    buffer: &Buffer,
+    cx: &AsyncAppContext,
+) -> (BufferInfo, Task<SharedString>) {
+    let buffer_info = BufferInfo {
+        id: buffer.remote_id(),
+        buffer_model,
+        version: buffer.version(),
+    };
+    // Important to collect version at the same time as content so that staleness logic is correct.
+    let content = buffer.as_rope().clone();
+    let text_task = cx
+        .background_executor()
+        .spawn(async move { to_fenced_codeblock(&path, content) });
+    (buffer_info, text_task)
+}
+
+fn to_fenced_codeblock(path: &Path, content: Rope) -> SharedString {
+    let path_extension = path.extension().and_then(|ext| ext.to_str());
+    let path_string = path.to_string_lossy();
+    let capacity = 3
+        + path_extension.map_or(0, |extension| extension.len() + 1)
+        + path_string.len()
+        + 1
+        + content.len()
+        + 5;
+    let mut buffer = String::with_capacity(capacity);
+
+    buffer.push_str("```");
+
+    if let Some(extension) = path_extension {
+        buffer.push_str(extension);
+        buffer.push(' ');
+    }
+    buffer.push_str(&path_string);
 
     buffer.push('\n');
-    buffer.push_str(&content);
+    for chunk in content.chunks() {
+        buffer.push_str(&chunk);
+    }
 
     if !buffer.ends_with('\n') {
         buffer.push('\n');
     }
 
     buffer.push_str("```\n");
+
+    if buffer.len() > capacity {
+        log::error!(
+            "to_fenced_codeblock calculated capacity {} but length was {}",
+            capacity,
+            buffer.len()
+        );
+    }
+
+    buffer.into()
 }
 
 fn collect_files_in_path(worktree: &Worktree, path: &Path) -> Vec<Arc<Path>> {

crates/assistant2/src/context_strip.rs 🔗

@@ -1,10 +1,11 @@
 use std::rc::Rc;
 
+use anyhow::Result;
 use collections::HashSet;
 use editor::Editor;
 use gpui::{
-    AppContext, DismissEvent, EventEmitter, FocusHandle, Model, Subscription, View, WeakModel,
-    WeakView,
+    DismissEvent, EventEmitter, FocusHandle, Model, ModelContext, Subscription, Task, View,
+    WeakModel, WeakView,
 };
 use itertools::Itertools;
 use language::Buffer;
@@ -230,12 +231,32 @@ impl Render for ContextStrip {
                     suggested.kind(),
                     {
                         let context_store = self.context_store.clone();
-                        Rc::new(cx.listener(move |_this, _event, cx| {
-                            context_store.update(cx, |context_store, cx| {
-                                suggested.accept(context_store, cx);
+                        Rc::new(cx.listener(move |this, _event, cx| {
+                            let task = context_store.update(cx, |context_store, cx| {
+                                suggested.accept(context_store, cx)
                             });
 
-                            cx.notify();
+                            let workspace = this.workspace.clone();
+                            cx.spawn(|this, mut cx| async move {
+                                match task.await {
+                                    Ok(()) => {
+                                        if let Some(this) = this.upgrade() {
+                                            this.update(&mut cx, |_, cx| cx.notify())?;
+                                        }
+                                    }
+                                    Err(err) => {
+                                        let Some(workspace) = workspace.upgrade() else {
+                                            return anyhow::Ok(());
+                                        };
+
+                                        workspace.update(&mut cx, |workspace, cx| {
+                                            workspace.show_error(&err, cx);
+                                        })?;
+                                    }
+                                }
+                                anyhow::Ok(())
+                            })
+                            .detach_and_log_err(cx);
                         }))
                     },
                 ))
@@ -299,11 +320,15 @@ impl SuggestedContext {
         }
     }
 
-    pub fn accept(&self, context_store: &mut ContextStore, cx: &mut AppContext) {
+    pub fn accept(
+        &self,
+        context_store: &mut ContextStore,
+        cx: &mut ModelContext<ContextStore>,
+    ) -> Task<Result<()>> {
         match self {
             Self::File { buffer, name: _ } => {
                 if let Some(buffer) = buffer.upgrade() {
-                    context_store.insert_file(buffer, cx);
+                    return context_store.add_file_from_buffer(buffer, cx);
                 };
             }
             Self::Thread { thread, name: _ } => {
@@ -312,6 +337,7 @@ impl SuggestedContext {
                 };
             }
         }
+        Task::ready(Ok(()))
     }
 
     pub fn kind(&self) -> ContextKind {