assistant2: Add ability to fetch URLs as context (#21988)

Marshall Bowers created

This PR adds the ability to fetch URLs as context in Assistant2.

In the picker we use the search area as an input for the user to enter
the URL they wish to fetch:

<img width="1159" alt="Screenshot 2024-12-13 at 2 45 41 PM"
src="https://github.com/user-attachments/assets/b3b20648-2c22-4509-b592-d0291d25b202"
/>

<img width="1159" alt="Screenshot 2024-12-13 at 2 45 47 PM"
src="https://github.com/user-attachments/assets/7e6bab2d-2731-467f-9781-130c6e4ea5cf"
/>

Release Notes:

- N/A

Change summary

Cargo.lock                                                   |   2 
crates/assistant2/Cargo.toml                                 |   2 
crates/assistant2/src/context.rs                             |   1 
crates/assistant2/src/context_picker.rs                      |  30 
crates/assistant2/src/context_picker/fetch_context_picker.rs | 218 ++++++
crates/assistant2/src/context_picker/file_context_picker.rs  |   2 
crates/assistant2/src/thread.rs                              |  14 
7 files changed, 261 insertions(+), 8 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -472,6 +472,8 @@ dependencies = [
  "fuzzy",
  "gpui",
  "handlebars 4.5.0",
+ "html_to_markdown",
+ "http_client",
  "indoc",
  "language",
  "language_model",

crates/assistant2/Cargo.toml 🔗

@@ -31,6 +31,8 @@ futures.workspace = true
 fuzzy.workspace = true
 gpui.workspace = true
 handlebars.workspace = true
+html_to_markdown.workspace = true
+http_client.workspace = true
 language.workspace = true
 language_model.workspace = true
 language_model_selector.workspace = true

crates/assistant2/src/context_picker.rs 🔗

@@ -1,3 +1,4 @@
+mod fetch_context_picker;
 mod file_context_picker;
 
 use std::sync::Arc;
@@ -11,6 +12,7 @@ use ui::{prelude::*, ListItem, ListItemSpacing, Tooltip};
 use util::ResultExt;
 use workspace::Workspace;
 
+use crate::context_picker::fetch_context_picker::FetchContextPicker;
 use crate::context_picker::file_context_picker::FileContextPicker;
 use crate::message_editor::MessageEditor;
 
@@ -18,6 +20,7 @@ use crate::message_editor::MessageEditor;
 enum ContextPickerMode {
     Default,
     File(View<FileContextPicker>),
+    Fetch(View<FetchContextPicker>),
 }
 
 pub(super) struct ContextPicker {
@@ -47,7 +50,7 @@ impl ContextPicker {
                     icon: IconName::File,
                 },
                 ContextPickerEntry {
-                    name: "web".into(),
+                    name: "fetch".into(),
                     description: "Fetch content from URL".into(),
                     icon: IconName::Globe,
                 },
@@ -77,16 +80,21 @@ impl FocusableView for ContextPicker {
         match &self.mode {
             ContextPickerMode::Default => self.picker.focus_handle(cx),
             ContextPickerMode::File(file_picker) => file_picker.focus_handle(cx),
+            ContextPickerMode::Fetch(fetch_picker) => fetch_picker.focus_handle(cx),
         }
     }
 }
 
 impl Render for ContextPicker {
     fn render(&mut self, _cx: &mut ViewContext<Self>) -> impl IntoElement {
-        v_flex().min_w(px(400.)).map(|parent| match &self.mode {
-            ContextPickerMode::Default => parent.child(self.picker.clone()),
-            ContextPickerMode::File(file_picker) => parent.child(file_picker.clone()),
-        })
+        v_flex()
+            .w(px(400.))
+            .min_w(px(400.))
+            .map(|parent| match &self.mode {
+                ContextPickerMode::Default => parent.child(self.picker.clone()),
+                ContextPickerMode::File(file_picker) => parent.child(file_picker.clone()),
+                ContextPickerMode::Fetch(fetch_picker) => parent.child(fetch_picker.clone()),
+            })
     }
 }
 
@@ -144,6 +152,16 @@ impl PickerDelegate for ContextPickerDelegate {
                                 )
                             }));
                         }
+                        "fetch" => {
+                            this.mode = ContextPickerMode::Fetch(cx.new_view(|cx| {
+                                FetchContextPicker::new(
+                                    self.context_picker.clone(),
+                                    self.workspace.clone(),
+                                    self.message_editor.clone(),
+                                    cx,
+                                )
+                            }));
+                        }
                         _ => {}
                     }
 
@@ -157,7 +175,7 @@ impl PickerDelegate for ContextPickerDelegate {
         self.context_picker
             .update(cx, |this, cx| match this.mode {
                 ContextPickerMode::Default => cx.emit(DismissEvent),
-                ContextPickerMode::File(_) => {}
+                ContextPickerMode::File(_) | ContextPickerMode::Fetch(_) => {}
             })
             .log_err();
     }

crates/assistant2/src/context_picker/fetch_context_picker.rs 🔗

@@ -0,0 +1,218 @@
+use std::cell::RefCell;
+use std::rc::Rc;
+use std::sync::Arc;
+
+use anyhow::{bail, Context as _, Result};
+use futures::AsyncReadExt as _;
+use gpui::{AppContext, DismissEvent, FocusHandle, FocusableView, Task, View, WeakView};
+use html_to_markdown::{convert_html_to_markdown, markdown, TagHandler};
+use http_client::{AsyncBody, HttpClientWithUrl};
+use picker::{Picker, PickerDelegate};
+use ui::{prelude::*, ListItem, ListItemSpacing, ViewContext};
+use workspace::Workspace;
+
+use crate::context::ContextKind;
+use crate::context_picker::ContextPicker;
+use crate::message_editor::MessageEditor;
+
+pub struct FetchContextPicker {
+    picker: View<Picker<FetchContextPickerDelegate>>,
+}
+
+impl FetchContextPicker {
+    pub fn new(
+        context_picker: WeakView<ContextPicker>,
+        workspace: WeakView<Workspace>,
+        message_editor: WeakView<MessageEditor>,
+        cx: &mut ViewContext<Self>,
+    ) -> Self {
+        let delegate = FetchContextPickerDelegate::new(context_picker, workspace, message_editor);
+        let picker = cx.new_view(|cx| Picker::uniform_list(delegate, cx));
+
+        Self { picker }
+    }
+}
+
+impl FocusableView for FetchContextPicker {
+    fn focus_handle(&self, cx: &AppContext) -> FocusHandle {
+        self.picker.focus_handle(cx)
+    }
+}
+
+impl Render for FetchContextPicker {
+    fn render(&mut self, _cx: &mut ViewContext<Self>) -> impl IntoElement {
+        self.picker.clone()
+    }
+}
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
+enum ContentType {
+    Html,
+    Plaintext,
+    Json,
+}
+
+pub struct FetchContextPickerDelegate {
+    context_picker: WeakView<ContextPicker>,
+    workspace: WeakView<Workspace>,
+    message_editor: WeakView<MessageEditor>,
+    url: String,
+}
+
+impl FetchContextPickerDelegate {
+    pub fn new(
+        context_picker: WeakView<ContextPicker>,
+        workspace: WeakView<Workspace>,
+        message_editor: WeakView<MessageEditor>,
+    ) -> Self {
+        FetchContextPickerDelegate {
+            context_picker,
+            workspace,
+            message_editor,
+            url: String::new(),
+        }
+    }
+
+    async fn build_message(http_client: Arc<HttpClientWithUrl>, url: &str) -> Result<String> {
+        let mut url = url.to_owned();
+        if !url.starts_with("https://") && !url.starts_with("http://") {
+            url = format!("https://{url}");
+        }
+
+        let mut response = http_client.get(&url, AsyncBody::default(), true).await?;
+
+        let mut body = Vec::new();
+        response
+            .body_mut()
+            .read_to_end(&mut body)
+            .await
+            .context("error reading response body")?;
+
+        if response.status().is_client_error() {
+            let text = String::from_utf8_lossy(body.as_slice());
+            bail!(
+                "status error {}, response: {text:?}",
+                response.status().as_u16()
+            );
+        }
+
+        let Some(content_type) = response.headers().get("content-type") else {
+            bail!("missing Content-Type header");
+        };
+        let content_type = content_type
+            .to_str()
+            .context("invalid Content-Type header")?;
+        let content_type = match content_type {
+            "text/html" => ContentType::Html,
+            "text/plain" => ContentType::Plaintext,
+            "application/json" => ContentType::Json,
+            _ => ContentType::Html,
+        };
+
+        match content_type {
+            ContentType::Html => {
+                let mut handlers: Vec<TagHandler> = vec![
+                    Rc::new(RefCell::new(markdown::WebpageChromeRemover)),
+                    Rc::new(RefCell::new(markdown::ParagraphHandler)),
+                    Rc::new(RefCell::new(markdown::HeadingHandler)),
+                    Rc::new(RefCell::new(markdown::ListHandler)),
+                    Rc::new(RefCell::new(markdown::TableHandler::new())),
+                    Rc::new(RefCell::new(markdown::StyledTextHandler)),
+                ];
+                if url.contains("wikipedia.org") {
+                    use html_to_markdown::structure::wikipedia;
+
+                    handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaChromeRemover)));
+                    handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaInfoboxHandler)));
+                    handlers.push(Rc::new(
+                        RefCell::new(wikipedia::WikipediaCodeHandler::new()),
+                    ));
+                } else {
+                    handlers.push(Rc::new(RefCell::new(markdown::CodeHandler)));
+                }
+
+                convert_html_to_markdown(&body[..], &mut handlers)
+            }
+            ContentType::Plaintext => Ok(std::str::from_utf8(&body)?.to_owned()),
+            ContentType::Json => {
+                let json: serde_json::Value = serde_json::from_slice(&body)?;
+
+                Ok(format!(
+                    "```json\n{}\n```",
+                    serde_json::to_string_pretty(&json)?
+                ))
+            }
+        }
+    }
+}
+
+impl PickerDelegate for FetchContextPickerDelegate {
+    type ListItem = ListItem;
+
+    fn match_count(&self) -> usize {
+        1
+    }
+
+    fn selected_index(&self) -> usize {
+        0
+    }
+
+    fn set_selected_index(&mut self, _ix: usize, _cx: &mut ViewContext<Picker<Self>>) {}
+
+    fn placeholder_text(&self, _cx: &mut ui::WindowContext) -> Arc<str> {
+        "Enter a URL…".into()
+    }
+
+    fn update_matches(&mut self, query: String, _cx: &mut ViewContext<Picker<Self>>) -> Task<()> {
+        self.url = query;
+
+        Task::ready(())
+    }
+
+    fn confirm(&mut self, _secondary: bool, cx: &mut ViewContext<Picker<Self>>) {
+        let Some(workspace) = self.workspace.upgrade() else {
+            return;
+        };
+
+        let http_client = workspace.read(cx).client().http_client().clone();
+        let url = self.url.clone();
+        cx.spawn(|this, mut cx| async move {
+            let text = Self::build_message(http_client, &url).await?;
+
+            this.update(&mut cx, |this, cx| {
+                this.delegate
+                    .message_editor
+                    .update(cx, |message_editor, _cx| {
+                        message_editor.insert_context(ContextKind::FetchedUrl, url, text);
+                    })
+            })??;
+
+            anyhow::Ok(())
+        })
+        .detach_and_log_err(cx);
+    }
+
+    fn dismissed(&mut self, cx: &mut ViewContext<Picker<Self>>) {
+        self.context_picker
+            .update(cx, |this, cx| {
+                this.reset_mode();
+                cx.emit(DismissEvent);
+            })
+            .ok();
+    }
+
+    fn render_match(
+        &self,
+        ix: usize,
+        selected: bool,
+        _cx: &mut ViewContext<Picker<Self>>,
+    ) -> Option<Self::ListItem> {
+        Some(
+            ListItem::new(ix)
+                .inset(true)
+                .spacing(ListItemSpacing::Sparse)
+                .toggle_state(selected)
+                .child(self.url.clone()),
+        )
+    }
+}

crates/assistant2/src/thread.rs 🔗

@@ -193,12 +193,19 @@ impl Thread {
 
             if let Some(context) = self.context_for_message(message.id) {
                 let mut file_context = String::new();
+                let mut fetch_context = String::new();
 
                 for context in context.iter() {
                     match context.kind {
                         ContextKind::File => {
                             file_context.push_str(&context.text);
-                            file_context.push_str("\n");
+                            file_context.push('\n');
+                        }
+                        ContextKind::FetchedUrl => {
+                            fetch_context.push_str(&context.name);
+                            fetch_context.push('\n');
+                            fetch_context.push_str(&context.text);
+                            fetch_context.push('\n');
                         }
                     }
                 }
@@ -209,6 +216,11 @@ impl Thread {
                     context_text.push_str(&file_context);
                 }
 
+                if !fetch_context.is_empty() {
+                    context_text.push_str("The following fetched results are available\n");
+                    context_text.push_str(&fetch_context);
+                }
+
                 request_message
                     .content
                     .push(MessageContent::Text(context_text))