From c57cc35b030407a6e4e9f9cd990135dcdcf36e9e Mon Sep 17 00:00:00 2001 From: Marshall Bowers Date: Fri, 13 Dec 2024 15:03:55 -0500 Subject: [PATCH] assistant2: Add ability to fetch URLs as context (#21988) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds the ability to fetch URLs as context in Assistant2. In the picker we use the search area as an input for the user to enter the URL they wish to fetch: Screenshot 2024-12-13 at 2 45 41 PM Screenshot 2024-12-13 at 2 45 47 PM Release Notes: - N/A --- Cargo.lock | 2 + crates/assistant2/Cargo.toml | 2 + crates/assistant2/src/context.rs | 1 + crates/assistant2/src/context_picker.rs | 30 ++- .../context_picker/fetch_context_picker.rs | 218 ++++++++++++++++++ .../src/context_picker/file_context_picker.rs | 2 +- crates/assistant2/src/thread.rs | 14 +- 7 files changed, 261 insertions(+), 8 deletions(-) create mode 100644 crates/assistant2/src/context_picker/fetch_context_picker.rs diff --git a/Cargo.lock b/Cargo.lock index 0ee25ccb5f0a461be62afc753548067b5fde724d..e7e9449bdfd0d6d3affc648f9e320f8a7d7021a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -472,6 +472,8 @@ dependencies = [ "fuzzy", "gpui", "handlebars 4.5.0", + "html_to_markdown", + "http_client", "indoc", "language", "language_model", diff --git a/crates/assistant2/Cargo.toml b/crates/assistant2/Cargo.toml index 3da2c7faee7c6873d6cd570e1722bf8832acd13a..c67674b437891300dad6c806da85271c6da424d7 100644 --- a/crates/assistant2/Cargo.toml +++ b/crates/assistant2/Cargo.toml @@ -31,6 +31,8 @@ futures.workspace = true fuzzy.workspace = true gpui.workspace = true handlebars.workspace = true +html_to_markdown.workspace = true +http_client.workspace = true language.workspace = true language_model.workspace = true language_model_selector.workspace = true diff --git a/crates/assistant2/src/context.rs b/crates/assistant2/src/context.rs index 9d095a10d87e125887d32b6d05dfdfdf18f61852..577d87166ff6861c8024ed904615e417b4f6641b 100644 --- a/crates/assistant2/src/context.rs +++ b/crates/assistant2/src/context.rs @@ -23,4 +23,5 @@ pub struct Context { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ContextKind { File, + FetchedUrl, } diff --git a/crates/assistant2/src/context_picker.rs b/crates/assistant2/src/context_picker.rs index 1e4d007e761dd3dd73af86945284b8d17a128483..f78e617a340a5a55cf94e7d2e0284a2021ef8de2 100644 --- a/crates/assistant2/src/context_picker.rs +++ b/crates/assistant2/src/context_picker.rs @@ -1,3 +1,4 @@ +mod fetch_context_picker; mod file_context_picker; use std::sync::Arc; @@ -11,6 +12,7 @@ use ui::{prelude::*, ListItem, ListItemSpacing, Tooltip}; use util::ResultExt; use workspace::Workspace; +use crate::context_picker::fetch_context_picker::FetchContextPicker; use crate::context_picker::file_context_picker::FileContextPicker; use crate::message_editor::MessageEditor; @@ -18,6 +20,7 @@ use crate::message_editor::MessageEditor; enum ContextPickerMode { Default, File(View), + Fetch(View), } pub(super) struct ContextPicker { @@ -47,7 +50,7 @@ impl ContextPicker { icon: IconName::File, }, ContextPickerEntry { - name: "web".into(), + name: "fetch".into(), description: "Fetch content from URL".into(), icon: IconName::Globe, }, @@ -77,16 +80,21 @@ impl FocusableView for ContextPicker { match &self.mode { ContextPickerMode::Default => self.picker.focus_handle(cx), ContextPickerMode::File(file_picker) => file_picker.focus_handle(cx), + ContextPickerMode::Fetch(fetch_picker) => fetch_picker.focus_handle(cx), } } } impl Render for ContextPicker { fn render(&mut self, _cx: &mut ViewContext) -> impl IntoElement { - v_flex().min_w(px(400.)).map(|parent| match &self.mode { - ContextPickerMode::Default => parent.child(self.picker.clone()), - ContextPickerMode::File(file_picker) => parent.child(file_picker.clone()), - }) + v_flex() + .w(px(400.)) + .min_w(px(400.)) + .map(|parent| match &self.mode { + ContextPickerMode::Default => parent.child(self.picker.clone()), + ContextPickerMode::File(file_picker) => parent.child(file_picker.clone()), + ContextPickerMode::Fetch(fetch_picker) => parent.child(fetch_picker.clone()), + }) } } @@ -144,6 +152,16 @@ impl PickerDelegate for ContextPickerDelegate { ) })); } + "fetch" => { + this.mode = ContextPickerMode::Fetch(cx.new_view(|cx| { + FetchContextPicker::new( + self.context_picker.clone(), + self.workspace.clone(), + self.message_editor.clone(), + cx, + ) + })); + } _ => {} } @@ -157,7 +175,7 @@ impl PickerDelegate for ContextPickerDelegate { self.context_picker .update(cx, |this, cx| match this.mode { ContextPickerMode::Default => cx.emit(DismissEvent), - ContextPickerMode::File(_) => {} + ContextPickerMode::File(_) | ContextPickerMode::Fetch(_) => {} }) .log_err(); } diff --git a/crates/assistant2/src/context_picker/fetch_context_picker.rs b/crates/assistant2/src/context_picker/fetch_context_picker.rs new file mode 100644 index 0000000000000000000000000000000000000000..9545d546eb061cb544f3caa931ff20682b09fa61 --- /dev/null +++ b/crates/assistant2/src/context_picker/fetch_context_picker.rs @@ -0,0 +1,218 @@ +use std::cell::RefCell; +use std::rc::Rc; +use std::sync::Arc; + +use anyhow::{bail, Context as _, Result}; +use futures::AsyncReadExt as _; +use gpui::{AppContext, DismissEvent, FocusHandle, FocusableView, Task, View, WeakView}; +use html_to_markdown::{convert_html_to_markdown, markdown, TagHandler}; +use http_client::{AsyncBody, HttpClientWithUrl}; +use picker::{Picker, PickerDelegate}; +use ui::{prelude::*, ListItem, ListItemSpacing, ViewContext}; +use workspace::Workspace; + +use crate::context::ContextKind; +use crate::context_picker::ContextPicker; +use crate::message_editor::MessageEditor; + +pub struct FetchContextPicker { + picker: View>, +} + +impl FetchContextPicker { + pub fn new( + context_picker: WeakView, + workspace: WeakView, + message_editor: WeakView, + cx: &mut ViewContext, + ) -> Self { + let delegate = FetchContextPickerDelegate::new(context_picker, workspace, message_editor); + let picker = cx.new_view(|cx| Picker::uniform_list(delegate, cx)); + + Self { picker } + } +} + +impl FocusableView for FetchContextPicker { + fn focus_handle(&self, cx: &AppContext) -> FocusHandle { + self.picker.focus_handle(cx) + } +} + +impl Render for FetchContextPicker { + fn render(&mut self, _cx: &mut ViewContext) -> impl IntoElement { + self.picker.clone() + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] +enum ContentType { + Html, + Plaintext, + Json, +} + +pub struct FetchContextPickerDelegate { + context_picker: WeakView, + workspace: WeakView, + message_editor: WeakView, + url: String, +} + +impl FetchContextPickerDelegate { + pub fn new( + context_picker: WeakView, + workspace: WeakView, + message_editor: WeakView, + ) -> Self { + FetchContextPickerDelegate { + context_picker, + workspace, + message_editor, + url: String::new(), + } + } + + async fn build_message(http_client: Arc, url: &str) -> Result { + let mut url = url.to_owned(); + if !url.starts_with("https://") && !url.starts_with("http://") { + url = format!("https://{url}"); + } + + let mut response = http_client.get(&url, AsyncBody::default(), true).await?; + + let mut body = Vec::new(); + response + .body_mut() + .read_to_end(&mut body) + .await + .context("error reading response body")?; + + if response.status().is_client_error() { + let text = String::from_utf8_lossy(body.as_slice()); + bail!( + "status error {}, response: {text:?}", + response.status().as_u16() + ); + } + + let Some(content_type) = response.headers().get("content-type") else { + bail!("missing Content-Type header"); + }; + let content_type = content_type + .to_str() + .context("invalid Content-Type header")?; + let content_type = match content_type { + "text/html" => ContentType::Html, + "text/plain" => ContentType::Plaintext, + "application/json" => ContentType::Json, + _ => ContentType::Html, + }; + + match content_type { + ContentType::Html => { + let mut handlers: Vec = vec![ + Rc::new(RefCell::new(markdown::WebpageChromeRemover)), + Rc::new(RefCell::new(markdown::ParagraphHandler)), + Rc::new(RefCell::new(markdown::HeadingHandler)), + Rc::new(RefCell::new(markdown::ListHandler)), + Rc::new(RefCell::new(markdown::TableHandler::new())), + Rc::new(RefCell::new(markdown::StyledTextHandler)), + ]; + if url.contains("wikipedia.org") { + use html_to_markdown::structure::wikipedia; + + handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaChromeRemover))); + handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaInfoboxHandler))); + handlers.push(Rc::new( + RefCell::new(wikipedia::WikipediaCodeHandler::new()), + )); + } else { + handlers.push(Rc::new(RefCell::new(markdown::CodeHandler))); + } + + convert_html_to_markdown(&body[..], &mut handlers) + } + ContentType::Plaintext => Ok(std::str::from_utf8(&body)?.to_owned()), + ContentType::Json => { + let json: serde_json::Value = serde_json::from_slice(&body)?; + + Ok(format!( + "```json\n{}\n```", + serde_json::to_string_pretty(&json)? + )) + } + } + } +} + +impl PickerDelegate for FetchContextPickerDelegate { + type ListItem = ListItem; + + fn match_count(&self) -> usize { + 1 + } + + fn selected_index(&self) -> usize { + 0 + } + + fn set_selected_index(&mut self, _ix: usize, _cx: &mut ViewContext>) {} + + fn placeholder_text(&self, _cx: &mut ui::WindowContext) -> Arc { + "Enter a URL…".into() + } + + fn update_matches(&mut self, query: String, _cx: &mut ViewContext>) -> Task<()> { + self.url = query; + + Task::ready(()) + } + + fn confirm(&mut self, _secondary: bool, cx: &mut ViewContext>) { + let Some(workspace) = self.workspace.upgrade() else { + return; + }; + + let http_client = workspace.read(cx).client().http_client().clone(); + let url = self.url.clone(); + cx.spawn(|this, mut cx| async move { + let text = Self::build_message(http_client, &url).await?; + + this.update(&mut cx, |this, cx| { + this.delegate + .message_editor + .update(cx, |message_editor, _cx| { + message_editor.insert_context(ContextKind::FetchedUrl, url, text); + }) + })??; + + anyhow::Ok(()) + }) + .detach_and_log_err(cx); + } + + fn dismissed(&mut self, cx: &mut ViewContext>) { + self.context_picker + .update(cx, |this, cx| { + this.reset_mode(); + cx.emit(DismissEvent); + }) + .ok(); + } + + fn render_match( + &self, + ix: usize, + selected: bool, + _cx: &mut ViewContext>, + ) -> Option { + Some( + ListItem::new(ix) + .inset(true) + .spacing(ListItemSpacing::Sparse) + .toggle_state(selected) + .child(self.url.clone()), + ) + } +} diff --git a/crates/assistant2/src/context_picker/file_context_picker.rs b/crates/assistant2/src/context_picker/file_context_picker.rs index 13950b267ade03f2f03645847d0c26654f373e3a..08e7e13d544b56181d30007470469c1daa63eec4 100644 --- a/crates/assistant2/src/context_picker/file_context_picker.rs +++ b/crates/assistant2/src/context_picker/file_context_picker.rs @@ -245,7 +245,7 @@ impl PickerDelegate for FileContextPickerDelegate { this.reset_mode(); cx.emit(DismissEvent); }) - .log_err(); + .ok(); } fn render_match( diff --git a/crates/assistant2/src/thread.rs b/crates/assistant2/src/thread.rs index 77c0cd9836e569bb3afe6b7b58922fd4f81c3576..8234a0e8af6a1039479c3825a6ca3549acd0bf11 100644 --- a/crates/assistant2/src/thread.rs +++ b/crates/assistant2/src/thread.rs @@ -193,12 +193,19 @@ impl Thread { if let Some(context) = self.context_for_message(message.id) { let mut file_context = String::new(); + let mut fetch_context = String::new(); for context in context.iter() { match context.kind { ContextKind::File => { file_context.push_str(&context.text); - file_context.push_str("\n"); + file_context.push('\n'); + } + ContextKind::FetchedUrl => { + fetch_context.push_str(&context.name); + fetch_context.push('\n'); + fetch_context.push_str(&context.text); + fetch_context.push('\n'); } } } @@ -209,6 +216,11 @@ impl Thread { context_text.push_str(&file_context); } + if !fetch_context.is_empty() { + context_text.push_str("The following fetched results are available\n"); + context_text.push_str(&fetch_context); + } + request_message .content .push(MessageContent::Text(context_text))