From c57cc35b030407a6e4e9f9cd990135dcdcf36e9e Mon Sep 17 00:00:00 2001 From: Marshall Bowers Date: Fri, 13 Dec 2024 15:03:55 -0500 Subject: [PATCH] assistant2: Add ability to fetch URLs as context (#21988) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds the ability to fetch URLs as context in Assistant2. In the picker we use the search area as an input for the user to enter the URL they wish to fetch: Screenshot 2024-12-13 at 2 45 41 PM Screenshot 2024-12-13 at 2 45 47 PM Release Notes: - N/A --- Cargo.lock | 2 + crates/assistant2/Cargo.toml | 2 + crates/assistant2/src/context.rs | 1 + crates/assistant2/src/context_picker.rs | 30 ++- .../context_picker/fetch_context_picker.rs | 218 ++++++++++++++++++ .../src/context_picker/file_context_picker.rs | 2 +- crates/assistant2/src/thread.rs | 14 +- 7 files changed, 261 insertions(+), 8 deletions(-) create mode 100644 crates/assistant2/src/context_picker/fetch_context_picker.rs diff --git a/Cargo.lock b/Cargo.lock index 0ee25ccb5f..e7e9449bdf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -472,6 +472,8 @@ dependencies = [ "fuzzy", "gpui", "handlebars 4.5.0", + "html_to_markdown", + "http_client", "indoc", "language", "language_model", diff --git a/crates/assistant2/Cargo.toml b/crates/assistant2/Cargo.toml index 3da2c7faee..c67674b437 100644 --- a/crates/assistant2/Cargo.toml +++ b/crates/assistant2/Cargo.toml @@ -31,6 +31,8 @@ futures.workspace = true fuzzy.workspace = true gpui.workspace = true handlebars.workspace = true +html_to_markdown.workspace = true +http_client.workspace = true language.workspace = true language_model.workspace = true language_model_selector.workspace = true diff --git a/crates/assistant2/src/context.rs b/crates/assistant2/src/context.rs index 9d095a10d8..577d87166f 100644 --- a/crates/assistant2/src/context.rs +++ b/crates/assistant2/src/context.rs @@ -23,4 +23,5 @@ pub struct Context { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ContextKind { File, + FetchedUrl, } diff --git a/crates/assistant2/src/context_picker.rs b/crates/assistant2/src/context_picker.rs index 1e4d007e76..f78e617a34 100644 --- a/crates/assistant2/src/context_picker.rs +++ b/crates/assistant2/src/context_picker.rs @@ -1,3 +1,4 @@ +mod fetch_context_picker; mod file_context_picker; use std::sync::Arc; @@ -11,6 +12,7 @@ use ui::{prelude::*, ListItem, ListItemSpacing, Tooltip}; use util::ResultExt; use workspace::Workspace; +use crate::context_picker::fetch_context_picker::FetchContextPicker; use crate::context_picker::file_context_picker::FileContextPicker; use crate::message_editor::MessageEditor; @@ -18,6 +20,7 @@ use crate::message_editor::MessageEditor; enum ContextPickerMode { Default, File(View), + Fetch(View), } pub(super) struct ContextPicker { @@ -47,7 +50,7 @@ impl ContextPicker { icon: IconName::File, }, ContextPickerEntry { - name: "web".into(), + name: "fetch".into(), description: "Fetch content from URL".into(), icon: IconName::Globe, }, @@ -77,16 +80,21 @@ impl FocusableView for ContextPicker { match &self.mode { ContextPickerMode::Default => self.picker.focus_handle(cx), ContextPickerMode::File(file_picker) => file_picker.focus_handle(cx), + ContextPickerMode::Fetch(fetch_picker) => fetch_picker.focus_handle(cx), } } } impl Render for ContextPicker { fn render(&mut self, _cx: &mut ViewContext) -> impl IntoElement { - v_flex().min_w(px(400.)).map(|parent| match &self.mode { - ContextPickerMode::Default => parent.child(self.picker.clone()), - ContextPickerMode::File(file_picker) => parent.child(file_picker.clone()), - }) + v_flex() + .w(px(400.)) + .min_w(px(400.)) + .map(|parent| match &self.mode { + ContextPickerMode::Default => parent.child(self.picker.clone()), + ContextPickerMode::File(file_picker) => parent.child(file_picker.clone()), + ContextPickerMode::Fetch(fetch_picker) => parent.child(fetch_picker.clone()), + }) } } @@ -144,6 +152,16 @@ impl PickerDelegate for ContextPickerDelegate { ) })); } + "fetch" => { + this.mode = ContextPickerMode::Fetch(cx.new_view(|cx| { + FetchContextPicker::new( + self.context_picker.clone(), + self.workspace.clone(), + self.message_editor.clone(), + cx, + ) + })); + } _ => {} } @@ -157,7 +175,7 @@ impl PickerDelegate for ContextPickerDelegate { self.context_picker .update(cx, |this, cx| match this.mode { ContextPickerMode::Default => cx.emit(DismissEvent), - ContextPickerMode::File(_) => {} + ContextPickerMode::File(_) | ContextPickerMode::Fetch(_) => {} }) .log_err(); } diff --git a/crates/assistant2/src/context_picker/fetch_context_picker.rs b/crates/assistant2/src/context_picker/fetch_context_picker.rs new file mode 100644 index 0000000000..9545d546eb --- /dev/null +++ b/crates/assistant2/src/context_picker/fetch_context_picker.rs @@ -0,0 +1,218 @@ +use std::cell::RefCell; +use std::rc::Rc; +use std::sync::Arc; + +use anyhow::{bail, Context as _, Result}; +use futures::AsyncReadExt as _; +use gpui::{AppContext, DismissEvent, FocusHandle, FocusableView, Task, View, WeakView}; +use html_to_markdown::{convert_html_to_markdown, markdown, TagHandler}; +use http_client::{AsyncBody, HttpClientWithUrl}; +use picker::{Picker, PickerDelegate}; +use ui::{prelude::*, ListItem, ListItemSpacing, ViewContext}; +use workspace::Workspace; + +use crate::context::ContextKind; +use crate::context_picker::ContextPicker; +use crate::message_editor::MessageEditor; + +pub struct FetchContextPicker { + picker: View>, +} + +impl FetchContextPicker { + pub fn new( + context_picker: WeakView, + workspace: WeakView, + message_editor: WeakView, + cx: &mut ViewContext, + ) -> Self { + let delegate = FetchContextPickerDelegate::new(context_picker, workspace, message_editor); + let picker = cx.new_view(|cx| Picker::uniform_list(delegate, cx)); + + Self { picker } + } +} + +impl FocusableView for FetchContextPicker { + fn focus_handle(&self, cx: &AppContext) -> FocusHandle { + self.picker.focus_handle(cx) + } +} + +impl Render for FetchContextPicker { + fn render(&mut self, _cx: &mut ViewContext) -> impl IntoElement { + self.picker.clone() + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] +enum ContentType { + Html, + Plaintext, + Json, +} + +pub struct FetchContextPickerDelegate { + context_picker: WeakView, + workspace: WeakView, + message_editor: WeakView, + url: String, +} + +impl FetchContextPickerDelegate { + pub fn new( + context_picker: WeakView, + workspace: WeakView, + message_editor: WeakView, + ) -> Self { + FetchContextPickerDelegate { + context_picker, + workspace, + message_editor, + url: String::new(), + } + } + + async fn build_message(http_client: Arc, url: &str) -> Result { + let mut url = url.to_owned(); + if !url.starts_with("https://") && !url.starts_with("http://") { + url = format!("https://{url}"); + } + + let mut response = http_client.get(&url, AsyncBody::default(), true).await?; + + let mut body = Vec::new(); + response + .body_mut() + .read_to_end(&mut body) + .await + .context("error reading response body")?; + + if response.status().is_client_error() { + let text = String::from_utf8_lossy(body.as_slice()); + bail!( + "status error {}, response: {text:?}", + response.status().as_u16() + ); + } + + let Some(content_type) = response.headers().get("content-type") else { + bail!("missing Content-Type header"); + }; + let content_type = content_type + .to_str() + .context("invalid Content-Type header")?; + let content_type = match content_type { + "text/html" => ContentType::Html, + "text/plain" => ContentType::Plaintext, + "application/json" => ContentType::Json, + _ => ContentType::Html, + }; + + match content_type { + ContentType::Html => { + let mut handlers: Vec = vec![ + Rc::new(RefCell::new(markdown::WebpageChromeRemover)), + Rc::new(RefCell::new(markdown::ParagraphHandler)), + Rc::new(RefCell::new(markdown::HeadingHandler)), + Rc::new(RefCell::new(markdown::ListHandler)), + Rc::new(RefCell::new(markdown::TableHandler::new())), + Rc::new(RefCell::new(markdown::StyledTextHandler)), + ]; + if url.contains("wikipedia.org") { + use html_to_markdown::structure::wikipedia; + + handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaChromeRemover))); + handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaInfoboxHandler))); + handlers.push(Rc::new( + RefCell::new(wikipedia::WikipediaCodeHandler::new()), + )); + } else { + handlers.push(Rc::new(RefCell::new(markdown::CodeHandler))); + } + + convert_html_to_markdown(&body[..], &mut handlers) + } + ContentType::Plaintext => Ok(std::str::from_utf8(&body)?.to_owned()), + ContentType::Json => { + let json: serde_json::Value = serde_json::from_slice(&body)?; + + Ok(format!( + "```json\n{}\n```", + serde_json::to_string_pretty(&json)? + )) + } + } + } +} + +impl PickerDelegate for FetchContextPickerDelegate { + type ListItem = ListItem; + + fn match_count(&self) -> usize { + 1 + } + + fn selected_index(&self) -> usize { + 0 + } + + fn set_selected_index(&mut self, _ix: usize, _cx: &mut ViewContext>) {} + + fn placeholder_text(&self, _cx: &mut ui::WindowContext) -> Arc { + "Enter a URL…".into() + } + + fn update_matches(&mut self, query: String, _cx: &mut ViewContext>) -> Task<()> { + self.url = query; + + Task::ready(()) + } + + fn confirm(&mut self, _secondary: bool, cx: &mut ViewContext>) { + let Some(workspace) = self.workspace.upgrade() else { + return; + }; + + let http_client = workspace.read(cx).client().http_client().clone(); + let url = self.url.clone(); + cx.spawn(|this, mut cx| async move { + let text = Self::build_message(http_client, &url).await?; + + this.update(&mut cx, |this, cx| { + this.delegate + .message_editor + .update(cx, |message_editor, _cx| { + message_editor.insert_context(ContextKind::FetchedUrl, url, text); + }) + })??; + + anyhow::Ok(()) + }) + .detach_and_log_err(cx); + } + + fn dismissed(&mut self, cx: &mut ViewContext>) { + self.context_picker + .update(cx, |this, cx| { + this.reset_mode(); + cx.emit(DismissEvent); + }) + .ok(); + } + + fn render_match( + &self, + ix: usize, + selected: bool, + _cx: &mut ViewContext>, + ) -> Option { + Some( + ListItem::new(ix) + .inset(true) + .spacing(ListItemSpacing::Sparse) + .toggle_state(selected) + .child(self.url.clone()), + ) + } +} diff --git a/crates/assistant2/src/context_picker/file_context_picker.rs b/crates/assistant2/src/context_picker/file_context_picker.rs index 13950b267a..08e7e13d54 100644 --- a/crates/assistant2/src/context_picker/file_context_picker.rs +++ b/crates/assistant2/src/context_picker/file_context_picker.rs @@ -245,7 +245,7 @@ impl PickerDelegate for FileContextPickerDelegate { this.reset_mode(); cx.emit(DismissEvent); }) - .log_err(); + .ok(); } fn render_match( diff --git a/crates/assistant2/src/thread.rs b/crates/assistant2/src/thread.rs index 77c0cd9836..8234a0e8af 100644 --- a/crates/assistant2/src/thread.rs +++ b/crates/assistant2/src/thread.rs @@ -193,12 +193,19 @@ impl Thread { if let Some(context) = self.context_for_message(message.id) { let mut file_context = String::new(); + let mut fetch_context = String::new(); for context in context.iter() { match context.kind { ContextKind::File => { file_context.push_str(&context.text); - file_context.push_str("\n"); + file_context.push('\n'); + } + ContextKind::FetchedUrl => { + fetch_context.push_str(&context.name); + fetch_context.push('\n'); + fetch_context.push_str(&context.text); + fetch_context.push('\n'); } } } @@ -209,6 +216,11 @@ impl Thread { context_text.push_str(&file_context); } + if !fetch_context.is_empty() { + context_text.push_str("The following fetched results are available\n"); + context_text.push_str(&fetch_context); + } + request_message .content .push(MessageContent::Text(context_text))