From 54c63063e483969ee937417ac3c4eb3805f5cdf8 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 26 Sep 2023 16:23:48 -0400 Subject: [PATCH 1/4] changed inline assist generate prompt to leverage outline as opposed to entire prior file Co-Authored-by: Antonio --- crates/assistant/src/assistant.rs | 1 + crates/assistant/src/assistant_panel.rs | 125 +++----- crates/assistant/src/prompts.rs | 378 ++++++++++++++++++++++++ 3 files changed, 413 insertions(+), 91 deletions(-) create mode 100644 crates/assistant/src/prompts.rs diff --git a/crates/assistant/src/assistant.rs b/crates/assistant/src/assistant.rs index 258684db47..6c9b14333e 100644 --- a/crates/assistant/src/assistant.rs +++ b/crates/assistant/src/assistant.rs @@ -1,6 +1,7 @@ pub mod assistant_panel; mod assistant_settings; mod codegen; +mod prompts; mod streaming_diff; use ai::completion::Role; diff --git a/crates/assistant/src/assistant_panel.rs b/crates/assistant/src/assistant_panel.rs index 42e5fb7897..55a1dfe0f6 100644 --- a/crates/assistant/src/assistant_panel.rs +++ b/crates/assistant/src/assistant_panel.rs @@ -1,6 +1,7 @@ use crate::{ assistant_settings::{AssistantDockPosition, AssistantSettings, OpenAIModel}, codegen::{self, Codegen, CodegenKind}, + prompts::generate_content_prompt, MessageId, MessageMetadata, MessageStatus, Role, SavedConversation, SavedConversationMetadata, SavedMessage, }; @@ -541,11 +542,31 @@ impl AssistantPanel { self.inline_prompt_history.pop_front(); } - let snapshot = editor.read(cx).buffer().read(cx).snapshot(cx); - let range = pending_assist.codegen.read(cx).range(); - let selected_text = snapshot.text_for_range(range.clone()).collect::(); + let multi_buffer = editor.read(cx).buffer().read(cx); + let multi_buffer_snapshot = multi_buffer.snapshot(cx); + let snapshot = if multi_buffer.all_buffers().len() > 1 { + return; + } else { + multi_buffer + .all_buffers() + .iter() + .next() + .unwrap() + .read(cx) + .snapshot() + }; - let language = snapshot.language_at(range.start); + let range = pending_assist.codegen.read(cx).range(); + let language_range = snapshot.anchor_at( + range.start.to_offset(&multi_buffer_snapshot), + language::Bias::Left, + ) + ..snapshot.anchor_at( + range.end.to_offset(&multi_buffer_snapshot), + language::Bias::Right, + ); + + let language = snapshot.language_at(language_range.start); let language_name = if let Some(language) = language.as_ref() { if Arc::ptr_eq(language, &language::PLAIN_TEXT) { None @@ -557,93 +578,15 @@ impl AssistantPanel { }; let language_name = language_name.as_deref(); - let mut prompt = String::new(); - if let Some(language_name) = language_name { - writeln!(prompt, "You're an expert {language_name} engineer.").unwrap(); - } - match pending_assist.codegen.read(cx).kind() { - CodegenKind::Transform { .. } => { - writeln!( - prompt, - "You're currently working inside an editor on this file:" - ) - .unwrap(); - if let Some(language_name) = language_name { - writeln!(prompt, "```{language_name}").unwrap(); - } else { - writeln!(prompt, "```").unwrap(); - } - for chunk in snapshot.text_for_range(Anchor::min()..Anchor::max()) { - write!(prompt, "{chunk}").unwrap(); - } - writeln!(prompt, "```").unwrap(); - - writeln!( - prompt, - "In particular, the user has selected the following text:" - ) - .unwrap(); - if let Some(language_name) = language_name { - writeln!(prompt, "```{language_name}").unwrap(); - } else { - writeln!(prompt, "```").unwrap(); - } - writeln!(prompt, "{selected_text}").unwrap(); - writeln!(prompt, "```").unwrap(); - writeln!(prompt).unwrap(); - writeln!( - prompt, - "Modify the selected text given the user prompt: {user_prompt}" - ) - .unwrap(); - writeln!( - prompt, - "You MUST reply only with the edited selected text, not the entire file." - ) - .unwrap(); - } - CodegenKind::Generate { .. } => { - writeln!( - prompt, - "You're currently working inside an editor on this file:" - ) - .unwrap(); - if let Some(language_name) = language_name { - writeln!(prompt, "```{language_name}").unwrap(); - } else { - writeln!(prompt, "```").unwrap(); - } - for chunk in snapshot.text_for_range(Anchor::min()..range.start) { - write!(prompt, "{chunk}").unwrap(); - } - write!(prompt, "<|>").unwrap(); - for chunk in snapshot.text_for_range(range.start..Anchor::max()) { - write!(prompt, "{chunk}").unwrap(); - } - writeln!(prompt).unwrap(); - writeln!(prompt, "```").unwrap(); - writeln!( - prompt, - "Assume the cursor is located where the `<|>` marker is." - ) - .unwrap(); - writeln!( - prompt, - "Text can't be replaced, so assume your answer will be inserted at the cursor." - ) - .unwrap(); - writeln!( - prompt, - "Complete the text given the user prompt: {user_prompt}" - ) - .unwrap(); - } - } - if let Some(language_name) = language_name { - writeln!(prompt, "Your answer MUST always be valid {language_name}.").unwrap(); - } - writeln!(prompt, "Always wrap your response in a Markdown codeblock.").unwrap(); - writeln!(prompt, "Never make remarks about the output.").unwrap(); + let codegen_kind = pending_assist.codegen.read(cx).kind().clone(); + let prompt = generate_content_prompt( + user_prompt.to_string(), + language_name, + &snapshot, + language_range, + cx, + codegen_kind, + ); let mut messages = Vec::new(); let mut model = settings::get::(cx) diff --git a/crates/assistant/src/prompts.rs b/crates/assistant/src/prompts.rs new file mode 100644 index 0000000000..272ae9eac6 --- /dev/null +++ b/crates/assistant/src/prompts.rs @@ -0,0 +1,378 @@ +use gpui::AppContext; +use language::{BufferSnapshot, OffsetRangeExt, ToOffset}; +use std::cmp; +use std::ops::Range; +use std::{fmt::Write, iter}; + +use crate::codegen::CodegenKind; + +fn outline_for_prompt( + buffer: &BufferSnapshot, + range: Range, + cx: &AppContext, +) -> Option { + let indent = buffer + .language_indent_size_at(0, cx) + .chars() + .collect::(); + let outline = buffer.outline(None)?; + let range = range.to_offset(buffer); + + let mut text = String::new(); + let mut items = outline.items.into_iter().peekable(); + + let mut intersected = false; + let mut intersection_indent = 0; + let mut extended_range = range.clone(); + + while let Some(item) = items.next() { + let item_range = item.range.to_offset(buffer); + if item_range.end < range.start || item_range.start > range.end { + text.extend(iter::repeat(indent.as_str()).take(item.depth)); + text.push_str(&item.text); + text.push('\n'); + } else { + intersected = true; + let is_terminal = items + .peek() + .map_or(true, |next_item| next_item.depth <= item.depth); + if is_terminal { + if item_range.start <= extended_range.start { + extended_range.start = item_range.start; + intersection_indent = item.depth; + } + extended_range.end = cmp::max(extended_range.end, item_range.end); + } else { + let name_start = item_range.start + item.name_ranges.first().unwrap().start; + let name_end = item_range.start + item.name_ranges.last().unwrap().end; + + if range.start > name_end { + text.extend(iter::repeat(indent.as_str()).take(item.depth)); + text.push_str(&item.text); + text.push('\n'); + } else { + if name_start <= extended_range.start { + extended_range.start = item_range.start; + intersection_indent = item.depth; + } + extended_range.end = cmp::max(extended_range.end, name_end); + } + } + } + + if intersected + && items.peek().map_or(true, |next_item| { + next_item.range.start.to_offset(buffer) > range.end + }) + { + intersected = false; + text.extend(iter::repeat(indent.as_str()).take(intersection_indent)); + text.extend(buffer.text_for_range(extended_range.start..range.start)); + text.push_str("<|"); + text.extend(buffer.text_for_range(range.clone())); + text.push_str("|>"); + text.extend(buffer.text_for_range(range.end..extended_range.end)); + text.push('\n'); + } + } + + Some(text) +} + +pub fn generate_content_prompt( + user_prompt: String, + language_name: Option<&str>, + buffer: &BufferSnapshot, + range: Range, + cx: &AppContext, + kind: CodegenKind, +) -> String { + let mut prompt = String::new(); + + // General Preamble + if let Some(language_name) = language_name { + writeln!(prompt, "You're an expert {language_name} engineer.\n").unwrap(); + } else { + writeln!(prompt, "You're an expert engineer.\n").unwrap(); + } + + let outline = outline_for_prompt(buffer, range.clone(), cx); + if let Some(outline) = outline { + writeln!( + prompt, + "The file you are currently working on has the following outline:" + ) + .unwrap(); + if let Some(language_name) = language_name { + let language_name = language_name.to_lowercase(); + writeln!(prompt, "```{language_name}\n{outline}\n```").unwrap(); + } else { + writeln!(prompt, "```\n{outline}\n```").unwrap(); + } + } + + // Assume for now that we are just generating + if range.clone().start == range.end { + writeln!(prompt, "In particular, the user's cursor is current on the '<||>' span in the above outline, with no text selected.").unwrap(); + } else { + writeln!(prompt, "In particular, the user has selected a section of the text between the '<|' and '|>' spans.").unwrap(); + } + + match kind { + CodegenKind::Generate { position } => { + writeln!( + prompt, + "Assume the cursor is located where the `<|` marker is." + ) + .unwrap(); + writeln!( + prompt, + "Text can't be replaced, so assume your answer will be inserted at the cursor." + ) + .unwrap(); + writeln!( + prompt, + "Generate text based on the users prompt: {user_prompt}" + ) + .unwrap(); + } + CodegenKind::Transform { range } => { + writeln!( + prompt, + "Modify the users code selected text based upon the users prompt: {user_prompt}" + ) + .unwrap(); + writeln!( + prompt, + "You MUST reply with only the adjusted code, not the entire file." + ) + .unwrap(); + } + } + + if let Some(language_name) = language_name { + writeln!(prompt, "Your answer MUST always be valid {language_name}").unwrap(); + } + writeln!(prompt, "Always wrap your response in a Markdown codeblock").unwrap(); + writeln!(prompt, "Never make remarks about the output.").unwrap(); + + prompt +} + +#[cfg(test)] +pub(crate) mod tests { + + use super::*; + use std::sync::Arc; + + use gpui::AppContext; + use indoc::indoc; + use language::{language_settings, tree_sitter_rust, Buffer, Language, LanguageConfig, Point}; + use settings::SettingsStore; + + pub(crate) fn rust_lang() -> Language { + Language::new( + LanguageConfig { + name: "Rust".into(), + path_suffixes: vec!["rs".to_string()], + ..Default::default() + }, + Some(tree_sitter_rust::language()), + ) + .with_indents_query( + r#" + (call_expression) @indent + (field_expression) @indent + (_ "(" ")" @end) @indent + (_ "{" "}" @end) @indent + "#, + ) + .unwrap() + .with_outline_query( + r#" + (struct_item + "struct" @context + name: (_) @name) @item + (enum_item + "enum" @context + name: (_) @name) @item + (enum_variant + name: (_) @name) @item + (field_declaration + name: (_) @name) @item + (impl_item + "impl" @context + trait: (_)? @name + "for"? @context + type: (_) @name) @item + (function_item + "fn" @context + name: (_) @name) @item + (mod_item + "mod" @context + name: (_) @name) @item + "#, + ) + .unwrap() + } + + #[gpui::test] + fn test_outline_for_prompt(cx: &mut AppContext) { + cx.set_global(SettingsStore::test(cx)); + language_settings::init(cx); + let text = indoc! {" + struct X { + a: usize, + b: usize, + } + + impl X { + + fn new() -> Self { + let a = 1; + let b = 2; + Self { a, b } + } + + pub fn a(&self, param: bool) -> usize { + self.a + } + + pub fn b(&self) -> usize { + self.b + } + } + "}; + let buffer = + cx.add_model(|cx| Buffer::new(0, 0, text).with_language(Arc::new(rust_lang()), cx)); + let snapshot = buffer.read(cx).snapshot(); + + let outline = outline_for_prompt( + &snapshot, + snapshot.anchor_before(Point::new(1, 4))..snapshot.anchor_before(Point::new(1, 4)), + cx, + ); + assert_eq!( + outline.as_deref(), + Some(indoc! {" + struct X + <||>a: usize + b + impl X + fn new + fn a + fn b + "}) + ); + + let outline = outline_for_prompt( + &snapshot, + snapshot.anchor_before(Point::new(8, 12))..snapshot.anchor_before(Point::new(8, 14)), + cx, + ); + assert_eq!( + outline.as_deref(), + Some(indoc! {" + struct X + a + b + impl X + fn new() -> Self { + let <|a |>= 1; + let b = 2; + Self { a, b } + } + fn a + fn b + "}) + ); + + let outline = outline_for_prompt( + &snapshot, + snapshot.anchor_before(Point::new(6, 0))..snapshot.anchor_before(Point::new(6, 0)), + cx, + ); + assert_eq!( + outline.as_deref(), + Some(indoc! {" + struct X + a + b + impl X + <||> + fn new + fn a + fn b + "}) + ); + + let outline = outline_for_prompt( + &snapshot, + snapshot.anchor_before(Point::new(8, 12))..snapshot.anchor_before(Point::new(13, 9)), + cx, + ); + assert_eq!( + outline.as_deref(), + Some(indoc! {" + struct X + a + b + impl X + fn new() -> Self { + let <|a = 1; + let b = 2; + Self { a, b } + } + + pub f|>n a(&self, param: bool) -> usize { + self.a + } + fn b + "}) + ); + + let outline = outline_for_prompt( + &snapshot, + snapshot.anchor_before(Point::new(5, 6))..snapshot.anchor_before(Point::new(12, 0)), + cx, + ); + assert_eq!( + outline.as_deref(), + Some(indoc! {" + struct X + a + b + impl X<| { + + fn new() -> Self { + let a = 1; + let b = 2; + Self { a, b } + } + |> + fn a + fn b + "}) + ); + + let outline = outline_for_prompt( + &snapshot, + snapshot.anchor_before(Point::new(18, 8))..snapshot.anchor_before(Point::new(18, 8)), + cx, + ); + assert_eq!( + outline.as_deref(), + Some(indoc! {" + struct X + a + b + impl X + fn new + fn a + pub fn b(&self) -> usize { + <||>self.b + } + "}) + ); + } +} From e8dd412ac12372f2e1f47ae1499c54595ba7f34d Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 26 Sep 2023 17:10:31 -0400 Subject: [PATCH 2/4] update inline generate prompt to leverage more explicit <|START| and |END|> spans --- crates/assistant/src/assistant_panel.rs | 14 +++++--------- crates/assistant/src/prompts.rs | 16 ++++++++++------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/crates/assistant/src/assistant_panel.rs b/crates/assistant/src/assistant_panel.rs index 55a1dfe0f6..37d0d729fe 100644 --- a/crates/assistant/src/assistant_panel.rs +++ b/crates/assistant/src/assistant_panel.rs @@ -544,16 +544,10 @@ impl AssistantPanel { let multi_buffer = editor.read(cx).buffer().read(cx); let multi_buffer_snapshot = multi_buffer.snapshot(cx); - let snapshot = if multi_buffer.all_buffers().len() > 1 { - return; + let snapshot = if multi_buffer.is_singleton() { + multi_buffer.as_singleton().unwrap().read(cx).snapshot() } else { - multi_buffer - .all_buffers() - .iter() - .next() - .unwrap() - .read(cx) - .snapshot() + return; }; let range = pending_assist.codegen.read(cx).range(); @@ -588,6 +582,8 @@ impl AssistantPanel { codegen_kind, ); + dbg!(&prompt); + let mut messages = Vec::new(); let mut model = settings::get::(cx) .default_open_ai_model diff --git a/crates/assistant/src/prompts.rs b/crates/assistant/src/prompts.rs index 272ae9eac6..00db4c1302 100644 --- a/crates/assistant/src/prompts.rs +++ b/crates/assistant/src/prompts.rs @@ -68,9 +68,13 @@ fn outline_for_prompt( intersected = false; text.extend(iter::repeat(indent.as_str()).take(intersection_indent)); text.extend(buffer.text_for_range(extended_range.start..range.start)); - text.push_str("<|"); + text.push_str("<|START|"); text.extend(buffer.text_for_range(range.clone())); - text.push_str("|>"); + if range.start != range.end { + text.push_str("|END|>"); + } else { + text.push_str(">"); + } text.extend(buffer.text_for_range(range.end..extended_range.end)); text.push('\n'); } @@ -113,16 +117,16 @@ pub fn generate_content_prompt( // Assume for now that we are just generating if range.clone().start == range.end { - writeln!(prompt, "In particular, the user's cursor is current on the '<||>' span in the above outline, with no text selected.").unwrap(); + writeln!(prompt, "In particular, the user's cursor is current on the '<|START|>' span in the above outline, with no text selected.").unwrap(); } else { - writeln!(prompt, "In particular, the user has selected a section of the text between the '<|' and '|>' spans.").unwrap(); + writeln!(prompt, "In particular, the user has selected a section of the text between the '<|START|' and '|END|>' spans.").unwrap(); } match kind { CodegenKind::Generate { position } => { writeln!( prompt, - "Assume the cursor is located where the `<|` marker is." + "Assume the cursor is located where the `<|START|` marker is." ) .unwrap(); writeln!( @@ -144,7 +148,7 @@ pub fn generate_content_prompt( .unwrap(); writeln!( prompt, - "You MUST reply with only the adjusted code, not the entire file." + "You MUST reply with only the adjusted code (within the '<|START|' and '|END|>' spans), not the entire file." ) .unwrap(); } From 90f17d4a28c716f13b5e6325a2bec3786bf9bd3f Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 26 Sep 2023 17:11:20 -0400 Subject: [PATCH 3/4] updated codegen match to leverage unused values --- crates/assistant/src/prompts.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/assistant/src/prompts.rs b/crates/assistant/src/prompts.rs index 00db4c1302..04cf5d089b 100644 --- a/crates/assistant/src/prompts.rs +++ b/crates/assistant/src/prompts.rs @@ -123,7 +123,7 @@ pub fn generate_content_prompt( } match kind { - CodegenKind::Generate { position } => { + CodegenKind::Generate { position: _ } => { writeln!( prompt, "Assume the cursor is located where the `<|START|` marker is." @@ -140,7 +140,7 @@ pub fn generate_content_prompt( ) .unwrap(); } - CodegenKind::Transform { range } => { + CodegenKind::Transform { range: _ } => { writeln!( prompt, "Modify the users code selected text based upon the users prompt: {user_prompt}" From 650a160f04cabdbb7e21faf047fbe39dd071dd4f Mon Sep 17 00:00:00 2001 From: KCaverly Date: Wed, 27 Sep 2023 09:06:53 -0400 Subject: [PATCH 4/4] update test outline for prompt tests for new cursor span --- crates/assistant/src/prompts.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/assistant/src/prompts.rs b/crates/assistant/src/prompts.rs index 04cf5d089b..ee58090d04 100644 --- a/crates/assistant/src/prompts.rs +++ b/crates/assistant/src/prompts.rs @@ -260,7 +260,7 @@ pub(crate) mod tests { outline.as_deref(), Some(indoc! {" struct X - <||>a: usize + <|START|>a: usize b impl X fn new @@ -282,7 +282,7 @@ pub(crate) mod tests { b impl X fn new() -> Self { - let <|a |>= 1; + let <|START|a |END|>= 1; let b = 2; Self { a, b } } @@ -303,7 +303,7 @@ pub(crate) mod tests { a b impl X - <||> + <|START|> fn new fn a fn b @@ -323,12 +323,12 @@ pub(crate) mod tests { b impl X fn new() -> Self { - let <|a = 1; + let <|START|a = 1; let b = 2; Self { a, b } } - pub f|>n a(&self, param: bool) -> usize { + pub f|END|>n a(&self, param: bool) -> usize { self.a } fn b @@ -346,14 +346,14 @@ pub(crate) mod tests { struct X a b - impl X<| { + impl X<|START| { fn new() -> Self { let a = 1; let b = 2; Self { a, b } } - |> + |END|> fn a fn b "}) @@ -374,7 +374,7 @@ pub(crate) mod tests { fn new fn a pub fn b(&self) -> usize { - <||>self.b + <|START|>self.b } "}) );