From c0a3642f77da8d44a4006fc452b76f937d1b10a9 Mon Sep 17 00:00:00 2001 From: Tristan Hume Date: Wed, 26 Jun 2024 14:41:40 -0400 Subject: [PATCH] Improve prompt for Claude models (#13531) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This inline assistant prompt is one I designed that in my experience works much better with Claude 3.5 Sonnet than the default prompt. Mainly because it takes advantage of a weird property of our finetuning which is that when you use XML tags it knows that it's doing a machine-read tasks and stops trying to elide things for brevity. The default prompt will often remove comments and otherwise add elisions for brevity when doing large rewrites. It also avoids giving the entire file content twice when the rewrite region is large relative to the non-rewritten region. Not necessarily meant to be merged as-is since it may mess up OAI models. This is mainly meant for your reference. But everyone should be using 3.5 Sonnet for coding use cases now anyhow 😛 Release Notes: - N/A --- crates/assistant/src/prompts.rs | 176 +++++++++++++++----------------- 1 file changed, 82 insertions(+), 94 deletions(-) diff --git a/crates/assistant/src/prompts.rs b/crates/assistant/src/prompts.rs index f8847733f7..9f33cf546a 100644 --- a/crates/assistant/src/prompts.rs +++ b/crates/assistant/src/prompts.rs @@ -6,118 +6,106 @@ pub fn generate_content_prompt( language_name: Option<&str>, buffer: BufferSnapshot, range: Range, - project_name: Option, + _project_name: Option, ) -> anyhow::Result { let mut prompt = String::new(); let content_type = match language_name { None | Some("Markdown" | "Plain Text") => { - writeln!(prompt, "You are an expert engineer.")?; - "Text" - } - Some(language_name) => { - writeln!(prompt, "You are an expert {language_name} engineer.")?; writeln!( prompt, - "Your answer MUST always and only be valid {}.", - language_name + "Here's a file of text that I'm going to ask you to make an edit to." )?; - "Code" + "text" + } + Some(language_name) => { + writeln!( + prompt, + "Here's a file of {language_name} that I'm going to ask you to make an edit to." + )?; + "code" } }; - if let Some(project_name) = project_name { - writeln!( - prompt, - "You are currently working inside the '{project_name}' project in code editor Zed." - )?; - } - - writeln!( - prompt, - "The user has the following file open in the editor:" - )?; + const MAX_CTX: usize = 50000; + let mut is_truncated = false; if range.is_empty() { - write!(prompt, "```")?; - if let Some(language_name) = language_name { - write!(prompt, "{language_name}")?; - } - - for chunk in buffer.as_rope().chunks_in_range(0..range.start) { - prompt.push_str(chunk); - } - prompt.push_str("<|CURSOR|>"); - for chunk in buffer.as_rope().chunks_in_range(range.start..buffer.len()) { - prompt.push_str(chunk); - } - if !prompt.ends_with('\n') { - prompt.push('\n'); - } - writeln!(prompt, "```")?; - prompt.push('\n'); - - writeln!( - prompt, - "Assume the cursor is located where the `<|CURSOR|>` span is." - ) - .unwrap(); - writeln!( - prompt, - "{content_type} can't be replaced, so assume your answer will be inserted at the cursor.", - ) - .unwrap(); - writeln!( - prompt, - "Generate {content_type} based on the users prompt: {user_prompt}", - ) - .unwrap(); + prompt.push_str("The point you'll need to insert at is marked with .\n\n"); } else { - write!(prompt, "```")?; - for chunk in buffer.as_rope().chunks() { - prompt.push_str(chunk); - } - if !prompt.ends_with('\n') { - prompt.push('\n'); - } - writeln!(prompt, "```")?; - prompt.push('\n'); - - writeln!( - prompt, - "In particular, the following piece of text is selected:" - )?; - write!(prompt, "```")?; - if let Some(language_name) = language_name { - write!(prompt, "{language_name}")?; - } - prompt.push('\n'); + prompt.push_str("The section you'll need to rewrite is marked with tags.\n\n"); + } + // Include file content. + let before_range = 0..range.start; + let truncated_before = if before_range.len() > MAX_CTX { + is_truncated = true; + range.start - MAX_CTX..range.start + } else { + before_range + }; + let mut non_rewrite_len = truncated_before.len(); + for chunk in buffer.text_for_range(truncated_before) { + prompt.push_str(chunk); + } + if !range.is_empty() { + prompt.push_str("\n"); for chunk in buffer.text_for_range(range.clone()) { prompt.push_str(chunk); } - if !prompt.ends_with('\n') { - prompt.push('\n'); - } - writeln!(prompt, "```")?; - prompt.push('\n'); - - writeln!( - prompt, - "Modify the user's selected {content_type} based upon the users prompt: {user_prompt}" - ) - .unwrap(); - writeln!( - prompt, - "You must reply with only the adjusted {content_type}, not the entire file." - ) - .unwrap(); + prompt.push_str("\n"); + } else { + prompt.push_str(""); + } + let after_range = range.end..buffer.len(); + let truncated_after = if after_range.len() > MAX_CTX { + is_truncated = true; + range.end..range.end + MAX_CTX + } else { + after_range + }; + non_rewrite_len += truncated_after.len(); + for chunk in buffer.text_for_range(truncated_after) { + prompt.push_str(chunk); } - writeln!(prompt, "Never make remarks about the output.").unwrap(); - writeln!( - prompt, - "Do not return anything else, except the generated {content_type}." - ) - .unwrap(); + write!(prompt, "\n\n").unwrap(); + + if is_truncated { + writeln!(prompt, "The context around the relevant section has been truncated (possibly in the middle of a line) for brevity.\n")?; + } + + if range.is_empty() { + writeln!( + prompt, + "You can't replace {content_type}, your answer will be inserted in place of the `` tags. Don't include the insert_here tags in your output.", + ) + .unwrap(); + writeln!( + prompt, + "Generate {content_type} based on the following prompt:\n\n\n{user_prompt}\n", + ) + .unwrap(); + writeln!(prompt, "Match the indentation in the original file in the inserted {content_type}, don't include any indentation on blank lines.\n").unwrap(); + prompt.push_str("Immediately start with the following format with no remarks:\n\n```\n{{INSERTED_CODE}}\n```"); + } else { + writeln!(prompt, "Edit the section of {content_type} in tags based on the following prompt:'").unwrap(); + writeln!(prompt, "\n\n{user_prompt}\n\n").unwrap(); + let rewrite_len = range.end - range.start; + if rewrite_len < 20000 && rewrite_len * 2 < non_rewrite_len { + writeln!(prompt, "And here's the section to rewrite based on that prompt again for reference:\n\n\n").unwrap(); + for chunk in buffer.text_for_range(range.clone()) { + prompt.push_str(chunk); + } + writeln!(prompt, "\n\n").unwrap(); + } + writeln!(prompt, "Only make changes that are necessary to fulfill the prompt, leave everything else as-is. All surrounding {content_type} will be preserved.\n").unwrap(); + write!( + prompt, + "Start at the indentation level in the original file in the rewritten {content_type}. " + ) + .unwrap(); + prompt.push_str("Don't stop until you've rewritten the entire section, even if you have no more changes to make, always write out the whole section with no unnecessary elisions."); + prompt.push_str("\n\nImmediately start with the following format with no remarks:\n\n```\n{{REWRITTEN_CODE}}\n```"); + } Ok(prompt) }