From 9809ec3d706a19cd409a8a7494fabc06803e0ed7 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Wed, 19 Jul 2023 15:47:05 -0400 Subject: [PATCH] update treesitter parsing to accomodate for collapsed nested functions Co-authored-by: maxbrunsfeld --- Cargo.lock | 3 +- Cargo.toml | 2 +- crates/language/src/language.rs | 22 + crates/semantic_index/Cargo.toml | 1 + crates/semantic_index/src/parsing.rs | 269 +++-- crates/semantic_index/src/semantic_index.rs | 8 +- .../src/semantic_index_tests.rs | 987 +++++++++--------- crates/zed/src/languages/rust/config.toml | 1 + crates/zed/src/languages/rust/embedding.scm | 64 +- 9 files changed, 773 insertions(+), 584 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7719eb24c2..8ea6f61da0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6486,6 +6486,7 @@ dependencies = [ "parking_lot 0.11.2", "picker", "postage", + "pretty_assertions", "project", "rand 0.8.5", "rpc", @@ -7991,7 +7992,7 @@ dependencies = [ [[package]] name = "tree-sitter" version = "0.20.10" -source = "git+https://github.com/tree-sitter/tree-sitter?rev=49226023693107fba9a1191136a4f47f38cdca73#49226023693107fba9a1191136a4f47f38cdca73" +source = "git+https://github.com/tree-sitter/tree-sitter?rev=1c65ca24bc9a734ab70115188f465e12eecf224e#1c65ca24bc9a734ab70115188f465e12eecf224e" dependencies = [ "cc", "regex", diff --git a/Cargo.toml b/Cargo.toml index 4b65745348..04f2147431 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -130,7 +130,7 @@ tree-sitter-yaml = { git = "https://github.com/zed-industries/tree-sitter-yaml", tree-sitter-lua = "0.0.14" [patch.crates-io] -tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "49226023693107fba9a1191136a4f47f38cdca73" } +tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "1c65ca24bc9a734ab70115188f465e12eecf224e" } async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" } # TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457 diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 8c6d6e9c09..ec233716d6 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -339,6 +339,8 @@ pub struct LanguageConfig { #[serde(default)] pub line_comment: Option>, #[serde(default)] + pub collapsed_placeholder: String, + #[serde(default)] pub block_comment: Option<(Arc, Arc)>, #[serde(default)] pub overrides: HashMap, @@ -408,6 +410,7 @@ impl Default for LanguageConfig { line_comment: Default::default(), block_comment: Default::default(), overrides: Default::default(), + collapsed_placeholder: Default::default(), } } } @@ -525,6 +528,8 @@ pub struct EmbeddingConfig { pub item_capture_ix: u32, pub name_capture_ix: u32, pub context_capture_ix: Option, + pub collapse_capture_ix: Option, + pub keep_capture_ix: Option, } struct InjectionConfig { @@ -1246,12 +1251,16 @@ impl Language { let mut item_capture_ix = None; let mut name_capture_ix = None; let mut context_capture_ix = None; + let mut collapse_capture_ix = None; + let mut keep_capture_ix = None; get_capture_indices( &query, &mut [ ("item", &mut item_capture_ix), ("name", &mut name_capture_ix), ("context", &mut context_capture_ix), + ("keep", &mut keep_capture_ix), + ("collapse", &mut collapse_capture_ix), ], ); if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) { @@ -1260,6 +1269,8 @@ impl Language { item_capture_ix, name_capture_ix, context_capture_ix, + collapse_capture_ix, + keep_capture_ix, }); } Ok(self) @@ -1544,9 +1555,20 @@ impl Language { pub fn grammar(&self) -> Option<&Arc> { self.grammar.as_ref() } + + pub fn default_scope(self: &Arc) -> LanguageScope { + LanguageScope { + language: self.clone(), + override_id: None, + } + } } impl LanguageScope { + pub fn collapsed_placeholder(&self) -> &str { + self.language.config.collapsed_placeholder.as_ref() + } + pub fn line_comment_prefix(&self) -> Option<&Arc> { Override::as_option( self.config_override().map(|o| &o.line_comment), diff --git a/crates/semantic_index/Cargo.toml b/crates/semantic_index/Cargo.toml index 2d21ff6c1c..1b3169bfe4 100644 --- a/crates/semantic_index/Cargo.toml +++ b/crates/semantic_index/Cargo.toml @@ -46,6 +46,7 @@ rpc = { path = "../rpc", features = ["test-support"] } workspace = { path = "../workspace", features = ["test-support"] } settings = { path = "../settings", features = ["test-support"]} +pretty_assertions.workspace = true rand.workspace = true unindent.workspace = true tempdir.workspace = true diff --git a/crates/semantic_index/src/parsing.rs b/crates/semantic_index/src/parsing.rs index 663f0f473b..0d2aeb60fb 100644 --- a/crates/semantic_index/src/parsing.rs +++ b/crates/semantic_index/src/parsing.rs @@ -1,6 +1,6 @@ use anyhow::{anyhow, Ok, Result}; -use language::Language; -use std::{ops::Range, path::Path, sync::Arc}; +use language::{Grammar, Language}; +use std::{cmp, collections::HashSet, ops::Range, path::Path, sync::Arc}; use tree_sitter::{Parser, QueryCursor}; #[derive(Debug, PartialEq, Clone)] @@ -22,6 +22,20 @@ pub struct CodeContextRetriever { pub cursor: QueryCursor, } +// Every match has an item, this represents the fundamental treesitter symbol and anchors the search +// Every match has one or more 'name' captures. These indicate the display range of the item for deduplication. +// If there are preceeding comments, we track this with a context capture +// If there is a piece that should be collapsed in hierarchical queries, we capture it with a collapse capture +// If there is a piece that should be kept inside a collapsed node, we capture it with a keep capture +#[derive(Debug, Clone)] +pub struct CodeContextMatch { + pub start_col: usize, + pub item_range: Range, + pub name_range: Range, + pub context_ranges: Vec>, + pub collapse_ranges: Vec>, +} + impl CodeContextRetriever { pub fn new() -> Self { Self { @@ -49,6 +63,82 @@ impl CodeContextRetriever { }]) } + fn get_matches_in_file( + &mut self, + content: &str, + grammar: &Arc, + ) -> Result> { + let embedding_config = grammar + .embedding_config + .as_ref() + .ok_or_else(|| anyhow!("no embedding queries"))?; + self.parser.set_language(grammar.ts_language).unwrap(); + + let tree = self + .parser + .parse(&content, None) + .ok_or_else(|| anyhow!("parsing failed"))?; + + let mut captures: Vec = Vec::new(); + let mut collapse_ranges: Vec> = Vec::new(); + let mut keep_ranges: Vec> = Vec::new(); + for mat in self.cursor.matches( + &embedding_config.query, + tree.root_node(), + content.as_bytes(), + ) { + let mut start_col = 0; + let mut item_range: Option> = None; + let mut name_range: Option> = None; + let mut context_ranges: Vec> = Vec::new(); + collapse_ranges.clear(); + keep_ranges.clear(); + for capture in mat.captures { + if capture.index == embedding_config.item_capture_ix { + item_range = Some(capture.node.byte_range()); + start_col = capture.node.start_position().column; + } else if capture.index == embedding_config.name_capture_ix { + name_range = Some(capture.node.byte_range()); + } else if Some(capture.index) == embedding_config.context_capture_ix { + context_ranges.push(capture.node.byte_range()); + } else if Some(capture.index) == embedding_config.collapse_capture_ix { + collapse_ranges.push(capture.node.byte_range()); + } else if Some(capture.index) == embedding_config.keep_capture_ix { + keep_ranges.push(capture.node.byte_range()); + } + } + + if item_range.is_some() && name_range.is_some() { + let item_range = item_range.unwrap(); + captures.push(CodeContextMatch { + start_col, + item_range, + name_range: name_range.unwrap(), + context_ranges, + collapse_ranges: subtract_ranges(&collapse_ranges, &keep_ranges), + }); + } + } + Ok(captures) + } + + pub fn parse_file_with_template( + &mut self, + relative_path: &Path, + content: &str, + language: Arc, + ) -> Result> { + let language_name = language.name(); + let mut documents = self.parse_file(relative_path, content, language)?; + for document in &mut documents { + document.content = CODE_CONTEXT_TEMPLATE + .replace("", relative_path.to_string_lossy().as_ref()) + .replace("", language_name.as_ref()) + .replace("item", &document.content); + } + Ok(documents) + } + pub fn parse_file( &mut self, relative_path: &Path, @@ -62,78 +152,131 @@ impl CodeContextRetriever { let grammar = language .grammar() .ok_or_else(|| anyhow!("no grammar for language"))?; - let embedding_config = grammar - .embedding_config - .as_ref() - .ok_or_else(|| anyhow!("no embedding queries"))?; - - self.parser.set_language(grammar.ts_language).unwrap(); - - let tree = self - .parser - .parse(&content, None) - .ok_or_else(|| anyhow!("parsing failed"))?; - - let mut documents = Vec::new(); // Iterate through query matches - let mut name_ranges: Vec> = vec![]; - for mat in self.cursor.matches( - &embedding_config.query, - tree.root_node(), - content.as_bytes(), - ) { - let mut name: Vec<&str> = vec![]; - let mut item: Option<&str> = None; - let mut byte_range: Option> = None; - let mut context_spans: Vec<&str> = vec![]; - for capture in mat.captures { - if capture.index == embedding_config.item_capture_ix { - byte_range = Some(capture.node.byte_range()); - item = content.get(capture.node.byte_range()); - } else if capture.index == embedding_config.name_capture_ix { - let name_range = capture.node.byte_range(); - if name_ranges.contains(&name_range) { - continue; - } - name_ranges.push(name_range.clone()); - if let Some(name_content) = content.get(name_range.clone()) { - name.push(name_content); - } - } + let matches = self.get_matches_in_file(content, grammar)?; - if let Some(context_capture_ix) = embedding_config.context_capture_ix { - if capture.index == context_capture_ix { - if let Some(context) = content.get(capture.node.byte_range()) { - context_spans.push(context); - } - } + let language_scope = language.default_scope(); + let placeholder = language_scope.collapsed_placeholder(); + + let mut documents = Vec::new(); + let mut collapsed_ranges_within = Vec::new(); + let mut parsed_name_ranges = HashSet::new(); + for (i, context_match) in matches.iter().enumerate() { + if parsed_name_ranges.contains(&context_match.name_range) { + continue; + } + + collapsed_ranges_within.clear(); + for remaining_match in &matches[(i + 1)..] { + if context_match + .item_range + .contains(&remaining_match.item_range.start) + && context_match + .item_range + .contains(&remaining_match.item_range.end) + { + collapsed_ranges_within.extend(remaining_match.collapse_ranges.iter().cloned()); + } else { + break; } } - if let Some((item, byte_range)) = item.zip(byte_range) { - if !name.is_empty() { - let item = if context_spans.is_empty() { - item.to_string() - } else { - format!("{}\n{}", context_spans.join("\n"), item) - }; + let mut document_content = String::new(); + for context_range in &context_match.context_ranges { + document_content.push_str(&content[context_range.clone()]); + document_content.push_str("\n"); + } - let document_text = CODE_CONTEXT_TEMPLATE - .replace("", relative_path.to_str().unwrap()) - .replace("", &language.name().to_lowercase()) - .replace("", item.as_str()); - - documents.push(Document { - range: byte_range, - content: document_text, - embedding: Vec::new(), - name: name.join(" ").to_string(), - }); + let mut offset = context_match.item_range.start; + for collapsed_range in &collapsed_ranges_within { + if collapsed_range.start > offset { + add_content_from_range( + &mut document_content, + content, + offset..collapsed_range.start, + context_match.start_col, + ); } + document_content.push_str(placeholder); + offset = collapsed_range.end; + } + + if offset < context_match.item_range.end { + add_content_from_range( + &mut document_content, + content, + offset..context_match.item_range.end, + context_match.start_col, + ); + } + + if let Some(name) = content.get(context_match.name_range.clone()) { + parsed_name_ranges.insert(context_match.name_range.clone()); + documents.push(Document { + name: name.to_string(), + content: document_content, + range: context_match.item_range.clone(), + embedding: vec![], + }) } } return Ok(documents); } } + +pub(crate) fn subtract_ranges( + ranges: &[Range], + ranges_to_subtract: &[Range], +) -> Vec> { + let mut result = Vec::new(); + + let mut ranges_to_subtract = ranges_to_subtract.iter().peekable(); + + for range in ranges { + let mut offset = range.start; + + while offset < range.end { + if let Some(range_to_subtract) = ranges_to_subtract.peek() { + if offset < range_to_subtract.start { + let next_offset = cmp::min(range_to_subtract.start, range.end); + result.push(offset..next_offset); + offset = next_offset; + } else { + let next_offset = cmp::min(range_to_subtract.end, range.end); + offset = next_offset; + } + + if offset >= range_to_subtract.end { + ranges_to_subtract.next(); + } + } else { + result.push(offset..range.end); + offset = range.end; + } + } + } + + result +} + +fn add_content_from_range( + output: &mut String, + content: &str, + range: Range, + start_col: usize, +) { + for mut line in content.get(range.clone()).unwrap_or("").lines() { + for _ in 0..start_col { + if line.starts_with(' ') { + line = &line[1..]; + } else { + break; + } + } + output.push_str(line); + output.push('\n'); + } + output.pop(); +} diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs index 44ce45f457..271fd741a6 100644 --- a/crates/semantic_index/src/semantic_index.rs +++ b/crates/semantic_index/src/semantic_index.rs @@ -409,7 +409,11 @@ impl SemanticIndex { ) { if let Some(content) = fs.load(&pending_file.absolute_path).await.log_err() { if let Some(documents) = retriever - .parse_file(&pending_file.relative_path, &content, pending_file.language) + .parse_file_with_template( + &pending_file.relative_path, + &content, + pending_file.language, + ) .log_err() { log::trace!( @@ -657,6 +661,8 @@ impl SemanticIndex { }) .await?; + dbg!(&documents); + let mut tasks = Vec::new(); let mut ranges = Vec::new(); let weak_project = project.downgrade(); diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs index 63b28798ad..c54d5079d3 100644 --- a/crates/semantic_index/src/semantic_index_tests.rs +++ b/crates/semantic_index/src/semantic_index_tests.rs @@ -1,7 +1,7 @@ use crate::{ db::dot, embedding::EmbeddingProvider, - parsing::{CodeContextRetriever, Document}, + parsing::{subtract_ranges, CodeContextRetriever, Document}, semantic_index_settings::SemanticIndexSettings, SemanticIndex, }; @@ -9,6 +9,7 @@ use anyhow::Result; use async_trait::async_trait; use gpui::{Task, TestAppContext}; use language::{Language, LanguageConfig, LanguageRegistry, ToOffset}; +use pretty_assertions::assert_eq; use project::{project_settings::ProjectSettings, FakeFs, Fs, Project}; use rand::{rngs::StdRng, Rng}; use serde_json::json; @@ -104,7 +105,7 @@ async fn test_semantic_index(cx: &mut TestAppContext) { assert_eq!(search_results[0].range.start.to_offset(buffer), 0); assert_eq!( buffer.file().unwrap().path().as_ref(), - Path::new("file1.rs") + Path::new("src/file1.rs") ); }); @@ -147,503 +148,548 @@ async fn test_code_context_retrieval_rust() { let text = " /// A doc comment /// that spans multiple lines + #[gpui::test] fn a() { b } impl C for D { } + + impl E { + // This is also a preceding comment + pub fn function_1() -> Option<()> { + todo!(); + } + + // This is a preceding comment + fn function_2() -> Result<()> { + todo!(); + } + } " .unindent(); - let parsed_files = retriever + let documents = retriever .parse_file(Path::new("foo.rs"), &text, language) .unwrap(); - assert_eq!( - parsed_files, + assert_documents_eq( + &documents, &[ - Document { - name: "a".into(), - range: text.find("fn a").unwrap()..(text.find("}").unwrap() + 1), - content: " - The below code snippet is from file 'foo.rs' - - ```rust - /// A doc comment - /// that spans multiple lines - fn a() { - b - } - ```" + ( + " + /// A doc comment + /// that spans multiple lines + #[gpui::test] + fn a() { + b + }" .unindent(), - embedding: vec![], - }, - Document { - name: "C for D".into(), - range: text.find("impl C").unwrap()..(text.rfind("}").unwrap() + 1), - content: " - The below code snippet is from file 'foo.rs' - - ```rust - impl C for D { - } - ```" + text.find("fn a").unwrap(), + ), + ( + " + impl C for D { + }" .unindent(), - embedding: vec![], - } - ] + text.find("impl C").unwrap(), + ), + ( + " + impl E { + // This is also a preceding comment + pub fn function_1() -> Option<()> { /* ... */ } + + // This is a preceding comment + fn function_2() -> Result<()> { /* ... */ } + }" + .unindent(), + text.find("impl E").unwrap(), + ), + ( + " + // This is also a preceding comment + pub fn function_1() -> Option<()> { + todo!(); + }" + .unindent(), + text.find("pub fn function_1").unwrap(), + ), + ( + " + // This is a preceding comment + fn function_2() -> Result<()> { + todo!(); + }" + .unindent(), + text.find("fn function_2").unwrap(), + ), + ], ); } -#[gpui::test] -async fn test_code_context_retrieval_javascript() { - let language = js_lang(); - let mut retriever = CodeContextRetriever::new(); - - let text = " - /* globals importScripts, backend */ - function _authorize() {} - - /** - * Sometimes the frontend build is way faster than backend. - */ - export async function authorizeBank() { - _authorize(pushModal, upgradingAccountId, {}); - } - - export class SettingsPage { - /* This is a test setting */ - constructor(page) { - this.page = page; - } - } - - /* This is a test comment */ - class TestClass {} - - /* Schema for editor_events in Clickhouse. */ - export interface ClickhouseEditorEvent { - installation_id: string - operation: string - } - " - .unindent(); - - let parsed_files = retriever - .parse_file(Path::new("foo.js"), &text, language) - .unwrap(); - - let test_documents = &[ - Document { - name: "function _authorize".into(), - range: text.find("function _authorize").unwrap()..(text.find("}").unwrap() + 1), - content: " - The below code snippet is from file 'foo.js' - - ```javascript - /* globals importScripts, backend */ - function _authorize() {} - ```" - .unindent(), - embedding: vec![], - }, - Document { - name: "async function authorizeBank".into(), - range: text.find("export async").unwrap()..223, - content: " - The below code snippet is from file 'foo.js' - - ```javascript - /** - * Sometimes the frontend build is way faster than backend. - */ - export async function authorizeBank() { - _authorize(pushModal, upgradingAccountId, {}); - } - ```" - .unindent(), - embedding: vec![], - }, - Document { - name: "class SettingsPage".into(), - range: 225..343, - content: " - The below code snippet is from file 'foo.js' - - ```javascript - export class SettingsPage { - /* This is a test setting */ - constructor(page) { - this.page = page; - } - } - ```" - .unindent(), - embedding: vec![], - }, - Document { - name: "constructor".into(), - range: 290..341, - content: " - The below code snippet is from file 'foo.js' - - ```javascript - /* This is a test setting */ - constructor(page) { - this.page = page; - } - ```" - .unindent(), - embedding: vec![], - }, - Document { - name: "class TestClass".into(), - range: 374..392, - content: " - The below code snippet is from file 'foo.js' - - ```javascript - /* This is a test comment */ - class TestClass {} - ```" - .unindent(), - embedding: vec![], - }, - Document { - name: "interface ClickhouseEditorEvent".into(), - range: 440..532, - content: " - The below code snippet is from file 'foo.js' - - ```javascript - /* Schema for editor_events in Clickhouse. */ - export interface ClickhouseEditorEvent { - installation_id: string - operation: string - } - ```" - .unindent(), - embedding: vec![], - }, - ]; - - for idx in 0..test_documents.len() { - assert_eq!(test_documents[idx], parsed_files[idx]); - } +fn assert_documents_eq( + documents: &[Document], + expected_contents_and_start_offsets: &[(String, usize)], +) { + assert_eq!( + documents + .iter() + .map(|document| (document.content.clone(), document.range.start)) + .collect::>(), + expected_contents_and_start_offsets + ); } -#[gpui::test] -async fn test_code_context_retrieval_elixir() { - let language = elixir_lang(); - let mut retriever = CodeContextRetriever::new(); +// #[gpui::test] +// async fn test_code_context_retrieval_javascript() { +// let language = js_lang(); +// let mut retriever = CodeContextRetriever::new(); - let text = r#" -defmodule File.Stream do - @moduledoc """ - Defines a `File.Stream` struct returned by `File.stream!/3`. +// let text = " +// /* globals importScripts, backend */ +// function _authorize() {} - The following fields are public: +// /** +// * Sometimes the frontend build is way faster than backend. +// */ +// export async function authorizeBank() { +// _authorize(pushModal, upgradingAccountId, {}); +// } - * `path` - the file path - * `modes` - the file modes - * `raw` - a boolean indicating if bin functions should be used - * `line_or_bytes` - if reading should read lines or a given number of bytes - * `node` - the node the file belongs to +// export class SettingsPage { +// /* This is a test setting */ +// constructor(page) { +// this.page = page; +// } +// } - """ +// /* This is a test comment */ +// class TestClass {} - defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil +// /* Schema for editor_events in Clickhouse. */ +// export interface ClickhouseEditorEvent { +// installation_id: string +// operation: string +// } +// " +// .unindent(); - @type t :: %__MODULE__{} +// let parsed_files = retriever +// .parse_file(Path::new("foo.js"), &text, language) +// .unwrap(); - @doc false - def __build__(path, modes, line_or_bytes) do - raw = :lists.keyfind(:encoding, 1, modes) == false +// let test_documents = &[ +// Document { +// name: "function _authorize".into(), +// range: text.find("function _authorize").unwrap()..(text.find("}").unwrap() + 1), +// content: " +// The below code snippet is from file 'foo.js' - modes = - case raw do - true -> - case :lists.keyfind(:read_ahead, 1, modes) do - {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)] - {:read_ahead, _} -> [:raw | modes] - false -> [:raw, :read_ahead | modes] - end +// ```javascript +// /* globals importScripts, backend */ +// function _authorize() {} +// ```" +// .unindent(), +// embedding: vec![], +// }, +// Document { +// name: "async function authorizeBank".into(), +// range: text.find("export async").unwrap()..223, +// content: " +// The below code snippet is from file 'foo.js' - false -> - modes - end +// ```javascript +// /** +// * Sometimes the frontend build is way faster than backend. +// */ +// export async function authorizeBank() { +// _authorize(pushModal, upgradingAccountId, {}); +// } +// ```" +// .unindent(), +// embedding: vec![], +// }, +// Document { +// name: "class SettingsPage".into(), +// range: 225..343, +// content: " +// The below code snippet is from file 'foo.js' - %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()} +// ```javascript +// export class SettingsPage { +// /* This is a test setting */ +// constructor(page) { +// this.page = page; +// } +// } +// ```" +// .unindent(), +// embedding: vec![], +// }, +// Document { +// name: "constructor".into(), +// range: 290..341, +// content: " +// The below code snippet is from file 'foo.js' - end -"# - .unindent(); +// ```javascript +// /* This is a test setting */ +// constructor(page) { +// this.page = page; +// } +// ```" +// .unindent(), +// embedding: vec![], +// }, +// Document { +// name: "class TestClass".into(), +// range: 374..392, +// content: " +// The below code snippet is from file 'foo.js' - let parsed_files = retriever - .parse_file(Path::new("foo.ex"), &text, language) - .unwrap(); +// ```javascript +// /* This is a test comment */ +// class TestClass {} +// ```" +// .unindent(), +// embedding: vec![], +// }, +// Document { +// name: "interface ClickhouseEditorEvent".into(), +// range: 440..532, +// content: " +// The below code snippet is from file 'foo.js' - let test_documents = &[ - Document{ - name: "defmodule File.Stream".into(), - range: 0..1132, - content: r#" - The below code snippet is from file 'foo.ex' +// ```javascript +// /* Schema for editor_events in Clickhouse. */ +// export interface ClickhouseEditorEvent { +// installation_id: string +// operation: string +// } +// ```" +// .unindent(), +// embedding: vec![], +// }, +// ]; - ```elixir - defmodule File.Stream do - @moduledoc """ - Defines a `File.Stream` struct returned by `File.stream!/3`. +// for idx in 0..test_documents.len() { +// assert_eq!(test_documents[idx], parsed_files[idx]); +// } +// } - The following fields are public: +// #[gpui::test] +// async fn test_code_context_retrieval_elixir() { +// let language = elixir_lang(); +// let mut retriever = CodeContextRetriever::new(); - * `path` - the file path - * `modes` - the file modes - * `raw` - a boolean indicating if bin functions should be used - * `line_or_bytes` - if reading should read lines or a given number of bytes - * `node` - the node the file belongs to +// let text = r#" +// defmodule File.Stream do +// @moduledoc """ +// Defines a `File.Stream` struct returned by `File.stream!/3`. - """ +// The following fields are public: - defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil +// * `path` - the file path +// * `modes` - the file modes +// * `raw` - a boolean indicating if bin functions should be used +// * `line_or_bytes` - if reading should read lines or a given number of bytes +// * `node` - the node the file belongs to - @type t :: %__MODULE__{} +// """ - @doc false - def __build__(path, modes, line_or_bytes) do - raw = :lists.keyfind(:encoding, 1, modes) == false +// defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil - modes = - case raw do - true -> - case :lists.keyfind(:read_ahead, 1, modes) do - {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)] - {:read_ahead, _} -> [:raw | modes] - false -> [:raw, :read_ahead | modes] - end +// @type t :: %__MODULE__{} - false -> - modes - end +// @doc false +// def __build__(path, modes, line_or_bytes) do +// raw = :lists.keyfind(:encoding, 1, modes) == false - %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()} +// modes = +// case raw do +// true -> +// case :lists.keyfind(:read_ahead, 1, modes) do +// {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)] +// {:read_ahead, _} -> [:raw | modes] +// false -> [:raw, :read_ahead | modes] +// end - end - ```"#.unindent(), - embedding: vec![], - }, - Document { - name: "def __build__".into(), - range: 574..1132, - content: r#" -The below code snippet is from file 'foo.ex' +// false -> +// modes +// end -```elixir -@doc false -def __build__(path, modes, line_or_bytes) do - raw = :lists.keyfind(:encoding, 1, modes) == false +// %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()} - modes = - case raw do - true -> - case :lists.keyfind(:read_ahead, 1, modes) do - {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)] - {:read_ahead, _} -> [:raw | modes] - false -> [:raw, :read_ahead | modes] - end +// end +// "# +// .unindent(); - false -> - modes - end +// let parsed_files = retriever +// .parse_file(Path::new("foo.ex"), &text, language) +// .unwrap(); - %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()} +// let test_documents = &[ +// Document{ +// name: "defmodule File.Stream".into(), +// range: 0..1132, +// content: r#" +// The below code snippet is from file 'foo.ex' - end -```"# - .unindent(), - embedding: vec![], - }]; +// ```elixir +// defmodule File.Stream do +// @moduledoc """ +// Defines a `File.Stream` struct returned by `File.stream!/3`. - for idx in 0..test_documents.len() { - assert_eq!(test_documents[idx], parsed_files[idx]); - } -} +// The following fields are public: -#[gpui::test] -async fn test_code_context_retrieval_cpp() { - let language = cpp_lang(); - let mut retriever = CodeContextRetriever::new(); +// * `path` - the file path +// * `modes` - the file modes +// * `raw` - a boolean indicating if bin functions should be used +// * `line_or_bytes` - if reading should read lines or a given number of bytes +// * `node` - the node the file belongs to - let text = " - /** - * @brief Main function - * @returns 0 on exit - */ - int main() { return 0; } +// """ - /** - * This is a test comment - */ - class MyClass { // The class - public: // Access specifier - int myNum; // Attribute (int variable) - string myString; // Attribute (string variable) - }; +// defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil - // This is a test comment - enum Color { red, green, blue }; +// @type t :: %__MODULE__{} - /** This is a preceeding block comment - * This is the second line - */ - struct { // Structure declaration - int myNum; // Member (int variable) - string myString; // Member (string variable) - } myStructure; +// @doc false +// def __build__(path, modes, line_or_bytes) do +// raw = :lists.keyfind(:encoding, 1, modes) == false - /** - * @brief Matrix class. - */ - template ::value || std::is_floating_point::value, - bool>::type> - class Matrix2 { - std::vector> _mat; +// modes = +// case raw do +// true -> +// case :lists.keyfind(:read_ahead, 1, modes) do +// {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)] +// {:read_ahead, _} -> [:raw | modes] +// false -> [:raw, :read_ahead | modes] +// end - public: - /** - * @brief Constructor - * @tparam Integer ensuring integers are being evaluated and not other - * data types. - * @param size denoting the size of Matrix as size x size - */ - template ::value, - Integer>::type> - explicit Matrix(const Integer size) { - for (size_t i = 0; i < size; ++i) { - _mat.emplace_back(std::vector(size, 0)); - } - } - }" - .unindent(); +// false -> +// modes +// end - let parsed_files = retriever - .parse_file(Path::new("foo.cpp"), &text, language) - .unwrap(); +// %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()} - let test_documents = &[ - Document { - name: "int main".into(), - range: 54..78, - content: " - The below code snippet is from file 'foo.cpp' +// end +// ```"#.unindent(), +// embedding: vec![], +// }, +// Document { +// name: "def __build__".into(), +// range: 574..1132, +// content: r#" +// The below code snippet is from file 'foo.ex' - ```cpp - /** - * @brief Main function - * @returns 0 on exit - */ - int main() { return 0; } - ```" - .unindent(), - embedding: vec![], - }, - Document { - name: "class MyClass".into(), - range: 112..295, - content: " - The below code snippet is from file 'foo.cpp' +// ```elixir +// @doc false +// def __build__(path, modes, line_or_bytes) do +// raw = :lists.keyfind(:encoding, 1, modes) == false - ```cpp - /** - * This is a test comment - */ - class MyClass { // The class - public: // Access specifier - int myNum; // Attribute (int variable) - string myString; // Attribute (string variable) - } - ```" - .unindent(), - embedding: vec![], - }, - Document { - name: "enum Color".into(), - range: 324..355, - content: " - The below code snippet is from file 'foo.cpp' +// modes = +// case raw do +// true -> +// case :lists.keyfind(:read_ahead, 1, modes) do +// {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)] +// {:read_ahead, _} -> [:raw | modes] +// false -> [:raw, :read_ahead | modes] +// end - ```cpp - // This is a test comment - enum Color { red, green, blue } - ```" - .unindent(), - embedding: vec![], - }, - Document { - name: "struct myStructure".into(), - range: 428..581, - content: " - The below code snippet is from file 'foo.cpp' +// false -> +// modes +// end - ```cpp - /** This is a preceeding block comment - * This is the second line - */ - struct { // Structure declaration - int myNum; // Member (int variable) - string myString; // Member (string variable) - } myStructure; - ```" - .unindent(), - embedding: vec![], - }, - Document { - name: "class Matrix2".into(), - range: 613..1342, - content: " - The below code snippet is from file 'foo.cpp' +// %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()} - ```cpp - /** - * @brief Matrix class. - */ - template ::value || std::is_floating_point::value, - bool>::type> - class Matrix2 { - std::vector> _mat; +// end +// ```"# +// .unindent(), +// embedding: vec![], +// }]; - public: - /** - * @brief Constructor - * @tparam Integer ensuring integers are being evaluated and not other - * data types. - * @param size denoting the size of Matrix as size x size - */ - template ::value, - Integer>::type> - explicit Matrix(const Integer size) { - for (size_t i = 0; i < size; ++i) { - _mat.emplace_back(std::vector(size, 0)); - } - } - } - ```" - .unindent(), - embedding: vec![], - }, - ]; +// for idx in 0..test_documents.len() { +// assert_eq!(test_documents[idx], parsed_files[idx]); +// } +// } - for idx in 0..test_documents.len() { - assert_eq!(test_documents[idx], parsed_files[idx]); - } -} +// #[gpui::test] +// async fn test_code_context_retrieval_cpp() { +// let language = cpp_lang(); +// let mut retriever = CodeContextRetriever::new(); + +// let text = " +// /** +// * @brief Main function +// * @returns 0 on exit +// */ +// int main() { return 0; } + +// /** +// * This is a test comment +// */ +// class MyClass { // The class +// public: // Access specifier +// int myNum; // Attribute (int variable) +// string myString; // Attribute (string variable) +// }; + +// // This is a test comment +// enum Color { red, green, blue }; + +// /** This is a preceding block comment +// * This is the second line +// */ +// struct { // Structure declaration +// int myNum; // Member (int variable) +// string myString; // Member (string variable) +// } myStructure; + +// /** +// * @brief Matrix class. +// */ +// template ::value || std::is_floating_point::value, +// bool>::type> +// class Matrix2 { +// std::vector> _mat; + +// public: +// /** +// * @brief Constructor +// * @tparam Integer ensuring integers are being evaluated and not other +// * data types. +// * @param size denoting the size of Matrix as size x size +// */ +// template ::value, +// Integer>::type> +// explicit Matrix(const Integer size) { +// for (size_t i = 0; i < size; ++i) { +// _mat.emplace_back(std::vector(size, 0)); +// } +// } +// }" +// .unindent(); + +// let parsed_files = retriever +// .parse_file(Path::new("foo.cpp"), &text, language) +// .unwrap(); + +// let test_documents = &[ +// Document { +// name: "int main".into(), +// range: 54..78, +// content: " +// The below code snippet is from file 'foo.cpp' + +// ```cpp +// /** +// * @brief Main function +// * @returns 0 on exit +// */ +// int main() { return 0; } +// ```" +// .unindent(), +// embedding: vec![], +// }, +// Document { +// name: "class MyClass".into(), +// range: 112..295, +// content: " +// The below code snippet is from file 'foo.cpp' + +// ```cpp +// /** +// * This is a test comment +// */ +// class MyClass { // The class +// public: // Access specifier +// int myNum; // Attribute (int variable) +// string myString; // Attribute (string variable) +// } +// ```" +// .unindent(), +// embedding: vec![], +// }, +// Document { +// name: "enum Color".into(), +// range: 324..355, +// content: " +// The below code snippet is from file 'foo.cpp' + +// ```cpp +// // This is a test comment +// enum Color { red, green, blue } +// ```" +// .unindent(), +// embedding: vec![], +// }, +// Document { +// name: "struct myStructure".into(), +// range: 428..581, +// content: " +// The below code snippet is from file 'foo.cpp' + +// ```cpp +// /** This is a preceding block comment +// * This is the second line +// */ +// struct { // Structure declaration +// int myNum; // Member (int variable) +// string myString; // Member (string variable) +// } myStructure; +// ```" +// .unindent(), +// embedding: vec![], +// }, +// Document { +// name: "class Matrix2".into(), +// range: 613..1342, +// content: " +// The below code snippet is from file 'foo.cpp' + +// ```cpp +// /** +// * @brief Matrix class. +// */ +// template ::value || std::is_floating_point::value, +// bool>::type> +// class Matrix2 { +// std::vector> _mat; + +// public: +// /** +// * @brief Constructor +// * @tparam Integer ensuring integers are being evaluated and not other +// * data types. +// * @param size denoting the size of Matrix as size x size +// */ +// template ::value, +// Integer>::type> +// explicit Matrix(const Integer size) { +// for (size_t i = 0; i < size; ++i) { +// _mat.emplace_back(std::vector(size, 0)); +// } +// } +// } +// ```" +// .unindent(), +// embedding: vec![], +// }, +// ]; + +// for idx in 0..test_documents.len() { +// assert_eq!(test_documents[idx], parsed_files[idx]); +// } +// } #[gpui::test] fn test_dot_product(mut rng: StdRng) { @@ -826,6 +872,7 @@ fn rust_lang() -> Arc { LanguageConfig { name: "Rust".into(), path_suffixes: vec!["rs".into()], + collapsed_placeholder: " /* ... */ ".to_string(), ..Default::default() }, Some(tree_sitter_rust::language()), @@ -833,54 +880,32 @@ fn rust_lang() -> Arc { .with_embedding_query( r#" ( - (line_comment)* @context + [(line_comment) (attribute_item)]* @context . - (enum_item - name: (_) @name) @item - ) + [ + (struct_item + name: (_) @name) - ( - (line_comment)* @context - . - (struct_item - name: (_) @name) @item - ) + (enum_item + name: (_) @name) - ( - (line_comment)* @context - . - (impl_item - trait: (_)? @name - "for"? @name - type: (_) @name) @item - ) + (impl_item + trait: (_)? @name + "for"? @name + type: (_) @name) - ( - (line_comment)* @context - . - (trait_item - name: (_) @name) @item - ) + (trait_item + name: (_) @name) - ( - (line_comment)* @context - . - (function_item - name: (_) @name) @item - ) + (function_item + name: (_) @name + body: (block + "{" @keep + "}" @keep) @collapse) - ( - (line_comment)* @context - . - (macro_definition - name: (_) @name) @item - ) - - ( - (line_comment)* @context - . - (function_signature_item - name: (_) @name) @item + (macro_definition + name: (_) @name) + ] @item ) "#, ) @@ -1023,3 +1048,15 @@ fn elixir_lang() -> Arc { .unwrap(), ) } + +#[gpui::test] +fn test_subtract_ranges() { + // collapsed_ranges: Vec>, keep_ranges: Vec> + + assert_eq!( + subtract_ranges(&[0..5, 10..21], &[0..1, 4..5]), + vec![1..4, 10..21] + ); + + assert_eq!(subtract_ranges(&[0..5], &[1..2]), &[0..1, 2..5]); +} diff --git a/crates/zed/src/languages/rust/config.toml b/crates/zed/src/languages/rust/config.toml index 705287f0a7..8216ba0a74 100644 --- a/crates/zed/src/languages/rust/config.toml +++ b/crates/zed/src/languages/rust/config.toml @@ -10,3 +10,4 @@ brackets = [ { start = "\"", end = "\"", close = true, newline = false, not_in = ["string"] }, { start = "/*", end = " */", close = true, newline = false, not_in = ["string", "comment"] }, ] +collapsed_placeholder = " /* ... */ " diff --git a/crates/zed/src/languages/rust/embedding.scm b/crates/zed/src/languages/rust/embedding.scm index 66e4083de5..e4218382a9 100644 --- a/crates/zed/src/languages/rust/embedding.scm +++ b/crates/zed/src/languages/rust/embedding.scm @@ -1,50 +1,28 @@ ( - (line_comment)* @context + [(line_comment) (attribute_item)]* @context . - (enum_item - name: (_) @name) @item -) + [ + (struct_item + name: (_) @name) -( - (line_comment)* @context - . - (struct_item - name: (_) @name) @item -) + (enum_item + name: (_) @name) -( - (line_comment)* @context - . - (impl_item - trait: (_)? @name - "for"? @name - type: (_) @name) @item -) + (impl_item + trait: (_)? @name + "for"? @name + type: (_) @name) -( - (line_comment)* @context - . - (trait_item - name: (_) @name) @item -) + (trait_item + name: (_) @name) -( - (line_comment)* @context - . - (function_item - name: (_) @name) @item -) + (function_item + name: (_) @name + body: (block + "{" @keep + "}" @keep) @collapse) -( - (line_comment)* @context - . - (macro_definition - name: (_) @name) @item -) - -( - (line_comment)* @context - . - (function_signature_item - name: (_) @name) @item -) + (macro_definition + name: (_) @name) + ] @item + )