mirror of
https://github.com/zed-industries/zed.git
synced 2024-12-26 10:40:54 +00:00
update treesitter parsing to accomodate for collapsed nested functions
Co-authored-by: maxbrunsfeld <max@zed.dev>
This commit is contained in:
parent
0e071919a0
commit
9809ec3d70
9 changed files with 773 additions and 584 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -6486,6 +6486,7 @@ dependencies = [
|
|||
"parking_lot 0.11.2",
|
||||
"picker",
|
||||
"postage",
|
||||
"pretty_assertions",
|
||||
"project",
|
||||
"rand 0.8.5",
|
||||
"rpc",
|
||||
|
@ -7991,7 +7992,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "tree-sitter"
|
||||
version = "0.20.10"
|
||||
source = "git+https://github.com/tree-sitter/tree-sitter?rev=49226023693107fba9a1191136a4f47f38cdca73#49226023693107fba9a1191136a4f47f38cdca73"
|
||||
source = "git+https://github.com/tree-sitter/tree-sitter?rev=1c65ca24bc9a734ab70115188f465e12eecf224e#1c65ca24bc9a734ab70115188f465e12eecf224e"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"regex",
|
||||
|
|
|
@ -130,7 +130,7 @@ tree-sitter-yaml = { git = "https://github.com/zed-industries/tree-sitter-yaml",
|
|||
tree-sitter-lua = "0.0.14"
|
||||
|
||||
[patch.crates-io]
|
||||
tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "49226023693107fba9a1191136a4f47f38cdca73" }
|
||||
tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "1c65ca24bc9a734ab70115188f465e12eecf224e" }
|
||||
async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" }
|
||||
|
||||
# TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457
|
||||
|
|
|
@ -339,6 +339,8 @@ pub struct LanguageConfig {
|
|||
#[serde(default)]
|
||||
pub line_comment: Option<Arc<str>>,
|
||||
#[serde(default)]
|
||||
pub collapsed_placeholder: String,
|
||||
#[serde(default)]
|
||||
pub block_comment: Option<(Arc<str>, Arc<str>)>,
|
||||
#[serde(default)]
|
||||
pub overrides: HashMap<String, LanguageConfigOverride>,
|
||||
|
@ -408,6 +410,7 @@ impl Default for LanguageConfig {
|
|||
line_comment: Default::default(),
|
||||
block_comment: Default::default(),
|
||||
overrides: Default::default(),
|
||||
collapsed_placeholder: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -525,6 +528,8 @@ pub struct EmbeddingConfig {
|
|||
pub item_capture_ix: u32,
|
||||
pub name_capture_ix: u32,
|
||||
pub context_capture_ix: Option<u32>,
|
||||
pub collapse_capture_ix: Option<u32>,
|
||||
pub keep_capture_ix: Option<u32>,
|
||||
}
|
||||
|
||||
struct InjectionConfig {
|
||||
|
@ -1246,12 +1251,16 @@ impl Language {
|
|||
let mut item_capture_ix = None;
|
||||
let mut name_capture_ix = None;
|
||||
let mut context_capture_ix = None;
|
||||
let mut collapse_capture_ix = None;
|
||||
let mut keep_capture_ix = None;
|
||||
get_capture_indices(
|
||||
&query,
|
||||
&mut [
|
||||
("item", &mut item_capture_ix),
|
||||
("name", &mut name_capture_ix),
|
||||
("context", &mut context_capture_ix),
|
||||
("keep", &mut keep_capture_ix),
|
||||
("collapse", &mut collapse_capture_ix),
|
||||
],
|
||||
);
|
||||
if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) {
|
||||
|
@ -1260,6 +1269,8 @@ impl Language {
|
|||
item_capture_ix,
|
||||
name_capture_ix,
|
||||
context_capture_ix,
|
||||
collapse_capture_ix,
|
||||
keep_capture_ix,
|
||||
});
|
||||
}
|
||||
Ok(self)
|
||||
|
@ -1544,9 +1555,20 @@ impl Language {
|
|||
pub fn grammar(&self) -> Option<&Arc<Grammar>> {
|
||||
self.grammar.as_ref()
|
||||
}
|
||||
|
||||
pub fn default_scope(self: &Arc<Self>) -> LanguageScope {
|
||||
LanguageScope {
|
||||
language: self.clone(),
|
||||
override_id: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LanguageScope {
|
||||
pub fn collapsed_placeholder(&self) -> &str {
|
||||
self.language.config.collapsed_placeholder.as_ref()
|
||||
}
|
||||
|
||||
pub fn line_comment_prefix(&self) -> Option<&Arc<str>> {
|
||||
Override::as_option(
|
||||
self.config_override().map(|o| &o.line_comment),
|
||||
|
|
|
@ -46,6 +46,7 @@ rpc = { path = "../rpc", features = ["test-support"] }
|
|||
workspace = { path = "../workspace", features = ["test-support"] }
|
||||
settings = { path = "../settings", features = ["test-support"]}
|
||||
|
||||
pretty_assertions.workspace = true
|
||||
rand.workspace = true
|
||||
unindent.workspace = true
|
||||
tempdir.workspace = true
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use anyhow::{anyhow, Ok, Result};
|
||||
use language::Language;
|
||||
use std::{ops::Range, path::Path, sync::Arc};
|
||||
use language::{Grammar, Language};
|
||||
use std::{cmp, collections::HashSet, ops::Range, path::Path, sync::Arc};
|
||||
use tree_sitter::{Parser, QueryCursor};
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
|
@ -22,6 +22,20 @@ pub struct CodeContextRetriever {
|
|||
pub cursor: QueryCursor,
|
||||
}
|
||||
|
||||
// Every match has an item, this represents the fundamental treesitter symbol and anchors the search
|
||||
// Every match has one or more 'name' captures. These indicate the display range of the item for deduplication.
|
||||
// If there are preceeding comments, we track this with a context capture
|
||||
// If there is a piece that should be collapsed in hierarchical queries, we capture it with a collapse capture
|
||||
// If there is a piece that should be kept inside a collapsed node, we capture it with a keep capture
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CodeContextMatch {
|
||||
pub start_col: usize,
|
||||
pub item_range: Range<usize>,
|
||||
pub name_range: Range<usize>,
|
||||
pub context_ranges: Vec<Range<usize>>,
|
||||
pub collapse_ranges: Vec<Range<usize>>,
|
||||
}
|
||||
|
||||
impl CodeContextRetriever {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
|
@ -49,6 +63,82 @@ impl CodeContextRetriever {
|
|||
}])
|
||||
}
|
||||
|
||||
fn get_matches_in_file(
|
||||
&mut self,
|
||||
content: &str,
|
||||
grammar: &Arc<Grammar>,
|
||||
) -> Result<Vec<CodeContextMatch>> {
|
||||
let embedding_config = grammar
|
||||
.embedding_config
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("no embedding queries"))?;
|
||||
self.parser.set_language(grammar.ts_language).unwrap();
|
||||
|
||||
let tree = self
|
||||
.parser
|
||||
.parse(&content, None)
|
||||
.ok_or_else(|| anyhow!("parsing failed"))?;
|
||||
|
||||
let mut captures: Vec<CodeContextMatch> = Vec::new();
|
||||
let mut collapse_ranges: Vec<Range<usize>> = Vec::new();
|
||||
let mut keep_ranges: Vec<Range<usize>> = Vec::new();
|
||||
for mat in self.cursor.matches(
|
||||
&embedding_config.query,
|
||||
tree.root_node(),
|
||||
content.as_bytes(),
|
||||
) {
|
||||
let mut start_col = 0;
|
||||
let mut item_range: Option<Range<usize>> = None;
|
||||
let mut name_range: Option<Range<usize>> = None;
|
||||
let mut context_ranges: Vec<Range<usize>> = Vec::new();
|
||||
collapse_ranges.clear();
|
||||
keep_ranges.clear();
|
||||
for capture in mat.captures {
|
||||
if capture.index == embedding_config.item_capture_ix {
|
||||
item_range = Some(capture.node.byte_range());
|
||||
start_col = capture.node.start_position().column;
|
||||
} else if capture.index == embedding_config.name_capture_ix {
|
||||
name_range = Some(capture.node.byte_range());
|
||||
} else if Some(capture.index) == embedding_config.context_capture_ix {
|
||||
context_ranges.push(capture.node.byte_range());
|
||||
} else if Some(capture.index) == embedding_config.collapse_capture_ix {
|
||||
collapse_ranges.push(capture.node.byte_range());
|
||||
} else if Some(capture.index) == embedding_config.keep_capture_ix {
|
||||
keep_ranges.push(capture.node.byte_range());
|
||||
}
|
||||
}
|
||||
|
||||
if item_range.is_some() && name_range.is_some() {
|
||||
let item_range = item_range.unwrap();
|
||||
captures.push(CodeContextMatch {
|
||||
start_col,
|
||||
item_range,
|
||||
name_range: name_range.unwrap(),
|
||||
context_ranges,
|
||||
collapse_ranges: subtract_ranges(&collapse_ranges, &keep_ranges),
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(captures)
|
||||
}
|
||||
|
||||
pub fn parse_file_with_template(
|
||||
&mut self,
|
||||
relative_path: &Path,
|
||||
content: &str,
|
||||
language: Arc<Language>,
|
||||
) -> Result<Vec<Document>> {
|
||||
let language_name = language.name();
|
||||
let mut documents = self.parse_file(relative_path, content, language)?;
|
||||
for document in &mut documents {
|
||||
document.content = CODE_CONTEXT_TEMPLATE
|
||||
.replace("<path>", relative_path.to_string_lossy().as_ref())
|
||||
.replace("<language>", language_name.as_ref())
|
||||
.replace("item", &document.content);
|
||||
}
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
pub fn parse_file(
|
||||
&mut self,
|
||||
relative_path: &Path,
|
||||
|
@ -62,78 +152,131 @@ impl CodeContextRetriever {
|
|||
let grammar = language
|
||||
.grammar()
|
||||
.ok_or_else(|| anyhow!("no grammar for language"))?;
|
||||
let embedding_config = grammar
|
||||
.embedding_config
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("no embedding queries"))?;
|
||||
|
||||
self.parser.set_language(grammar.ts_language).unwrap();
|
||||
|
||||
let tree = self
|
||||
.parser
|
||||
.parse(&content, None)
|
||||
.ok_or_else(|| anyhow!("parsing failed"))?;
|
||||
|
||||
let mut documents = Vec::new();
|
||||
|
||||
// Iterate through query matches
|
||||
let mut name_ranges: Vec<Range<usize>> = vec![];
|
||||
for mat in self.cursor.matches(
|
||||
&embedding_config.query,
|
||||
tree.root_node(),
|
||||
content.as_bytes(),
|
||||
) {
|
||||
let mut name: Vec<&str> = vec![];
|
||||
let mut item: Option<&str> = None;
|
||||
let mut byte_range: Option<Range<usize>> = None;
|
||||
let mut context_spans: Vec<&str> = vec![];
|
||||
for capture in mat.captures {
|
||||
if capture.index == embedding_config.item_capture_ix {
|
||||
byte_range = Some(capture.node.byte_range());
|
||||
item = content.get(capture.node.byte_range());
|
||||
} else if capture.index == embedding_config.name_capture_ix {
|
||||
let name_range = capture.node.byte_range();
|
||||
if name_ranges.contains(&name_range) {
|
||||
continue;
|
||||
}
|
||||
name_ranges.push(name_range.clone());
|
||||
if let Some(name_content) = content.get(name_range.clone()) {
|
||||
name.push(name_content);
|
||||
}
|
||||
}
|
||||
let matches = self.get_matches_in_file(content, grammar)?;
|
||||
|
||||
if let Some(context_capture_ix) = embedding_config.context_capture_ix {
|
||||
if capture.index == context_capture_ix {
|
||||
if let Some(context) = content.get(capture.node.byte_range()) {
|
||||
context_spans.push(context);
|
||||
}
|
||||
}
|
||||
let language_scope = language.default_scope();
|
||||
let placeholder = language_scope.collapsed_placeholder();
|
||||
|
||||
let mut documents = Vec::new();
|
||||
let mut collapsed_ranges_within = Vec::new();
|
||||
let mut parsed_name_ranges = HashSet::new();
|
||||
for (i, context_match) in matches.iter().enumerate() {
|
||||
if parsed_name_ranges.contains(&context_match.name_range) {
|
||||
continue;
|
||||
}
|
||||
|
||||
collapsed_ranges_within.clear();
|
||||
for remaining_match in &matches[(i + 1)..] {
|
||||
if context_match
|
||||
.item_range
|
||||
.contains(&remaining_match.item_range.start)
|
||||
&& context_match
|
||||
.item_range
|
||||
.contains(&remaining_match.item_range.end)
|
||||
{
|
||||
collapsed_ranges_within.extend(remaining_match.collapse_ranges.iter().cloned());
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((item, byte_range)) = item.zip(byte_range) {
|
||||
if !name.is_empty() {
|
||||
let item = if context_spans.is_empty() {
|
||||
item.to_string()
|
||||
} else {
|
||||
format!("{}\n{}", context_spans.join("\n"), item)
|
||||
};
|
||||
let mut document_content = String::new();
|
||||
for context_range in &context_match.context_ranges {
|
||||
document_content.push_str(&content[context_range.clone()]);
|
||||
document_content.push_str("\n");
|
||||
}
|
||||
|
||||
let document_text = CODE_CONTEXT_TEMPLATE
|
||||
.replace("<path>", relative_path.to_str().unwrap())
|
||||
.replace("<language>", &language.name().to_lowercase())
|
||||
.replace("<item>", item.as_str());
|
||||
|
||||
documents.push(Document {
|
||||
range: byte_range,
|
||||
content: document_text,
|
||||
embedding: Vec::new(),
|
||||
name: name.join(" ").to_string(),
|
||||
});
|
||||
let mut offset = context_match.item_range.start;
|
||||
for collapsed_range in &collapsed_ranges_within {
|
||||
if collapsed_range.start > offset {
|
||||
add_content_from_range(
|
||||
&mut document_content,
|
||||
content,
|
||||
offset..collapsed_range.start,
|
||||
context_match.start_col,
|
||||
);
|
||||
}
|
||||
document_content.push_str(placeholder);
|
||||
offset = collapsed_range.end;
|
||||
}
|
||||
|
||||
if offset < context_match.item_range.end {
|
||||
add_content_from_range(
|
||||
&mut document_content,
|
||||
content,
|
||||
offset..context_match.item_range.end,
|
||||
context_match.start_col,
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(name) = content.get(context_match.name_range.clone()) {
|
||||
parsed_name_ranges.insert(context_match.name_range.clone());
|
||||
documents.push(Document {
|
||||
name: name.to_string(),
|
||||
content: document_content,
|
||||
range: context_match.item_range.clone(),
|
||||
embedding: vec![],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(documents);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn subtract_ranges(
|
||||
ranges: &[Range<usize>],
|
||||
ranges_to_subtract: &[Range<usize>],
|
||||
) -> Vec<Range<usize>> {
|
||||
let mut result = Vec::new();
|
||||
|
||||
let mut ranges_to_subtract = ranges_to_subtract.iter().peekable();
|
||||
|
||||
for range in ranges {
|
||||
let mut offset = range.start;
|
||||
|
||||
while offset < range.end {
|
||||
if let Some(range_to_subtract) = ranges_to_subtract.peek() {
|
||||
if offset < range_to_subtract.start {
|
||||
let next_offset = cmp::min(range_to_subtract.start, range.end);
|
||||
result.push(offset..next_offset);
|
||||
offset = next_offset;
|
||||
} else {
|
||||
let next_offset = cmp::min(range_to_subtract.end, range.end);
|
||||
offset = next_offset;
|
||||
}
|
||||
|
||||
if offset >= range_to_subtract.end {
|
||||
ranges_to_subtract.next();
|
||||
}
|
||||
} else {
|
||||
result.push(offset..range.end);
|
||||
offset = range.end;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn add_content_from_range(
|
||||
output: &mut String,
|
||||
content: &str,
|
||||
range: Range<usize>,
|
||||
start_col: usize,
|
||||
) {
|
||||
for mut line in content.get(range.clone()).unwrap_or("").lines() {
|
||||
for _ in 0..start_col {
|
||||
if line.starts_with(' ') {
|
||||
line = &line[1..];
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
output.push_str(line);
|
||||
output.push('\n');
|
||||
}
|
||||
output.pop();
|
||||
}
|
||||
|
|
|
@ -409,7 +409,11 @@ impl SemanticIndex {
|
|||
) {
|
||||
if let Some(content) = fs.load(&pending_file.absolute_path).await.log_err() {
|
||||
if let Some(documents) = retriever
|
||||
.parse_file(&pending_file.relative_path, &content, pending_file.language)
|
||||
.parse_file_with_template(
|
||||
&pending_file.relative_path,
|
||||
&content,
|
||||
pending_file.language,
|
||||
)
|
||||
.log_err()
|
||||
{
|
||||
log::trace!(
|
||||
|
@ -657,6 +661,8 @@ impl SemanticIndex {
|
|||
})
|
||||
.await?;
|
||||
|
||||
dbg!(&documents);
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
let mut ranges = Vec::new();
|
||||
let weak_project = project.downgrade();
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -10,3 +10,4 @@ brackets = [
|
|||
{ start = "\"", end = "\"", close = true, newline = false, not_in = ["string"] },
|
||||
{ start = "/*", end = " */", close = true, newline = false, not_in = ["string", "comment"] },
|
||||
]
|
||||
collapsed_placeholder = " /* ... */ "
|
||||
|
|
|
@ -1,50 +1,28 @@
|
|||
(
|
||||
(line_comment)* @context
|
||||
[(line_comment) (attribute_item)]* @context
|
||||
.
|
||||
(enum_item
|
||||
name: (_) @name) @item
|
||||
)
|
||||
[
|
||||
(struct_item
|
||||
name: (_) @name)
|
||||
|
||||
(
|
||||
(line_comment)* @context
|
||||
.
|
||||
(struct_item
|
||||
name: (_) @name) @item
|
||||
)
|
||||
(enum_item
|
||||
name: (_) @name)
|
||||
|
||||
(
|
||||
(line_comment)* @context
|
||||
.
|
||||
(impl_item
|
||||
trait: (_)? @name
|
||||
"for"? @name
|
||||
type: (_) @name) @item
|
||||
)
|
||||
(impl_item
|
||||
trait: (_)? @name
|
||||
"for"? @name
|
||||
type: (_) @name)
|
||||
|
||||
(
|
||||
(line_comment)* @context
|
||||
.
|
||||
(trait_item
|
||||
name: (_) @name) @item
|
||||
)
|
||||
(trait_item
|
||||
name: (_) @name)
|
||||
|
||||
(
|
||||
(line_comment)* @context
|
||||
.
|
||||
(function_item
|
||||
name: (_) @name) @item
|
||||
)
|
||||
(function_item
|
||||
name: (_) @name
|
||||
body: (block
|
||||
"{" @keep
|
||||
"}" @keep) @collapse)
|
||||
|
||||
(
|
||||
(line_comment)* @context
|
||||
.
|
||||
(macro_definition
|
||||
name: (_) @name) @item
|
||||
)
|
||||
|
||||
(
|
||||
(line_comment)* @context
|
||||
.
|
||||
(function_signature_item
|
||||
name: (_) @name) @item
|
||||
)
|
||||
(macro_definition
|
||||
name: (_) @name)
|
||||
] @item
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue