From ced45cbb0a16e0fa45a999d64dc7bc9dbc1e57fa Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 23 Aug 2022 17:09:13 -0700 Subject: [PATCH] Use SyntaxMap in Buffer --- crates/language/src/buffer.rs | 616 ++++++++----------- crates/language/src/language.rs | 144 ++++- crates/language/src/syntax_map.rs | 591 +++++++++++++----- crates/language/src/tests.rs | 4 +- crates/project/src/project.rs | 1 + crates/zed/src/languages/rust/injections.scm | 6 +- 6 files changed, 830 insertions(+), 532 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index b7a1bd30fc..7b298b7420 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -6,13 +6,15 @@ pub use crate::{ use crate::{ diagnostic_set::{DiagnosticEntry, DiagnosticGroup}, outline::OutlineItem, + syntax_map::{ + SyntaxMap, SyntaxMapCapture, SyntaxMapCaptures, SyntaxSnapshot, ToTreeSitterPoint, + }, CodeLabel, Outline, }; use anyhow::{anyhow, Result}; use clock::ReplicaId; use futures::FutureExt as _; use gpui::{fonts::HighlightStyle, AppContext, Entity, ModelContext, MutableAppContext, Task}; -use lazy_static::lazy_static; use parking_lot::Mutex; use settings::Settings; use similar::{ChangeTag, TextDiff}; @@ -25,7 +27,7 @@ use std::{ future::Future, iter::{self, Iterator, Peekable}, mem, - ops::{Deref, DerefMut, Range}, + ops::{Deref, Range}, path::{Path, PathBuf}, str, sync::Arc, @@ -36,7 +38,6 @@ use sum_tree::TreeMap; use text::operation_queue::OperationQueue; pub use text::{Buffer as TextBuffer, BufferSnapshot as TextBufferSnapshot, Operation as _, *}; use theme::SyntaxTheme; -use tree_sitter::{InputEdit, QueryCursor, Tree}; use util::TryFutureExt as _; #[cfg(any(test, feature = "test-support"))] @@ -44,10 +45,6 @@ pub use {tree_sitter_rust, tree_sitter_typescript}; pub use lsp::DiagnosticSeverity; -lazy_static! { - static ref QUERY_CURSORS: Mutex> = Default::default(); -} - pub struct Buffer { text: TextBuffer, file: Option>, @@ -60,7 +57,7 @@ pub struct Buffer { autoindent_requests: Vec>, pending_autoindent: Option>, sync_parse_timeout: Duration, - syntax_tree: Mutex>, + syntax_map: Mutex, parsing_in_background: bool, parse_count: usize, diagnostics: DiagnosticSet, @@ -75,7 +72,7 @@ pub struct Buffer { pub struct BufferSnapshot { text: text::BufferSnapshot, - tree: Option, + syntax: SyntaxSnapshot, file: Option>, diagnostics: DiagnosticSet, diagnostics_update_count: usize, @@ -221,14 +218,6 @@ pub trait LocalFile: File { ); } -pub(crate) struct QueryCursorHandle(Option); - -#[derive(Clone)] -struct SyntaxTree { - tree: Tree, - version: clock::Global, -} - #[derive(Clone, Debug)] pub enum AutoindentMode { /// Indent each line of inserted text. @@ -268,14 +257,11 @@ struct IndentSuggestion { delta: Ordering, } -pub(crate) struct TextProvider<'a>(pub(crate) &'a Rope); - struct BufferChunkHighlights<'a> { - captures: tree_sitter::QueryCaptures<'a, 'a, TextProvider<'a>>, - next_capture: Option<(tree_sitter::QueryMatch<'a, 'a>, usize)>, + captures: SyntaxMapCaptures<'a>, + next_capture: Option>, stack: Vec<(usize, HighlightId)>, - highlight_map: HighlightMap, - _query_cursor: QueryCursorHandle, + highlight_maps: Vec, } pub struct BufferChunks<'a> { @@ -456,7 +442,7 @@ impl Buffer { was_dirty_before_starting_transaction: None, text: buffer, file, - syntax_tree: Mutex::new(None), + syntax_map: Mutex::new(SyntaxMap::new()), parsing_in_background: false, parse_count: 0, sync_parse_timeout: Duration::from_millis(1), @@ -477,7 +463,7 @@ impl Buffer { pub fn snapshot(&self) -> BufferSnapshot { BufferSnapshot { text: self.text.snapshot(), - tree: self.syntax_tree(), + syntax: self.syntax_map(), file: self.file.clone(), remote_selections: self.remote_selections.clone(), diagnostics: self.diagnostics.clone(), @@ -533,11 +519,17 @@ impl Buffer { } pub fn set_language(&mut self, language: Option>, cx: &mut ModelContext) { - *self.syntax_tree.lock() = None; + self.syntax_map.lock().clear(); self.language = language; self.reparse(cx); } + pub fn set_language_registry(&mut self, language_registry: Arc) { + self.syntax_map + .lock() + .set_language_registry(language_registry); + } + pub fn did_save( &mut self, version: clock::Global, @@ -682,13 +674,10 @@ impl Buffer { self.file_update_count } - pub(crate) fn syntax_tree(&self) -> Option { - if let Some(syntax_tree) = self.syntax_tree.lock().as_mut() { - self.interpolate_tree(syntax_tree); - Some(syntax_tree.tree.clone()) - } else { - None - } + pub(crate) fn syntax_map(&self) -> SyntaxSnapshot { + let mut syntax_map = self.syntax_map.lock(); + syntax_map.interpolate(&self.text_snapshot()); + syntax_map.snapshot() } #[cfg(any(test, feature = "test-support"))] @@ -706,35 +695,49 @@ impl Buffer { return false; } - if let Some(grammar) = self.grammar().cloned() { - let old_tree = self.syntax_tree(); - let text = self.as_rope().clone(); + if let Some(language) = self.language.clone() { + let text = self.text_snapshot(); let parsed_version = self.version(); + + let mut syntax_map; + let language_registry; + let syntax_map_version; + { + let mut map = self.syntax_map.lock(); + map.interpolate(&text); + language_registry = map.language_registry(); + syntax_map = map.snapshot(); + syntax_map_version = map.parsed_version(); + } let parse_task = cx.background().spawn({ - let grammar = grammar.clone(); - async move { grammar.parse_text(&text, old_tree) } + let language = language.clone(); + async move { + syntax_map.reparse(&syntax_map_version, &text, language_registry, language); + syntax_map + } }); match cx .background() .block_with_timeout(self.sync_parse_timeout, parse_task) { - Ok(new_tree) => { - self.did_finish_parsing(new_tree, parsed_version, cx); + Ok(new_syntax_map) => { + self.did_finish_parsing(new_syntax_map, parsed_version, cx); return true; } Err(parse_task) => { self.parsing_in_background = true; cx.spawn(move |this, mut cx| async move { - let new_tree = parse_task.await; + let new_syntax_map = parse_task.await; this.update(&mut cx, move |this, cx| { - let grammar_changed = this - .grammar() - .map_or(true, |curr_grammar| !Arc::ptr_eq(&grammar, curr_grammar)); + let grammar_changed = + this.language.as_ref().map_or(true, |current_language| { + !Arc::ptr_eq(&language, current_language) + }); let parse_again = this.version.changed_since(&parsed_version) || grammar_changed; this.parsing_in_background = false; - this.did_finish_parsing(new_tree, parsed_version, cx); + this.did_finish_parsing(new_syntax_map, parsed_version, cx); if parse_again && this.reparse(cx) {} }); @@ -746,30 +749,14 @@ impl Buffer { false } - fn interpolate_tree(&self, tree: &mut SyntaxTree) { - for edit in self.edits_since::<(usize, Point)>(&tree.version) { - let (bytes, lines) = edit.flatten(); - tree.tree.edit(&InputEdit { - start_byte: bytes.new.start, - old_end_byte: bytes.new.start + bytes.old.len(), - new_end_byte: bytes.new.end, - start_position: lines.new.start.to_ts_point(), - old_end_position: (lines.new.start + (lines.old.end - lines.old.start)) - .to_ts_point(), - new_end_position: lines.new.end.to_ts_point(), - }); - } - tree.version = self.version(); - } - fn did_finish_parsing( &mut self, - tree: Tree, + syntax_map: SyntaxSnapshot, version: clock::Global, cx: &mut ModelContext, ) { self.parse_count += 1; - *self.syntax_tree.lock() = Some(SyntaxTree { tree, version }); + self.syntax_map.lock().did_parse(syntax_map, version); self.request_autoindent(cx); cx.emit(Event::Reparsed); cx.notify(); @@ -808,10 +795,7 @@ impl Buffer { fn compute_autoindents(&self) -> Option>> { let max_rows_between_yields = 100; let snapshot = self.snapshot(); - if snapshot.language.is_none() - || snapshot.tree.is_none() - || self.autoindent_requests.is_empty() - { + if snapshot.syntax.is_empty() || self.autoindent_requests.is_empty() { return None; } @@ -1310,10 +1294,6 @@ impl Buffer { cx.notify(); } - fn grammar(&self) -> Option<&Arc> { - self.language.as_ref().and_then(|l| l.grammar.as_ref()) - } - pub fn apply_ops>( &mut self, ops: I, @@ -1654,32 +1634,30 @@ impl BufferSnapshot { let prev_non_blank_row = self.prev_non_blank_row(row_range.start); // Find the suggested indentation ranges based on the syntax tree. - let indents_query = grammar.indents_query.as_ref()?; - let mut query_cursor = QueryCursorHandle::new(); - let indent_capture_ix = indents_query.capture_index_for_name("indent"); - let end_capture_ix = indents_query.capture_index_for_name("end"); - query_cursor.set_point_range( - Point::new(prev_non_blank_row.unwrap_or(row_range.start), 0).to_ts_point() - ..Point::new(row_range.end, 0).to_ts_point(), - ); + let start = Point::new(prev_non_blank_row.unwrap_or(row_range.start), 0); + let end = Point::new(row_range.end, 0); + let range = (start..end).to_offset(&self.text); + let mut matches = self.syntax.matches(range, &self.text, |grammar| { + Some(&grammar.indents_config.as_ref()?.query) + }); let mut indent_ranges = Vec::>::new(); - for mat in query_cursor.matches( - indents_query, - self.tree.as_ref()?.root_node(), - TextProvider(self.as_rope()), - ) { + while let Some(mat) = matches.peek() { let mut start: Option = None; let mut end: Option = None; - for capture in mat.captures { - if Some(capture.index) == indent_capture_ix { - start.get_or_insert(Point::from_ts_point(capture.node.start_position())); - end.get_or_insert(Point::from_ts_point(capture.node.end_position())); - } else if Some(capture.index) == end_capture_ix { - end = Some(Point::from_ts_point(capture.node.start_position())); + + if let Some(config) = &grammar.indents_config { + for capture in mat.captures { + if capture.index == config.indent_capture_ix { + start.get_or_insert(Point::from_ts_point(capture.node.start_position())); + end.get_or_insert(Point::from_ts_point(capture.node.end_position())); + } else if Some(capture.index) == config.end_capture_ix { + end = Some(Point::from_ts_point(capture.node.start_position())); + } } } + matches.advance(); if let Some((start, end)) = start.zip(end) { if start.row == end.row { continue; @@ -1811,10 +1789,18 @@ impl BufferSnapshot { pub fn chunks(&self, range: Range, language_aware: bool) -> BufferChunks { let range = range.start.to_offset(self)..range.end.to_offset(self); - let mut tree = None; + let mut syntax = None; let mut diagnostic_endpoints = Vec::new(); if language_aware { - tree = self.tree.as_ref(); + let captures = self.syntax.captures(range.clone(), &self.text, |grammar| { + grammar.highlights_query.as_ref() + }); + let highlight_maps = captures + .grammars() + .into_iter() + .map(|grammar| grammar.highlight_map()) + .collect(); + syntax = Some((captures, highlight_maps)); for entry in self.diagnostics_in_range::<_, usize>(range.clone(), false) { diagnostic_endpoints.push(DiagnosticEndpoint { offset: entry.range.start, @@ -1833,13 +1819,7 @@ impl BufferSnapshot { .sort_unstable_by_key(|endpoint| (endpoint.offset, !endpoint.is_start)); } - BufferChunks::new( - self.text.as_rope(), - range, - tree, - self.grammar(), - diagnostic_endpoints, - ) + BufferChunks::new(self.text.as_rope(), range, syntax, diagnostic_endpoints) } pub fn for_each_line(&self, range: Range, mut callback: impl FnMut(u32, &str)) { @@ -1865,12 +1845,6 @@ impl BufferSnapshot { self.language.as_ref() } - fn grammar(&self) -> Option<&Arc> { - self.language - .as_ref() - .and_then(|language| language.grammar.as_ref()) - } - pub fn surrounding_word(&self, start: T) -> (Range, Option) { let mut start = start.to_offset(self); let mut end = start; @@ -1901,61 +1875,71 @@ impl BufferSnapshot { } pub fn range_for_syntax_ancestor(&self, range: Range) -> Option> { - let tree = self.tree.as_ref()?; let range = range.start.to_offset(self)..range.end.to_offset(self); - let mut cursor = tree.root_node().walk(); + let mut result: Option> = None; + 'outer: for (_, _, node) in self.syntax.layers_for_range(range.clone(), &self.text) { + let mut cursor = node.walk(); - // Descend to the first leaf that touches the start of the range, - // and if the range is non-empty, extends beyond the start. - while cursor.goto_first_child_for_byte(range.start).is_some() { - if !range.is_empty() && cursor.node().end_byte() == range.start { - cursor.goto_next_sibling(); + // Descend to the first leaf that touches the start of the range, + // and if the range is non-empty, extends beyond the start. + while cursor.goto_first_child_for_byte(range.start).is_some() { + if !range.is_empty() && cursor.node().end_byte() == range.start { + cursor.goto_next_sibling(); + } } - } - // Ascend to the smallest ancestor that strictly contains the range. - loop { - let node_range = cursor.node().byte_range(); - if node_range.start <= range.start - && node_range.end >= range.end - && node_range.len() > range.len() - { - break; - } - if !cursor.goto_parent() { - break; - } - } - - let left_node = cursor.node(); - - // For an empty range, try to find another node immediately to the right of the range. - if left_node.end_byte() == range.start { - let mut right_node = None; - while !cursor.goto_next_sibling() { + // Ascend to the smallest ancestor that strictly contains the range. + loop { + let node_range = cursor.node().byte_range(); + if node_range.start <= range.start + && node_range.end >= range.end + && node_range.len() > range.len() + { + break; + } if !cursor.goto_parent() { - break; + continue 'outer; } } - while cursor.node().start_byte() == range.start { - right_node = Some(cursor.node()); - if !cursor.goto_first_child() { - break; + let left_node = cursor.node(); + let mut layer_result = left_node.byte_range(); + + // For an empty range, try to find another node immediately to the right of the range. + if left_node.end_byte() == range.start { + let mut right_node = None; + while !cursor.goto_next_sibling() { + if !cursor.goto_parent() { + break; + } + } + + while cursor.node().start_byte() == range.start { + right_node = Some(cursor.node()); + if !cursor.goto_first_child() { + break; + } + } + + // If there is a candidate node on both sides of the (empty) range, then + // decide between the two by favoring a named node over an anonymous token. + // If both nodes are the same in that regard, favor the right one. + if let Some(right_node) = right_node { + if right_node.is_named() || !left_node.is_named() { + layer_result = right_node.byte_range(); + } } } - // If there is a candidate node on both sides of the (empty) range, then - // decide between the two by favoring a named node over an anonymous token. - // If both nodes are the same in that regard, favor the right one. - if let Some(right_node) = right_node { - if right_node.is_named() || !left_node.is_named() { - return Some(right_node.byte_range()); + if let Some(previous_result) = &result { + if previous_result.len() < layer_result.len() { + continue; } } + result = Some(layer_result); } - Some(left_node.byte_range()) + result } pub fn outline(&self, theme: Option<&SyntaxTheme>) -> Option> { @@ -1985,109 +1969,107 @@ impl BufferSnapshot { range: Range, theme: Option<&SyntaxTheme>, ) -> Option>> { - let tree = self.tree.as_ref()?; - let grammar = self - .language - .as_ref() - .and_then(|language| language.grammar.as_ref())?; - - let outline_query = grammar.outline_query.as_ref()?; - let mut cursor = QueryCursorHandle::new(); - cursor.set_byte_range(range.clone()); - let matches = cursor.matches( - outline_query, - tree.root_node(), - TextProvider(self.as_rope()), - ); + let mut matches = self.syntax.matches(range.clone(), &self.text, |grammar| { + grammar.outline_config.as_ref().map(|c| &c.query) + }); + let configs = matches + .grammars() + .iter() + .map(|g| g.outline_config.as_ref().unwrap()) + .collect::>(); let mut chunks = self.chunks(0..self.len(), true); - - let item_capture_ix = outline_query.capture_index_for_name("item")?; - let name_capture_ix = outline_query.capture_index_for_name("name")?; - let context_capture_ix = outline_query - .capture_index_for_name("context") - .unwrap_or(u32::MAX); - let mut stack = Vec::>::new(); - let items = matches - .filter_map(|mat| { - let item_node = mat.nodes_for_capture_index(item_capture_ix).next()?; - let item_range = item_node.start_byte()..item_node.end_byte(); - if item_range.end < range.start || item_range.start > range.end { - return None; + let mut items = Vec::new(); + while let Some(mat) = matches.peek() { + let config = &configs[mat.grammar_index]; + let item_node = mat.captures.iter().find_map(|cap| { + if cap.index == config.item_capture_ix { + Some(cap.node) + } else { + None } - let mut text = String::new(); - let mut name_ranges = Vec::new(); - let mut highlight_ranges = Vec::new(); + })?; - for capture in mat.captures { - let node_is_name; - if capture.index == name_capture_ix { - node_is_name = true; - } else if capture.index == context_capture_ix { - node_is_name = false; + let item_range = item_node.byte_range(); + if item_range.end < range.start || item_range.start > range.end { + matches.advance(); + continue; + } + + // TODO - move later, after processing captures + + let mut text = String::new(); + let mut name_ranges = Vec::new(); + let mut highlight_ranges = Vec::new(); + for capture in mat.captures { + let node_is_name; + if capture.index == config.name_capture_ix { + node_is_name = true; + } else if Some(capture.index) == config.context_capture_ix { + node_is_name = false; + } else { + continue; + } + + let range = capture.node.start_byte()..capture.node.end_byte(); + if !text.is_empty() { + text.push(' '); + } + if node_is_name { + let mut start = text.len(); + let end = start + range.len(); + + // When multiple names are captured, then the matcheable text + // includes the whitespace in between the names. + if !name_ranges.is_empty() { + start -= 1; + } + + name_ranges.push(start..end); + } + + let mut offset = range.start; + chunks.seek(offset); + for mut chunk in chunks.by_ref() { + if chunk.text.len() > range.end - offset { + chunk.text = &chunk.text[0..(range.end - offset)]; + offset = range.end; } else { - continue; + offset += chunk.text.len(); } - - let range = capture.node.start_byte()..capture.node.end_byte(); - if !text.is_empty() { - text.push(' '); + let style = chunk + .syntax_highlight_id + .zip(theme) + .and_then(|(highlight, theme)| highlight.style(theme)); + if let Some(style) = style { + let start = text.len(); + let end = start + chunk.text.len(); + highlight_ranges.push((start..end, style)); } - if node_is_name { - let mut start = text.len(); - let end = start + range.len(); - - // When multiple names are captured, then the matcheable text - // includes the whitespace in between the names. - if !name_ranges.is_empty() { - start -= 1; - } - - name_ranges.push(start..end); - } - - let mut offset = range.start; - chunks.seek(offset); - for mut chunk in chunks.by_ref() { - if chunk.text.len() > range.end - offset { - chunk.text = &chunk.text[0..(range.end - offset)]; - offset = range.end; - } else { - offset += chunk.text.len(); - } - let style = chunk - .syntax_highlight_id - .zip(theme) - .and_then(|(highlight, theme)| highlight.style(theme)); - if let Some(style) = style { - let start = text.len(); - let end = start + chunk.text.len(); - highlight_ranges.push((start..end, style)); - } - text.push_str(chunk.text); - if offset >= range.end { - break; - } + text.push_str(chunk.text); + if offset >= range.end { + break; } } + } - while stack.last().map_or(false, |prev_range| { - prev_range.start > item_range.start || prev_range.end < item_range.end - }) { - stack.pop(); - } - stack.push(item_range.clone()); + matches.advance(); + while stack.last().map_or(false, |prev_range| { + prev_range.start > item_range.start || prev_range.end < item_range.end + }) { + stack.pop(); + } + stack.push(item_range.clone()); - Some(OutlineItem { - depth: stack.len() - 1, - range: self.anchor_after(item_range.start)..self.anchor_before(item_range.end), - text, - highlight_ranges, - name_ranges, - }) + items.push(OutlineItem { + depth: stack.len() - 1, + range: self.anchor_after(item_range.start)..self.anchor_before(item_range.end), + text, + highlight_ranges, + name_ranges, }) - .collect::>(); + } Some(items) } @@ -2095,28 +2077,48 @@ impl BufferSnapshot { &self, range: Range, ) -> Option<(Range, Range)> { - let (grammar, tree) = self.grammar().zip(self.tree.as_ref())?; - let brackets_query = grammar.brackets_query.as_ref()?; - let open_capture_ix = brackets_query.capture_index_for_name("open")?; - let close_capture_ix = brackets_query.capture_index_for_name("close")?; - // Find bracket pairs that *inclusively* contain the given range. let range = range.start.to_offset(self).saturating_sub(1)..range.end.to_offset(self) + 1; - let mut cursor = QueryCursorHandle::new(); - let matches = cursor.set_byte_range(range).matches( - brackets_query, - tree.root_node(), - TextProvider(self.as_rope()), - ); + let mut matches = self.syntax.matches(range, &self.text, |grammar| { + grammar.brackets_config.as_ref().map(|c| &c.query) + }); + let configs = matches + .grammars() + .iter() + .map(|grammar| grammar.brackets_config.as_ref().unwrap()) + .collect::>(); // Get the ranges of the innermost pair of brackets. - matches - .filter_map(|mat| { - let open = mat.nodes_for_capture_index(open_capture_ix).next()?; - let close = mat.nodes_for_capture_index(close_capture_ix).next()?; - Some((open.byte_range(), close.byte_range())) - }) - .min_by_key(|(open_range, close_range)| close_range.end - open_range.start) + let mut result: Option<(Range, Range)> = None; + while let Some(mat) = matches.peek() { + let mut open = None; + let mut close = None; + let config = &configs[mat.grammar_index]; + for capture in mat.captures { + if capture.index == config.open_capture_ix { + open = Some(capture.node.byte_range()); + } else if capture.index == config.close_capture_ix { + close = Some(capture.node.byte_range()); + } + } + + matches.advance(); + + if let Some((open, close)) = open.zip(close) { + let len = close.end - open.start; + + if let Some((existing_open, existing_close)) = &result { + let existing_len = existing_close.end - existing_open.start; + if len > existing_len { + continue; + } + } + + result = Some((open, close)); + } + } + + result } #[allow(clippy::type_complexity)] @@ -2228,7 +2230,7 @@ impl Clone for BufferSnapshot { fn clone(&self) -> Self { Self { text: self.text.clone(), - tree: self.tree.clone(), + syntax: self.syntax.clone(), file: self.file.clone(), remote_selections: self.remote_selections.clone(), diagnostics: self.diagnostics.clone(), @@ -2249,56 +2251,23 @@ impl Deref for BufferSnapshot { } } -impl<'a> tree_sitter::TextProvider<'a> for TextProvider<'a> { - type I = ByteChunks<'a>; - - fn text(&mut self, node: tree_sitter::Node) -> Self::I { - ByteChunks(self.0.chunks_in_range(node.byte_range())) - } -} - -pub(crate) struct ByteChunks<'a>(rope::Chunks<'a>); - -impl<'a> Iterator for ByteChunks<'a> { - type Item = &'a [u8]; - - fn next(&mut self) -> Option { - self.0.next().map(str::as_bytes) - } -} - unsafe impl<'a> Send for BufferChunks<'a> {} impl<'a> BufferChunks<'a> { pub(crate) fn new( text: &'a Rope, range: Range, - tree: Option<&'a Tree>, - grammar: Option<&'a Arc>, + syntax: Option<(SyntaxMapCaptures<'a>, Vec)>, diagnostic_endpoints: Vec, ) -> Self { let mut highlights = None; - if let Some((grammar, tree)) = grammar.zip(tree) { - if let Some(highlights_query) = grammar.highlights_query.as_ref() { - let mut query_cursor = QueryCursorHandle::new(); - - // TODO - add a Tree-sitter API to remove the need for this. - let cursor = unsafe { - std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) - }; - let captures = cursor.set_byte_range(range.clone()).captures( - highlights_query, - tree.root_node(), - TextProvider(text), - ); - highlights = Some(BufferChunkHighlights { - captures, - next_capture: None, - stack: Default::default(), - highlight_map: grammar.highlight_map(), - _query_cursor: query_cursor, - }) - } + if let Some((captures, highlight_maps)) = syntax { + highlights = Some(BufferChunkHighlights { + captures, + next_capture: None, + stack: Default::default(), + highlight_maps, + }) } let diagnostic_endpoints = diagnostic_endpoints.into_iter().peekable(); @@ -2324,14 +2293,13 @@ impl<'a> BufferChunks<'a> { highlights .stack .retain(|(end_offset, _)| *end_offset > offset); - if let Some((mat, capture_ix)) = &highlights.next_capture { - let capture = mat.captures[*capture_ix as usize]; + if let Some(capture) = &highlights.next_capture { if offset >= capture.node.start_byte() { let next_capture_end = capture.node.end_byte(); if offset < next_capture_end { highlights.stack.push(( next_capture_end, - highlights.highlight_map.get(capture.index), + highlights.highlight_maps[capture.grammar_index].get(capture.index), )); } highlights.next_capture.take(); @@ -2407,13 +2375,13 @@ impl<'a> Iterator for BufferChunks<'a> { highlights.next_capture = highlights.captures.next(); } - while let Some((mat, capture_ix)) = highlights.next_capture.as_ref() { - let capture = mat.captures[*capture_ix as usize]; + while let Some(capture) = highlights.next_capture.as_ref() { if self.range.start < capture.node.start_byte() { next_capture_start = capture.node.start_byte(); break; } else { - let highlight_id = highlights.highlight_map.get(capture.index); + let highlight_id = + highlights.highlight_maps[capture.grammar_index].get(capture.index); highlights .stack .push((capture.node.end_byte(), highlight_id)); @@ -2465,52 +2433,6 @@ impl<'a> Iterator for BufferChunks<'a> { } } -impl QueryCursorHandle { - pub(crate) fn new() -> Self { - let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new); - cursor.set_match_limit(64); - QueryCursorHandle(Some(cursor)) - } -} - -impl Deref for QueryCursorHandle { - type Target = QueryCursor; - - fn deref(&self) -> &Self::Target { - self.0.as_ref().unwrap() - } -} - -impl DerefMut for QueryCursorHandle { - fn deref_mut(&mut self) -> &mut Self::Target { - self.0.as_mut().unwrap() - } -} - -impl Drop for QueryCursorHandle { - fn drop(&mut self) { - let mut cursor = self.0.take().unwrap(); - cursor.set_byte_range(0..usize::MAX); - cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point()); - QUERY_CURSORS.lock().push(cursor) - } -} - -pub(crate) trait ToTreeSitterPoint { - fn to_ts_point(self) -> tree_sitter::Point; - fn from_ts_point(point: tree_sitter::Point) -> Self; -} - -impl ToTreeSitterPoint for Point { - fn to_ts_point(self) -> tree_sitter::Point { - tree_sitter::Point::new(self.row as usize, self.column as usize) - } - - fn from_ts_point(point: tree_sitter::Point) -> Self { - Point::new(point.row as u32, point.column as u32) - } -} - impl operation_queue::Operation for Operation { fn lamport_timestamp(&self) -> clock::Lamport { match self { diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 8dcfc8fffd..780f6e75b5 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -30,8 +30,12 @@ use std::{ ops::Range, path::{Path, PathBuf}, str, - sync::Arc, + sync::{ + atomic::{AtomicUsize, Ordering::SeqCst}, + Arc, + }, }; +use syntax_map::SyntaxSnapshot; use theme::{SyntaxTheme, Theme}; use tree_sitter::{self, Query}; use util::ResultExt; @@ -50,6 +54,7 @@ thread_local! { } lazy_static! { + pub static ref NEXT_GRAMMAR_ID: AtomicUsize = Default::default(); pub static ref PLAIN_TEXT: Arc = Arc::new(Language::new( LanguageConfig { name: "Plain Text".into(), @@ -286,15 +291,29 @@ pub struct Language { } pub struct Grammar { + id: usize, pub(crate) ts_language: tree_sitter::Language, pub(crate) highlights_query: Option, - pub(crate) brackets_query: Option, - pub(crate) indents_query: Option, - pub(crate) outline_query: Option, + pub(crate) brackets_config: Option, + pub(crate) indents_config: Option, + pub(crate) outline_config: Option, pub(crate) injection_config: Option, pub(crate) highlight_map: Mutex, } +struct IndentConfig { + query: Query, + indent_capture_ix: u32, + end_capture_ix: Option, +} + +struct OutlineConfig { + query: Query, + item_capture_ix: u32, + name_capture_ix: u32, + context_capture_ix: Option, +} + struct InjectionConfig { query: Query, content_capture_ix: u32, @@ -302,6 +321,12 @@ struct InjectionConfig { languages_by_pattern_ix: Vec>>, } +struct BracketConfig { + query: Query, + open_capture_ix: u32, + close_capture_ix: u32, +} + #[derive(Clone)] pub enum LanguageServerBinaryStatus { CheckingForUpdate, @@ -499,6 +524,13 @@ impl LanguageRegistry { } } +#[cfg(any(test, feature = "test-support"))] +impl Default for LanguageRegistry { + fn default() -> Self { + Self::test() + } +} + async fn get_server_binary_path( adapter: Arc, language: Arc, @@ -576,10 +608,11 @@ impl Language { config, grammar: ts_language.map(|ts_language| { Arc::new(Grammar { + id: NEXT_GRAMMAR_ID.fetch_add(1, SeqCst), highlights_query: None, - brackets_query: None, - indents_query: None, - outline_query: None, + brackets_config: None, + outline_config: None, + indents_config: None, injection_config: None, ts_language, highlight_map: Default::default(), @@ -604,19 +637,70 @@ impl Language { pub fn with_brackets_query(mut self, source: &str) -> Result { let grammar = self.grammar_mut(); - grammar.brackets_query = Some(Query::new(grammar.ts_language, source)?); + let query = Query::new(grammar.ts_language, source)?; + let mut open_capture_ix = None; + let mut close_capture_ix = None; + get_capture_indices( + &query, + &mut [ + ("open", &mut open_capture_ix), + ("close", &mut close_capture_ix), + ], + ); + if let Some((open_capture_ix, close_capture_ix)) = open_capture_ix.zip(close_capture_ix) { + grammar.brackets_config = Some(BracketConfig { + query, + open_capture_ix, + close_capture_ix, + }); + } Ok(self) } pub fn with_indents_query(mut self, source: &str) -> Result { let grammar = self.grammar_mut(); - grammar.indents_query = Some(Query::new(grammar.ts_language, source)?); + let query = Query::new(grammar.ts_language, source)?; + let mut indent_capture_ix = None; + let mut end_capture_ix = None; + get_capture_indices( + &query, + &mut [ + ("indent", &mut indent_capture_ix), + ("end", &mut end_capture_ix), + ], + ); + if let Some(indent_capture_ix) = indent_capture_ix { + grammar.indents_config = Some(IndentConfig { + query, + indent_capture_ix, + end_capture_ix, + }); + } Ok(self) } pub fn with_outline_query(mut self, source: &str) -> Result { let grammar = self.grammar_mut(); - grammar.outline_query = Some(Query::new(grammar.ts_language, source)?); + let query = Query::new(grammar.ts_language, source)?; + let mut item_capture_ix = None; + let mut name_capture_ix = None; + let mut context_capture_ix = None; + get_capture_indices( + &query, + &mut [ + ("item", &mut item_capture_ix), + ("name", &mut name_capture_ix), + ("context", &mut context_capture_ix), + ], + ); + if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) { + grammar.outline_config = Some(OutlineConfig { + query, + item_capture_ix, + name_capture_ix, + context_capture_ix, + }); + } Ok(self) } @@ -625,13 +709,13 @@ impl Language { let query = Query::new(grammar.ts_language, source)?; let mut language_capture_ix = None; let mut content_capture_ix = None; - for (ix, name) in query.capture_names().iter().enumerate() { - *match name.as_str() { - "language" => &mut language_capture_ix, - "content" => &mut content_capture_ix, - _ => continue, - } = Some(ix as u32); - } + get_capture_indices( + &query, + &mut [ + ("language", &mut language_capture_ix), + ("content", &mut content_capture_ix), + ], + ); let languages_by_pattern_ix = (0..query.pattern_count()) .map(|ix| { query.property_settings(ix).iter().find_map(|setting| { @@ -729,9 +813,16 @@ impl Language { let mut result = Vec::new(); if let Some(grammar) = &self.grammar { let tree = grammar.parse_text(text, None); + let captures = SyntaxSnapshot::single_tree_captures( + range.clone(), + text, + &tree, + grammar, + |grammar| grammar.highlights_query.as_ref(), + ); + let highlight_maps = vec![grammar.highlight_map()]; let mut offset = 0; - for chunk in BufferChunks::new(text, range, Some(&tree), self.grammar.as_ref(), vec![]) - { + for chunk in BufferChunks::new(text, range, Some((captures, highlight_maps)), vec![]) { let end_offset = offset + chunk.text.len(); if let Some(highlight_id) = chunk.syntax_highlight_id { if !highlight_id.is_default() { @@ -771,6 +862,10 @@ impl Language { } impl Grammar { + pub fn id(&self) -> usize { + self.id + } + fn parse_text(&self, text: &Rope, old_tree: Option) -> Tree { PARSER.with(|parser| { let mut parser = parser.borrow_mut(); @@ -870,6 +965,17 @@ impl LspAdapter for Arc { } } +fn get_capture_indices(query: &Query, captures: &mut [(&str, &mut Option)]) { + for (ix, name) in query.capture_names().iter().enumerate() { + for (capture_name, index) in captures.iter_mut() { + if capture_name == name { + **index = Some(ix as u32); + break; + } + } + } +} + pub fn point_to_lsp(point: PointUtf16) -> lsp::Position { lsp::Position::new(point.row, point.column) } diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index a578d36a38..ca0c28202c 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -1,26 +1,28 @@ -use crate::{ - Grammar, InjectionConfig, Language, LanguageRegistry, QueryCursorHandle, TextProvider, - ToTreeSitterPoint, -}; +use crate::{Grammar, InjectionConfig, Language, LanguageRegistry}; +use lazy_static::lazy_static; +use parking_lot::Mutex; use std::{ borrow::Cow, cell::RefCell, cmp::{Ordering, Reverse}, collections::BinaryHeap, - iter::Peekable, - ops::{DerefMut, Range}, + ops::{Deref, DerefMut, Range}, sync::Arc, }; use sum_tree::{Bias, SeekTarget, SumTree}; -use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; +use text::{rope, Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint}; use tree_sitter::{ - Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatch, QueryMatches, Tree, + Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree, }; thread_local! { static PARSER: RefCell = RefCell::new(Parser::new()); } +lazy_static! { + static ref QUERY_CURSORS: Mutex> = Default::default(); +} + #[derive(Default)] pub struct SyntaxMap { parsed_version: clock::Global, @@ -34,39 +36,51 @@ pub struct SyntaxSnapshot { layers: SumTree, } +#[derive(Default)] pub struct SyntaxMapCaptures<'a> { layers: Vec>, + active_layer_count: usize, + grammars: Vec<&'a Grammar>, } +#[derive(Default)] pub struct SyntaxMapMatches<'a> { layers: Vec>, + active_layer_count: usize, + grammars: Vec<&'a Grammar>, } +#[derive(Debug)] pub struct SyntaxMapCapture<'a> { - pub grammar: &'a Grammar, pub depth: usize, pub node: Node<'a>, pub index: u32, + pub grammar_index: usize, } +#[derive(Debug)] pub struct SyntaxMapMatch<'a> { - pub grammar: &'a Grammar, pub depth: usize, pub pattern_index: usize, pub captures: &'a [QueryCapture<'a>], + pub grammar_index: usize, } struct SyntaxMapCapturesLayer<'a> { depth: usize, - captures: Peekable>>, - grammar: &'a Grammar, + captures: QueryCaptures<'a, 'a, TextProvider<'a>>, + next_capture: Option>, + grammar_index: usize, _query_cursor: QueryCursorHandle, } struct SyntaxMapMatchesLayer<'a> { depth: usize, - matches: Peekable>>, - grammar: &'a Grammar, + next_pattern_index: usize, + next_captures: Vec>, + has_next: bool, + matches: QueryMatches<'a, 'a, TextProvider<'a>>, + grammar_index: usize, _query_cursor: QueryCursorHandle, } @@ -80,6 +94,7 @@ struct SyntaxLayer { #[derive(Debug, Clone)] struct SyntaxLayerSummary { + min_depth: usize, max_depth: usize, range: Range, last_layer_range: Range, @@ -110,6 +125,12 @@ struct ChangedRegion { #[derive(Default)] struct ChangeRegionSet(Vec); +struct TextProvider<'a>(&'a Rope); + +struct ByteChunks<'a>(rope::Chunks<'a>); + +struct QueryCursorHandle(Option); + impl SyntaxMap { pub fn new() -> Self { Self::default() @@ -123,11 +144,20 @@ impl SyntaxMap { self.snapshot.clone() } + pub fn language_registry(&self) -> Option> { + self.language_registry.clone() + } + + pub fn parsed_version(&self) -> clock::Global { + self.parsed_version.clone() + } + pub fn interpolate(&mut self, text: &BufferSnapshot) { self.snapshot.interpolate(&self.interpolated_version, text); self.interpolated_version = text.version.clone(); } + #[cfg(test)] pub fn reparse(&mut self, language: Arc, text: &BufferSnapshot) { if !self.interpolated_version.observed_all(&text.version) { self.interpolate(text); @@ -141,9 +171,22 @@ impl SyntaxMap { ); self.parsed_version = text.version.clone(); } + + pub fn did_parse(&mut self, snapshot: SyntaxSnapshot, version: clock::Global) { + self.parsed_version = version; + self.snapshot = snapshot; + } + + pub fn clear(&mut self) { + self.snapshot = SyntaxSnapshot::default(); + } } impl SyntaxSnapshot { + pub fn is_empty(&self) -> bool { + self.layers.is_empty() + } + pub fn interpolate(&mut self, from_version: &clock::Global, text: &BufferSnapshot) { let edits = text .edits_since::<(usize, Point)>(&from_version) @@ -429,117 +472,52 @@ impl SyntaxSnapshot { self.layers = layers; } + pub fn single_tree_captures<'a>( + range: Range, + text: &'a Rope, + tree: &'a Tree, + grammar: &'a Grammar, + query: fn(&Grammar) -> Option<&Query>, + ) -> SyntaxMapCaptures<'a> { + SyntaxMapCaptures::new( + range.clone(), + text, + [(grammar, 0, tree.root_node())].into_iter(), + query, + ) + } + pub fn captures<'a>( &'a self, range: Range, buffer: &'a BufferSnapshot, - query: impl Fn(&Grammar) -> Option<&Query>, + query: fn(&Grammar) -> Option<&Query>, ) -> SyntaxMapCaptures { - let mut result = SyntaxMapCaptures { layers: Vec::new() }; - for (grammar, depth, node) in self.layers_for_range(range.clone(), buffer) { - let query = if let Some(query) = query(grammar) { - query - } else { - continue; - }; - - let mut query_cursor = QueryCursorHandle::new(); - - // TODO - add a Tree-sitter API to remove the need for this. - let cursor = unsafe { - std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) - }; - - cursor.set_byte_range(range.clone()); - let captures = cursor.captures(query, node, TextProvider(buffer.as_rope())); - let mut layer = SyntaxMapCapturesLayer { - depth, - grammar, - captures: captures.peekable(), - _query_cursor: query_cursor, - }; - - if let Some(key) = layer.sort_key() { - let mut ix = 0; - while let Some(next_layer) = result.layers.get_mut(ix) { - if let Some(next_key) = next_layer.sort_key() { - if key > next_key { - ix += 1; - continue; - } - } - break; - } - result.layers.insert(ix, layer); - } - } - result + SyntaxMapCaptures::new( + range.clone(), + buffer.as_rope(), + self.layers_for_range(range, buffer).into_iter(), + query, + ) } pub fn matches<'a>( &'a self, range: Range, buffer: &'a BufferSnapshot, - query: impl Fn(&Grammar) -> Option<&Query>, + query: fn(&Grammar) -> Option<&Query>, ) -> SyntaxMapMatches { - let mut result = SyntaxMapMatches { layers: Vec::new() }; - for (grammar, depth, node) in self.layers_for_range(range.clone(), buffer) { - let query = if let Some(query) = query(grammar) { - query - } else { - continue; - }; - - let mut query_cursor = QueryCursorHandle::new(); - - // TODO - add a Tree-sitter API to remove the need for this. - let cursor = unsafe { - std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) - }; - - cursor.set_byte_range(range.clone()); - let matches = cursor.matches(query, node, TextProvider(buffer.as_rope())); - let mut layer = SyntaxMapMatchesLayer { - depth, - grammar, - matches: matches.peekable(), - _query_cursor: query_cursor, - }; - - if let Some(key) = layer.sort_key() { - let mut ix = 0; - while let Some(next_layer) = result.layers.get_mut(ix) { - if let Some(next_key) = next_layer.sort_key() { - if key > next_key { - ix += 1; - continue; - } - } - break; - } - result.layers.insert(ix, layer); - } - } - result + SyntaxMapMatches::new( + range.clone(), + buffer.as_rope(), + self.layers_for_range(range, buffer).into_iter(), + query, + ) } - pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, Node)> { - self.layers - .iter() - .filter_map(|layer| { - if let Some(grammar) = &layer.language.grammar { - Some(( - grammar.as_ref(), - layer.tree.root_node_with_offset( - layer.range.start.to_offset(buffer), - layer.range.start.to_point(buffer).to_ts_point(), - ), - )) - } else { - None - } - }) - .collect() + #[cfg(test)] + pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, usize, Node)> { + self.layers_for_range(0..buffer.len(), buffer) } pub fn layers_for_range<'a, T: ToOffset>( @@ -551,9 +529,13 @@ impl SyntaxSnapshot { let end = buffer.anchor_after(range.end.to_offset(buffer)); let mut cursor = self.layers.filter::<_, ()>(|summary| { - let is_before_start = summary.range.end.cmp(&start, buffer).is_lt(); - let is_after_end = summary.range.start.cmp(&end, buffer).is_gt(); - !is_before_start && !is_after_end + if summary.max_depth > summary.min_depth { + true + } else { + let is_before_start = summary.range.end.cmp(&start, buffer).is_lt(); + let is_after_end = summary.range.start.cmp(&end, buffer).is_gt(); + !is_before_start && !is_after_end + } }); let mut result = Vec::new(); @@ -576,57 +558,274 @@ impl SyntaxSnapshot { } } -impl<'a> Iterator for SyntaxMapCaptures<'a> { - type Item = SyntaxMapCapture<'a>; +impl<'a> SyntaxMapCaptures<'a> { + fn new( + range: Range, + text: &'a Rope, + layers: impl Iterator)>, + query: fn(&Grammar) -> Option<&Query>, + ) -> Self { + let mut result = Self { + layers: Vec::new(), + grammars: Vec::new(), + active_layer_count: 0, + }; + for (grammar, depth, node) in layers { + let query = if let Some(query) = query(grammar) { + query + } else { + continue; + }; - fn next(&mut self) -> Option { - let layer = self.layers.first_mut()?; - let (mat, ix) = layer.captures.next()?; + let mut query_cursor = QueryCursorHandle::new(); - let capture = mat.captures[ix as usize]; - let grammar = layer.grammar; - let depth = layer.depth; + // TODO - add a Tree-sitter API to remove the need for this. + let cursor = unsafe { + std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) + }; - if let Some(key) = layer.sort_key() { - let mut i = 1; - while let Some(later_layer) = self.layers.get_mut(i) { - if let Some(later_key) = later_layer.sort_key() { - if key > later_key { - i += 1; - continue; - } - } - break; + cursor.set_byte_range(range.clone()); + let captures = cursor.captures(query, node, TextProvider(text)); + let grammar_index = result + .grammars + .iter() + .position(|g| g.id == grammar.id()) + .unwrap_or_else(|| { + result.grammars.push(grammar); + result.grammars.len() - 1 + }); + let mut layer = SyntaxMapCapturesLayer { + depth, + grammar_index, + next_capture: None, + captures, + _query_cursor: query_cursor, + }; + + layer.advance(); + if layer.next_capture.is_some() { + let key = layer.sort_key(); + let ix = match result.layers[..result.active_layer_count] + .binary_search_by_key(&key, |layer| layer.sort_key()) + { + Ok(ix) | Err(ix) => ix, + }; + result.layers.insert(ix, layer); + result.active_layer_count += 1; + } else { + result.layers.push(layer); } - if i > 1 { - self.layers[0..i].rotate_left(1); - } - } else { - self.layers.remove(0); } + result + } + + pub fn grammars(&self) -> &[&'a Grammar] { + &self.grammars + } + + pub fn peek(&self) -> Option> { + let layer = self.layers[..self.active_layer_count].first()?; + let capture = layer.next_capture?; Some(SyntaxMapCapture { - grammar, - depth, - node: capture.node, + depth: layer.depth, + grammar_index: layer.grammar_index, index: capture.index, + node: capture.node, }) } + + pub fn advance(&mut self) -> bool { + let layer = if let Some(layer) = self.layers[..self.active_layer_count].first_mut() { + layer + } else { + return false; + }; + + layer.advance(); + if layer.next_capture.is_some() { + let key = layer.sort_key(); + let i = 1 + self.layers[1..self.active_layer_count] + .iter() + .position(|later_layer| key < later_layer.sort_key()) + .unwrap_or(self.active_layer_count - 1); + self.layers[0..i].rotate_left(1); + } else { + self.layers[0..self.active_layer_count].rotate_left(1); + self.active_layer_count -= 1; + } + + true + } + + pub fn set_byte_range(&mut self, range: Range) { + for layer in &mut self.layers { + layer.captures.set_byte_range(range.clone()); + if let Some(capture) = &layer.next_capture { + if capture.node.end_byte() > range.start { + continue; + } + } + layer.advance(); + } + self.layers.sort_unstable_by_key(|layer| layer.sort_key()); + self.active_layer_count = self + .layers + .iter() + .position(|layer| layer.next_capture.is_none()) + .unwrap_or(self.layers.len()); + } +} + +impl<'a> SyntaxMapMatches<'a> { + fn new( + range: Range, + text: &'a Rope, + layers: impl Iterator)>, + query: fn(&Grammar) -> Option<&Query>, + ) -> Self { + let mut result = Self::default(); + for (grammar, depth, node) in layers { + let query = if let Some(query) = query(grammar) { + query + } else { + continue; + }; + + let mut query_cursor = QueryCursorHandle::new(); + + // TODO - add a Tree-sitter API to remove the need for this. + let cursor = unsafe { + std::mem::transmute::<_, &'static mut QueryCursor>(query_cursor.deref_mut()) + }; + + cursor.set_byte_range(range.clone()); + let matches = cursor.matches(query, node, TextProvider(text)); + let grammar_index = result + .grammars + .iter() + .position(|g| g.id == grammar.id()) + .unwrap_or_else(|| { + result.grammars.push(grammar); + result.grammars.len() - 1 + }); + let mut layer = SyntaxMapMatchesLayer { + depth, + grammar_index, + matches, + next_pattern_index: 0, + next_captures: Vec::new(), + has_next: false, + _query_cursor: query_cursor, + }; + + layer.advance(); + if layer.has_next { + let key = layer.sort_key(); + let ix = match result.layers[..result.active_layer_count] + .binary_search_by_key(&key, |layer| layer.sort_key()) + { + Ok(ix) | Err(ix) => ix, + }; + result.layers.insert(ix, layer); + result.active_layer_count += 1; + } else { + result.layers.push(layer); + } + } + result + } + + pub fn grammars(&self) -> &[&'a Grammar] { + &self.grammars + } + + pub fn peek(&self) -> Option { + let layer = self.layers.first()?; + if !layer.has_next { + return None; + } + Some(SyntaxMapMatch { + depth: layer.depth, + grammar_index: layer.grammar_index, + pattern_index: layer.next_pattern_index, + captures: &layer.next_captures, + }) + } + + pub fn advance(&mut self) -> bool { + let layer = if let Some(layer) = self.layers.first_mut() { + layer + } else { + return false; + }; + + layer.advance(); + if layer.has_next { + let key = layer.sort_key(); + let i = 1 + self.layers[1..self.active_layer_count] + .iter() + .position(|later_layer| key < later_layer.sort_key()) + .unwrap_or(self.active_layer_count - 1); + self.layers[0..i].rotate_left(1); + } else { + self.layers[0..self.active_layer_count].rotate_left(1); + self.active_layer_count -= 1; + } + + true + } } impl<'a> SyntaxMapCapturesLayer<'a> { - fn sort_key(&mut self) -> Option<(usize, Reverse, usize)> { - let (mat, ix) = self.captures.peek()?; - let range = &mat.captures[*ix].node.byte_range(); - Some((range.start, Reverse(range.end), self.depth)) + fn advance(&mut self) { + self.next_capture = self.captures.next().map(|(mat, ix)| mat.captures[ix]); + } + + fn sort_key(&self) -> (usize, Reverse, usize) { + if let Some(capture) = &self.next_capture { + let range = capture.node.byte_range(); + (range.start, Reverse(range.end), self.depth) + } else { + (usize::MAX, Reverse(0), usize::MAX) + } } } impl<'a> SyntaxMapMatchesLayer<'a> { - fn sort_key(&mut self) -> Option<(usize, Reverse, usize)> { - let mat = self.matches.peek()?; - let range = mat.captures.first()?.node.start_byte()..mat.captures.last()?.node.end_byte(); - Some((range.start, Reverse(range.end), self.depth)) + fn advance(&mut self) { + if let Some(mat) = self.matches.next() { + self.next_captures.clear(); + self.next_captures.extend_from_slice(&mat.captures); + self.next_pattern_index = mat.pattern_index; + self.has_next = true; + } else { + self.has_next = false; + } + } + + fn sort_key(&self) -> (usize, Reverse, usize) { + if self.has_next { + let captures = &self.next_captures; + if let Some((first, last)) = captures.first().zip(captures.last()) { + return ( + first.node.start_byte(), + Reverse(last.node.end_byte()), + self.depth, + ); + } + } + (usize::MAX, Reverse(0), usize::MAX) + } +} + +impl<'a> Iterator for SyntaxMapCaptures<'a> { + type Item = SyntaxMapCapture<'a>; + + fn next(&mut self) -> Option { + let result = self.peek(); + self.advance(); + result } } @@ -864,6 +1063,7 @@ impl Default for SyntaxLayerSummary { fn default() -> Self { Self { max_depth: 0, + min_depth: 0, range: Anchor::MAX..Anchor::MIN, last_layer_range: Anchor::MIN..Anchor::MAX, } @@ -875,7 +1075,8 @@ impl sum_tree::Summary for SyntaxLayerSummary { fn add_summary(&mut self, other: &Self, buffer: &Self::Context) { if other.max_depth > self.max_depth { - *self = other.clone(); + self.max_depth = other.max_depth; + self.range = other.range.clone(); } else { if other.range.start.cmp(&self.range.start, buffer).is_lt() { self.range.start = other.range.start; @@ -883,8 +1084,8 @@ impl sum_tree::Summary for SyntaxLayerSummary { if other.range.end.cmp(&self.range.end, buffer).is_gt() { self.range.end = other.range.end; } - self.last_layer_range = other.last_layer_range.clone(); } + self.last_layer_range = other.last_layer_range.clone(); } } @@ -927,6 +1128,7 @@ impl sum_tree::Item for SyntaxLayer { fn summary(&self) -> Self::Summary { SyntaxLayerSummary { + min_depth: self.depth, max_depth: self.depth, range: self.range.clone(), last_layer_range: self.range.clone(), @@ -944,12 +1146,73 @@ impl std::fmt::Debug for SyntaxLayer { } } +impl<'a> tree_sitter::TextProvider<'a> for TextProvider<'a> { + type I = ByteChunks<'a>; + + fn text(&mut self, node: tree_sitter::Node) -> Self::I { + ByteChunks(self.0.chunks_in_range(node.byte_range())) + } +} + +impl<'a> Iterator for ByteChunks<'a> { + type Item = &'a [u8]; + + fn next(&mut self) -> Option { + self.0.next().map(str::as_bytes) + } +} + +impl QueryCursorHandle { + pub(crate) fn new() -> Self { + let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new); + cursor.set_match_limit(64); + QueryCursorHandle(Some(cursor)) + } +} + +impl Deref for QueryCursorHandle { + type Target = QueryCursor; + + fn deref(&self) -> &Self::Target { + self.0.as_ref().unwrap() + } +} + +impl DerefMut for QueryCursorHandle { + fn deref_mut(&mut self) -> &mut Self::Target { + self.0.as_mut().unwrap() + } +} + +impl Drop for QueryCursorHandle { + fn drop(&mut self) { + let mut cursor = self.0.take().unwrap(); + cursor.set_byte_range(0..usize::MAX); + cursor.set_point_range(Point::zero().to_ts_point()..Point::MAX.to_ts_point()); + QUERY_CURSORS.lock().push(cursor) + } +} + +pub(crate) trait ToTreeSitterPoint { + fn to_ts_point(self) -> tree_sitter::Point; + fn from_ts_point(point: tree_sitter::Point) -> Self; +} + +impl ToTreeSitterPoint for Point { + fn to_ts_point(self) -> tree_sitter::Point { + tree_sitter::Point::new(self.row as usize, self.column as usize) + } + + fn from_ts_point(point: tree_sitter::Point) -> Self { + Point::new(point.row as u32, point.column as u32) + } +} + #[cfg(test)] mod tests { use super::*; use crate::LanguageConfig; use text::{Buffer, Point}; - use tree_sitter::Query; use unindent::Unindent as _; use util::test::marked_text_ranges; @@ -1298,13 +1561,13 @@ mod tests { mutated_layers.into_iter().zip(reference_layers.into_iter()) { assert_eq!( - edited_layer.1.to_sexp(), - reference_layer.1.to_sexp(), + edited_layer.2.to_sexp(), + reference_layer.2.to_sexp(), "different layer at step {i}" ); assert_eq!( - edited_layer.1.range(), - reference_layer.1.range(), + edited_layer.2.range(), + reference_layer.2.range(), "different layer at step {i}" ); } @@ -1377,16 +1640,16 @@ mod tests { marked_string: &str, ) { let mut actual_ranges = Vec::>::new(); - for capture in syntax_map.captures(0..buffer.len(), buffer, |grammar| { + let captures = syntax_map.captures(0..buffer.len(), buffer, |grammar| { grammar.highlights_query.as_ref() - }) { - let name = &capture - .grammar - .highlights_query - .as_ref() - .unwrap() - .capture_names()[capture.index as usize]; - dbg!(capture.node, capture.index, name); + }); + let queries = captures + .grammars() + .iter() + .map(|grammar| grammar.highlights_query.as_ref().unwrap()) + .collect::>(); + for capture in captures { + let name = &queries[capture.grammar_index].capture_names()[capture.index as usize]; if highlight_query_capture_names.contains(&name.as_str()) { actual_ranges.push(capture.node.byte_range()); } diff --git a/crates/language/src/tests.rs b/crates/language/src/tests.rs index 44c15d1a3b..ad997753cd 100644 --- a/crates/language/src/tests.rs +++ b/crates/language/src/tests.rs @@ -1407,7 +1407,9 @@ fn json_lang() -> Language { fn get_tree_sexp(buffer: &ModelHandle, cx: &gpui::TestAppContext) -> String { buffer.read_with(cx, |buffer, _| { - buffer.syntax_tree().unwrap().root_node().to_sexp() + let syntax_map = buffer.syntax_map(); + let layers = syntax_map.layers(buffer.as_text_snapshot()); + layers[0].2.to_sexp() }) } diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index 0f762f822f..531fdcbe15 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -2056,6 +2056,7 @@ impl Project { let full_path = buffer.read(cx).file()?.full_path(cx); let language = self.languages.select_language(&full_path)?; buffer.update(cx, |buffer, cx| { + buffer.set_language_registry(self.languages.clone()); buffer.set_language(Some(language.clone()), cx); }); diff --git a/crates/zed/src/languages/rust/injections.scm b/crates/zed/src/languages/rust/injections.scm index 9d8c03c889..57ebea8539 100644 --- a/crates/zed/src/languages/rust/injections.scm +++ b/crates/zed/src/languages/rust/injections.scm @@ -1,3 +1,7 @@ (macro_invocation - (token_tree) @content) + (token_tree) @content + (#set! "language" "rust")) + +(macro_rule + (token_tree) @content (#set! "language" "rust")) \ No newline at end of file