Store syntax layers even if a language for the injection can't be found

This commit is contained in:
Antonio Scandurra 2023-01-24 12:25:12 +01:00
parent 8dabdd1baa
commit 14c72cac58

View file

@ -89,8 +89,34 @@ struct SyntaxMapMatchesLayer<'a> {
struct SyntaxLayer { struct SyntaxLayer {
depth: usize, depth: usize,
range: Range<Anchor>, range: Range<Anchor>,
tree: tree_sitter::Tree, content: SyntaxLayerContent,
language: Arc<Language>, }
#[derive(Clone)]
enum SyntaxLayerContent {
Parsed {
tree: tree_sitter::Tree,
language: Arc<Language>,
},
Pending {
language_name: Arc<str>,
},
}
impl SyntaxLayerContent {
fn language_id(&self) -> Option<usize> {
match self {
SyntaxLayerContent::Parsed { language, .. } => language.id(),
SyntaxLayerContent::Pending { .. } => None,
}
}
fn tree(&self) -> Option<&Tree> {
match self {
SyntaxLayerContent::Parsed { tree, .. } => Some(tree),
SyntaxLayerContent::Pending { .. } => None,
}
}
} }
#[derive(Debug)] #[derive(Debug)]
@ -130,12 +156,26 @@ struct SyntaxLayerPositionBeforeChange {
struct ParseStep { struct ParseStep {
depth: usize, depth: usize,
language: Arc<Language>, language: ParseStepLanguage,
range: Range<Anchor>, range: Range<Anchor>,
included_ranges: Vec<tree_sitter::Range>, included_ranges: Vec<tree_sitter::Range>,
mode: ParseMode, mode: ParseMode,
} }
enum ParseStepLanguage {
Loaded { language: Arc<Language> },
Pending { name: Arc<str> },
}
impl ParseStepLanguage {
fn id(&self) -> Option<usize> {
match self {
ParseStepLanguage::Loaded { language } => language.id(),
ParseStepLanguage::Pending { .. } => None,
}
}
}
enum ParseMode { enum ParseMode {
Single, Single,
Combined { Combined {
@ -276,47 +316,49 @@ impl SyntaxSnapshot {
} }
let mut layer = layer.clone(); let mut layer = layer.clone();
for (edit, edit_range) in &edits[first_edit_ix_for_depth..] { if let SyntaxLayerContent::Parsed { tree, .. } = &mut layer.content {
// Ignore any edits that follow this layer. for (edit, edit_range) in &edits[first_edit_ix_for_depth..] {
if edit_range.start.cmp(&layer.range.end, text).is_ge() { // Ignore any edits that follow this layer.
break; if edit_range.start.cmp(&layer.range.end, text).is_ge() {
break;
}
// Apply any edits that intersect this layer to the layer's syntax tree.
let tree_edit = if edit_range.start.cmp(&layer.range.start, text).is_ge() {
tree_sitter::InputEdit {
start_byte: edit.new.start.0 - start_byte,
old_end_byte: edit.new.start.0 - start_byte
+ (edit.old.end.0 - edit.old.start.0),
new_end_byte: edit.new.end.0 - start_byte,
start_position: (edit.new.start.1 - start_point).to_ts_point(),
old_end_position: (edit.new.start.1 - start_point
+ (edit.old.end.1 - edit.old.start.1))
.to_ts_point(),
new_end_position: (edit.new.end.1 - start_point).to_ts_point(),
}
} else {
let node = tree.root_node();
tree_sitter::InputEdit {
start_byte: 0,
old_end_byte: node.end_byte(),
new_end_byte: 0,
start_position: Default::default(),
old_end_position: node.end_position(),
new_end_position: Default::default(),
}
};
tree.edit(&tree_edit);
} }
// Apply any edits that intersect this layer to the layer's syntax tree. debug_assert!(
let tree_edit = if edit_range.start.cmp(&layer.range.start, text).is_ge() { tree.root_node().end_byte() <= text.len(),
tree_sitter::InputEdit { "tree's size {}, is larger than text size {}",
start_byte: edit.new.start.0 - start_byte, tree.root_node().end_byte(),
old_end_byte: edit.new.start.0 - start_byte text.len(),
+ (edit.old.end.0 - edit.old.start.0), );
new_end_byte: edit.new.end.0 - start_byte,
start_position: (edit.new.start.1 - start_point).to_ts_point(),
old_end_position: (edit.new.start.1 - start_point
+ (edit.old.end.1 - edit.old.start.1))
.to_ts_point(),
new_end_position: (edit.new.end.1 - start_point).to_ts_point(),
}
} else {
let node = layer.tree.root_node();
tree_sitter::InputEdit {
start_byte: 0,
old_end_byte: node.end_byte(),
new_end_byte: 0,
start_position: Default::default(),
old_end_position: node.end_position(),
new_end_position: Default::default(),
}
};
layer.tree.edit(&tree_edit);
} }
debug_assert!(
layer.tree.root_node().end_byte() <= text.len(),
"tree's size {}, is larger than text size {}",
layer.tree.root_node().end_byte(),
text.len(),
);
layers.push(layer, text); layers.push(layer, text);
cursor.next(text); cursor.next(text);
} }
@ -344,7 +386,9 @@ impl SyntaxSnapshot {
let mut combined_injection_ranges = HashMap::default(); let mut combined_injection_ranges = HashMap::default();
queue.push(ParseStep { queue.push(ParseStep {
depth: 0, depth: 0,
language: root_language.clone(), language: ParseStepLanguage::Loaded {
language: root_language,
},
included_ranges: vec![tree_sitter::Range { included_ranges: vec![tree_sitter::Range {
start_byte: 0, start_byte: 0,
end_byte: text.len(), end_byte: text.len(),
@ -415,12 +459,11 @@ impl SyntaxSnapshot {
let (step_start_byte, step_start_point) = let (step_start_byte, step_start_point) =
step.range.start.summary::<(usize, Point)>(text); step.range.start.summary::<(usize, Point)>(text);
let step_end_byte = step.range.end.to_offset(text); let step_end_byte = step.range.end.to_offset(text);
let Some(grammar) = step.language.grammar.as_deref() else { continue };
let mut old_layer = cursor.item(); let mut old_layer = cursor.item();
if let Some(layer) = old_layer { if let Some(layer) = old_layer {
if layer.range.to_offset(text) == (step_start_byte..step_end_byte) if layer.range.to_offset(text) == (step_start_byte..step_end_byte)
&& layer.language.id() == step.language.id() && layer.content.language_id() == step.language.id()
{ {
cursor.next(&text); cursor.next(&text);
} else { } else {
@ -428,85 +471,99 @@ impl SyntaxSnapshot {
} }
} }
let tree; let content = match step.language {
let changed_ranges; ParseStepLanguage::Loaded { language } => {
let mut included_ranges = step.included_ranges; let Some(grammar) = language.grammar() else { continue };
if let Some(old_layer) = old_layer { let tree;
if let ParseMode::Combined { let changed_ranges;
parent_layer_changed_ranges, let mut included_ranges = step.included_ranges;
.. if let Some(SyntaxLayerContent::Parsed { tree: old_tree, .. }) =
} = step.mode old_layer.map(|layer| &layer.content)
{ {
included_ranges = splice_included_ranges( if let ParseMode::Combined {
old_layer.tree.included_ranges(), parent_layer_changed_ranges,
&parent_layer_changed_ranges, ..
&included_ranges, } = step.mode
); {
} included_ranges = splice_included_ranges(
old_tree.included_ranges(),
&parent_layer_changed_ranges,
&included_ranges,
);
}
tree = parse_text( tree = parse_text(
grammar, grammar,
text.as_rope(), text.as_rope(),
step_start_byte, step_start_byte,
step_start_point, step_start_point,
included_ranges, included_ranges,
Some(old_layer.tree.clone()), Some(old_tree.clone()),
); );
changed_ranges = join_ranges( changed_ranges = join_ranges(
edits.iter().map(|e| e.new.clone()).filter(|range| { edits.iter().map(|e| e.new.clone()).filter(|range| {
range.start <= step_end_byte && range.end >= step_start_byte range.start <= step_end_byte && range.end >= step_start_byte
}), }),
old_layer old_tree.changed_ranges(&tree).map(|r| {
.tree step_start_byte + r.start_byte..step_start_byte + r.end_byte
.changed_ranges(&tree) }),
.map(|r| step_start_byte + r.start_byte..step_start_byte + r.end_byte), );
); } else {
} else { tree = parse_text(
tree = parse_text( grammar,
grammar, text.as_rope(),
text.as_rope(), step_start_byte,
step_start_byte, step_start_point,
step_start_point, included_ranges,
included_ranges, None,
None, );
); changed_ranges = vec![step_start_byte..step_end_byte];
changed_ranges = vec![step_start_byte..step_end_byte]; }
}
if let (Some((config, registry)), false) = (
grammar.injection_config.as_ref().zip(registry.as_ref()),
changed_ranges.is_empty(),
) {
for range in &changed_ranges {
changed_regions.insert(
ChangedRegion {
depth: step.depth + 1,
range: text.anchor_before(range.start)
..text.anchor_after(range.end),
},
text,
);
}
get_injections(
config,
text,
tree.root_node_with_offset(
step_start_byte,
step_start_point.to_ts_point(),
),
registry,
step.depth + 1,
&changed_ranges,
&mut combined_injection_ranges,
&mut queue,
);
}
SyntaxLayerContent::Parsed { tree, language }
}
ParseStepLanguage::Pending { name } => SyntaxLayerContent::Pending {
language_name: name,
},
};
layers.push( layers.push(
SyntaxLayer { SyntaxLayer {
depth: step.depth, depth: step.depth,
range: step.range, range: step.range,
tree: tree.clone(), content,
language: step.language.clone(),
}, },
&text, &text,
); );
if let (Some((config, registry)), false) = (
grammar.injection_config.as_ref().zip(registry.as_ref()),
changed_ranges.is_empty(),
) {
for range in &changed_ranges {
changed_regions.insert(
ChangedRegion {
depth: step.depth + 1,
range: text.anchor_before(range.start)..text.anchor_after(range.end),
},
text,
);
}
get_injections(
config,
text,
tree.root_node_with_offset(step_start_byte, step_start_point.to_ts_point()),
registry,
step.depth + 1,
&changed_ranges,
&mut combined_injection_ranges,
&mut queue,
);
}
} }
drop(cursor); drop(cursor);
@ -586,20 +643,23 @@ impl SyntaxSnapshot {
cursor.next(buffer); cursor.next(buffer);
std::iter::from_fn(move || { std::iter::from_fn(move || {
if let Some(layer) = cursor.item() { while let Some(layer) = cursor.item() {
let info = SyntaxLayerInfo { if let SyntaxLayerContent::Parsed { tree, language } = &layer.content {
language: &layer.language, let info = SyntaxLayerInfo {
depth: layer.depth, language,
node: layer.tree.root_node_with_offset( depth: layer.depth,
layer.range.start.to_offset(buffer), node: tree.root_node_with_offset(
layer.range.start.to_point(buffer).to_ts_point(), layer.range.start.to_offset(buffer),
), layer.range.start.to_point(buffer).to_ts_point(),
}; ),
cursor.next(buffer); };
Some(info) cursor.next(buffer);
} else { return Some(info);
None } else {
cursor.next(buffer);
}
} }
None
}) })
} }
} }
@ -968,8 +1028,7 @@ fn get_injections(
changed_ranges: &[Range<usize>], changed_ranges: &[Range<usize>],
combined_injection_ranges: &mut HashMap<Arc<Language>, Vec<tree_sitter::Range>>, combined_injection_ranges: &mut HashMap<Arc<Language>, Vec<tree_sitter::Range>>,
queue: &mut BinaryHeap<ParseStep>, queue: &mut BinaryHeap<ParseStep>,
) -> bool { ) {
let mut result = false;
let mut query_cursor = QueryCursorHandle::new(); let mut query_cursor = QueryCursorHandle::new();
let mut prev_match = None; let mut prev_match = None;
@ -1024,10 +1083,8 @@ fn get_injections(
let language = language_registry let language = language_registry
.language_for_name(&language_name) .language_for_name(&language_name)
.or_else(|| language_registry.language_for_extension(&language_name)); .or_else(|| language_registry.language_for_extension(&language_name));
let range = text.anchor_before(step_range.start)..text.anchor_after(step_range.end);
if let Some(language) = language { if let Some(language) = language {
result = true;
let range =
text.anchor_before(step_range.start)..text.anchor_after(step_range.end);
if combined { if combined {
combined_injection_ranges combined_injection_ranges
.get_mut(&language.clone()) .get_mut(&language.clone())
@ -1036,12 +1093,22 @@ fn get_injections(
} else { } else {
queue.push(ParseStep { queue.push(ParseStep {
depth, depth,
language, language: ParseStepLanguage::Loaded { language },
included_ranges: content_ranges, included_ranges: content_ranges,
range, range,
mode: ParseMode::Single, mode: ParseMode::Single,
}); });
} }
} else {
queue.push(ParseStep {
depth,
language: ParseStepLanguage::Pending {
name: language_name.into(),
},
included_ranges: content_ranges,
range,
mode: ParseMode::Single,
});
} }
} }
} }
@ -1052,7 +1119,7 @@ fn get_injections(
let range = text.anchor_before(node.start_byte())..text.anchor_after(node.end_byte()); let range = text.anchor_before(node.start_byte())..text.anchor_after(node.end_byte());
queue.push(ParseStep { queue.push(ParseStep {
depth, depth,
language, language: ParseStepLanguage::Loaded { language },
range, range,
included_ranges, included_ranges,
mode: ParseMode::Combined { mode: ParseMode::Combined {
@ -1061,8 +1128,6 @@ fn get_injections(
}, },
}) })
} }
result
} }
fn splice_included_ranges( fn splice_included_ranges(
@ -1361,7 +1426,7 @@ impl sum_tree::Item for SyntaxLayer {
max_depth: self.depth, max_depth: self.depth,
range: self.range.clone(), range: self.range.clone(),
last_layer_range: self.range.clone(), last_layer_range: self.range.clone(),
last_layer_language: self.language.id(), last_layer_language: self.content.language_id(),
} }
} }
} }
@ -1371,7 +1436,7 @@ impl std::fmt::Debug for SyntaxLayer {
f.debug_struct("SyntaxLayer") f.debug_struct("SyntaxLayer")
.field("depth", &self.depth) .field("depth", &self.depth)
.field("range", &self.range) .field("range", &self.range)
.field("tree", &self.tree) .field("tree", &self.content.tree())
.finish() .finish()
} }
} }
@ -2216,16 +2281,14 @@ mod tests {
.zip(new_syntax_map.layers.iter()) .zip(new_syntax_map.layers.iter())
{ {
assert_eq!(old_layer.range, new_layer.range); assert_eq!(old_layer.range, new_layer.range);
let Some(old_tree) = old_layer.content.tree() else { continue };
let Some(new_tree) = new_layer.content.tree() else { continue };
let old_start_byte = old_layer.range.start.to_offset(old_buffer); let old_start_byte = old_layer.range.start.to_offset(old_buffer);
let new_start_byte = new_layer.range.start.to_offset(new_buffer); let new_start_byte = new_layer.range.start.to_offset(new_buffer);
let old_start_point = old_layer.range.start.to_point(old_buffer).to_ts_point(); let old_start_point = old_layer.range.start.to_point(old_buffer).to_ts_point();
let new_start_point = new_layer.range.start.to_point(new_buffer).to_ts_point(); let new_start_point = new_layer.range.start.to_point(new_buffer).to_ts_point();
let old_node = old_layer let old_node = old_tree.root_node_with_offset(old_start_byte, old_start_point);
.tree let new_node = new_tree.root_node_with_offset(new_start_byte, new_start_point);
.root_node_with_offset(old_start_byte, old_start_point);
let new_node = new_layer
.tree
.root_node_with_offset(new_start_byte, new_start_point);
check_node_edits( check_node_edits(
old_layer.depth, old_layer.depth,
&old_layer.range, &old_layer.range,