templater: relax operator precedence rule to reduce possibility of large reparse

After upgrading pest from 2.7.8 to 2.7.9, I noticed CLI tests got significantly
slow (something around 40sec -> 60sec on my laptop.) I suspect this would be
caused by detailed error state tracking introduced in 2.7.9, but it's also true
that our template grammar exercises such code path.

My understanding is that PEG is basically a top down parsing with unlimited
lookahead. Before this change, the default commit_summary template would be
parsed as follows:
 1. parse the outermost separate(..) as "term"
 2. "concat" rule can't continue, so
 3. reparse the whole string as "expression"
Because this pattern is not uncommon, I think it's better to rewrite the
grammar to avoid large retry.

With this patch, our tests runs within ~50sec under debug build. It appears to
save a few milliseconds in release build, but my development environment isn't
quiet enough to say the difference is significant.
This commit is contained in:
Yuya Nishihara 2024-04-04 00:25:14 +09:00
parent b07fb3ea58
commit 32afea198a
2 changed files with 12 additions and 13 deletions

View file

@ -71,14 +71,10 @@ expression = {
~ (whitespace* ~ infix_ops ~ whitespace* ~ (prefix_ops ~ whitespace*)* ~ term)*
}
// Use 'term' instead of 'expression' to disallow 'x || y ++ z'. It can be
// parsed, but the precedence isn't obvious.
concat = _{
term ~ (whitespace* ~ concat_op ~ whitespace* ~ term)+
template = {
expression ~ (whitespace* ~ concat_op ~ whitespace* ~ expression)*
}
template = { concat | expression }
program = _{ SOI ~ whitespace* ~ template? ~ whitespace* ~ EOI }
function_alias_declaration = {

View file

@ -52,7 +52,6 @@ impl Rule {
Rule::primary => None,
Rule::term => None,
Rule::expression => None,
Rule::concat => None,
Rule::template => None,
Rule::program => None,
Rule::function_alias_declaration => None,
@ -476,7 +475,6 @@ fn parse_template_node(pair: Pair<Rule>) -> TemplateParseResult<ExpressionNode>
let mut nodes: Vec<_> = inner
.filter_map(|pair| match pair.as_rule() {
Rule::concat_op => None,
Rule::term => Some(parse_term_node(pair)),
Rule::expression => Some(parse_expression_node(pair)),
r => panic!("unexpected template item rule {r:?}"),
})
@ -1070,11 +1068,16 @@ mod tests {
parse_normalized("x || (y && (z.h()))").unwrap(),
);
// Top-level expression is allowed, but not in concatenation
assert!(parse_template(r"x && y").is_ok());
assert!(parse_template(r"f(x && y)").is_ok());
assert!(parse_template(r"x && y ++ z").is_err());
assert!(parse_template(r"(x && y) ++ z").is_ok());
// Logical operator bounds more tightly than concatenation. This might
// not be so intuitive, but should be harmless.
assert_eq!(
parse_normalized(r"x && y ++ z").unwrap(),
parse_normalized(r"(x && y) ++ z").unwrap(),
);
assert_eq!(
parse_normalized(r"x ++ y || z").unwrap(),
parse_normalized(r"x ++ (y || z)").unwrap(),
);
// Expression span
assert_eq!(parse_template(" ! x ").unwrap().span.as_str(), "! x");