From 32afea198af6cea69190b35cf50a0315051a4b0c Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Thu, 4 Apr 2024 00:25:14 +0900 Subject: [PATCH] templater: relax operator precedence rule to reduce possibility of large reparse After upgrading pest from 2.7.8 to 2.7.9, I noticed CLI tests got significantly slow (something around 40sec -> 60sec on my laptop.) I suspect this would be caused by detailed error state tracking introduced in 2.7.9, but it's also true that our template grammar exercises such code path. My understanding is that PEG is basically a top down parsing with unlimited lookahead. Before this change, the default commit_summary template would be parsed as follows: 1. parse the outermost separate(..) as "term" 2. "concat" rule can't continue, so 3. reparse the whole string as "expression" Because this pattern is not uncommon, I think it's better to rewrite the grammar to avoid large retry. With this patch, our tests runs within ~50sec under debug build. It appears to save a few milliseconds in release build, but my development environment isn't quiet enough to say the difference is significant. --- cli/src/template.pest | 8 ++------ cli/src/template_parser.rs | 17 ++++++++++------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/cli/src/template.pest b/cli/src/template.pest index b40f71a6a..89e24cc17 100644 --- a/cli/src/template.pest +++ b/cli/src/template.pest @@ -71,14 +71,10 @@ expression = { ~ (whitespace* ~ infix_ops ~ whitespace* ~ (prefix_ops ~ whitespace*)* ~ term)* } -// Use 'term' instead of 'expression' to disallow 'x || y ++ z'. It can be -// parsed, but the precedence isn't obvious. -concat = _{ - term ~ (whitespace* ~ concat_op ~ whitespace* ~ term)+ +template = { + expression ~ (whitespace* ~ concat_op ~ whitespace* ~ expression)* } -template = { concat | expression } - program = _{ SOI ~ whitespace* ~ template? ~ whitespace* ~ EOI } function_alias_declaration = { diff --git a/cli/src/template_parser.rs b/cli/src/template_parser.rs index 8c42717a0..60347b985 100644 --- a/cli/src/template_parser.rs +++ b/cli/src/template_parser.rs @@ -52,7 +52,6 @@ impl Rule { Rule::primary => None, Rule::term => None, Rule::expression => None, - Rule::concat => None, Rule::template => None, Rule::program => None, Rule::function_alias_declaration => None, @@ -476,7 +475,6 @@ fn parse_template_node(pair: Pair) -> TemplateParseResult let mut nodes: Vec<_> = inner .filter_map(|pair| match pair.as_rule() { Rule::concat_op => None, - Rule::term => Some(parse_term_node(pair)), Rule::expression => Some(parse_expression_node(pair)), r => panic!("unexpected template item rule {r:?}"), }) @@ -1070,11 +1068,16 @@ mod tests { parse_normalized("x || (y && (z.h()))").unwrap(), ); - // Top-level expression is allowed, but not in concatenation - assert!(parse_template(r"x && y").is_ok()); - assert!(parse_template(r"f(x && y)").is_ok()); - assert!(parse_template(r"x && y ++ z").is_err()); - assert!(parse_template(r"(x && y) ++ z").is_ok()); + // Logical operator bounds more tightly than concatenation. This might + // not be so intuitive, but should be harmless. + assert_eq!( + parse_normalized(r"x && y ++ z").unwrap(), + parse_normalized(r"(x && y) ++ z").unwrap(), + ); + assert_eq!( + parse_normalized(r"x ++ y || z").unwrap(), + parse_normalized(r"x ++ (y || z)").unwrap(), + ); // Expression span assert_eq!(parse_template(" ! x ").unwrap().span.as_str(), "! x");