From 57b423e3d75f596b10495a2eecfac7074953d948 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 8 Apr 2024 16:52:27 +0900 Subject: [PATCH] fileset: relax identifier rule to accept more path-like strings Since fileset is primarily used in CLI, it's better to avoid inner quoting if possible. For example, ".." would have to be quoted in the original grammar derived from the revset. This patch also adds a stricter version of an identifier rule. If we add a symbol alias, it will follow the "strict_identifier" rule. --- lib/src/fileset.pest | 15 +++++++++++---- lib/src/fileset.rs | 3 +-- lib/src/fileset_parser.rs | 17 +++++++++++++++-- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/lib/src/fileset.pest b/lib/src/fileset.pest index 83e17780c..c06a2ce73 100644 --- a/lib/src/fileset.pest +++ b/lib/src/fileset.pest @@ -14,10 +14,17 @@ whitespace = _{ " " | "\t" | "\r" | "\n" | "\x0c" } -// TODO: adjust identifier rule for file names -identifier_part = @{ (ASCII_ALPHANUMERIC | "_" | "/")+ } +// XID_CONTINUE: https://www.unicode.org/reports/tr31/#Default_Identifier_Syntax +// +, -, ., @, _: commonly used in file name including "." and ".." +// /: path separator +// \: path separator (Windows) +// TODO: accept glob characters as identifier? identifier = @{ - identifier_part ~ (("." | "-" | "+") ~ identifier_part)* + (XID_CONTINUE | "+" | "-" | "." | "@" | "_" | "/" | "\\")+ +} +strict_identifier_part = @{ (ASCII_ALPHANUMERIC | "_")+ } +strict_identifier = @{ + strict_identifier_part ~ ("-" ~ strict_identifier_part)* } string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") } @@ -42,7 +49,7 @@ function_arguments = { } // TODO: change rhs to string_literal to require quoting? #2101 -string_pattern = { identifier ~ pattern_kind_op ~ (identifier | string_literal) } +string_pattern = { strict_identifier ~ pattern_kind_op ~ (identifier | string_literal) } primary = { "(" ~ whitespace* ~ expression ~ whitespace* ~ ")" diff --git a/lib/src/fileset.rs b/lib/src/fileset.rs index 1cbcab2f6..31368ae28 100644 --- a/lib/src/fileset.rs +++ b/lib/src/fileset.rs @@ -396,8 +396,7 @@ mod tests { cwd: Path::new("/ws/cur"), workspace_root: Path::new("/ws"), }; - // TODO: adjust identifier rule and test the expression parser instead - let parse = |input| FilePattern::parse(&ctx, input).map(FilesetExpression::pattern); + let parse = |text| parse(text, &ctx); // cwd-relative patterns assert_eq!( diff --git a/lib/src/fileset_parser.rs b/lib/src/fileset_parser.rs index 10a072cf0..ec41f4db8 100644 --- a/lib/src/fileset_parser.rs +++ b/lib/src/fileset_parser.rs @@ -40,8 +40,9 @@ impl Rule { match self { Rule::EOI => None, Rule::whitespace => None, - Rule::identifier_part => None, Rule::identifier => None, + Rule::strict_identifier_part => None, + Rule::strict_identifier => None, Rule::string_escape => None, Rule::string_content_char => None, Rule::string_content => None, @@ -237,7 +238,7 @@ fn parse_primary_node(pair: Pair) -> FilesetParseResult { } Rule::string_pattern => { let (lhs, op, rhs) = first.into_inner().collect_tuple().unwrap(); - assert_eq!(lhs.as_rule(), Rule::identifier); + assert_eq!(lhs.as_rule(), Rule::strict_identifier); assert_eq!(op.as_rule(), Rule::pattern_kind_op); let kind = lhs.as_str(); let value = match rhs.as_rule() { @@ -399,6 +400,18 @@ mod tests { parse_into_kind("dir/foo-bar_0.baz"), Ok(ExpressionKind::Identifier("dir/foo-bar_0.baz")) ); + assert_eq!( + parse_into_kind("cli-reference@.md.snap"), + Ok(ExpressionKind::Identifier("cli-reference@.md.snap")) + ); + assert_eq!( + parse_into_kind("柔術.jj"), + Ok(ExpressionKind::Identifier("柔術.jj")) + ); + assert_eq!( + parse_into_kind(r#"Windows\Path"#), + Ok(ExpressionKind::Identifier(r#"Windows\Path"#)) + ); } #[test]