fileset: relax identifier rule to accept more path-like strings

Since fileset is primarily used in CLI, it's better to avoid inner quoting if
possible. For example, ".." would have to be quoted in the original grammar
derived from the revset.

This patch also adds a stricter version of an identifier rule. If we add a
symbol alias, it will follow the "strict_identifier" rule.
This commit is contained in:
Yuya Nishihara 2024-04-08 16:52:27 +09:00
parent 653173abad
commit 57b423e3d7
3 changed files with 27 additions and 8 deletions

View file

@ -14,10 +14,17 @@
whitespace = _{ " " | "\t" | "\r" | "\n" | "\x0c" }
// TODO: adjust identifier rule for file names
identifier_part = @{ (ASCII_ALPHANUMERIC | "_" | "/")+ }
// XID_CONTINUE: https://www.unicode.org/reports/tr31/#Default_Identifier_Syntax
// +, -, ., @, _: commonly used in file name including "." and ".."
// /: path separator
// \: path separator (Windows)
// TODO: accept glob characters as identifier?
identifier = @{
identifier_part ~ (("." | "-" | "+") ~ identifier_part)*
(XID_CONTINUE | "+" | "-" | "." | "@" | "_" | "/" | "\\")+
}
strict_identifier_part = @{ (ASCII_ALPHANUMERIC | "_")+ }
strict_identifier = @{
strict_identifier_part ~ ("-" ~ strict_identifier_part)*
}
string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") }
@ -42,7 +49,7 @@ function_arguments = {
}
// TODO: change rhs to string_literal to require quoting? #2101
string_pattern = { identifier ~ pattern_kind_op ~ (identifier | string_literal) }
string_pattern = { strict_identifier ~ pattern_kind_op ~ (identifier | string_literal) }
primary = {
"(" ~ whitespace* ~ expression ~ whitespace* ~ ")"

View file

@ -396,8 +396,7 @@ mod tests {
cwd: Path::new("/ws/cur"),
workspace_root: Path::new("/ws"),
};
// TODO: adjust identifier rule and test the expression parser instead
let parse = |input| FilePattern::parse(&ctx, input).map(FilesetExpression::pattern);
let parse = |text| parse(text, &ctx);
// cwd-relative patterns
assert_eq!(

View file

@ -40,8 +40,9 @@ impl Rule {
match self {
Rule::EOI => None,
Rule::whitespace => None,
Rule::identifier_part => None,
Rule::identifier => None,
Rule::strict_identifier_part => None,
Rule::strict_identifier => None,
Rule::string_escape => None,
Rule::string_content_char => None,
Rule::string_content => None,
@ -237,7 +238,7 @@ fn parse_primary_node(pair: Pair<Rule>) -> FilesetParseResult<ExpressionNode> {
}
Rule::string_pattern => {
let (lhs, op, rhs) = first.into_inner().collect_tuple().unwrap();
assert_eq!(lhs.as_rule(), Rule::identifier);
assert_eq!(lhs.as_rule(), Rule::strict_identifier);
assert_eq!(op.as_rule(), Rule::pattern_kind_op);
let kind = lhs.as_str();
let value = match rhs.as_rule() {
@ -399,6 +400,18 @@ mod tests {
parse_into_kind("dir/foo-bar_0.baz"),
Ok(ExpressionKind::Identifier("dir/foo-bar_0.baz"))
);
assert_eq!(
parse_into_kind("cli-reference@.md.snap"),
Ok(ExpressionKind::Identifier("cli-reference@.md.snap"))
);
assert_eq!(
parse_into_kind("柔術.jj"),
Ok(ExpressionKind::Identifier("柔術.jj"))
);
assert_eq!(
parse_into_kind(r#"Windows\Path"#),
Ok(ExpressionKind::Identifier(r#"Windows\Path"#))
);
}
#[test]