jj/lib/src/fileset.pest
Yuya Nishihara 57b423e3d7 fileset: relax identifier rule to accept more path-like strings
Since fileset is primarily used in CLI, it's better to avoid inner quoting if
possible. For example, ".." would have to be quoted in the original grammar
derived from the revset.

This patch also adds a stricter version of an identifier rule. If we add a
symbol alias, it will follow the "strict_identifier" rule.
2024-04-09 20:42:09 +09:00

67 lines
2.3 KiB
Text

// Copyright 2021-2024 The Jujutsu Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
whitespace = _{ " " | "\t" | "\r" | "\n" | "\x0c" }
// XID_CONTINUE: https://www.unicode.org/reports/tr31/#Default_Identifier_Syntax
// +, -, ., @, _: commonly used in file name including "." and ".."
// /: path separator
// \: path separator (Windows)
// TODO: accept glob characters as identifier?
identifier = @{
(XID_CONTINUE | "+" | "-" | "." | "@" | "_" | "/" | "\\")+
}
strict_identifier_part = @{ (ASCII_ALPHANUMERIC | "_")+ }
strict_identifier = @{
strict_identifier_part ~ ("-" ~ strict_identifier_part)*
}
string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") }
string_content_char = @{ !("\"" | "\\") ~ ANY }
string_content = @{ string_content_char+ }
string_literal = ${ "\"" ~ (string_content | string_escape)* ~ "\"" }
pattern_kind_op = { ":" }
negate_op = { "~" }
union_op = { "|" }
intersection_op = { "&" }
difference_op = { "~" }
prefix_ops = _{ negate_op }
infix_ops = _{ union_op | intersection_op | difference_op }
function = { function_name ~ "(" ~ whitespace* ~ function_arguments ~ whitespace* ~ ")" }
function_name = @{ (ASCII_ALPHANUMERIC | "_")+ }
function_arguments = {
expression ~ (whitespace* ~ "," ~ whitespace* ~ expression)* ~ (whitespace* ~ ",")?
| ""
}
// TODO: change rhs to string_literal to require quoting? #2101
string_pattern = { strict_identifier ~ pattern_kind_op ~ (identifier | string_literal) }
primary = {
"(" ~ whitespace* ~ expression ~ whitespace* ~ ")"
| function
| string_pattern
| identifier
| string_literal
}
expression = {
(prefix_ops ~ whitespace*)* ~ primary
~ (whitespace* ~ infix_ops ~ whitespace* ~ (prefix_ops ~ whitespace*)* ~ primary)*
}
program = _{ SOI ~ whitespace* ~ expression ~ whitespace* ~ EOI }