forked from mirrors/jj
It's inconvenient that we have to quote glob patterns as 'glob:"*.rs"'. Suppose filesets are usually specified in shell, it's better to allow unquoted strings if possible. This change also means we'll probably abandon #2101 "make the parsing of string arguments stricter." Note that we can no longer introduce ? operator or [] subscript syntax in filesets. Closes #4053
88 lines
2.9 KiB
Text
88 lines
2.9 KiB
Text
// Copyright 2021-2024 The Jujutsu Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
whitespace = _{ " " | "\t" | "\r" | "\n" | "\x0c" }
|
|
|
|
// XID_CONTINUE: https://www.unicode.org/reports/tr31/#Default_Identifier_Syntax
|
|
// +, -, ., @, _: commonly used in file name including "." and ".."
|
|
// *, ?, [, ]: glob characters (not extended glob)
|
|
// /: path separator
|
|
// \: path separator (Windows)
|
|
identifier = @{
|
|
(XID_CONTINUE | "+" | "-" | "." | "@" | "_" | "*" | "?" | "[" | "]" | "/" | "\\")+
|
|
}
|
|
strict_identifier_part = @{ (ASCII_ALPHANUMERIC | "_")+ }
|
|
strict_identifier = @{
|
|
strict_identifier_part ~ ("-" ~ strict_identifier_part)*
|
|
}
|
|
|
|
// TODO: accept more ASCII meta characters such as "#" and ","?
|
|
bare_string = @{
|
|
( ASCII_ALPHANUMERIC
|
|
| " " | "+" | "-" | "." | "@" | "_" | "*" | "?" | "[" | "]" | "/" | "\\"
|
|
| '\u{80}'..'\u{10ffff}' )+
|
|
}
|
|
|
|
string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") }
|
|
string_content_char = @{ !("\"" | "\\") ~ ANY }
|
|
string_content = @{ string_content_char+ }
|
|
string_literal = ${ "\"" ~ (string_content | string_escape)* ~ "\"" }
|
|
|
|
raw_string_content = @{ (!"'" ~ ANY)* }
|
|
raw_string_literal = ${ "'" ~ raw_string_content ~ "'" }
|
|
|
|
pattern_kind_op = { ":" }
|
|
|
|
negate_op = { "~" }
|
|
union_op = { "|" }
|
|
intersection_op = { "&" }
|
|
difference_op = { "~" }
|
|
prefix_ops = _{ negate_op }
|
|
infix_ops = _{ union_op | intersection_op | difference_op }
|
|
|
|
function = { function_name ~ "(" ~ whitespace* ~ function_arguments ~ whitespace* ~ ")" }
|
|
function_name = @{ (ASCII_ALPHANUMERIC | "_")+ }
|
|
function_arguments = {
|
|
expression ~ (whitespace* ~ "," ~ whitespace* ~ expression)* ~ (whitespace* ~ ",")?
|
|
| ""
|
|
}
|
|
|
|
// TODO: change rhs to string_literal to require quoting? #2101
|
|
string_pattern = {
|
|
strict_identifier
|
|
~ pattern_kind_op
|
|
~ (identifier | string_literal | raw_string_literal)
|
|
}
|
|
bare_string_pattern = { strict_identifier ~ pattern_kind_op ~ bare_string }
|
|
|
|
primary = {
|
|
"(" ~ whitespace* ~ expression ~ whitespace* ~ ")"
|
|
| function
|
|
| string_pattern
|
|
| identifier
|
|
| string_literal
|
|
| raw_string_literal
|
|
}
|
|
|
|
expression = {
|
|
(prefix_ops ~ whitespace*)* ~ primary
|
|
~ (whitespace* ~ infix_ops ~ whitespace* ~ (prefix_ops ~ whitespace*)* ~ primary)*
|
|
}
|
|
|
|
program = _{ SOI ~ whitespace* ~ expression ~ whitespace* ~ EOI }
|
|
program_or_bare_string = _{
|
|
SOI ~ ( whitespace* ~ expression ~ whitespace* ~ EOI
|
|
| bare_string_pattern ~ EOI
|
|
| bare_string ~ EOI )
|
|
}
|