ok/jj
1
0
Fork 0
forked from mirrors/jj
jj/lib/src/fileset.pest
Yuya Nishihara 5649ee4f45 fileset: parse glob characters as identifier
It's inconvenient that we have to quote glob patterns as 'glob:"*.rs"'. Suppose
filesets are usually specified in shell, it's better to allow unquoted strings
if possible. This change also means we'll probably abandon #2101 "make the
parsing of string arguments stricter."

Note that we can no longer introduce ? operator or [] subscript syntax in
filesets.

Closes #4053
2024-07-18 13:49:10 +09:00

88 lines
2.9 KiB
Text

// Copyright 2021-2024 The Jujutsu Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
whitespace = _{ " " | "\t" | "\r" | "\n" | "\x0c" }
// XID_CONTINUE: https://www.unicode.org/reports/tr31/#Default_Identifier_Syntax
// +, -, ., @, _: commonly used in file name including "." and ".."
// *, ?, [, ]: glob characters (not extended glob)
// /: path separator
// \: path separator (Windows)
identifier = @{
(XID_CONTINUE | "+" | "-" | "." | "@" | "_" | "*" | "?" | "[" | "]" | "/" | "\\")+
}
strict_identifier_part = @{ (ASCII_ALPHANUMERIC | "_")+ }
strict_identifier = @{
strict_identifier_part ~ ("-" ~ strict_identifier_part)*
}
// TODO: accept more ASCII meta characters such as "#" and ","?
bare_string = @{
( ASCII_ALPHANUMERIC
| " " | "+" | "-" | "." | "@" | "_" | "*" | "?" | "[" | "]" | "/" | "\\"
| '\u{80}'..'\u{10ffff}' )+
}
string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") }
string_content_char = @{ !("\"" | "\\") ~ ANY }
string_content = @{ string_content_char+ }
string_literal = ${ "\"" ~ (string_content | string_escape)* ~ "\"" }
raw_string_content = @{ (!"'" ~ ANY)* }
raw_string_literal = ${ "'" ~ raw_string_content ~ "'" }
pattern_kind_op = { ":" }
negate_op = { "~" }
union_op = { "|" }
intersection_op = { "&" }
difference_op = { "~" }
prefix_ops = _{ negate_op }
infix_ops = _{ union_op | intersection_op | difference_op }
function = { function_name ~ "(" ~ whitespace* ~ function_arguments ~ whitespace* ~ ")" }
function_name = @{ (ASCII_ALPHANUMERIC | "_")+ }
function_arguments = {
expression ~ (whitespace* ~ "," ~ whitespace* ~ expression)* ~ (whitespace* ~ ",")?
| ""
}
// TODO: change rhs to string_literal to require quoting? #2101
string_pattern = {
strict_identifier
~ pattern_kind_op
~ (identifier | string_literal | raw_string_literal)
}
bare_string_pattern = { strict_identifier ~ pattern_kind_op ~ bare_string }
primary = {
"(" ~ whitespace* ~ expression ~ whitespace* ~ ")"
| function
| string_pattern
| identifier
| string_literal
| raw_string_literal
}
expression = {
(prefix_ops ~ whitespace*)* ~ primary
~ (whitespace* ~ infix_ops ~ whitespace* ~ (prefix_ops ~ whitespace*)* ~ primary)*
}
program = _{ SOI ~ whitespace* ~ expression ~ whitespace* ~ EOI }
program_or_bare_string = _{
SOI ~ ( whitespace* ~ expression ~ whitespace* ~ EOI
| bare_string_pattern ~ EOI
| bare_string ~ EOI )
}