From 653173abad0adf5007f5f40a8149405e7157a4f8 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Sun, 7 Apr 2024 21:14:17 +0900 Subject: [PATCH] fileset: implement name resolution stage, add all()/none() functions #3239 --- docs/filesets.md | 26 +++++- lib/src/fileset.rs | 191 +++++++++++++++++++++++++++++++++++++- lib/src/fileset_parser.rs | 13 ++- 3 files changed, 222 insertions(+), 8 deletions(-) diff --git a/docs/filesets.md b/docs/filesets.md index 0b6fde9a9..b9bccaf07 100644 --- a/docs/filesets.md +++ b/docs/filesets.md @@ -1,13 +1,9 @@ # Filesets - +consists of file patterns, operators, and functions. ## File patterns @@ -19,3 +15,23 @@ The following patterns are supported: * `root:"path"`: Matches workspace-relative path prefix (file or files under directory recursively.) * `root-file:"path"`: Matches workspace-relative file (or exact) path. + +## Operators + +The following operators are supported. `x` and `y` below can be any fileset +expressions. + +* `x & y`: Matches both `x` and `y`. +* `x | y`: Matches either `x` or `y` (or both). +* `x ~ y`: Matches `x` but not `y`. +* `~x`: Matches everything but `x`. + +You can use parentheses to control evaluation order, such as `(x & y) | z` or +`x & (y | z)`. + +## Functions + +You can also specify patterns by using functions. + +* `all()`: Matches everything. +* `none()`: Matches nothing. diff --git a/lib/src/fileset.rs b/lib/src/fileset.rs index 5c7b29b36..1cbcab2f6 100644 --- a/lib/src/fileset.rs +++ b/lib/src/fileset.rs @@ -14,11 +14,17 @@ //! Functional language for selecting a set of paths. +use std::collections::HashMap; use std::path::Path; use std::slice; +use once_cell::sync::Lazy; use thiserror::Error; +use crate::dsl_util::collect_similar; +use crate::fileset_parser::{ + self, BinaryOp, ExpressionKind, ExpressionNode, FunctionCallNode, UnaryOp, +}; pub use crate::fileset_parser::{FilesetParseError, FilesetParseErrorKind, FilesetParseResult}; use crate::matchers::{ DifferenceMatcher, EverythingMatcher, FilesMatcher, IntersectionMatcher, Matcher, @@ -171,6 +177,18 @@ impl FilesetExpression { FilesetExpression::Pattern(FilePattern::PrefixPath(path)) } + /// Expression that matches either `self` or `other` (or both). + pub fn union(self, other: Self) -> Self { + match self { + // Micro optimization for "x | y | z" + FilesetExpression::UnionAll(mut expressions) => { + expressions.push(other); + FilesetExpression::UnionAll(expressions) + } + expr => FilesetExpression::UnionAll(vec![expr, other]), + } + } + /// Expression that matches any of the given `expressions`. pub fn union_all(expressions: Vec) -> Self { match expressions.len() { @@ -283,6 +301,87 @@ impl FilesetParseContext<'_> { } } +type FilesetFunction = + fn(&FilesetParseContext, &FunctionCallNode) -> FilesetParseResult; + +static BUILTIN_FUNCTION_MAP: Lazy> = Lazy::new(|| { + // Not using maplit::hashmap!{} or custom declarative macro here because + // code completion inside macro is quite restricted. + let mut map: HashMap<&'static str, FilesetFunction> = HashMap::new(); + map.insert("none", |_ctx, function| { + fileset_parser::expect_no_arguments(function)?; + Ok(FilesetExpression::none()) + }); + map.insert("all", |_ctx, function| { + fileset_parser::expect_no_arguments(function)?; + Ok(FilesetExpression::all()) + }); + map +}); + +fn resolve_function( + ctx: &FilesetParseContext, + function: &FunctionCallNode, +) -> FilesetParseResult { + if let Some(func) = BUILTIN_FUNCTION_MAP.get(function.name) { + func(ctx, function) + } else { + Err(FilesetParseError::new( + FilesetParseErrorKind::NoSuchFunction { + name: function.name.to_owned(), + candidates: collect_similar(function.name, BUILTIN_FUNCTION_MAP.keys()), + }, + function.name_span, + )) + } +} + +fn resolve_expression( + ctx: &FilesetParseContext, + node: &ExpressionNode, +) -> FilesetParseResult { + let wrap_pattern_error = + |err| FilesetParseError::expression("Invalid file pattern", node.span).with_source(err); + match &node.kind { + ExpressionKind::Identifier(name) => { + let pattern = FilePattern::cwd_prefix_path(ctx, name).map_err(wrap_pattern_error)?; + Ok(FilesetExpression::pattern(pattern)) + } + ExpressionKind::String(name) => { + let pattern = FilePattern::cwd_prefix_path(ctx, name).map_err(wrap_pattern_error)?; + Ok(FilesetExpression::pattern(pattern)) + } + ExpressionKind::StringPattern { kind, value } => { + let pattern = + FilePattern::from_str_kind(ctx, value, kind).map_err(wrap_pattern_error)?; + Ok(FilesetExpression::pattern(pattern)) + } + ExpressionKind::Unary(op, arg_node) => { + let arg = resolve_expression(ctx, arg_node)?; + match op { + UnaryOp::Negate => Ok(FilesetExpression::all().difference(arg)), + } + } + ExpressionKind::Binary(op, lhs_node, rhs_node) => { + let lhs = resolve_expression(ctx, lhs_node)?; + let rhs = resolve_expression(ctx, rhs_node)?; + match op { + BinaryOp::Union => Ok(lhs.union(rhs)), + BinaryOp::Intersection => Ok(lhs.intersection(rhs)), + BinaryOp::Difference => Ok(lhs.difference(rhs)), + } + } + ExpressionKind::FunctionCall(function) => resolve_function(ctx, function), + } +} + +/// Parses text into `FilesetExpression`. +pub fn parse(text: &str, ctx: &FilesetParseContext) -> FilesetParseResult { + let node = fileset_parser::parse_program(text)?; + // TODO: add basic tree substitution pass to eliminate redundant expressions + resolve_expression(ctx, &node) +} + #[cfg(test)] mod tests { use super::*; @@ -297,7 +396,7 @@ mod tests { cwd: Path::new("/ws/cur"), workspace_root: Path::new("/ws"), }; - // TODO: implement fileset expression parser and test it instead + // TODO: adjust identifier rule and test the expression parser instead let parse = |input| FilePattern::parse(&ctx, input).map(FilesetExpression::pattern); // cwd-relative patterns @@ -343,6 +442,96 @@ mod tests { ); } + #[test] + fn test_parse_function() { + let ctx = FilesetParseContext { + cwd: Path::new("/ws/cur"), + workspace_root: Path::new("/ws"), + }; + let parse = |text| parse(text, &ctx); + + assert_eq!(parse("all()").unwrap(), FilesetExpression::all()); + assert_eq!(parse("none()").unwrap(), FilesetExpression::none()); + insta::assert_debug_snapshot!(parse("all(x)").unwrap_err().kind(), @r###" + InvalidArguments { + name: "all", + message: "Expected 0 arguments", + } + "###); + insta::assert_debug_snapshot!(parse("ale()").unwrap_err().kind(), @r###" + NoSuchFunction { + name: "ale", + candidates: [ + "all", + ], + } + "###); + } + + #[test] + fn test_parse_compound_expression() { + let ctx = FilesetParseContext { + cwd: Path::new("/ws/cur"), + workspace_root: Path::new("/ws"), + }; + let parse = |text| parse(text, &ctx); + + insta::assert_debug_snapshot!(parse("~x").unwrap(), @r###" + Difference( + All, + Pattern( + PrefixPath( + "cur/x", + ), + ), + ) + "###); + insta::assert_debug_snapshot!(parse("x|y|root:z").unwrap(), @r###" + UnionAll( + [ + Pattern( + PrefixPath( + "cur/x", + ), + ), + Pattern( + PrefixPath( + "cur/y", + ), + ), + Pattern( + PrefixPath( + "z", + ), + ), + ], + ) + "###); + insta::assert_debug_snapshot!(parse("x|y&z").unwrap(), @r###" + UnionAll( + [ + Pattern( + PrefixPath( + "cur/x", + ), + ), + Intersection( + Pattern( + PrefixPath( + "cur/y", + ), + ), + Pattern( + PrefixPath( + "cur/z", + ), + ), + ), + ], + ) + "###); + } + #[test] fn test_build_matcher_simple() { insta::assert_debug_snapshot!(FilesetExpression::none().to_matcher(), @"NothingMatcher"); diff --git a/lib/src/fileset_parser.rs b/lib/src/fileset_parser.rs index c08fb02cf..10a072cf0 100644 --- a/lib/src/fileset_parser.rs +++ b/lib/src/fileset_parser.rs @@ -14,8 +14,6 @@ //! Parser for the fileset language. -#![allow(unused)] // TODO - use std::error; use itertools::Itertools as _; @@ -303,6 +301,17 @@ pub fn parse_program(text: &str) -> FilesetParseResult { parse_expression_node(first) } +pub fn expect_no_arguments(function: &FunctionCallNode) -> FilesetParseResult<()> { + if function.args.is_empty() { + Ok(()) + } else { + Err(FilesetParseError::invalid_arguments( + function, + "Expected 0 arguments", + )) + } +} + #[cfg(test)] mod tests { use assert_matches::assert_matches;