From 0d62a336af0278c0f08b8bbf6512754a3a648729 Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Sat, 10 Apr 2021 10:18:30 -0700 Subject: [PATCH] revsets: initial support for Mercurial-style revsets This patch adds initial support for a DSL for specifying revisions inspired by Mercurial's "revset" language. The initial support includes prefix operators ":" (parents) and "*:" (ancestors) with naive parsing of the revsets. Mercurial uses postfix operator "^" for parent 1 just like Git does. It uses prefix operator "::" for ancestors and the same operator as postfix operator for descendants. I did it differently because I like the idea of using the same operator as prefix/postfix depending on desired direction, so I wanted to apply that to parents/children as well (and for predecessors/successors). The "*" in the "*:" operator is copied from regular expression syntax. Let's see how it works out. This is an experimental VCS, after all. I've updated the CLI to use the new revset support. The implementation feels a little messy, but you have to start somewhere... --- lib/src/index.rs | 7 +- lib/src/revset.rs | 92 +++++++++++++++++++++++++- lib/tests/test_revset.rs | 138 ++++++++++++++++++++++++++++++++++++++- src/commands.rs | 51 +++++++++++---- 4 files changed, 269 insertions(+), 19 deletions(-) diff --git a/lib/src/index.rs b/lib/src/index.rs index 3f7bf5488..df781746e 100644 --- a/lib/src/index.rs +++ b/lib/src/index.rs @@ -969,7 +969,7 @@ pub struct IndexStats { pub levels: Vec, } -#[derive(Eq, PartialEq)] +#[derive(Clone, Eq, PartialEq)] struct IndexEntryByPosition<'a>(IndexEntry<'a>); impl Ord for IndexEntryByPosition<'_> { @@ -984,7 +984,7 @@ impl PartialOrd for IndexEntryByPosition<'_> { } } -#[derive(Eq, PartialEq)] +#[derive(Clone, Eq, PartialEq)] struct IndexEntryByGeneration<'a>(IndexEntry<'a>); impl Ord for IndexEntryByGeneration<'_> { @@ -1002,12 +1002,13 @@ impl PartialOrd for IndexEntryByGeneration<'_> { } } -#[derive(Eq, PartialEq, Ord, PartialOrd)] +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd)] struct RevWalkWorkItem<'a> { entry: IndexEntryByGeneration<'a>, wanted: bool, } +#[derive(Clone)] pub struct RevWalk<'a> { index: CompositeIndex<'a>, items: BinaryHeap>, diff --git a/lib/src/revset.rs b/lib/src/revset.rs index c590a812d..c9c1f644c 100644 --- a/lib/src/revset.rs +++ b/lib/src/revset.rs @@ -12,10 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Reverse; + use thiserror::Error; use crate::commit::Commit; -use crate::index::{HexPrefix, PrefixResolution}; +use crate::index::{HexPrefix, IndexEntry, PrefixResolution, RevWalk}; use crate::repo::RepoRef; use crate::store::{CommitId, StoreError}; @@ -75,3 +77,91 @@ pub fn resolve_symbol(repo: RepoRef, symbol: &str) -> Result), + Ancestors(Box), +} + +pub fn parse(revset_str: &str) -> RevsetExpression { + // TODO: Parse using a parser generator (probably pest since we already use that + // for templates) + if let Some(remainder) = revset_str.strip_prefix("*:") { + RevsetExpression::Ancestors(Box::new(parse(remainder))) + } else if let Some(remainder) = revset_str.strip_prefix(":") { + RevsetExpression::Parents(Box::new(parse(remainder))) + } else { + RevsetExpression::Symbol(revset_str.to_owned()) + } +} + +pub trait Revset<'repo> { + fn iter<'revset>(&'revset self) -> Box> + 'revset>; +} + +struct EagerRevset<'repo> { + index_entries: Vec>, +} + +impl<'repo> Revset<'repo> for EagerRevset<'repo> { + fn iter<'revset>(&'revset self) -> Box> + 'revset> { + Box::new(self.index_entries.iter().cloned()) + } +} + +struct RevWalkRevset<'repo> { + walk: RevWalk<'repo>, +} + +impl<'repo> Revset<'repo> for RevWalkRevset<'repo> { + fn iter<'revset>(&'revset self) -> Box> + 'revset> { + Box::new(RevWalkRevsetIterator { + walk: self.walk.clone(), + }) + } +} + +struct RevWalkRevsetIterator<'repo> { + walk: RevWalk<'repo>, +} + +impl<'repo> Iterator for RevWalkRevsetIterator<'repo> { + type Item = IndexEntry<'repo>; + + fn next(&mut self) -> Option { + self.walk.next() + } +} + +pub fn evaluate_expression<'repo>( + repo: RepoRef<'repo>, + expression: &RevsetExpression, +) -> Result + 'repo>, RevsetError> { + match expression { + RevsetExpression::Symbol(symbol) => { + let commit_id = resolve_symbol(repo, &symbol)?.id().clone(); + Ok(Box::new(EagerRevset { + index_entries: vec![repo.index().entry_by_id(&commit_id).unwrap()], + })) + } + RevsetExpression::Parents(base_expression) => { + // TODO: Make this lazy + let base_set = evaluate_expression(repo, base_expression.as_ref())?; + let mut parent_entries: Vec<_> = + base_set.iter().flat_map(|entry| entry.parents()).collect(); + parent_entries.sort_by_key(|b| Reverse(b.position())); + parent_entries.dedup_by_key(|entry| entry.position()); + Ok(Box::new(EagerRevset { + index_entries: parent_entries, + })) + } + RevsetExpression::Ancestors(base_expression) => { + let base_set = evaluate_expression(repo, base_expression.as_ref())?; + let base_ids: Vec<_> = base_set.iter().map(|entry| entry.commit_id()).collect(); + let walk = repo.index().walk_revs(&base_ids, &[]); + Ok(Box::new(RevWalkRevset { walk })) + } + } +} diff --git a/lib/tests/test_revset.rs b/lib/tests/test_revset.rs index 93fc1d4e8..557e0e25b 100644 --- a/lib/tests/test_revset.rs +++ b/lib/tests/test_revset.rs @@ -13,8 +13,11 @@ // limitations under the License. use jujube_lib::commit_builder::CommitBuilder; -use jujube_lib::revset::{resolve_symbol, RevsetError}; -use jujube_lib::store::{MillisSinceEpoch, Signature, Timestamp}; +use jujube_lib::repo::RepoRef; +use jujube_lib::revset::{ + evaluate_expression, parse, resolve_symbol, RevsetError, RevsetExpression, +}; +use jujube_lib::store::{CommitId, MillisSinceEpoch, Signature, Timestamp}; use jujube_lib::testutils; use test_case::test_case; @@ -222,3 +225,134 @@ fn test_resolve_symbol_git_refs() { tx.discard(); } + +#[test] +fn test_parse_revset() { + assert_eq!(parse("@"), RevsetExpression::Symbol("@".to_string())); + assert_eq!(parse("foo"), RevsetExpression::Symbol("foo".to_string())); + assert_eq!( + parse(":@"), + RevsetExpression::Parents(Box::new(RevsetExpression::Symbol("@".to_string()))) + ); + assert_eq!( + parse("*:@"), + RevsetExpression::Ancestors(Box::new(RevsetExpression::Symbol("@".to_string()))) + ); +} + +fn resolve_commit_ids(repo: RepoRef, revset_str: &str) -> Vec { + let expression = parse(revset_str); + evaluate_expression(repo, &expression) + .unwrap() + .iter() + .map(|entry| entry.commit_id()) + .collect() +} + +#[test_case(false ; "local store")] +#[test_case(true ; "git store")] +fn test_evaluate_expression_root_and_checkout(use_git: bool) { + let settings = testutils::user_settings(); + let (_temp_dir, repo) = testutils::init_repo(&settings, use_git); + + let mut tx = repo.start_transaction("test"); + let mut_repo = tx.mut_repo(); + + let root_commit = repo.store().root_commit(); + let commit1 = testutils::create_random_commit(&settings, &repo).write_to_repo(mut_repo); + + // Can find the root commit + assert_eq!( + resolve_commit_ids(mut_repo.as_repo_ref(), "root"), + vec![root_commit.id().clone()] + ); + + // Can find the current checkout + mut_repo.set_checkout(commit1.id().clone()); + assert_eq!( + resolve_commit_ids(mut_repo.as_repo_ref(), "@"), + vec![commit1.id().clone()] + ); + + tx.discard(); +} + +#[test_case(false ; "local store")] +#[test_case(true ; "git store")] +fn test_evaluate_expression_parents(use_git: bool) { + let settings = testutils::user_settings(); + let (_temp_dir, repo) = testutils::init_repo(&settings, use_git); + + let mut tx = repo.start_transaction("test"); + let mut_repo = tx.mut_repo(); + + let commit1 = testutils::create_random_commit(&settings, &repo).write_to_repo(mut_repo); + let commit2 = testutils::create_random_commit(&settings, &repo) + .set_parents(vec![commit1.id().clone()]) + .write_to_repo(mut_repo); + let commit3 = testutils::create_random_commit(&settings, &repo).write_to_repo(mut_repo); + let commit4 = testutils::create_random_commit(&settings, &repo) + .set_parents(vec![commit2.id().clone(), commit3.id().clone()]) + .write_to_repo(mut_repo); + + // The root commit has no parents + assert_eq!(resolve_commit_ids(mut_repo.as_repo_ref(), ":root"), vec![]); + + // Can find parents of the current checkout + mut_repo.set_checkout(commit2.id().clone()); + assert_eq!( + resolve_commit_ids(mut_repo.as_repo_ref(), ":@"), + vec![commit1.id().clone()] + ); + + // Can find parents of a merge commit + assert_eq!( + resolve_commit_ids(mut_repo.as_repo_ref(), &format!(":{}", commit4.id().hex())), + vec![commit3.id().clone(), commit2.id().clone(),] + ); + + tx.discard(); +} + +#[test_case(false ; "local store")] +#[test_case(true ; "git store")] +fn test_evaluate_expression_ancestors(use_git: bool) { + let settings = testutils::user_settings(); + let (_temp_dir, repo) = testutils::init_repo(&settings, use_git); + + let mut tx = repo.start_transaction("test"); + let mut_repo = tx.mut_repo(); + + let root_commit = repo.store().root_commit(); + let commit1 = testutils::create_random_commit(&settings, &repo).write_to_repo(mut_repo); + let commit2 = testutils::create_random_commit(&settings, &repo) + .set_parents(vec![commit1.id().clone()]) + .write_to_repo(mut_repo); + let commit3 = testutils::create_random_commit(&settings, &repo) + .set_parents(vec![commit2.id().clone()]) + .write_to_repo(mut_repo); + let commit4 = testutils::create_random_commit(&settings, &repo) + .set_parents(vec![commit1.id().clone(), commit3.id().clone()]) + .write_to_repo(mut_repo); + + // The ancestors of the root commit is just the root commit itself + assert_eq!( + resolve_commit_ids(mut_repo.as_repo_ref(), "*:root"), + vec![root_commit.id().clone()] + ); + + // Can find ancestors of a specific commit. Commits reachable via multiple paths + // are not repeated. + assert_eq!( + resolve_commit_ids(mut_repo.as_repo_ref(), &format!("*:{}", commit4.id().hex())), + vec![ + commit4.id().clone(), + commit3.id().clone(), + commit2.id().clone(), + commit1.id().clone(), + root_commit.id().clone(), + ] + ); + + tx.discard(); +} diff --git a/src/commands.rs b/src/commands.rs index 8055411d6..c80484e9a 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -42,7 +42,7 @@ use jujube_lib::repo_path::RepoPath; use jujube_lib::revset::RevsetError; use jujube_lib::rewrite::{back_out_commit, merge_commit_trees, rebase_commit}; use jujube_lib::settings::UserSettings; -use jujube_lib::store::{CommitId, Timestamp, TreeValue}; +use jujube_lib::store::{CommitId, StoreError, Timestamp, TreeValue}; use jujube_lib::store_wrapper::StoreWrapper; use jujube_lib::tree::Tree; use jujube_lib::trees::Diff; @@ -76,6 +76,12 @@ impl From for CommandError { } } +impl From for CommandError { + fn from(err: StoreError) -> Self { + CommandError::UserError(format!("Unexpected error from store: {}", err)) + } +} + impl From for CommandError { fn from(err: DiffEditError) -> Self { CommandError::UserError(format!("Failed to edit diff: {}", err)) @@ -127,6 +133,14 @@ fn resolve_single_rev( ) -> Result<(Arc, Commit), CommandError> { // If we're looking up the working copy commit ("@"), make sure that it is up to // date (the lib crate only looks at the checkout in the view). + // TODO: How do we generally figure out if a revset needs to commit the working + // copy? For example, ":@" should ideally not result in a new working copy + // commit, but "::@" should. "foo::" is probably also should, since we would + // otherwise need to evaluate the revset and see if "foo::" includes the + // parent of the current checkout. Other interesting cases include some kind of + // reference pointing to the working copy commit. If it's a + // type of reference that would get updated when the commit gets rewritten, then + // we probably should create a new working copy commit. if revision_str == "@" { let wc = repo.working_copy(); // TODO: Avoid committing every time this function is called. @@ -134,12 +148,7 @@ fn resolve_single_rev( repo = reloaded_repo; } - if revision_str == "@^" { - let commit = repo.store().get_commit(repo.view().checkout()).unwrap(); - assert!(commit.is_open()); - let parents = commit.parents(); - Ok((repo, parents[0].clone())) - } else if revision_str.starts_with("desc(") && revision_str.ends_with(')') { + if revision_str.starts_with("desc(") && revision_str.ends_with(')') { let needle = revision_str[5..revision_str.len() - 1].to_string(); let mut matches = vec![]; let head_ids = skip_uninteresting_heads(repo.as_ref(), &repo.view().heads()); @@ -157,10 +166,26 @@ fn resolve_single_rev( Some(commit) => Ok((repo, commit)), } } else { - Ok(( - repo.clone(), - revset::resolve_symbol(repo.as_repo_ref(), revision_str)?, - )) + let revset_expression = revset::parse(revision_str); + let revset = revset::evaluate_expression(repo.as_repo_ref(), &revset_expression)?; + let mut iter = revset.iter(); + match iter.next() { + None => Err(CommandError::UserError(format!( + "Revset \"{}\" didn't resolve to any revisions", + revision_str + ))), + Some(entry) => { + let commit = repo.store().get_commit(&entry.commit_id())?; + if iter.next().is_some() { + return Err(CommandError::UserError(format!( + "Revset \"{}\" resolved to more than one revision", + revision_str + ))); + } else { + Ok((repo.clone(), commit)) + } + } + } } } @@ -376,7 +401,7 @@ fn get_app<'a, 'b>() -> App<'a, 'b> { .long("source") .short("s") .takes_value(true) - .default_value("@^"), + .default_value(":@"), ) .arg( Arg::with_name("destination") @@ -468,7 +493,7 @@ fn get_app<'a, 'b>() -> App<'a, 'b> { .long("revision") .short("r") .takes_value(true) - .default_value("@^"), + .default_value(":@"), ) .arg( Arg::with_name("remote")