From 0532301e03421923e06f788b869fdd56048697ca Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Sat, 25 Mar 2023 12:14:26 +0900 Subject: [PATCH] revset: add latest(candidates, count) predicate This serves the role of limit() in Mercurial. Since revsets in JJ is (conceptually) an unordered set, a "limit" predicate should define its ordering criteria. That's why the added predicate is named as "latest". Closes #1110 --- CHANGELOG.md | 2 + docs/revsets.md | 2 + lib/src/default_index_store.rs | 2 +- lib/src/default_revset_engine.rs | 55 +++++++++++++++++++-- lib/src/revset.rs | 32 ++++++++++++ lib/tests/test_revset.rs | 84 ++++++++++++++++++++++++++++++++ tests/test_revset_output.rs | 10 ++++ 7 files changed, 183 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08d54ad7b..baf9d5623 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,6 +59,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 to the configured user. `jj describe` also gained a `--no-edit` option to avoid opening the editor. +* Added `latest(x[, n])` revset function to select the latest `n` commits. + ### Fixed bugs * Modify/delete conflicts now include context lines diff --git a/docs/revsets.md b/docs/revsets.md index 8fb31579f..496dc9232 100644 --- a/docs/revsets.md +++ b/docs/revsets.md @@ -108,6 +108,8 @@ revsets (expressions) as arguments. If `x` was not specified, it selects all visible heads (as if you had said `heads(all())`). * `roots(x)`: Commits in `x` that are not descendants of other commits in `x`. +* `latest(x[, count])`: Latest `count` commits in `x`, based on committer + timestamp. The default `count` is 1. * `merges()`: Merge commits. * `description(needle)`: Commits with the given string in their description. diff --git a/lib/src/default_index_store.rs b/lib/src/default_index_store.rs index 38b98c391..a07cb8fd8 100644 --- a/lib/src/default_index_store.rs +++ b/lib/src/default_index_store.rs @@ -1131,7 +1131,7 @@ pub struct IndexStats { } #[derive(Clone, Eq, PartialEq)] -pub struct IndexEntryByPosition<'a>(IndexEntry<'a>); +pub struct IndexEntryByPosition<'a>(pub IndexEntry<'a>); impl Ord for IndexEntryByPosition<'_> { fn cmp(&self, other: &Self) -> Ordering { diff --git a/lib/src/default_revset_engine.rs b/lib/src/default_revset_engine.rs index a59ba17a8..43487229d 100644 --- a/lib/src/default_revset_engine.rs +++ b/lib/src/default_revset_engine.rs @@ -13,13 +13,13 @@ // limitations under the License. use std::cmp::{Ordering, Reverse}; -use std::collections::HashSet; +use std::collections::{BinaryHeap, HashSet}; use std::iter::Peekable; use itertools::Itertools; -use crate::backend::{ChangeId, CommitId, ObjectId}; -use crate::default_index_store::{CompositeIndex, IndexEntry, IndexPosition}; +use crate::backend::{ChangeId, CommitId, MillisSinceEpoch, ObjectId}; +use crate::default_index_store::{CompositeIndex, IndexEntry, IndexEntryByPosition, IndexPosition}; use crate::default_revset_graph_iterator::RevsetGraphIterator; use crate::index::{HexPrefix, PrefixResolution}; use crate::matchers::{EverythingMatcher, Matcher, PrefixMatcher}; @@ -661,6 +661,10 @@ fn internal_evaluate<'index>( } Ok(revset_for_commit_ids(repo, &commit_ids)) } + RevsetExpression::Latest { candidates, count } => { + let candidate_set = internal_evaluate(repo, candidates)?; + Ok(take_latest_revset(repo, candidate_set.as_ref(), *count)) + } RevsetExpression::Filter(predicate) => Ok(Box::new(FilterRevset { candidates: internal_evaluate(repo, &RevsetExpression::All)?, predicate: build_predicate_fn(repo, predicate), @@ -722,6 +726,51 @@ fn revset_for_commit_ids<'index>( Box::new(EagerRevset { index_entries }) } +fn take_latest_revset<'index>( + repo: &dyn Repo, + candidate_set: &dyn InternalRevset<'index>, + count: usize, +) -> Box + 'index> { + if count == 0 { + return Box::new(EagerRevset::empty()); + } + + #[derive(Clone, Eq, Ord, PartialEq, PartialOrd)] + struct Item<'a> { + timestamp: MillisSinceEpoch, + entry: IndexEntryByPosition<'a>, // tie-breaker + } + + let store = repo.store(); + let make_rev_item = |entry: IndexEntry<'index>| { + let commit = store.get_commit(&entry.commit_id()).unwrap(); + Reverse(Item { + timestamp: commit.committer().timestamp.timestamp.clone(), + entry: IndexEntryByPosition(entry), + }) + }; + + // Maintain min-heap containing the latest (greatest) count items. For small + // count and large candidate set, this is probably cheaper than building vec + // and applying selection algorithm. + let mut candidate_iter = candidate_set.iter().map(make_rev_item).fuse(); + let mut latest_items = BinaryHeap::from_iter(candidate_iter.by_ref().take(count)); + for item in candidate_iter { + let mut earliest = latest_items.peek_mut().unwrap(); + if earliest.0 < item.0 { + *earliest = item; + } + } + + assert!(latest_items.len() <= count); + let mut index_entries = latest_items + .into_iter() + .map(|item| item.0.entry.0) + .collect_vec(); + index_entries.sort_unstable_by_key(|b| Reverse(b.position())); + Box::new(EagerRevset { index_entries }) +} + type PurePredicateFn<'index> = Box) -> bool + 'index>; impl<'index> ToPredicateFn<'index> for PurePredicateFn<'index> { diff --git a/lib/src/revset.rs b/lib/src/revset.rs index bbe9b841e..5ff9799b7 100644 --- a/lib/src/revset.rs +++ b/lib/src/revset.rs @@ -232,6 +232,10 @@ pub enum RevsetExpression { Tags, GitRefs, GitHead, + Latest { + candidates: Rc, + count: usize, + }, Filter(RevsetFilterPredicate), /// Marker for subtree that should be intersected as filter. AsFilter(Rc), @@ -294,6 +298,13 @@ impl RevsetExpression { Rc::new(RevsetExpression::GitHead) } + pub fn latest(self: &Rc, count: usize) -> Rc { + Rc::new(RevsetExpression::Latest { + candidates: self.clone(), + count, + }) + } + pub fn filter(predicate: RevsetFilterPredicate) -> Rc { Rc::new(RevsetExpression::Filter(predicate)) } @@ -835,6 +846,16 @@ static BUILTIN_FUNCTION_MAP: Lazy> = Lazy: expect_no_arguments(name, arguments_pair)?; Ok(RevsetExpression::git_head()) }); + map.insert("latest", |name, arguments_pair, state| { + let ([candidates_arg], [count_opt_arg]) = expect_arguments(name, arguments_pair)?; + let candidates = parse_expression_rule(candidates_arg.into_inner(), state)?; + let count = if let Some(count_arg) = count_opt_arg { + parse_function_argument_as_literal("integer", name, count_arg, state)? + } else { + 1 + }; + Ok(candidates.latest(count)) + }); map.insert("merges", |name, arguments_pair, _state| { expect_no_arguments(name, arguments_pair)?; Ok(RevsetExpression::filter( @@ -1139,6 +1160,12 @@ fn try_transform_expression_bottom_up( RevsetExpression::Tags => None, RevsetExpression::GitRefs => None, RevsetExpression::GitHead => None, + RevsetExpression::Latest { candidates, count } => { + transform_rec(candidates, f)?.map(|candidates| RevsetExpression::Latest { + candidates, + count: *count, + }) + } RevsetExpression::Filter(_) => None, RevsetExpression::AsFilter(candidates) => { transform_rec(candidates, f)?.map(RevsetExpression::AsFilter) @@ -2373,6 +2400,11 @@ mod tests { RevsetExpression::branches("".to_owned()).roots() ); + assert_eq!( + optimize(parse("latest(branches() & all(), 2)").unwrap()), + RevsetExpression::branches("".to_owned()).latest(2) + ); + assert_eq!( optimize(parse("present(author(foo) ~ bar)").unwrap()), Rc::new(RevsetExpression::AsFilter(Rc::new( diff --git a/lib/tests/test_revset.rs b/lib/tests/test_revset.rs index e8b097bd6..05ea39608 100644 --- a/lib/tests/test_revset.rs +++ b/lib/tests/test_revset.rs @@ -1506,6 +1506,90 @@ fn test_evaluate_expression_remote_branches(use_git: bool) { ); } +#[test_case(false ; "local backend")] +#[test_case(true ; "git backend")] +fn test_evaluate_expression_latest(use_git: bool) { + let settings = testutils::user_settings(); + let test_repo = TestRepo::init(use_git); + let repo = &test_repo.repo; + + let mut tx = repo.start_transaction(&settings, "test"); + let mut_repo = tx.mut_repo(); + + let mut write_commit_with_committer_timestamp = |msec| { + let builder = create_random_commit(mut_repo, &settings); + let mut committer = builder.committer().clone(); + committer.timestamp.timestamp = MillisSinceEpoch(msec); + builder.set_committer(committer).write().unwrap() + }; + let commit1_t3 = write_commit_with_committer_timestamp(3); + let commit2_t2 = write_commit_with_committer_timestamp(2); + let commit3_t2 = write_commit_with_committer_timestamp(2); + let commit4_t1 = write_commit_with_committer_timestamp(1); + + // Pick the latest entry by default (count = 1) + assert_eq!( + resolve_commit_ids(mut_repo, "latest(all())"), + vec![commit1_t3.id().clone()], + ); + + // Should not panic with count = 0 or empty set + assert_eq!(resolve_commit_ids(mut_repo, "latest(all(), 0)"), vec![]); + assert_eq!(resolve_commit_ids(mut_repo, "latest(none())"), vec![]); + + assert_eq!( + resolve_commit_ids(mut_repo, "latest(all(), 1)"), + vec![commit1_t3.id().clone()], + ); + + // Tie-breaking: pick the later entry in position + assert_eq!( + resolve_commit_ids(mut_repo, "latest(all(), 2)"), + vec![commit3_t2.id().clone(), commit1_t3.id().clone()], + ); + + assert_eq!( + resolve_commit_ids(mut_repo, "latest(all(), 3)"), + vec![ + commit3_t2.id().clone(), + commit2_t2.id().clone(), + commit1_t3.id().clone(), + ], + ); + + assert_eq!( + resolve_commit_ids(mut_repo, "latest(all(), 4)"), + vec![ + commit4_t1.id().clone(), + commit3_t2.id().clone(), + commit2_t2.id().clone(), + commit1_t3.id().clone(), + ], + ); + + assert_eq!( + resolve_commit_ids(mut_repo, "latest(all(), 5)"), + vec![ + commit4_t1.id().clone(), + commit3_t2.id().clone(), + commit2_t2.id().clone(), + commit1_t3.id().clone(), + mut_repo.store().root_commit_id().clone(), + ], + ); + + // Should not panic if count is larger than the candidates size + assert_eq!( + resolve_commit_ids(mut_repo, "latest(~root, 5)"), + vec![ + commit4_t1.id().clone(), + commit3_t2.id().clone(), + commit2_t2.id().clone(), + commit1_t3.id().clone(), + ], + ); +} + #[test_case(false ; "local backend")] #[test_case(true ; "git backend")] fn test_evaluate_expression_merges(use_git: bool) { diff --git a/tests/test_revset_output.rs b/tests/test_revset_output.rs index 066ddf6c8..1fe3ff655 100644 --- a/tests/test_revset_output.rs +++ b/tests/test_revset_output.rs @@ -101,6 +101,16 @@ fn test_bad_function_call() { = Invalid arguments to revset function "heads": Expected 0 to 1 arguments "###); + let stderr = test_env.jj_cmd_failure(&repo_path, &["log", "-r", "latest(a, not_an_integer)"]); + insta::assert_snapshot!(stderr, @r###" + Error: Failed to parse revset: --> 1:11 + | + 1 | latest(a, not_an_integer) + | ^------------^ + | + = Invalid arguments to revset function "latest": Expected function argument of type integer + "###); + let stderr = test_env.jj_cmd_failure(&repo_path, &["log", "-r", "file()"]); insta::assert_snapshot!(stderr, @r###" Error: Failed to parse revset: --> 1:6