mirror of
https://github.com/martinvonz/jj.git
synced 2025-01-18 10:07:28 +00:00
revset: add diff_contains(text[, files]) to search diffs
The text pattern is applied prior to comparison as we do in Mercurial. This might affect hunk selection, but is much faster than computing diff of full file contents. For example, the following hunk wouldn't be caught by diff_contains("a") because the line "b\n" is filtered out: - a b + a Closes #2933
This commit is contained in:
parent
eabff4c0b4
commit
895eead4b8
5 changed files with 268 additions and 2 deletions
|
@ -63,6 +63,8 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|||
|
||||
* The `file()` revset function now accepts fileset as argument.
|
||||
|
||||
* New `diff_contains()` revset function can be used to search diffs.
|
||||
|
||||
### Fixed bugs
|
||||
|
||||
* `jj diff --git` no longer shows the contents of binary files.
|
||||
|
|
|
@ -280,6 +280,16 @@ given [string pattern](#string-patterns).
|
|||
Some file patterns might need quoting because the `expression` must also be
|
||||
parsable as a revset. For example, `.` has to be quoted in `file(".")`.
|
||||
|
||||
* `diff_contains(text[, files])`: Commits containing diffs matching the given
|
||||
`text` pattern line by line.
|
||||
|
||||
The search paths can be narrowed by the `files` expression. All modified files
|
||||
are scanned by default, but it is likely to change in future version to
|
||||
respect the command line path arguments.
|
||||
|
||||
For example, `diff_contains("TODO", "src")` will search revisions where "TODO"
|
||||
is added to or removed from files under "src".
|
||||
|
||||
* `conflict()`: Commits with conflicts.
|
||||
|
||||
* `present(x)`: Same as `x`, but evaluated to `none()` if any of the commits
|
||||
|
|
|
@ -20,14 +20,17 @@ use std::collections::{BTreeSet, BinaryHeap, HashSet};
|
|||
use std::ops::Range;
|
||||
use std::rc::Rc;
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, iter};
|
||||
use std::{fmt, iter, str};
|
||||
|
||||
use futures::StreamExt as _;
|
||||
use itertools::Itertools;
|
||||
use pollster::FutureExt as _;
|
||||
|
||||
use super::rev_walk::{EagerRevWalk, PeekableRevWalk, RevWalk, RevWalkBuilder};
|
||||
use super::revset_graph_iterator::RevsetGraphWalk;
|
||||
use crate::backend::{BackendResult, ChangeId, CommitId, MillisSinceEpoch};
|
||||
use crate::backend::{BackendError, BackendResult, ChangeId, CommitId, MillisSinceEpoch};
|
||||
use crate::commit::Commit;
|
||||
use crate::conflicts::{materialized_diff_stream, MaterializedTreeValue};
|
||||
use crate::default_index::{AsCompositeIndex, CompositeIndex, IndexPosition};
|
||||
use crate::graph::GraphEdge;
|
||||
use crate::matchers::{Matcher, Visit};
|
||||
|
@ -37,6 +40,7 @@ use crate::revset::{
|
|||
RevsetFilterPredicate, GENERATION_RANGE_FULL,
|
||||
};
|
||||
use crate::store::Store;
|
||||
use crate::str_util::StringPattern;
|
||||
use crate::{rewrite, union_find};
|
||||
|
||||
type BoxedPredicateFn<'a> = Box<dyn FnMut(&CompositeIndex, IndexPosition) -> bool + 'a>;
|
||||
|
@ -1078,6 +1082,16 @@ fn build_predicate_fn(
|
|||
has_diff_from_parent(&store, index, &commit, matcher.as_ref()).unwrap()
|
||||
})
|
||||
}
|
||||
RevsetFilterPredicate::DiffContains { text, files } => {
|
||||
let text_pattern = text.clone();
|
||||
let files_matcher: Rc<dyn Matcher> = files.to_matcher().into();
|
||||
box_pure_predicate_fn(move |index, pos| {
|
||||
let entry = index.entry_by_pos(pos);
|
||||
let commit = store.get_commit(&entry.commit_id()).unwrap();
|
||||
matches_diff_from_parent(&store, index, &commit, &text_pattern, &*files_matcher)
|
||||
.unwrap()
|
||||
})
|
||||
}
|
||||
RevsetFilterPredicate::HasConflict => box_pure_predicate_fn(move |index, pos| {
|
||||
let entry = index.entry_by_pos(pos);
|
||||
let commit = store.get_commit(&entry.commit_id()).unwrap();
|
||||
|
@ -1115,6 +1129,75 @@ fn has_diff_from_parent(
|
|||
Ok(from_tree.diff(&to_tree, matcher).next().is_some())
|
||||
}
|
||||
|
||||
fn matches_diff_from_parent(
|
||||
store: &Arc<Store>,
|
||||
index: &CompositeIndex,
|
||||
commit: &Commit,
|
||||
text_pattern: &StringPattern,
|
||||
files_matcher: &dyn Matcher,
|
||||
) -> BackendResult<bool> {
|
||||
let parents: Vec<_> = commit.parents().try_collect()?;
|
||||
let from_tree = rewrite::merge_commit_trees_without_repo(store, &index, &parents)?;
|
||||
let to_tree = commit.tree()?;
|
||||
let tree_diff = from_tree.diff_stream(&to_tree, files_matcher);
|
||||
// Conflicts are compared in materialized form. Alternatively, conflict
|
||||
// pairs can be compared one by one. #4062
|
||||
let mut diff_stream = materialized_diff_stream(store, tree_diff);
|
||||
async {
|
||||
while let Some((path, diff)) = diff_stream.next().await {
|
||||
let (left_value, right_value) = diff?;
|
||||
let left_content = to_file_content(&path, left_value)?;
|
||||
let right_content = to_file_content(&path, right_value)?;
|
||||
// Filter lines prior to comparison. This might produce inferior
|
||||
// hunks due to lack of contexts, but is way faster than full diff.
|
||||
let left_lines = match_lines(&left_content, text_pattern);
|
||||
let right_lines = match_lines(&right_content, text_pattern);
|
||||
if left_lines.ne(right_lines) {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
.block_on()
|
||||
}
|
||||
|
||||
fn match_lines<'a: 'b, 'b>(
|
||||
text: &'a [u8],
|
||||
pattern: &'b StringPattern,
|
||||
) -> impl Iterator<Item = &'a [u8]> + 'b {
|
||||
// The pattern is matched line by line so that it can be anchored to line
|
||||
// start/end. For example, exact:"" will match blank lines.
|
||||
text.split_inclusive(|b| *b == b'\n').filter(|line| {
|
||||
let line = line.strip_suffix(b"\n").unwrap_or(line);
|
||||
// TODO: add .matches_bytes() or .to_bytes_matcher()
|
||||
str::from_utf8(line).map_or(false, |line| pattern.matches(line))
|
||||
})
|
||||
}
|
||||
|
||||
fn to_file_content(path: &RepoPath, value: MaterializedTreeValue) -> BackendResult<Vec<u8>> {
|
||||
match value {
|
||||
MaterializedTreeValue::Absent => Ok(vec![]),
|
||||
MaterializedTreeValue::AccessDenied(_) => Ok(vec![]),
|
||||
MaterializedTreeValue::File { id, mut reader, .. } => {
|
||||
let mut content = vec![];
|
||||
reader
|
||||
.read_to_end(&mut content)
|
||||
.map_err(|err| BackendError::ReadFile {
|
||||
path: path.to_owned(),
|
||||
id: id.clone(),
|
||||
source: err.into(),
|
||||
})?;
|
||||
Ok(content)
|
||||
}
|
||||
MaterializedTreeValue::Symlink { id: _, target } => Ok(target.into_bytes()),
|
||||
MaterializedTreeValue::GitSubmodule(_) => Ok(vec![]),
|
||||
MaterializedTreeValue::Conflict { contents, .. } => Ok(contents),
|
||||
MaterializedTreeValue::Tree(id) => {
|
||||
panic!("Unexpected tree with id {id:?} in diff at path {path:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
|
@ -134,6 +134,11 @@ pub enum RevsetFilterPredicate {
|
|||
Committer(StringPattern),
|
||||
/// Commits modifying the paths specified by the fileset.
|
||||
File(FilesetExpression),
|
||||
/// Commits containing diffs matching the `text` pattern within the `files`.
|
||||
DiffContains {
|
||||
text: StringPattern,
|
||||
files: FilesetExpression,
|
||||
},
|
||||
/// Commits with conflicts
|
||||
HasConflict,
|
||||
/// Custom predicates provided by extensions
|
||||
|
@ -714,6 +719,26 @@ static BUILTIN_FUNCTION_MAP: Lazy<HashMap<&'static str, RevsetFunction>> = Lazy:
|
|||
let expr = FilesetExpression::union_all(file_expressions);
|
||||
Ok(RevsetExpression::filter(RevsetFilterPredicate::File(expr)))
|
||||
});
|
||||
map.insert("diff_contains", |function, context| {
|
||||
let ([text_arg], [files_opt_arg]) = function.expect_arguments()?;
|
||||
let text = expect_string_pattern(text_arg)?;
|
||||
let files = if let Some(files_arg) = files_opt_arg {
|
||||
let ctx = context.workspace.as_ref().ok_or_else(|| {
|
||||
RevsetParseError::with_span(
|
||||
RevsetParseErrorKind::FsPathWithoutWorkspace,
|
||||
files_arg.span,
|
||||
)
|
||||
})?;
|
||||
expect_fileset_expression(files_arg, ctx.path_converter)?
|
||||
} else {
|
||||
// TODO: defaults to CLI path arguments?
|
||||
// https://github.com/martinvonz/jj/issues/2933#issuecomment-1925870731
|
||||
FilesetExpression::all()
|
||||
};
|
||||
Ok(RevsetExpression::filter(
|
||||
RevsetFilterPredicate::DiffContains { text, files },
|
||||
))
|
||||
});
|
||||
map.insert("conflict", |function, _context| {
|
||||
function.expect_no_arguments()?;
|
||||
Ok(RevsetExpression::filter(RevsetFilterPredicate::HasConflict))
|
||||
|
|
|
@ -3012,6 +3012,152 @@ fn test_evaluate_expression_file() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_evaluate_expression_diff_contains() {
|
||||
let settings = testutils::user_settings();
|
||||
let test_workspace = TestWorkspace::init(&settings);
|
||||
let repo = &test_workspace.repo;
|
||||
|
||||
let mut tx = repo.start_transaction(&settings);
|
||||
let mut_repo = tx.mut_repo();
|
||||
|
||||
let empty_clean_inserted_deleted =
|
||||
RepoPath::from_internal_string("empty_clean_inserted_deleted");
|
||||
let blank_clean_inserted_clean = RepoPath::from_internal_string("blank_clean_inserted_clean");
|
||||
let noeol_modified_modified_clean =
|
||||
RepoPath::from_internal_string("noeol_modified_modified_clean");
|
||||
let normal_inserted_modified_removed =
|
||||
RepoPath::from_internal_string("normal_inserted_modified_removed");
|
||||
let tree1 = create_tree(
|
||||
repo,
|
||||
&[
|
||||
(empty_clean_inserted_deleted, ""),
|
||||
(blank_clean_inserted_clean, "\n"),
|
||||
(noeol_modified_modified_clean, "1"),
|
||||
(normal_inserted_modified_removed, "1\n"),
|
||||
],
|
||||
);
|
||||
let tree2 = create_tree(
|
||||
repo,
|
||||
&[
|
||||
(empty_clean_inserted_deleted, ""),
|
||||
(blank_clean_inserted_clean, "\n"),
|
||||
(noeol_modified_modified_clean, "2"),
|
||||
(normal_inserted_modified_removed, "1\n2\n"),
|
||||
],
|
||||
);
|
||||
let tree3 = create_tree(
|
||||
repo,
|
||||
&[
|
||||
(empty_clean_inserted_deleted, "3"),
|
||||
(blank_clean_inserted_clean, "\n3\n"),
|
||||
(noeol_modified_modified_clean, "2 3"),
|
||||
(normal_inserted_modified_removed, "1 3\n2\n"),
|
||||
],
|
||||
);
|
||||
let tree4 = create_tree(
|
||||
repo,
|
||||
&[
|
||||
(empty_clean_inserted_deleted, ""),
|
||||
(blank_clean_inserted_clean, "\n3\n"),
|
||||
(noeol_modified_modified_clean, "2 3"),
|
||||
// normal_inserted_modified_removed
|
||||
],
|
||||
);
|
||||
let commit1 = mut_repo
|
||||
.new_commit(
|
||||
&settings,
|
||||
vec![repo.store().root_commit_id().clone()],
|
||||
tree1.id(),
|
||||
)
|
||||
.write()
|
||||
.unwrap();
|
||||
let commit2 = mut_repo
|
||||
.new_commit(&settings, vec![commit1.id().clone()], tree2.id())
|
||||
.write()
|
||||
.unwrap();
|
||||
let commit3 = mut_repo
|
||||
.new_commit(&settings, vec![commit2.id().clone()], tree3.id())
|
||||
.write()
|
||||
.unwrap();
|
||||
let commit4 = mut_repo
|
||||
.new_commit(&settings, vec![commit3.id().clone()], tree4.id())
|
||||
.write()
|
||||
.unwrap();
|
||||
|
||||
let query = |revset_str: &str| {
|
||||
resolve_commit_ids_in_workspace(
|
||||
mut_repo,
|
||||
revset_str,
|
||||
&test_workspace.workspace,
|
||||
Some(test_workspace.workspace.workspace_root()),
|
||||
)
|
||||
};
|
||||
|
||||
// should match both inserted and deleted lines
|
||||
assert_eq!(
|
||||
query("diff_contains('2')"),
|
||||
vec![
|
||||
commit4.id().clone(),
|
||||
commit3.id().clone(),
|
||||
commit2.id().clone(),
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
query("diff_contains('3')"),
|
||||
vec![commit4.id().clone(), commit3.id().clone()]
|
||||
);
|
||||
assert_eq!(query("diff_contains('2 3')"), vec![commit3.id().clone()]);
|
||||
assert_eq!(
|
||||
query("diff_contains('1 3')"),
|
||||
vec![commit4.id().clone(), commit3.id().clone()]
|
||||
);
|
||||
|
||||
// should match line with eol
|
||||
assert_eq!(
|
||||
query(&format!(
|
||||
"diff_contains(exact:'1', {normal_inserted_modified_removed:?})",
|
||||
)),
|
||||
vec![commit3.id().clone(), commit1.id().clone()]
|
||||
);
|
||||
|
||||
// should match line without eol
|
||||
assert_eq!(
|
||||
query(&format!(
|
||||
"diff_contains(exact:'1', {noeol_modified_modified_clean:?})",
|
||||
)),
|
||||
vec![commit2.id().clone(), commit1.id().clone()]
|
||||
);
|
||||
|
||||
// exact:'' should match blank line
|
||||
assert_eq!(
|
||||
query(&format!(
|
||||
"diff_contains(exact:'', {empty_clean_inserted_deleted:?})",
|
||||
)),
|
||||
vec![]
|
||||
);
|
||||
assert_eq!(
|
||||
query(&format!(
|
||||
"diff_contains(exact:'', {blank_clean_inserted_clean:?})",
|
||||
)),
|
||||
vec![commit1.id().clone()]
|
||||
);
|
||||
|
||||
// '' should match anything but clean
|
||||
assert_eq!(
|
||||
query(&format!(
|
||||
"diff_contains('', {empty_clean_inserted_deleted:?})",
|
||||
)),
|
||||
vec![commit4.id().clone(), commit3.id().clone()]
|
||||
);
|
||||
assert_eq!(
|
||||
query(&format!(
|
||||
"diff_contains('', {blank_clean_inserted_clean:?})",
|
||||
)),
|
||||
vec![commit3.id().clone(), commit1.id().clone()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_evaluate_expression_conflict() {
|
||||
let settings = testutils::user_settings();
|
||||
|
|
Loading…
Reference in a new issue