ok/jj
1
0
Fork 0
forked from mirrors/jj

backend: learn to store tree-level conflicts

Tree-level conflicts (#1624) will be stored as multiple trees
associated with a single commit. This patch adds support for that in
`backend::Commit` and in the backends.

When the Git backend writes a tree conflict, it creates a special root
tree for the commit. That tree has only the individual trees from the
conflict as subtrees. That way we prevent the trees from getting
GC'd. We also write the tree ids to the extra metadata table
(i.e. outside of the Git repo) so we don't need to load the tree
object to determine if there are conflicts.

I also added new flag to `backend::Commit` indicating whether the
commit is a new-style commit (with support for tree-level
conflicts). That will help with the migration. We will remove it once
we no longer care about old repos. When the flag is set, we know that
a commit with a single tree cannot have conflicts. When the flag is
not set, it's an old-style commit where we have to walk the whole tree
to find conflicts.
This commit is contained in:
Martin von Zweigbergk 2023-06-22 06:45:09 -07:00 committed by Martin von Zweigbergk
parent deb4ae476d
commit 006c764694
10 changed files with 289 additions and 19 deletions

View file

@ -23,6 +23,7 @@ use std::vec::Vec;
use thiserror::Error;
use crate::conflicts;
use crate::content_hash::ContentHash;
use crate::repo_path::{RepoPath, RepoPathComponent};
@ -145,7 +146,14 @@ content_hash! {
pub struct Commit {
pub parents: Vec<CommitId>,
pub predecessors: Vec<CommitId>,
pub root_tree: TreeId,
pub root_tree: conflicts::Conflict<TreeId>,
/// Indicates that there this commit uses the new tree-level conflict format, which means
/// that if `root_tree` is not a conflict, we know that we won't have to walk it to
/// determine if there are conflicts.
// TODO(#1624): Delete this field at some point in the future, when we decide to drop
// support for conflicts in older repos, or maybe after we have provided an upgrade
// mechanism.
pub uses_tree_conflict_format: bool,
pub change_id: ChangeId,
pub description: String,
pub author: Signature,
@ -387,7 +395,8 @@ pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Comm
Commit {
parents: vec![],
predecessors: vec![],
root_tree: empty_tree_id,
root_tree: conflicts::Conflict::resolved(empty_tree_id),
uses_tree_conflict_format: false,
change_id: root_change_id,
description: String::new(),
author: signature.clone(),

View file

@ -103,12 +103,12 @@ impl Commit {
pub fn tree(&self) -> Tree {
self.store
.get_tree(&RepoPath::root(), &self.data.root_tree)
.get_tree(&RepoPath::root(), self.data.root_tree.as_legacy_tree_id())
.unwrap()
}
pub fn tree_id(&self) -> &TreeId {
&self.data.root_tree
self.data.root_tree.as_legacy_tree_id()
}
pub fn change_id(&self) -> &ChangeId {

View file

@ -18,6 +18,7 @@ use std::sync::Arc;
use crate::backend::{self, BackendResult, ChangeId, CommitId, Signature, TreeId};
use crate::commit::Commit;
use crate::conflicts::Conflict;
use crate::repo::{MutableRepo, Repo};
use crate::settings::{JJRng, UserSettings};
@ -43,7 +44,9 @@ impl CommitBuilder<'_> {
let commit = backend::Commit {
parents,
predecessors: vec![],
root_tree: tree_id,
// TODO(#1624): set this when appropriate
root_tree: Conflict::from_legacy_tree_id(tree_id),
uses_tree_conflict_format: false,
change_id,
description: String::new(),
author: signature.clone(),
@ -101,11 +104,11 @@ impl CommitBuilder<'_> {
}
pub fn tree(&self) -> &TreeId {
&self.commit.root_tree
self.commit.root_tree.as_legacy_tree_id()
}
pub fn set_tree(mut self, tree_id: TreeId) -> Self {
self.commit.root_tree = tree_id;
self.commit.root_tree = Conflict::from_legacy_tree_id(tree_id);
self
}

View file

@ -22,6 +22,7 @@ use std::sync::Arc;
use itertools::Itertools;
use crate::backend::{BackendError, BackendResult, FileId, ObjectId, TreeId, TreeValue};
use crate::content_hash::ContentHash;
use crate::diff::{find_line_ranges, Diff, DiffHunk};
use crate::files::{ContentHunk, MergeResult};
use crate::merge::trivial_merge;
@ -53,6 +54,7 @@ impl<T> Conflict<T> {
Conflict { removes, adds }
}
/// Creates a `Conflict` with a single resolved value.
pub fn resolved(value: T) -> Self {
Conflict::new(vec![], vec![value])
}
@ -205,6 +207,32 @@ impl<T> Conflict<Conflict<T>> {
}
}
impl<T: ContentHash> ContentHash for Conflict<T> {
fn hash(&self, state: &mut impl digest::Update) {
self.removes().hash(state);
self.adds().hash(state);
}
}
impl Conflict<TreeId> {
// Creates a resolved conflict for a legacy tree id (same as
// `Conflict::resolved()`).
// TODO(#1624): delete when all callers have been updated to support tree-level
// conflicts
pub fn from_legacy_tree_id(value: TreeId) -> Self {
Conflict {
removes: vec![],
adds: vec![value],
}
}
// TODO(#1624): delete when all callers have been updated to support tree-level
// conflicts
pub fn as_legacy_tree_id(&self) -> &TreeId {
self.as_resolved().unwrap()
}
}
impl Conflict<Option<TreeValue>> {
/// Create a `Conflict` from a `backend::Conflict`, padding with `None` to
/// make sure that there is exactly one more `adds()` than `removes()`.

View file

@ -17,6 +17,7 @@
use std::any::Any;
use std::fmt::{Debug, Error, Formatter};
use std::io::{Cursor, Read};
use std::ops::Deref;
use std::path::Path;
use std::sync::{Arc, Mutex, MutexGuard};
use std::{fs, slice};
@ -31,6 +32,7 @@ use crate::backend::{
ChangeId, Commit, CommitId, Conflict, ConflictId, ConflictTerm, FileId, MillisSinceEpoch,
ObjectId, Signature, SymlinkId, Timestamp, Tree, TreeId, TreeValue,
};
use crate::conflicts;
use crate::file_util::{IoResultExt as _, PathError};
use crate::lock::FileLock;
use crate::repo_path::{RepoPath, RepoPathComponent};
@ -219,6 +221,9 @@ fn commit_from_git_without_root_parent(commit: &git2::Commit) -> Commit {
.map(|oid| CommitId::from_bytes(oid.as_bytes()))
.collect_vec();
let tree_id = TreeId::from_bytes(commit.tree_id().as_bytes());
// If this commit is a conflict, we'll update the root tree later, when we read
// the extra metadata.
let root_tree = conflicts::Conflict::resolved(tree_id);
let description = commit.message().unwrap_or("<no message>").to_owned();
let author = signature_from_git(commit.author());
let committer = signature_from_git(commit.committer());
@ -226,7 +231,9 @@ fn commit_from_git_without_root_parent(commit: &git2::Commit) -> Commit {
Commit {
parents,
predecessors: vec![],
root_tree: tree_id,
root_tree,
// If this commit has associated extra metadata, we may set this later.
uses_tree_conflict_format: false,
change_id,
description,
author,
@ -262,8 +269,28 @@ fn signature_to_git(signature: &Signature) -> git2::Signature<'static> {
fn serialize_extras(commit: &Commit) -> Vec<u8> {
let mut proto = crate::protos::git_store::Commit {
change_id: commit.change_id.to_bytes(),
uses_tree_conflict_format: commit.uses_tree_conflict_format,
..Default::default()
};
if commit.root_tree.as_resolved().is_none() {
assert!(commit.uses_tree_conflict_format);
let removes = commit
.root_tree
.removes()
.iter()
.map(|r| r.to_bytes())
.collect_vec();
let adds = commit
.root_tree
.adds()
.iter()
.map(|r| r.to_bytes())
.collect_vec();
let conflict = crate::protos::git_store::TreeConflict { removes, adds };
proto.root_tree = Some(crate::protos::git_store::commit::RootTree::Conflict(
conflict,
));
}
for predecessor in &commit.predecessors {
proto.predecessors.push(predecessor.to_bytes());
}
@ -273,6 +300,28 @@ fn serialize_extras(commit: &Commit) -> Vec<u8> {
fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
commit.change_id = ChangeId::new(proto.change_id);
commit.uses_tree_conflict_format = proto.uses_tree_conflict_format;
match proto.root_tree {
Some(crate::protos::git_store::commit::RootTree::Conflict(proto_conflict)) => {
assert!(commit.uses_tree_conflict_format);
commit.root_tree = conflicts::Conflict::new(
proto_conflict
.removes
.iter()
.map(|id_bytes| TreeId::from_bytes(id_bytes))
.collect(),
proto_conflict
.adds
.iter()
.map(|id_bytes| TreeId::from_bytes(id_bytes))
.collect(),
);
}
Some(crate::protos::git_store::commit::RootTree::Resolved(_)) => {
panic!("found resolved root tree in extras (should only be written to git metadata)");
}
None => {}
}
for predecessor in &proto.predecessors {
commit.predecessors.push(CommitId::from_bytes(predecessor));
}
@ -597,6 +646,9 @@ impl Backend for GitBackend {
// the commit is a branch head, so bulk-import metadata as much as possible.
tracing::debug!("import extra metadata entries");
let mut mut_table = table.start_mutation();
// TODO(#1624): Should we read the root tree here and check if it has a
// `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
// change the description of a commit with tree-level conflicts.
mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
if commit.parents != slice::from_ref(&self.root_commit_id) {
import_extra_metadata_entries_from_heads(
@ -615,10 +667,14 @@ impl Backend for GitBackend {
fn write_commit(&self, mut contents: Commit) -> BackendResult<(CommitId, Commit)> {
let locked_repo = self.repo.lock().unwrap();
let git_tree_id = validate_git_object_id(&contents.root_tree)?;
let git_tree_id = if let Some(tree_id) = contents.root_tree.as_resolved() {
validate_git_object_id(tree_id)?
} else {
write_tree_conflict(locked_repo.deref(), &contents.root_tree)?
};
let git_tree = locked_repo
.find_tree(git_tree_id)
.map_err(|err| map_not_found_err(err, &contents.root_tree))?;
.map_err(|err| map_not_found_err(err, &TreeId::from_bytes(git_tree_id.as_bytes())))?;
let author = signature_to_git(&contents.author);
let mut committer = signature_to_git(&contents.committer);
let message = &contents.description;
@ -705,6 +761,29 @@ impl Backend for GitBackend {
}
}
/// Write a tree conflict as a special tree with `.jjconflict-base-N` and
/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd.
fn write_tree_conflict(
repo: &git2::Repository,
conflict: &conflicts::Conflict<TreeId>,
) -> Result<Oid, BackendError> {
let mut builder = repo.treebuilder(None).unwrap();
let mut add_tree_entry = |name, tree_id: &TreeId| {
let tree_oid = Oid::from_bytes(tree_id.as_bytes()).unwrap();
builder.insert(name, tree_oid, 0o040000).unwrap();
};
for (i, tree_id) in conflict.removes().iter().enumerate() {
add_tree_entry(format!(".jjconflict-base-{i}"), tree_id);
}
for (i, tree_id) in conflict.adds().iter().enumerate() {
add_tree_entry(format!(".jjconflict-side-{i}"), tree_id);
}
builder.write().map_err(|err| BackendError::WriteObject {
object_type: "tree",
source: Box::new(err),
})
}
fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value {
serde_json::Value::Array(parts.iter().map(conflict_term_to_json).collect())
}
@ -836,7 +915,11 @@ mod tests {
assert_eq!(&commit.change_id, &change_id);
assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
assert_eq!(commit.predecessors, vec![]);
assert_eq!(commit.root_tree.as_bytes(), root_tree_id.as_bytes());
assert_eq!(
commit.root_tree.as_resolved().unwrap().as_bytes(),
root_tree_id.as_bytes()
);
assert!(!commit.uses_tree_conflict_format);
assert_eq!(commit.description, "git commit message");
assert_eq!(commit.author.name, "git author");
assert_eq!(commit.author.email, "git.author@example.com");
@ -905,7 +988,8 @@ mod tests {
let mut commit = Commit {
parents: vec![],
predecessors: vec![],
root_tree: backend.empty_tree_id().clone(),
root_tree: conflicts::Conflict::resolved(backend.empty_tree_id().clone()),
uses_tree_conflict_format: false,
change_id: ChangeId::from_hex("abc123"),
description: "".to_string(),
author: create_signature(),
@ -957,6 +1041,81 @@ mod tests {
);
}
#[test]
fn write_tree_conflicts() {
let temp_dir = testutils::new_temp_dir();
let store_path = temp_dir.path();
let git_repo_path = temp_dir.path().join("git");
let git_repo = git2::Repository::init(&git_repo_path).unwrap();
let backend = GitBackend::init_external(store_path, &git_repo_path).unwrap();
let crete_tree = |i| {
let blob_id = git_repo.blob(b"content {i}").unwrap();
let mut tree_builder = git_repo.treebuilder(None).unwrap();
tree_builder
.insert(format!("file{i}"), blob_id, 0o100644)
.unwrap();
TreeId::from_bytes(tree_builder.write().unwrap().as_bytes())
};
let root_tree = conflicts::Conflict::new(
vec![crete_tree(0), crete_tree(1)],
vec![crete_tree(2), crete_tree(3), crete_tree(4)],
);
let mut commit = Commit {
parents: vec![backend.root_commit_id().clone()],
predecessors: vec![],
root_tree: root_tree.clone(),
uses_tree_conflict_format: true,
change_id: ChangeId::from_hex("abc123"),
description: "".to_string(),
author: create_signature(),
committer: create_signature(),
};
// When writing a tree-level conflict, the root tree on the git side has the
// individual trees as subtrees.
let read_commit_id = backend.write_commit(commit.clone()).unwrap().0;
let read_commit = backend.read_commit(&read_commit_id).unwrap();
assert_eq!(read_commit, commit);
let git_commit = git_repo
.find_commit(Oid::from_bytes(read_commit_id.as_bytes()).unwrap())
.unwrap();
let git_tree = git_repo.find_tree(git_commit.tree_id()).unwrap();
assert!(git_tree.iter().all(|entry| entry.filemode() == 0o040000));
let mut iter = git_tree.iter();
let entry = iter.next().unwrap();
assert_eq!(entry.name(), Some(".jjconflict-base-0"));
assert_eq!(entry.id().as_bytes(), root_tree.removes()[0].as_bytes());
let entry = iter.next().unwrap();
assert_eq!(entry.name(), Some(".jjconflict-base-1"));
assert_eq!(entry.id().as_bytes(), root_tree.removes()[1].as_bytes());
let entry = iter.next().unwrap();
assert_eq!(entry.name(), Some(".jjconflict-side-0"));
assert_eq!(entry.id().as_bytes(), root_tree.adds()[0].as_bytes());
let entry = iter.next().unwrap();
assert_eq!(entry.name(), Some(".jjconflict-side-1"));
assert_eq!(entry.id().as_bytes(), root_tree.adds()[1].as_bytes());
let entry = iter.next().unwrap();
assert_eq!(entry.name(), Some(".jjconflict-side-2"));
assert_eq!(entry.id().as_bytes(), root_tree.adds()[2].as_bytes());
assert!(iter.next().is_none());
// When writing a single tree using the new format, it's represented by a
// regular git tree.
commit.root_tree = conflicts::Conflict::resolved(crete_tree(5));
let read_commit_id = backend.write_commit(commit.clone()).unwrap().0;
let read_commit = backend.read_commit(&read_commit_id).unwrap();
assert_eq!(read_commit, commit);
let git_commit = git_repo
.find_commit(Oid::from_bytes(read_commit_id.as_bytes()).unwrap())
.unwrap();
assert_eq!(
git_commit.tree_id().as_bytes(),
commit.root_tree.adds()[0].as_bytes()
);
}
#[test]
fn commit_has_ref() {
let temp_dir = testutils::new_temp_dir();
@ -972,7 +1131,8 @@ mod tests {
let commit = Commit {
parents: vec![store.root_commit_id().clone()],
predecessors: vec![],
root_tree: store.empty_tree_id().clone(),
root_tree: conflicts::Conflict::resolved(store.empty_tree_id().clone()),
uses_tree_conflict_format: false,
change_id: ChangeId::new(vec![]),
description: "initial".to_string(),
author: signature.clone(),
@ -995,7 +1155,8 @@ mod tests {
let mut commit1 = Commit {
parents: vec![store.root_commit_id().clone()],
predecessors: vec![],
root_tree: store.empty_tree_id().clone(),
root_tree: conflicts::Conflict::resolved(store.empty_tree_id().clone()),
uses_tree_conflict_format: false,
change_id: ChangeId::new(vec![]),
description: "initial".to_string(),
author: create_signature(),

View file

@ -30,6 +30,7 @@ use crate::backend::{
ConflictId, ConflictTerm, FileId, MillisSinceEpoch, ObjectId, Signature, SymlinkId, Timestamp,
Tree, TreeId, TreeValue,
};
use crate::conflicts;
use crate::content_hash::blake2b_hash;
use crate::file_util::persist_content_addressed_temp_file;
use crate::repo_path::{RepoPath, RepoPathComponent};
@ -284,7 +285,21 @@ pub fn commit_to_proto(commit: &Commit) -> crate::protos::local_store::Commit {
for predecessor in &commit.predecessors {
proto.predecessors.push(predecessor.to_bytes());
}
proto.root_tree = commit.root_tree.to_bytes();
let conflict = crate::protos::local_store::TreeConflict {
removes: commit
.root_tree
.removes()
.iter()
.map(|id| id.to_bytes())
.collect(),
adds: commit
.root_tree
.adds()
.iter()
.map(|id| id.to_bytes())
.collect(),
};
proto.root_tree = Some(conflict);
proto.change_id = commit.change_id.to_bytes();
proto.description = commit.description.clone();
proto.author = Some(signature_to_proto(&commit.author));
@ -295,12 +310,17 @@ pub fn commit_to_proto(commit: &Commit) -> crate::protos::local_store::Commit {
fn commit_from_proto(proto: crate::protos::local_store::Commit) -> Commit {
let parents = proto.parents.into_iter().map(CommitId::new).collect();
let predecessors = proto.predecessors.into_iter().map(CommitId::new).collect();
let root_tree = TreeId::new(proto.root_tree);
let conflict = proto.root_tree.unwrap();
let root_tree = conflicts::Conflict::new(
conflict.removes.into_iter().map(TreeId::new).collect(),
conflict.adds.into_iter().map(TreeId::new).collect(),
);
let change_id = ChangeId::new(proto.change_id);
Commit {
parents,
predecessors,
root_tree,
uses_tree_conflict_format: true,
change_id,
description: proto.description,
author: signature_from_proto(proto.author.unwrap_or_default()),

View file

@ -16,10 +16,22 @@ syntax = "proto3";
package git_store;
message TreeConflict {
repeated bytes removes = 1;
repeated bytes adds = 2;
}
message Commit {
repeated bytes predecessors = 2;
bytes change_id = 4;
oneof root_tree {
bytes resolved = 3;
TreeConflict conflict = 1;
}
// TODO(#1624): delete when we assume that all commits use this format
bool uses_tree_conflict_format = 10;
bool is_open = 8 [deprecated = true];
bool is_pruned = 9 [deprecated = true];
}

View file

@ -1,14 +1,38 @@
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TreeConflict {
#[prost(bytes = "vec", repeated, tag = "1")]
pub removes: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
#[prost(bytes = "vec", repeated, tag = "2")]
pub adds: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct Commit {
#[prost(bytes = "vec", repeated, tag = "2")]
pub predecessors: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
#[prost(bytes = "vec", tag = "4")]
pub change_id: ::prost::alloc::vec::Vec<u8>,
/// TODO(#1624): delete when we assume that all commits use this format
#[prost(bool, tag = "10")]
pub uses_tree_conflict_format: bool,
#[deprecated]
#[prost(bool, tag = "8")]
pub is_open: bool,
#[deprecated]
#[prost(bool, tag = "9")]
pub is_pruned: bool,
#[prost(oneof = "commit::RootTree", tags = "3, 1")]
pub root_tree: ::core::option::Option<commit::RootTree>,
}
/// Nested message and enum types in `Commit`.
pub mod commit {
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Oneof)]
pub enum RootTree {
#[prost(bytes, tag = "3")]
Resolved(::prost::alloc::vec::Vec<u8>),
#[prost(message, tag = "1")]
Conflict(super::TreeConflict),
}
}

View file

@ -39,10 +39,15 @@ message Tree {
repeated Entry entries = 1;
}
message TreeConflict {
repeated bytes removes = 1;
repeated bytes adds = 2;
}
message Commit {
repeated bytes parents = 1;
repeated bytes predecessors = 2;
bytes root_tree = 3;
TreeConflict root_tree = 3;
bytes change_id = 4;
string description = 5;

View file

@ -46,13 +46,21 @@ pub mod tree {
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TreeConflict {
#[prost(bytes = "vec", repeated, tag = "1")]
pub removes: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
#[prost(bytes = "vec", repeated, tag = "2")]
pub adds: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct Commit {
#[prost(bytes = "vec", repeated, tag = "1")]
pub parents: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
#[prost(bytes = "vec", repeated, tag = "2")]
pub predecessors: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
#[prost(bytes = "vec", tag = "3")]
pub root_tree: ::prost::alloc::vec::Vec<u8>,
#[prost(message, optional, tag = "3")]
pub root_tree: ::core::option::Option<TreeConflict>,
#[prost(bytes = "vec", tag = "4")]
pub change_id: ::prost::alloc::vec::Vec<u8>,
#[prost(string, tag = "5")]