forked from mirrors/jj
Tree-level conflicts (#1624) will be stored as multiple trees associated with a single commit. This patch adds support for that in `backend::Commit` and in the backends. When the Git backend writes a tree conflict, it creates a special root tree for the commit. That tree has only the individual trees from the conflict as subtrees. That way we prevent the trees from getting GC'd. We also write the tree ids to the extra metadata table (i.e. outside of the Git repo) so we don't need to load the tree object to determine if there are conflicts. I also added new flag to `backend::Commit` indicating whether the commit is a new-style commit (with support for tree-level conflicts). That will help with the migration. We will remove it once we no longer care about old repos. When the flag is set, we know that a commit with a single tree cannot have conflicts. When the flag is not set, it's an old-style commit where we have to walk the whole tree to find conflicts.
451 lines
13 KiB
451 lines
13 KiB
// Copyright 2020 The Jujutsu Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::collections::BTreeMap;
use std::fmt::{Debug, Error, Formatter};
use std::io::Read;
use std::result::Result;
use std::vec::Vec;
use thiserror::Error;
use crate::conflicts;
use crate::content_hash::ContentHash;
use crate::repo_path::{RepoPath, RepoPathComponent};
pub trait ObjectId {
fn new(value: Vec<u8>) -> Self;
fn object_type(&self) -> String;
fn from_bytes(bytes: &[u8]) -> Self;
fn as_bytes(&self) -> &[u8];
fn to_bytes(&self) -> Vec<u8>;
fn from_hex(hex: &str) -> Self;
fn hex(&self) -> String;
macro_rules! id_type {
($vis:vis $name:ident) => {
content_hash! {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
$vis struct $name(Vec<u8>);
macro_rules! impl_id_type {
($name:ident) => {
impl Debug for $name {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
impl crate::backend::ObjectId for $name {
fn new(value: Vec<u8>) -> Self {
fn object_type(&self) -> String {
fn from_bytes(bytes: &[u8]) -> Self {
fn as_bytes(&self) -> &[u8] {
fn to_bytes(&self) -> Vec<u8> {
fn from_hex(hex: &str) -> Self {
fn hex(&self) -> String {
id_type!(pub CommitId);
id_type!(pub ChangeId);
id_type!(pub TreeId);
id_type!(pub FileId);
id_type!(pub SymlinkId);
id_type!(pub ConflictId);
pub enum Phase {
content_hash! {
#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
pub struct MillisSinceEpoch(pub i64);
content_hash! {
#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
pub struct Timestamp {
pub timestamp: MillisSinceEpoch,
// time zone offset in minutes
pub tz_offset: i32,
impl Timestamp {
pub fn now() -> Self {
pub fn from_datetime<Tz: chrono::TimeZone<Offset = chrono::offset::FixedOffset>>(
datetime: chrono::DateTime<Tz>,
) -> Self {
Self {
timestamp: MillisSinceEpoch(datetime.timestamp_millis()),
tz_offset: datetime.offset().local_minus_utc() / 60,
content_hash! {
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Signature {
pub name: String,
pub email: String,
pub timestamp: Timestamp,
content_hash! {
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Commit {
pub parents: Vec<CommitId>,
pub predecessors: Vec<CommitId>,
pub root_tree: conflicts::Conflict<TreeId>,
/// Indicates that there this commit uses the new tree-level conflict format, which means
/// that if `root_tree` is not a conflict, we know that we won't have to walk it to
/// determine if there are conflicts.
// TODO(#1624): Delete this field at some point in the future, when we decide to drop
// support for conflicts in older repos, or maybe after we have provided an upgrade
// mechanism.
pub uses_tree_conflict_format: bool,
pub change_id: ChangeId,
pub description: String,
pub author: Signature,
pub committer: Signature,
content_hash! {
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct ConflictTerm {
pub value: TreeValue,
content_hash! {
#[derive(Default, Debug, PartialEq, Eq, Clone)]
pub struct Conflict {
// A conflict is represented by a list of positive and negative states that need to be applied.
// In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C],
// remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the
// same as non-conflict A.
pub removes: Vec<ConflictTerm>,
pub adds: Vec<ConflictTerm>,
/// Error that may occur during backend initialization.
#[derive(Debug, Error)]
pub struct BackendInitError(pub Box<dyn std::error::Error + Send + Sync>);
/// Error that may occur during backend loading.
#[derive(Debug, Error)]
pub struct BackendLoadError(pub Box<dyn std::error::Error + Send + Sync>);
/// Commit-backend error that may occur after the backend is loaded.
#[derive(Debug, Error)]
pub enum BackendError {
"Invalid hash length for object of type {object_type} (expected {expected} bytes, got \
{actual} bytes): {hash}"
InvalidHashLength {
expected: usize,
actual: usize,
object_type: String,
hash: String,
#[error("Invalid hash for object of type {object_type} with hash {hash}: {source}")]
InvalidHash {
object_type: String,
hash: String,
source: Box<dyn std::error::Error + Send + Sync>,
#[error("Invalid UTF-8 for object {hash} of type {object_type}: {source}")]
InvalidUtf8 {
object_type: String,
hash: String,
source: std::string::FromUtf8Error,
#[error("Object {hash} of type {object_type} not found: {source}")]
ObjectNotFound {
object_type: String,
hash: String,
source: Box<dyn std::error::Error + Send + Sync>,
#[error("Error when reading object {hash} of type {object_type}: {source}")]
ReadObject {
object_type: String,
hash: String,
source: Box<dyn std::error::Error + Send + Sync>,
#[error("Could not write object of type {object_type}: {source}")]
WriteObject {
object_type: &'static str,
source: Box<dyn std::error::Error + Send + Sync>,
#[error("Error: {0}")]
Other(Box<dyn std::error::Error + Send + Sync>),
pub type BackendResult<T> = Result<T, BackendError>;
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub enum TreeValue {
File { id: FileId, executable: bool },
impl ContentHash for TreeValue {
fn hash(&self, state: &mut impl digest::Update) {
use TreeValue::*;
match self {
File { id, executable } => {
Symlink(id) => {
Tree(id) => {
GitSubmodule(id) => {
Conflict(id) => {
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct TreeEntry<'a> {
name: &'a RepoPathComponent,
value: &'a TreeValue,
impl<'a> TreeEntry<'a> {
pub fn new(name: &'a RepoPathComponent, value: &'a TreeValue) -> Self {
TreeEntry { name, value }
pub fn name(&self) -> &'a RepoPathComponent {
pub fn value(&self) -> &'a TreeValue {
pub struct TreeEntriesNonRecursiveIterator<'a> {
iter: std::collections::btree_map::Iter<'a, RepoPathComponent, TreeValue>,
impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> {
type Item = TreeEntry<'a>;
fn next(&mut self) -> Option<Self::Item> {
.map(|(name, value)| TreeEntry { name, value })
content_hash! {
#[derive(Default, PartialEq, Eq, Debug, Clone)]
pub struct Tree {
entries: BTreeMap<RepoPathComponent, TreeValue>,
impl Tree {
pub fn is_empty(&self) -> bool {
pub fn names(&self) -> impl Iterator<Item = &RepoPathComponent> {
pub fn entries(&self) -> TreeEntriesNonRecursiveIterator {
TreeEntriesNonRecursiveIterator {
iter: self.entries.iter(),
pub fn set(&mut self, name: RepoPathComponent, value: TreeValue) {
self.entries.insert(name, value);
pub fn remove(&mut self, name: &RepoPathComponent) {
pub fn set_or_remove(&mut self, name: &RepoPathComponent, value: Option<TreeValue>) {
match value {
None => {
Some(value) => {
self.entries.insert(name.clone(), value);
pub fn entry(&self, name: &RepoPathComponent) -> Option<TreeEntry> {
.map(|(name, value)| TreeEntry { name, value })
pub fn value(&self, name: &RepoPathComponent) -> Option<&TreeValue> {
/// Calculates common prefix length of two bytes. The length to be returned is
/// a number of hexadecimal digits.
pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize {
.take_while(|(a, b)| a == b)
fn iter_half_bytes(bytes: &[u8]) -> impl ExactSizeIterator<Item = u8> + '_ {
(0..bytes.len() * 2).map(|i| {
let v = bytes[i / 2];
if i & 1 == 0 {
v >> 4
} else {
v & 0xf
pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Commit {
let timestamp = Timestamp {
timestamp: MillisSinceEpoch(0),
tz_offset: 0,
let signature = Signature {
name: String::new(),
email: String::new(),
Commit {
parents: vec![],
predecessors: vec![],
root_tree: conflicts::Conflict::resolved(empty_tree_id),
uses_tree_conflict_format: false,
change_id: root_change_id,
description: String::new(),
author: signature.clone(),
committer: signature,
pub trait Backend: Send + Sync + Debug {
fn as_any(&self) -> &dyn Any;
/// A unique name that identifies this backend. Written to
/// `.jj/repo/store/backend` when the repo is created.
fn name(&self) -> &str;
/// The length of commit IDs in bytes.
fn commit_id_length(&self) -> usize;
/// The length of change IDs in bytes.
fn change_id_length(&self) -> usize;
fn read_file(&self, path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>>;
fn write_file(&self, path: &RepoPath, contents: &mut dyn Read) -> BackendResult<FileId>;
fn read_symlink(&self, path: &RepoPath, id: &SymlinkId) -> BackendResult<String>;
fn write_symlink(&self, path: &RepoPath, target: &str) -> BackendResult<SymlinkId>;
fn root_commit_id(&self) -> &CommitId;
fn root_change_id(&self) -> &ChangeId;
fn empty_tree_id(&self) -> &TreeId;
fn read_tree(&self, path: &RepoPath, id: &TreeId) -> BackendResult<Tree>;
fn write_tree(&self, path: &RepoPath, contents: &Tree) -> BackendResult<TreeId>;
fn read_conflict(&self, path: &RepoPath, id: &ConflictId) -> BackendResult<Conflict>;
fn write_conflict(&self, path: &RepoPath, contents: &Conflict) -> BackendResult<ConflictId>;
fn read_commit(&self, id: &CommitId) -> BackendResult<Commit>;
/// Writes a commit and returns its ID and the commit itself. The commit
/// should contain the data that was actually written, which may differ
/// from the data passed in. For example, the backend may change the
/// committer name to an authenticated user's name, or the backend's
/// timestamps may have less precision than the millisecond precision in
/// `Commit`.
fn write_commit(&self, contents: Commit) -> BackendResult<(CommitId, Commit)>;