backend: let backend decide length of change id

As mentioned in the previous commit, our internal backend at Google
uses a 32-byte long change id. This commit will make us able to use
that.
This commit is contained in:
Martin von Zweigbergk 2023-02-06 10:15:01 -08:00 committed by Martin von Zweigbergk
parent e6693d0f68
commit d1dc22d957
10 changed files with 104 additions and 59 deletions

View file

@ -97,6 +97,10 @@ impl Backend for JitBackend {
self.inner.commit_id_length()
}
fn change_id_length(&self) -> usize {
self.inner.change_id_length()
}
fn git_repo(&self) -> Option<Repository> {
self.inner.git_repo()
}

View file

@ -23,8 +23,6 @@ use thiserror::Error;
use crate::content_hash::ContentHash;
use crate::repo_path::{RepoPath, RepoPathComponent};
pub const CHANGE_ID_HASH_LENGTH: usize = 16;
pub trait ObjectId {
fn new(value: Vec<u8>) -> Self;
fn object_type(&self) -> String;
@ -378,6 +376,9 @@ pub trait Backend: Send + Sync + Debug {
/// The length of commit IDs in bytes.
fn commit_id_length(&self) -> usize;
/// The length of change IDs in bytes.
fn change_id_length(&self) -> usize;
fn git_repo(&self) -> Option<git2::Repository>;
fn read_file(&self, path: &RepoPath, id: &FileId) -> BackendResult<Box<dyn Read>>;

View file

@ -37,11 +37,12 @@ impl CommitBuilder<'_> {
let signature = settings.signature();
assert!(!parents.is_empty());
let rng = settings.get_rng();
let change_id = rng.new_change_id(mut_repo.store().change_id_length());
let commit = backend::Commit {
parents,
predecessors: vec![],
root_tree: tree_id,
change_id: rng.new_change_id(),
change_id,
description: String::new(),
author: signature.clone(),
committer: signature,
@ -100,7 +101,9 @@ impl CommitBuilder<'_> {
}
pub fn generate_new_change_id(mut self) -> Self {
self.commit.change_id = self.rng.new_change_id();
self.commit.change_id = self
.rng
.new_change_id(self.mut_repo.store().change_id_length());
self
}

View file

@ -25,12 +25,13 @@ use prost::Message;
use crate::backend::{
make_root_commit, Backend, BackendError, BackendResult, ChangeId, Commit, CommitId, Conflict,
ConflictId, ConflictPart, FileId, MillisSinceEpoch, ObjectId, Signature, SymlinkId, Timestamp,
Tree, TreeId, TreeValue, CHANGE_ID_HASH_LENGTH,
Tree, TreeId, TreeValue,
};
use crate::repo_path::{RepoPath, RepoPathComponent};
use crate::stacked_table::{ReadonlyTable, TableSegment, TableStore};
const HASH_LENGTH: usize = 20;
const CHANGE_ID_LENGTH: usize = 16;
/// Ref namespace used only for preventing GC.
pub const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
const CONFLICT_SUFFIX: &str = ".jjconflict";
@ -47,7 +48,7 @@ pub struct GitBackend {
impl GitBackend {
fn new(repo: git2::Repository, extra_metadata_store: TableStore) -> Self {
let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_HASH_LENGTH]);
let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
let empty_tree_id = TreeId::from_hex("4b825dc642cb6eb9a060e54bf8d69288fbee4904");
GitBackend {
repo: Mutex::new(repo),
@ -194,6 +195,10 @@ impl Backend for GitBackend {
HASH_LENGTH
}
fn change_id_length(&self) -> usize {
CHANGE_ID_LENGTH
}
fn git_repo(&self) -> Option<git2::Repository> {
let path = self.repo.lock().unwrap().path().to_owned();
Some(git2::Repository::open(path).unwrap())

View file

@ -159,13 +159,14 @@ impl<'a> IndexRef<'a> {
struct CommitGraphEntry<'a> {
data: &'a [u8],
commit_id_length: usize,
change_id_length: usize,
}
// TODO: Add pointers to ancestors further back, like a skip list. Clear the
// lowest set bit to determine which generation number the pointers point to.
impl CommitGraphEntry<'_> {
fn size(commit_id_length: usize) -> usize {
36 + commit_id_length
fn size(commit_id_length: usize, change_id_length: usize) -> usize {
20 + commit_id_length + change_id_length
}
fn generation_number(&self) -> u32 {
@ -191,11 +192,14 @@ impl CommitGraphEntry<'_> {
// to better cache locality when walking it; ability to quickly find all
// commits associated with a change id.
fn change_id(&self) -> ChangeId {
ChangeId::new(self.data[20..36].to_vec())
ChangeId::new(self.data[20..20 + self.change_id_length].to_vec())
}
fn commit_id(&self) -> CommitId {
CommitId::from_bytes(&self.data[36..36 + self.commit_id_length])
CommitId::from_bytes(
&self.data
[20 + self.change_id_length..20 + self.change_id_length + self.commit_id_length],
)
}
}
@ -256,6 +260,7 @@ pub struct ReadonlyIndex {
num_parent_commits: u32,
name: String,
commit_id_length: usize,
change_id_length: usize,
commit_graph_entry_size: usize,
commit_lookup_entry_size: usize,
// Number of commits not counting the parent file
@ -374,16 +379,18 @@ pub struct MutableIndex {
parent_file: Option<Arc<ReadonlyIndex>>,
num_parent_commits: u32,
commit_id_length: usize,
change_id_length: usize,
graph: Vec<MutableGraphEntry>,
lookup: BTreeMap<CommitId, IndexPosition>,
}
impl MutableIndex {
pub(crate) fn full(commit_id_length: usize) -> Self {
pub(crate) fn full(commit_id_length: usize, change_id_length: usize) -> Self {
Self {
parent_file: None,
num_parent_commits: 0,
commit_id_length,
change_id_length,
graph: vec![],
lookup: BTreeMap::new(),
}
@ -392,10 +399,12 @@ impl MutableIndex {
pub fn incremental(parent_file: Arc<ReadonlyIndex>) -> Self {
let num_parent_commits = parent_file.num_parent_commits + parent_file.num_local_commits;
let commit_id_length = parent_file.commit_id_length;
let change_id_length = parent_file.change_id_length;
Self {
parent_file: Some(parent_file),
num_parent_commits,
commit_id_length,
change_id_length,
graph: vec![],
lookup: BTreeMap::new(),
}
@ -531,7 +540,7 @@ impl MutableIndex {
buf.write_u32::<LittleEndian>(parent1_pos.0).unwrap();
buf.write_u32::<LittleEndian>(parent_overflow_pos).unwrap();
assert_eq!(entry.change_id.as_bytes().len(), 16);
assert_eq!(entry.change_id.as_bytes().len(), self.change_id_length);
buf.write_all(entry.change_id.as_bytes()).unwrap();
assert_eq!(entry.commit_id.as_bytes().len(), self.commit_id_length);
@ -576,7 +585,7 @@ impl MutableIndex {
maybe_parent_file = parent_file.parent_file.clone();
}
None => {
squashed = MutableIndex::full(self.commit_id_length);
squashed = MutableIndex::full(self.commit_id_length, self.change_id_length);
break;
}
}
@ -599,6 +608,7 @@ impl MutableIndex {
}
let commit_id_length = self.commit_id_length;
let change_id_length = self.change_id_length;
let buf = self.maybe_squash_with_ancestors().serialize();
let mut hasher = Blake2b512::new();
@ -612,14 +622,19 @@ impl MutableIndex {
persist_content_addressed_temp_file(temp_file, index_file_path)?;
let mut cursor = Cursor::new(&buf);
ReadonlyIndex::load_from(&mut cursor, dir, index_file_id_hex, commit_id_length).map_err(
|err| match err {
IndexLoadError::IndexCorrupt(err) => {
panic!("Just-created index file is corrupt: {err}")
}
IndexLoadError::IoError(err) => err,
},
ReadonlyIndex::load_from(
&mut cursor,
dir,
index_file_id_hex,
commit_id_length,
change_id_length,
)
.map_err(|err| match err {
IndexLoadError::IndexCorrupt(err) => {
panic!("Just-created index file is corrupt: {err}")
}
IndexLoadError::IoError(err) => err,
})
}
pub fn num_commits(&self) -> u32 {
@ -1544,6 +1559,7 @@ impl ReadonlyIndex {
dir: PathBuf,
name: String,
commit_id_length: usize,
change_id_length: usize,
) -> Result<Arc<ReadonlyIndex>, IndexLoadError> {
let parent_filename_len = file.read_u32::<LittleEndian>()?;
let num_parent_commits;
@ -1554,8 +1570,13 @@ impl ReadonlyIndex {
let parent_filename = String::from_utf8(parent_filename_bytes).unwrap();
let parent_file_path = dir.join(&parent_filename);
let mut index_file = File::open(parent_file_path).unwrap();
let parent_file =
ReadonlyIndex::load_from(&mut index_file, dir, parent_filename, commit_id_length)?;
let parent_file = ReadonlyIndex::load_from(
&mut index_file,
dir,
parent_filename,
commit_id_length,
change_id_length,
)?;
num_parent_commits = parent_file.num_parent_commits + parent_file.num_local_commits;
maybe_parent_file = Some(parent_file);
} else {
@ -1566,7 +1587,7 @@ impl ReadonlyIndex {
let num_parent_overflow_entries = file.read_u32::<LittleEndian>()?;
let mut data = vec![];
file.read_to_end(&mut data)?;
let commit_graph_entry_size = CommitGraphEntry::size(commit_id_length);
let commit_graph_entry_size = CommitGraphEntry::size(commit_id_length, change_id_length);
let graph_size = (num_commits as usize) * commit_graph_entry_size;
let commit_lookup_entry_size = CommitLookupEntry::size(commit_id_length);
let lookup_size = (num_commits as usize) * commit_lookup_entry_size;
@ -1583,6 +1604,7 @@ impl ReadonlyIndex {
num_parent_commits,
name,
commit_id_length,
change_id_length,
commit_graph_entry_size,
commit_lookup_entry_size,
num_local_commits: num_commits,
@ -1663,6 +1685,7 @@ impl ReadonlyIndex {
CommitGraphEntry {
data: &self.graph[offset..offset + self.commit_graph_entry_size],
commit_id_length: self.commit_id_length,
change_id_length: self.change_id_length,
}
}
@ -1723,7 +1746,7 @@ mod tests {
#[test_case(true; "file")]
fn index_empty(on_disk: bool) {
let temp_dir = testutils::new_temp_dir();
let index = MutableIndex::full(3);
let index = MutableIndex::full(3, 16);
let mut _saved_index = None;
let index = if on_disk {
_saved_index = Some(index.save_in(temp_dir.path().to_owned()).unwrap());
@ -1752,7 +1775,7 @@ mod tests {
fn index_root_commit(on_disk: bool) {
let temp_dir = testutils::new_temp_dir();
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
let id_0 = CommitId::from_hex("000000");
let change_id0 = new_change_id();
index.add_commit_data(id_0.clone(), change_id0.clone(), &[]);
@ -1791,7 +1814,7 @@ mod tests {
#[should_panic(expected = "parent commit is not indexed")]
fn index_missing_parent_commit() {
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
let id_0 = CommitId::from_hex("000000");
let id_1 = CommitId::from_hex("111111");
index.add_commit_data(id_1, new_change_id(), &[id_0]);
@ -1804,7 +1827,7 @@ mod tests {
fn index_multiple_commits(incremental: bool, on_disk: bool) {
let temp_dir = testutils::new_temp_dir();
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
// 5
// |\
// 4 | 3
@ -1912,7 +1935,7 @@ mod tests {
fn index_many_parents(on_disk: bool) {
let temp_dir = testutils::new_temp_dir();
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
// 6
// /|\
// / | \
@ -1977,7 +2000,7 @@ mod tests {
fn resolve_prefix() {
let temp_dir = testutils::new_temp_dir();
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
// Create some commits with different various common prefixes.
let id_0 = CommitId::from_hex("000000");
@ -2047,7 +2070,7 @@ mod tests {
fn neighbor_commit_ids() {
let temp_dir = testutils::new_temp_dir();
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
// Create some commits with different various common prefixes.
let id_0 = CommitId::from_hex("000001");
@ -2175,7 +2198,7 @@ mod tests {
fn shortest_unique_commit_id_prefix() {
let temp_dir = testutils::new_temp_dir();
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
// Create some commits with different various common prefixes.
let id_0 = CommitId::from_hex("000001");
@ -2226,7 +2249,7 @@ mod tests {
#[test]
fn test_is_ancestor() {
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
// 5
// |\
// 4 | 3
@ -2263,7 +2286,7 @@ mod tests {
#[test]
fn test_common_ancestors() {
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
// 5
// |\
// 4 |
@ -2345,7 +2368,7 @@ mod tests {
#[test]
fn test_common_ancestors_criss_cross() {
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
// 3 4
// |X|
// 1 2
@ -2370,7 +2393,7 @@ mod tests {
#[test]
fn test_common_ancestors_merge_with_ancestor() {
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
// 4 5
// |\ /|
// 1 2 3
@ -2397,7 +2420,7 @@ mod tests {
#[test]
fn test_walk_revs() {
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
// 5
// |\
// 4 | 3
@ -2476,7 +2499,7 @@ mod tests {
#[test]
fn test_walk_revs_filter_by_generation() {
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
// 8 6
// | |
// 7 5
@ -2558,7 +2581,7 @@ mod tests {
#[test]
fn test_heads() {
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
// 5
// |\
// 4 | 3

View file

@ -54,7 +54,11 @@ impl IndexStore {
let op_id_hex = op.id().hex();
let op_id_file = self.dir.join("operations").join(op_id_hex);
if op_id_file.exists() {
match self.load_index_at_operation(store.commit_id_length(), op.id()) {
match self.load_index_at_operation(
store.commit_id_length(),
store.change_id_length(),
op.id(),
) {
Err(IndexLoadError::IndexCorrupt(_)) => {
// If the index was corrupt (maybe it was written in a different format),
// we just reindex.
@ -78,6 +82,7 @@ impl IndexStore {
fn load_index_at_operation(
&self,
commit_id_length: usize,
change_id_length: usize,
op_id: &OperationId,
) -> Result<Arc<ReadonlyIndex>, IndexLoadError> {
let op_id_file = self.dir.join("operations").join(op_id.hex());
@ -94,6 +99,7 @@ impl IndexStore {
self.dir.clone(),
index_file_id_hex,
commit_id_length,
change_id_length,
)
}
@ -105,6 +111,7 @@ impl IndexStore {
let view = operation.view();
let operations_dir = self.dir.join("operations");
let commit_id_length = store.commit_id_length();
let change_id_length = store.change_id_length();
let mut new_heads = view.heads().clone();
let mut parent_op_id: Option<OperationId> = None;
for op in dag_walk::bfs(
@ -127,11 +134,11 @@ impl IndexStore {
match parent_op_id {
None => {
maybe_parent_file = None;
data = MutableIndex::full(commit_id_length);
data = MutableIndex::full(commit_id_length, change_id_length);
}
Some(parent_op_id) => {
let parent_file = self
.load_index_at_operation(commit_id_length, &parent_op_id)
.load_index_at_operation(commit_id_length, change_id_length, &parent_op_id)
.unwrap();
maybe_parent_file = Some(parent_file.clone());
data = MutableIndex::incremental(parent_file)

View file

@ -25,12 +25,15 @@ use tempfile::{NamedTempFile, PersistError};
use crate::backend::{
make_root_commit, Backend, BackendError, BackendResult, ChangeId, Commit, CommitId, Conflict,
ConflictId, ConflictPart, FileId, MillisSinceEpoch, ObjectId, Signature, SymlinkId, Timestamp,
Tree, TreeId, TreeValue, CHANGE_ID_HASH_LENGTH,
Tree, TreeId, TreeValue,
};
use crate::content_hash::blake2b_hash;
use crate::file_util::persist_content_addressed_temp_file;
use crate::repo_path::{RepoPath, RepoPathComponent};
const COMMIT_ID_LENGTH: usize = 64;
const CHANGE_ID_LENGTH: usize = 16;
impl From<std::io::Error> for BackendError {
fn from(err: std::io::Error) -> Self {
BackendError::Other(err.to_string())
@ -89,8 +92,8 @@ impl LocalBackend {
}
pub fn load(store_path: &Path) -> Self {
let root_commit_id = CommitId::from_bytes(&[0; 64]);
let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_HASH_LENGTH]);
let root_commit_id = CommitId::from_bytes(&[0; COMMIT_ID_LENGTH]);
let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
let empty_tree_id = TreeId::from_hex("482ae5a29fbe856c7272f2071b8b0f0359ee2d89ff392b8a900643fbd0836eccd067b8bf41909e206c90d45d6e7d8b6686b93ecaee5fe1a9060d87b672101310");
LocalBackend {
path: store_path.to_path_buf(),
@ -127,7 +130,11 @@ impl Backend for LocalBackend {
}
fn commit_id_length(&self) -> usize {
64
COMMIT_ID_LENGTH
}
fn change_id_length(&self) -> usize {
CHANGE_ID_LENGTH
}
fn git_repo(&self) -> Option<git2::Repository> {

View file

@ -3684,7 +3684,7 @@ mod tests {
#[test]
fn test_revset_combinator() {
let mut new_change_id = change_id_generator();
let mut index = MutableIndex::full(3);
let mut index = MutableIndex::full(3, 16);
let id_0 = CommitId::from_hex("000000");
let id_1 = CommitId::from_hex("111111");
let id_2 = CommitId::from_hex("222222");

View file

@ -19,7 +19,7 @@ use chrono::DateTime;
use rand::prelude::*;
use rand_chacha::ChaCha20Rng;
use crate::backend::{ChangeId, ObjectId, Signature, Timestamp, CHANGE_ID_HASH_LENGTH};
use crate::backend::{ChangeId, ObjectId, Signature, Timestamp};
#[derive(Debug, Clone)]
pub struct UserSettings {
@ -198,19 +198,10 @@ impl UserSettings {
#[derive(Debug)]
pub struct JJRng(Mutex<ChaCha20Rng>);
impl JJRng {
pub fn new_change_id(&self) -> ChangeId {
let random_bytes: [u8; CHANGE_ID_HASH_LENGTH] = self.gen();
ChangeId::new(random_bytes.into())
}
/// Wraps Rng::gen but only requires an immutable reference. Can be made
/// public if there's a use for it.
fn gen<T>(&self) -> T
where
rand::distributions::Standard: rand::distributions::Distribution<T>,
{
pub fn new_change_id(&self, length: usize) -> ChangeId {
let mut rng = self.0.lock().unwrap();
rng.gen()
let random_bytes = (0..length).map(|_| rng.gen::<u8>()).collect();
ChangeId::new(random_bytes)
}
/// Creates a new RNGs. Could be made public, but we'd like to encourage all

View file

@ -47,6 +47,10 @@ impl Store {
self.backend.commit_id_length()
}
pub fn change_id_length(&self) -> usize {
self.backend.change_id_length()
}
pub fn git_repo(&self) -> Option<git2::Repository> {
self.backend.git_repo()
}