forked from mirrors/jj
index: move segment files to sub directory, add version number
I'm going to introduce breaking changes in index format. Some of them will affect the file size, so version number or signature won't be needed. However, I think it's safer to detect the format change as early as possible. I have no idea if embedded version number is the best way. Because segment files are looked up through the operation links, the version number could be stored there and/or the "segments" directory could be versioned. If we want to support multiple format versions and clients, it might be better to split the tables into data chunks (e.g. graph entries, commit id table, change id table), and add per-chunk version/type tag. I choose the per-file version just because it's simple and would be non-controversial. As I'm going to introduce format change pretty soon, this patch doesn't implement data migration. The existing index files will be deleted and new files will be created from scratch. Planned index format changes include: 1. remove unused "flags" field 2. inline commit parents up to two 3. add sorted change ids table
This commit is contained in:
parent
4b541e6c93
commit
b0e8e2a1af
5 changed files with 82 additions and 10 deletions
|
@ -13,6 +13,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
* The minimum supported Rust version (MSRV) is now 1.76.0.
|
* The minimum supported Rust version (MSRV) is now 1.76.0.
|
||||||
|
|
||||||
|
* The on-disk index format changed. New index files will be created
|
||||||
|
automatically, but it can fail if the repository is co-located and predates
|
||||||
|
Git GC issues [#815](https://github.com/martinvonz/jj/issues/815). If
|
||||||
|
reindexing failed, you'll need to clean up corrupted operation history by
|
||||||
|
`jj op abandon ..<bad operation ID>`.
|
||||||
|
|
||||||
### New features
|
### New features
|
||||||
|
|
||||||
* Templates now support logical operators: `||`, `&&`, `!`
|
* Templates now support logical operators: `||`, `&&`, `!`
|
||||||
|
|
|
@ -31,7 +31,9 @@ use tempfile::NamedTempFile;
|
||||||
|
|
||||||
use super::composite::{AsCompositeIndex, ChangeIdIndexImpl, CompositeIndex, IndexSegment};
|
use super::composite::{AsCompositeIndex, ChangeIdIndexImpl, CompositeIndex, IndexSegment};
|
||||||
use super::entry::{IndexPosition, LocalPosition, SmallIndexPositionsVec};
|
use super::entry::{IndexPosition, LocalPosition, SmallIndexPositionsVec};
|
||||||
use super::readonly::{DefaultReadonlyIndex, ReadonlyIndexSegment};
|
use super::readonly::{
|
||||||
|
DefaultReadonlyIndex, ReadonlyIndexSegment, INDEX_SEGMENT_FILE_FORMAT_VERSION,
|
||||||
|
};
|
||||||
use crate::backend::{ChangeId, CommitId};
|
use crate::backend::{ChangeId, CommitId};
|
||||||
use crate::commit::Commit;
|
use crate::commit::Commit;
|
||||||
use crate::file_util::persist_content_addressed_temp_file;
|
use crate::file_util::persist_content_addressed_temp_file;
|
||||||
|
@ -275,6 +277,7 @@ impl MutableIndexSegment {
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut buf = Vec::new();
|
||||||
|
buf.extend(INDEX_SEGMENT_FILE_FORMAT_VERSION.to_le_bytes());
|
||||||
self.serialize_parent_filename(&mut buf);
|
self.serialize_parent_filename(&mut buf);
|
||||||
let local_entries_offset = buf.len();
|
let local_entries_offset = buf.len();
|
||||||
self.serialize_local_entries(&mut buf);
|
self.serialize_local_entries(&mut buf);
|
||||||
|
|
|
@ -72,6 +72,9 @@ impl ReadonlyIndexLoadError {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Current format version of the index segment file.
|
||||||
|
pub(crate) const INDEX_SEGMENT_FILE_FORMAT_VERSION: u32 = 1;
|
||||||
|
|
||||||
struct CommitGraphEntry<'a> {
|
struct CommitGraphEntry<'a> {
|
||||||
data: &'a [u8],
|
data: &'a [u8],
|
||||||
commit_id_length: usize,
|
commit_id_length: usize,
|
||||||
|
@ -145,6 +148,7 @@ impl CommitLookupEntry<'_> {
|
||||||
///
|
///
|
||||||
/// File format:
|
/// File format:
|
||||||
/// ```text
|
/// ```text
|
||||||
|
/// u32: file format version
|
||||||
/// u32: parent segment file name length (0 means root)
|
/// u32: parent segment file name length (0 means root)
|
||||||
/// <length number of bytes>: parent segment file name
|
/// <length number of bytes>: parent segment file name
|
||||||
///
|
///
|
||||||
|
@ -167,7 +171,6 @@ impl CommitLookupEntry<'_> {
|
||||||
///
|
///
|
||||||
/// Note that u32 fields are 4-byte aligned so long as the parent file name
|
/// Note that u32 fields are 4-byte aligned so long as the parent file name
|
||||||
/// (which is hexadecimal hash) and commit/change ids aren't of exotic length.
|
/// (which is hexadecimal hash) and commit/change ids aren't of exotic length.
|
||||||
// TODO: add a version number
|
|
||||||
// TODO: replace the table by a trie so we don't have to repeat the full commit
|
// TODO: replace the table by a trie so we don't have to repeat the full commit
|
||||||
// ids
|
// ids
|
||||||
// TODO: add a fanout table like git's commit graph has?
|
// TODO: add a fanout table like git's commit graph has?
|
||||||
|
@ -220,6 +223,13 @@ impl ReadonlyIndexSegment {
|
||||||
file.read_exact(&mut buf).map_err(from_io_err)?;
|
file.read_exact(&mut buf).map_err(from_io_err)?;
|
||||||
Ok(u32::from_le_bytes(buf))
|
Ok(u32::from_le_bytes(buf))
|
||||||
};
|
};
|
||||||
|
let format_version = read_u32(file)?;
|
||||||
|
if format_version != INDEX_SEGMENT_FILE_FORMAT_VERSION {
|
||||||
|
return Err(ReadonlyIndexLoadError::invalid_data(
|
||||||
|
&name,
|
||||||
|
format!("unsupported file format version: {format_version}"),
|
||||||
|
));
|
||||||
|
}
|
||||||
let parent_filename_len = read_u32(file)?;
|
let parent_filename_len = read_u32(file)?;
|
||||||
let maybe_parent_file = if parent_filename_len > 0 {
|
let maybe_parent_file = if parent_filename_len > 0 {
|
||||||
let mut parent_filename_bytes = vec![0; parent_filename_len as usize];
|
let mut parent_filename_bytes = vec![0; parent_filename_len as usize];
|
||||||
|
|
|
@ -109,6 +109,8 @@ impl DefaultIndexStore {
|
||||||
file_util::remove_dir_contents(&self.operations_dir())?;
|
file_util::remove_dir_contents(&self.operations_dir())?;
|
||||||
// Remove index segments to save disk space. If raced, new segment file
|
// Remove index segments to save disk space. If raced, new segment file
|
||||||
// will be created by the other process.
|
// will be created by the other process.
|
||||||
|
file_util::remove_dir_contents(&self.segments_dir())?;
|
||||||
|
// jj <= 0.14 created segment files in the top directory
|
||||||
for entry in self.dir.read_dir().context(&self.dir)? {
|
for entry in self.dir.read_dir().context(&self.dir)? {
|
||||||
let entry = entry.context(&self.dir)?;
|
let entry = entry.context(&self.dir)?;
|
||||||
let path = entry.path();
|
let path = entry.path();
|
||||||
|
@ -122,14 +124,20 @@ impl DefaultIndexStore {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ensure_base_dirs(&self) -> Result<(), PathError> {
|
fn ensure_base_dirs(&self) -> Result<(), PathError> {
|
||||||
let op_dir = self.operations_dir();
|
for dir in [self.operations_dir(), self.segments_dir()] {
|
||||||
file_util::create_or_reuse_dir(&op_dir).context(&op_dir)
|
file_util::create_or_reuse_dir(&dir).context(&dir)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn operations_dir(&self) -> PathBuf {
|
fn operations_dir(&self) -> PathBuf {
|
||||||
self.dir.join("operations")
|
self.dir.join("operations")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn segments_dir(&self) -> PathBuf {
|
||||||
|
self.dir.join("segments")
|
||||||
|
}
|
||||||
|
|
||||||
fn load_index_segments_at_operation(
|
fn load_index_segments_at_operation(
|
||||||
&self,
|
&self,
|
||||||
op_id: &OperationId,
|
op_id: &OperationId,
|
||||||
|
@ -140,7 +148,7 @@ impl DefaultIndexStore {
|
||||||
let index_file_id_hex =
|
let index_file_id_hex =
|
||||||
fs::read_to_string(op_id_file).map_err(DefaultIndexStoreError::LoadAssociation)?;
|
fs::read_to_string(op_id_file).map_err(DefaultIndexStoreError::LoadAssociation)?;
|
||||||
ReadonlyIndexSegment::load(
|
ReadonlyIndexSegment::load(
|
||||||
&self.dir,
|
&self.segments_dir(),
|
||||||
index_file_id_hex,
|
index_file_id_hex,
|
||||||
commit_id_length,
|
commit_id_length,
|
||||||
change_id_length,
|
change_id_length,
|
||||||
|
@ -271,7 +279,7 @@ impl DefaultIndexStore {
|
||||||
op_id: &OperationId,
|
op_id: &OperationId,
|
||||||
) -> Result<Arc<ReadonlyIndexSegment>, DefaultIndexStoreError> {
|
) -> Result<Arc<ReadonlyIndexSegment>, DefaultIndexStoreError> {
|
||||||
let index_segment = mutable_index
|
let index_segment = mutable_index
|
||||||
.squash_and_save_in(&self.dir)
|
.squash_and_save_in(&self.segments_dir())
|
||||||
.map_err(DefaultIndexStoreError::SaveIndex)?;
|
.map_err(DefaultIndexStoreError::SaveIndex)?;
|
||||||
self.associate_file_with_operation(&index_segment, op_id)
|
self.associate_file_with_operation(&index_segment, op_id)
|
||||||
.map_err(|source| DefaultIndexStoreError::AssociateIndex {
|
.map_err(|source| DefaultIndexStoreError::AssociateIndex {
|
||||||
|
|
|
@ -294,10 +294,9 @@ fn test_index_commits_previous_operations() {
|
||||||
let repo = tx.commit("test");
|
let repo = tx.commit("test");
|
||||||
|
|
||||||
// Delete index from disk
|
// Delete index from disk
|
||||||
let index_operations_dir = repo.repo_path().join("index").join("operations");
|
let default_index_store: &DefaultIndexStore =
|
||||||
assert!(index_operations_dir.is_dir());
|
repo.index_store().as_any().downcast_ref().unwrap();
|
||||||
std::fs::remove_dir_all(&index_operations_dir).unwrap();
|
default_index_store.reinit().unwrap();
|
||||||
std::fs::create_dir(&index_operations_dir).unwrap();
|
|
||||||
|
|
||||||
let repo = load_repo_at_head(&settings, repo.repo_path());
|
let repo = load_repo_at_head(&settings, repo.repo_path());
|
||||||
let index = as_readonly_composite(&repo);
|
let index = as_readonly_composite(&repo);
|
||||||
|
@ -586,6 +585,52 @@ fn test_index_commits_incremental_squashed() {
|
||||||
assert_eq!(commits_by_level(&repo), vec![71, 20]);
|
assert_eq!(commits_by_level(&repo), vec![71, 20]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_reindex_no_segments_dir() {
|
||||||
|
let settings = testutils::user_settings();
|
||||||
|
let test_repo = TestRepo::init();
|
||||||
|
let repo = &test_repo.repo;
|
||||||
|
|
||||||
|
let mut tx = repo.start_transaction(&settings);
|
||||||
|
let commit_a = write_random_commit(tx.mut_repo(), &settings);
|
||||||
|
let repo = tx.commit("test");
|
||||||
|
assert!(repo.index().has_id(commit_a.id()));
|
||||||
|
|
||||||
|
// jj <= 0.14 doesn't have "segments" directory
|
||||||
|
let segments_dir = repo.repo_path().join("index").join("segments");
|
||||||
|
assert!(segments_dir.is_dir());
|
||||||
|
fs::remove_dir_all(&segments_dir).unwrap();
|
||||||
|
|
||||||
|
let repo = load_repo_at_head(&settings, repo.repo_path());
|
||||||
|
assert!(repo.index().has_id(commit_a.id()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_reindex_corrupt_segment_files() {
|
||||||
|
let settings = testutils::user_settings();
|
||||||
|
let test_repo = TestRepo::init();
|
||||||
|
let repo = &test_repo.repo;
|
||||||
|
|
||||||
|
let mut tx = repo.start_transaction(&settings);
|
||||||
|
let commit_a = write_random_commit(tx.mut_repo(), &settings);
|
||||||
|
let repo = tx.commit("test");
|
||||||
|
assert!(repo.index().has_id(commit_a.id()));
|
||||||
|
|
||||||
|
// Corrupt the index files
|
||||||
|
let segments_dir = repo.repo_path().join("index").join("segments");
|
||||||
|
for entry in segments_dir.read_dir().unwrap() {
|
||||||
|
let entry = entry.unwrap();
|
||||||
|
// u32: file format version
|
||||||
|
// u32: parent segment file name length (0 means root)
|
||||||
|
// u32: number of local entries
|
||||||
|
// u32: number of overflow parent entries
|
||||||
|
fs::write(entry.path(), b"\0".repeat(16)).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
let repo = load_repo_at_head(&settings, repo.repo_path());
|
||||||
|
assert!(repo.index().has_id(commit_a.id()));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_reindex_from_merged_operation() {
|
fn test_reindex_from_merged_operation() {
|
||||||
let settings = testutils::user_settings();
|
let settings = testutils::user_settings();
|
||||||
|
|
Loading…
Reference in a new issue