diff --git a/CHANGELOG.md b/CHANGELOG.md index 003b3cccc..36fe4f2e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * The minimum supported Rust version (MSRV) is now 1.76.0. +* The on-disk index format changed. New index files will be created + automatically, but it can fail if the repository is co-located and predates + Git GC issues [#815](https://github.com/martinvonz/jj/issues/815). If + reindexing failed, you'll need to clean up corrupted operation history by + `jj op abandon ..`. + ### New features * Templates now support logical operators: `||`, `&&`, `!` diff --git a/lib/src/default_index/mutable.rs b/lib/src/default_index/mutable.rs index 7e6734ed7..c1ce03014 100644 --- a/lib/src/default_index/mutable.rs +++ b/lib/src/default_index/mutable.rs @@ -31,7 +31,9 @@ use tempfile::NamedTempFile; use super::composite::{AsCompositeIndex, ChangeIdIndexImpl, CompositeIndex, IndexSegment}; use super::entry::{IndexPosition, LocalPosition, SmallIndexPositionsVec}; -use super::readonly::{DefaultReadonlyIndex, ReadonlyIndexSegment}; +use super::readonly::{ + DefaultReadonlyIndex, ReadonlyIndexSegment, INDEX_SEGMENT_FILE_FORMAT_VERSION, +}; use crate::backend::{ChangeId, CommitId}; use crate::commit::Commit; use crate::file_util::persist_content_addressed_temp_file; @@ -275,6 +277,7 @@ impl MutableIndexSegment { } let mut buf = Vec::new(); + buf.extend(INDEX_SEGMENT_FILE_FORMAT_VERSION.to_le_bytes()); self.serialize_parent_filename(&mut buf); let local_entries_offset = buf.len(); self.serialize_local_entries(&mut buf); diff --git a/lib/src/default_index/readonly.rs b/lib/src/default_index/readonly.rs index 3bca5639c..1ff1041a9 100644 --- a/lib/src/default_index/readonly.rs +++ b/lib/src/default_index/readonly.rs @@ -72,6 +72,9 @@ impl ReadonlyIndexLoadError { } } +/// Current format version of the index segment file. +pub(crate) const INDEX_SEGMENT_FILE_FORMAT_VERSION: u32 = 1; + struct CommitGraphEntry<'a> { data: &'a [u8], commit_id_length: usize, @@ -145,6 +148,7 @@ impl CommitLookupEntry<'_> { /// /// File format: /// ```text +/// u32: file format version /// u32: parent segment file name length (0 means root) /// : parent segment file name /// @@ -167,7 +171,6 @@ impl CommitLookupEntry<'_> { /// /// Note that u32 fields are 4-byte aligned so long as the parent file name /// (which is hexadecimal hash) and commit/change ids aren't of exotic length. -// TODO: add a version number // TODO: replace the table by a trie so we don't have to repeat the full commit // ids // TODO: add a fanout table like git's commit graph has? @@ -220,6 +223,13 @@ impl ReadonlyIndexSegment { file.read_exact(&mut buf).map_err(from_io_err)?; Ok(u32::from_le_bytes(buf)) }; + let format_version = read_u32(file)?; + if format_version != INDEX_SEGMENT_FILE_FORMAT_VERSION { + return Err(ReadonlyIndexLoadError::invalid_data( + &name, + format!("unsupported file format version: {format_version}"), + )); + } let parent_filename_len = read_u32(file)?; let maybe_parent_file = if parent_filename_len > 0 { let mut parent_filename_bytes = vec![0; parent_filename_len as usize]; diff --git a/lib/src/default_index/store.rs b/lib/src/default_index/store.rs index 70ff612cc..ee96793a1 100644 --- a/lib/src/default_index/store.rs +++ b/lib/src/default_index/store.rs @@ -109,6 +109,8 @@ impl DefaultIndexStore { file_util::remove_dir_contents(&self.operations_dir())?; // Remove index segments to save disk space. If raced, new segment file // will be created by the other process. + file_util::remove_dir_contents(&self.segments_dir())?; + // jj <= 0.14 created segment files in the top directory for entry in self.dir.read_dir().context(&self.dir)? { let entry = entry.context(&self.dir)?; let path = entry.path(); @@ -122,14 +124,20 @@ impl DefaultIndexStore { } fn ensure_base_dirs(&self) -> Result<(), PathError> { - let op_dir = self.operations_dir(); - file_util::create_or_reuse_dir(&op_dir).context(&op_dir) + for dir in [self.operations_dir(), self.segments_dir()] { + file_util::create_or_reuse_dir(&dir).context(&dir)?; + } + Ok(()) } fn operations_dir(&self) -> PathBuf { self.dir.join("operations") } + fn segments_dir(&self) -> PathBuf { + self.dir.join("segments") + } + fn load_index_segments_at_operation( &self, op_id: &OperationId, @@ -140,7 +148,7 @@ impl DefaultIndexStore { let index_file_id_hex = fs::read_to_string(op_id_file).map_err(DefaultIndexStoreError::LoadAssociation)?; ReadonlyIndexSegment::load( - &self.dir, + &self.segments_dir(), index_file_id_hex, commit_id_length, change_id_length, @@ -271,7 +279,7 @@ impl DefaultIndexStore { op_id: &OperationId, ) -> Result, DefaultIndexStoreError> { let index_segment = mutable_index - .squash_and_save_in(&self.dir) + .squash_and_save_in(&self.segments_dir()) .map_err(DefaultIndexStoreError::SaveIndex)?; self.associate_file_with_operation(&index_segment, op_id) .map_err(|source| DefaultIndexStoreError::AssociateIndex { diff --git a/lib/tests/test_index.rs b/lib/tests/test_index.rs index 3abc321ab..da5c1ba9e 100644 --- a/lib/tests/test_index.rs +++ b/lib/tests/test_index.rs @@ -294,10 +294,9 @@ fn test_index_commits_previous_operations() { let repo = tx.commit("test"); // Delete index from disk - let index_operations_dir = repo.repo_path().join("index").join("operations"); - assert!(index_operations_dir.is_dir()); - std::fs::remove_dir_all(&index_operations_dir).unwrap(); - std::fs::create_dir(&index_operations_dir).unwrap(); + let default_index_store: &DefaultIndexStore = + repo.index_store().as_any().downcast_ref().unwrap(); + default_index_store.reinit().unwrap(); let repo = load_repo_at_head(&settings, repo.repo_path()); let index = as_readonly_composite(&repo); @@ -586,6 +585,52 @@ fn test_index_commits_incremental_squashed() { assert_eq!(commits_by_level(&repo), vec![71, 20]); } +#[test] +fn test_reindex_no_segments_dir() { + let settings = testutils::user_settings(); + let test_repo = TestRepo::init(); + let repo = &test_repo.repo; + + let mut tx = repo.start_transaction(&settings); + let commit_a = write_random_commit(tx.mut_repo(), &settings); + let repo = tx.commit("test"); + assert!(repo.index().has_id(commit_a.id())); + + // jj <= 0.14 doesn't have "segments" directory + let segments_dir = repo.repo_path().join("index").join("segments"); + assert!(segments_dir.is_dir()); + fs::remove_dir_all(&segments_dir).unwrap(); + + let repo = load_repo_at_head(&settings, repo.repo_path()); + assert!(repo.index().has_id(commit_a.id())); +} + +#[test] +fn test_reindex_corrupt_segment_files() { + let settings = testutils::user_settings(); + let test_repo = TestRepo::init(); + let repo = &test_repo.repo; + + let mut tx = repo.start_transaction(&settings); + let commit_a = write_random_commit(tx.mut_repo(), &settings); + let repo = tx.commit("test"); + assert!(repo.index().has_id(commit_a.id())); + + // Corrupt the index files + let segments_dir = repo.repo_path().join("index").join("segments"); + for entry in segments_dir.read_dir().unwrap() { + let entry = entry.unwrap(); + // u32: file format version + // u32: parent segment file name length (0 means root) + // u32: number of local entries + // u32: number of overflow parent entries + fs::write(entry.path(), b"\0".repeat(16)).unwrap() + } + + let repo = load_repo_at_head(&settings, repo.repo_path()); + assert!(repo.index().has_id(commit_a.id())); +} + #[test] fn test_reindex_from_merged_operation() { let settings = testutils::user_settings();