From f9e9058b9b0e351da4859aac1a219e180707b986 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Thu, 28 Dec 2023 11:47:23 +0900 Subject: [PATCH] index: show bad operation id if commit lookup failed during reindexing My jj repo contains such head commits, and "jj debug reindex" fails. To address this problem, we'll probably need to implement GC, and the user will discard operations before the first bad op id. --- lib/src/default_index/store.rs | 51 ++++++++++++++++++++----------- lib/tests/test_index.rs | 37 +++++++++++++++++++++- lib/testutils/src/test_backend.rs | 4 +++ 3 files changed, 74 insertions(+), 18 deletions(-) diff --git a/lib/src/default_index/store.rs b/lib/src/default_index/store.rs index 8560fab33..110752f09 100644 --- a/lib/src/default_index/store.rs +++ b/lib/src/default_index/store.rs @@ -15,6 +15,7 @@ #![allow(missing_docs)] use std::any::Any; +use std::collections::HashSet; use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -64,8 +65,11 @@ pub enum DefaultIndexStoreError { LoadIndex(ReadonlyIndexLoadError), #[error("Failed to write commit index file: {0}")] SaveIndex(#[source] io::Error), - #[error("Failed to index commits: {0}")] - IndexCommits(#[source] BackendError), + #[error("Failed to index commits at operation {op_id}: {source}", op_id = op_id.hex())] + IndexCommits { + op_id: OperationId, + source: BackendError, + }, #[error(transparent)] OpStore(#[from] OpStoreError), } @@ -142,7 +146,11 @@ impl DefaultIndexStore { let operations_dir = self.dir.join("operations"); let commit_id_length = store.commit_id_length(); let change_id_length = store.change_id_length(); - let mut new_heads = view.heads().clone(); + let mut visited_heads: HashSet = view.heads().clone(); + let mut historical_heads: Vec<(CommitId, OperationId)> = visited_heads + .iter() + .map(|commit_id| (commit_id.clone(), operation.id().clone())) + .collect(); let mut parent_op_id: Option = None; for op in dag_walk::dfs_ok( [Ok(operation.clone())], @@ -158,8 +166,10 @@ impl DefaultIndexStore { parent_op_id = Some(op.id().clone()); } // TODO: no need to walk ancestors of the parent_op_id operation - for head in op.view()?.heads() { - new_heads.insert(head.clone()); + for commit_id in op.view()?.heads() { + if visited_heads.insert(commit_id.clone()) { + historical_heads.push((commit_id.clone(), op.id().clone())); + } } } let maybe_parent_file; @@ -182,7 +192,7 @@ impl DefaultIndexStore { tracing::info!( ?maybe_parent_file, - new_heads_count = new_heads.len(), + heads_count = historical_heads.len(), "indexing commits reachable from historical heads" ); // Build a list of ancestors of heads where parents and predecessors come after @@ -192,23 +202,30 @@ impl DefaultIndexStore { .as_ref() .map_or(false, |segment| segment.as_composite().has_id(id)) }; + let get_commit_with_op = |commit_id: &CommitId, op_id: &OperationId| { + let op_id = op_id.clone(); + match store.get_commit(commit_id) { + // Propagate head's op_id to report possible source of an error. + // The op_id doesn't have to be included in the sort key, but + // that wouldn't matter since the commit should be unique. + Ok(commit) => Ok((CommitByCommitterTimestamp(commit), op_id)), + Err(source) => Err(DefaultIndexStoreError::IndexCommits { op_id, source }), + } + }; let commits = dag_walk::topo_order_reverse_ord_ok( - new_heads + historical_heads .iter() - .filter(|&id| !parent_file_has_id(id)) - .map(|id| store.get_commit(id)) - .map_ok(CommitByCommitterTimestamp), - |CommitByCommitterTimestamp(commit)| commit.id().clone(), - |CommitByCommitterTimestamp(commit)| { + .filter(|&(commit_id, _)| !parent_file_has_id(commit_id)) + .map(|(commit_id, op_id)| get_commit_with_op(commit_id, op_id)), + |(CommitByCommitterTimestamp(commit), _)| commit.id().clone(), + |(CommitByCommitterTimestamp(commit), op_id)| { itertools::chain(commit.parent_ids(), commit.predecessor_ids()) .filter(|&id| !parent_file_has_id(id)) - .map(|id| store.get_commit(id)) - .map_ok(CommitByCommitterTimestamp) + .map(|commit_id| get_commit_with_op(commit_id, op_id)) .collect_vec() }, - ) - .map_err(DefaultIndexStoreError::IndexCommits)?; - for CommitByCommitterTimestamp(commit) in commits.iter().rev() { + )?; + for (CommitByCommitterTimestamp(commit), _) in commits.iter().rev() { mutable_index.add_commit(commit); } diff --git a/lib/tests/test_index.rs b/lib/tests/test_index.rs index 9c6bffccc..d9b57c6d4 100644 --- a/lib/tests/test_index.rs +++ b/lib/tests/test_index.rs @@ -15,15 +15,18 @@ use std::fs; use std::sync::Arc; +use assert_matches::assert_matches; use jj_lib::backend::{CommitId, ObjectId as _}; use jj_lib::commit::Commit; use jj_lib::commit_builder::CommitBuilder; use jj_lib::default_index::{ - AsCompositeIndex as _, CompositeIndex, DefaultMutableIndex, DefaultReadonlyIndex, IndexPosition, + AsCompositeIndex as _, CompositeIndex, DefaultIndexStore, DefaultIndexStoreError, + DefaultMutableIndex, DefaultReadonlyIndex, IndexPosition, }; use jj_lib::index::Index as _; use jj_lib::repo::{MutableRepo, ReadonlyRepo, Repo}; use jj_lib::settings::UserSettings; +use testutils::test_backend::TestBackend; use testutils::{ commit_transactions, create_random_commit, load_repo_at_head, write_random_commit, CommitGraphBuilder, TestRepo, @@ -582,6 +585,38 @@ fn test_reindex_from_merged_operation() { assert_eq!(index.num_commits(), 4); } +#[test] +fn test_reindex_missing_commit() { + let settings = testutils::user_settings(); + let test_repo = TestRepo::init(); + let repo = &test_repo.repo; + + let mut tx = repo.start_transaction(&settings); + let missing_commit = write_random_commit(tx.mut_repo(), &settings); + let repo = tx.commit("test"); + let bad_op_id = repo.op_id(); + + let mut tx = repo.start_transaction(&settings); + tx.mut_repo().remove_head(missing_commit.id()); + let repo = tx.commit("test"); + + // Remove historical head commit to simulate bad GC. + let test_backend: &TestBackend = repo.store().backend_impl().downcast_ref().unwrap(); + test_backend.remove_commit_unchecked(missing_commit.id()); + let repo = load_repo_at_head(&settings, repo.repo_path()); // discard cache + assert!(repo.store().get_commit(missing_commit.id()).is_err()); + + // Reindexing error should include the operation id where the commit + // couldn't be found. + let default_index_store: &DefaultIndexStore = + repo.index_store().as_any().downcast_ref().unwrap(); + default_index_store.reinit().unwrap(); + let err = default_index_store + .build_index_at_operation(repo.operation(), repo.store()) + .unwrap_err(); + assert_matches!(err, DefaultIndexStoreError::IndexCommits { op_id, .. } if op_id == *bad_op_id); +} + /// Test that .jj/repo/index/type is created when the repo is created, and that /// it is created when an old repo is loaded. #[test] diff --git a/lib/testutils/src/test_backend.rs b/lib/testutils/src/test_backend.rs index c0657194e..a686f9164 100644 --- a/lib/testutils/src/test_backend.rs +++ b/lib/testutils/src/test_backend.rs @@ -100,6 +100,10 @@ impl TestBackend { fn locked_data(&self) -> MutexGuard<'_, TestBackendData> { self.data.lock().unwrap() } + + pub fn remove_commit_unchecked(&self, id: &CommitId) { + self.locked_data().commits.remove(id); + } } impl Debug for TestBackend {