index: transparently reindex if the index is corrupt

I'm about to change the index format (to remove predecessor
information), which will break the format. Let's prepare for that by
having `IndexStore` reindex the repo if it fails to read the index..
This commit is contained in:
Martin von Zweigbergk 2021-10-16 23:57:59 -07:00
parent 6722a79e27
commit 0354b8721b
2 changed files with 35 additions and 7 deletions

View file

@ -29,6 +29,7 @@ use blake2::{Blake2b, Digest};
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use itertools::Itertools; use itertools::Itertools;
use tempfile::NamedTempFile; use tempfile::NamedTempFile;
use thiserror::Error;
use crate::backend::{ChangeId, CommitId}; use crate::backend::{ChangeId, CommitId};
use crate::commit::Commit; use crate::commit::Commit;
@ -219,6 +220,14 @@ impl CommitLookupEntry<'_> {
} }
} }
#[derive(Error, Debug)]
pub enum IndexLoadError {
#[error("Index file '{0}' is corrupt.")]
IndexCorrupt(String),
#[error("I/O error while loading index file: {0}")]
IoError(#[from] io::Error),
}
// File format: // File format:
// u32: number of entries // u32: number of entries
// u32: number of parent overflow entries // u32: number of parent overflow entries
@ -612,7 +621,14 @@ impl MutableIndex {
persist_content_addressed_temp_file(temp_file, &index_file_path)?; persist_content_addressed_temp_file(temp_file, &index_file_path)?;
let mut cursor = Cursor::new(&buf); let mut cursor = Cursor::new(&buf);
ReadonlyIndex::load_from(&mut cursor, dir, index_file_id_hex, hash_length) ReadonlyIndex::load_from(&mut cursor, dir, index_file_id_hex, hash_length).map_err(|err| {
match err {
IndexLoadError::IndexCorrupt(err) => {
panic!("Just-created index file is corrupt: {}", err)
}
IndexLoadError::IoError(err) => err,
}
})
} }
pub fn num_commits(&self) -> u32 { pub fn num_commits(&self) -> u32 {
@ -1389,7 +1405,7 @@ impl ReadonlyIndex {
dir: PathBuf, dir: PathBuf,
name: String, name: String,
hash_length: usize, hash_length: usize,
) -> io::Result<Arc<ReadonlyIndex>> { ) -> Result<Arc<ReadonlyIndex>, IndexLoadError> {
let parent_filename_len = file.read_u32::<LittleEndian>()?; let parent_filename_len = file.read_u32::<LittleEndian>()?;
let num_parent_commits; let num_parent_commits;
let maybe_parent_file; let maybe_parent_file;
@ -1420,7 +1436,9 @@ impl ReadonlyIndex {
let predecessor_overflow_size = (num_predecessor_overflow_entries as usize) * 4; let predecessor_overflow_size = (num_predecessor_overflow_entries as usize) * 4;
let expected_size = let expected_size =
graph_size + lookup_size + parent_overflow_size + predecessor_overflow_size; graph_size + lookup_size + parent_overflow_size + predecessor_overflow_size;
assert_eq!(data.len(), expected_size); if data.len() != expected_size {
return Err(IndexLoadError::IndexCorrupt(name));
}
let overflow_predecessor = data.split_off(graph_size + lookup_size + parent_overflow_size); let overflow_predecessor = data.split_off(graph_size + lookup_size + parent_overflow_size);
let overflow_parent = data.split_off(graph_size + lookup_size); let overflow_parent = data.split_off(graph_size + lookup_size);
let lookup = data.split_off(graph_size); let lookup = data.split_off(graph_size);

View file

@ -26,7 +26,7 @@ use crate::backend::CommitId;
use crate::commit::Commit; use crate::commit::Commit;
use crate::dag_walk; use crate::dag_walk;
use crate::file_util::persist_content_addressed_temp_file; use crate::file_util::persist_content_addressed_temp_file;
use crate::index::{MutableIndex, ReadonlyIndex}; use crate::index::{IndexLoadError, MutableIndex, ReadonlyIndex};
use crate::op_store::OperationId; use crate::op_store::OperationId;
use crate::operation::Operation; use crate::operation::Operation;
use crate::store::Store; use crate::store::Store;
@ -54,8 +54,18 @@ impl IndexStore {
let op_id_hex = op.id().hex(); let op_id_hex = op.id().hex();
let op_id_file = self.dir.join("operations").join(&op_id_hex); let op_id_file = self.dir.join("operations").join(&op_id_hex);
if op_id_file.exists() { if op_id_file.exists() {
self.load_index_at_operation(store.hash_length(), op.id()) match self.load_index_at_operation(store.hash_length(), op.id()) {
.unwrap() Err(IndexLoadError::IndexCorrupt(_)) => {
// If the index was corrupt (maybe it was written in a different format),
// we just reindex.
// TODO: Move this message to a callback or something.
println!("The index was corrupt (maybe the format has changed). Reindexing...");
std::fs::remove_dir_all(self.dir.join("operations")).unwrap();
std::fs::create_dir(self.dir.join("operations")).unwrap();
self.index_at_operation(store, op).unwrap()
}
result => result.unwrap(),
}
} else { } else {
self.index_at_operation(store, op).unwrap() self.index_at_operation(store, op).unwrap()
} }
@ -69,7 +79,7 @@ impl IndexStore {
&self, &self,
hash_length: usize, hash_length: usize,
op_id: &OperationId, op_id: &OperationId,
) -> io::Result<Arc<ReadonlyIndex>> { ) -> Result<Arc<ReadonlyIndex>, IndexLoadError> {
let op_id_file = self.dir.join("operations").join(op_id.hex()); let op_id_file = self.dir.join("operations").join(op_id.hex());
let mut buf = vec![]; let mut buf = vec![];
File::open(op_id_file) File::open(op_id_file)