First draft of graceful corruption restoration

This commit is contained in:
Mikayla Maki 2022-12-01 14:16:38 -08:00
parent b8d423555b
commit 189a820113
3 changed files with 103 additions and 26 deletions

View file

@ -2,6 +2,7 @@ pub mod kvp;
// Re-export // Re-export
pub use anyhow; pub use anyhow;
use anyhow::Context;
pub use indoc::indoc; pub use indoc::indoc;
pub use lazy_static; pub use lazy_static;
pub use smol; pub use smol;
@ -14,9 +15,13 @@ use sqlez_macros::sql;
use std::fs::{create_dir_all, remove_dir_all}; use std::fs::{create_dir_all, remove_dir_all};
use std::path::Path; use std::path::Path;
use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::atomic::{AtomicBool, Ordering};
use std::time::{SystemTime, UNIX_EPOCH};
use util::{async_iife, ResultExt};
use util::channel::{ReleaseChannel, RELEASE_CHANNEL, RELEASE_CHANNEL_NAME}; use util::channel::{ReleaseChannel, RELEASE_CHANNEL, RELEASE_CHANNEL_NAME};
use util::paths::DB_DIR; use util::paths::DB_DIR;
// TODO: Add a savepoint to the thread safe connection initialization and migrations
const CONNECTION_INITIALIZE_QUERY: &'static str = sql!( const CONNECTION_INITIALIZE_QUERY: &'static str = sql!(
PRAGMA synchronous=NORMAL; PRAGMA synchronous=NORMAL;
PRAGMA busy_timeout=1; PRAGMA busy_timeout=1;
@ -28,31 +33,90 @@ const DB_INITIALIZE_QUERY: &'static str = sql!(
PRAGMA journal_mode=WAL; PRAGMA journal_mode=WAL;
); );
const FALLBACK_DB_NAME: &'static str = "FALLBACK_MEMORY_DB";
lazy_static::lazy_static! { lazy_static::lazy_static! {
static ref DB_WIPED: AtomicBool = AtomicBool::new(false); static ref DB_WIPED: AtomicBool = AtomicBool::new(false);
} }
/// Open or create a database at the given directory path. /// Open or create a database at the given directory path.
pub async fn open_db<M: Migrator>() -> ThreadSafeConnection<M> { pub async fn open_db<M: Migrator>() -> ThreadSafeConnection<M> {
// Use 0 for now. Will implement incrementing and clearing of old db files soon TM let db_dir = (*DB_DIR).join(Path::new(&format!("0-{}", *RELEASE_CHANNEL_NAME)));
let current_db_dir = (*DB_DIR).join(Path::new(&format!("0-{}", *RELEASE_CHANNEL_NAME)));
// If WIPE_DB, delete 0-{channel}
if *RELEASE_CHANNEL == ReleaseChannel::Dev if *RELEASE_CHANNEL == ReleaseChannel::Dev
&& std::env::var("WIPE_DB").is_ok() && std::env::var("WIPE_DB").is_ok()
&& !DB_WIPED.load(Ordering::Acquire) && !DB_WIPED.load(Ordering::Acquire)
{ {
remove_dir_all(&current_db_dir).ok(); remove_dir_all(&db_dir).ok();
DB_WIPED.store(true, Ordering::Relaxed); DB_WIPED.store(true, Ordering::Release);
} }
create_dir_all(&current_db_dir).expect("Should be able to create the database directory"); let connection = async_iife!({
let db_path = current_db_dir.join(Path::new("db.sqlite")); // If no db folder, create one at 0-{channel}
create_dir_all(&db_dir).context("Could not create db directory")?;
let db_path = db_dir.join(Path::new("db.sqlite"));
ThreadSafeConnection::<M>::builder(db_path.to_string_lossy().as_ref(), true) // Try building a connection
if let Some(connection) = ThreadSafeConnection::<M>::builder(db_path.to_string_lossy().as_ref(), true)
.with_db_initialization_query(DB_INITIALIZE_QUERY)
.with_connection_initialize_query(CONNECTION_INITIALIZE_QUERY)
.build()
.await
.log_err() {
return Ok(connection)
}
let backup_timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect(
"System clock is set before the unix timestamp, Zed does not support this region of spacetime"
)
.as_millis();
// If failed, move 0-{channel} to {current unix timestamp}-{channel}
let backup_db_dir = (*DB_DIR).join(Path::new(&format!(
"{}{}",
backup_timestamp,
*RELEASE_CHANNEL_NAME
)));
std::fs::rename(&db_dir, backup_db_dir)
.context("Failed clean up corrupted database, panicking.")?;
// TODO: Set a constant with the failed timestamp and error so we can notify the user
// Create a new 0-{channel}
create_dir_all(&db_dir).context("Should be able to create the database directory")?;
let db_path = db_dir.join(Path::new("db.sqlite"));
// Try again
ThreadSafeConnection::<M>::builder(db_path.to_string_lossy().as_ref(), true)
.with_db_initialization_query(DB_INITIALIZE_QUERY)
.with_connection_initialize_query(CONNECTION_INITIALIZE_QUERY)
.build()
.await
}).await.log_err();
if let Some(connection) = connection {
return connection;
}
// TODO: Set another constant so that we can escalate the notification
// If still failed, create an in memory db with a known name
open_fallback_db().await
}
async fn open_fallback_db<M: Migrator>() -> ThreadSafeConnection<M> {
ThreadSafeConnection::<M>::builder(FALLBACK_DB_NAME, false)
.with_db_initialization_query(DB_INITIALIZE_QUERY) .with_db_initialization_query(DB_INITIALIZE_QUERY)
.with_connection_initialize_query(CONNECTION_INITIALIZE_QUERY) .with_connection_initialize_query(CONNECTION_INITIALIZE_QUERY)
.build() .build()
.await .await
.expect(
"Fallback in memory database failed. Likely initialization queries or migrations have fundamental errors",
)
} }
#[cfg(any(test, feature = "test-support"))] #[cfg(any(test, feature = "test-support"))]
@ -66,6 +130,7 @@ pub async fn open_test_db<M: Migrator>(db_name: &str) -> ThreadSafeConnection<M>
.with_write_queue_constructor(locking_queue()) .with_write_queue_constructor(locking_queue())
.build() .build()
.await .await
.unwrap()
} }
/// Implements a basic DB wrapper for a given domain /// Implements a basic DB wrapper for a given domain

View file

@ -1,3 +1,4 @@
use anyhow::Context;
use futures::{channel::oneshot, Future, FutureExt}; use futures::{channel::oneshot, Future, FutureExt};
use lazy_static::lazy_static; use lazy_static::lazy_static;
use parking_lot::{Mutex, RwLock}; use parking_lot::{Mutex, RwLock};
@ -72,7 +73,7 @@ impl<M: Migrator> ThreadSafeConnectionBuilder<M> {
self self
} }
pub async fn build(self) -> ThreadSafeConnection<M> { pub async fn build(self) -> anyhow::Result<ThreadSafeConnection<M>> {
self.connection self.connection
.initialize_queues(self.write_queue_constructor); .initialize_queues(self.write_queue_constructor);
@ -81,26 +82,33 @@ impl<M: Migrator> ThreadSafeConnectionBuilder<M> {
self.connection self.connection
.write(move |connection| { .write(move |connection| {
if let Some(db_initialize_query) = db_initialize_query { if let Some(db_initialize_query) = db_initialize_query {
connection.exec(db_initialize_query).expect(&format!( connection.exec(db_initialize_query).with_context(|| {
"Db initialize query failed to execute: {}", format!(
db_initialize_query "Db initialize query failed to execute: {}",
))() db_initialize_query
.unwrap(); )
})?()?;
} }
let mut failure_result = None; // Retry failed migrations in case they were run in parallel from different
// processes. This gives a best attempt at migrating before bailing
let mut migration_result =
anyhow::Result::<()>::Err(anyhow::anyhow!("Migration never run"));
for _ in 0..MIGRATION_RETRIES { for _ in 0..MIGRATION_RETRIES {
failure_result = Some(M::migrate(connection)); migration_result = connection
if failure_result.as_ref().unwrap().is_ok() { .with_savepoint("thread_safe_multi_migration", || M::migrate(connection));
if migration_result.is_ok() {
break; break;
} }
} }
failure_result.unwrap().expect("Migration failed"); migration_result
}) })
.await; .await?;
self.connection Ok(self.connection)
} }
} }
@ -240,10 +248,6 @@ impl<D: Domain> Clone for ThreadSafeConnection<D> {
} }
} }
// TODO:
// 1. When migration or initialization fails, move the corrupted db to a holding place and create a new one
// 2. If the new db also fails, downgrade to a shared in memory db
// 3. In either case notify the user about what went wrong
impl<M: Migrator> Deref for ThreadSafeConnection<M> { impl<M: Migrator> Deref for ThreadSafeConnection<M> {
type Target = Connection; type Target = Connection;
@ -265,7 +269,7 @@ pub fn locking_queue() -> WriteQueueConstructor {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use indoc::indoc; use indoc::indoc;
use lazy_static::__Deref; use std::ops::Deref;
use std::thread; use std::thread;
use crate::{domain::Domain, thread_safe_connection::ThreadSafeConnection}; use crate::{domain::Domain, thread_safe_connection::ThreadSafeConnection};
@ -295,7 +299,8 @@ mod test {
PRAGMA foreign_keys=TRUE; PRAGMA foreign_keys=TRUE;
PRAGMA case_sensitive_like=TRUE; PRAGMA case_sensitive_like=TRUE;
"}); "});
let _ = smol::block_on(builder.build()).deref();
let _ = smol::block_on(builder.build()).unwrap().deref();
})); }));
} }
@ -341,6 +346,6 @@ mod test {
ThreadSafeConnection::<TestWorkspace>::builder("wild_zed_lost_failure", false) ThreadSafeConnection::<TestWorkspace>::builder("wild_zed_lost_failure", false)
.with_connection_initialize_query("PRAGMA FOREIGN_KEYS=true"); .with_connection_initialize_query("PRAGMA FOREIGN_KEYS=true");
smol::block_on(builder.build()); smol::block_on(builder.build()).unwrap();
} }
} }

View file

@ -223,6 +223,13 @@ macro_rules! iife {
}; };
} }
#[macro_export]
macro_rules! async_iife {
($block:block) => {
(|| async move { $block })()
};
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;