Simplify interning by restricting deletes

This commit is contained in:
Jack Rickard 2022-09-25 22:33:37 +01:00
parent 79823c7f65
commit c58ff26d87
No known key found for this signature in database
GPG key ID: 88084D7D08A72C8A

View file

@ -32,12 +32,12 @@ pub struct InternedIngredient<Id: InternedId, Data: InternedData> {
/// Maps from data to the existing interned id for that data. /// Maps from data to the existing interned id for that data.
/// ///
/// Deadlock requirement: We access `key_map` while holding lock on `value_map`, but not vice versa. /// Deadlock requirement: We access `value_map` while holding lock on `key_map`, but not vice versa.
key_map: FxDashMap<Data, Id>, key_map: FxDashMap<Data, Id>,
/// Maps from an interned id to its data. /// Maps from an interned id to its data.
/// ///
/// Deadlock requirement: We access `key_map` while holding lock on `value_map`, but not vice versa. /// Deadlock requirement: We access `value_map` while holding lock on `key_map`, but not vice versa.
value_map: FxDashMap<Id, Box<Data>>, value_map: FxDashMap<Id, Box<Data>>,
/// counter for the next id. /// counter for the next id.
@ -89,31 +89,18 @@ where
return *id; return *id;
} }
let next_id = self.counter.fetch_add(1); match self.key_map.entry(data.clone()) {
let next_id = Id::from_id(crate::id::Id::from_u32(next_id));
// Insert the data into the value map. This temporarily breaks the
// invariant that value_map and key_map are opposites, this is fine
// because bno-one has the value next_id so it's not observable.
let old_value = self.value_map.insert(next_id, Box::new(data.clone()));
assert!(
old_value.is_none(),
"next_id is guaranteed to be unique, bar overflow"
);
match self.key_map.entry(data) {
// Data has been interned by a racing call, use that ID instead // Data has been interned by a racing call, use that ID instead
dashmap::mapref::entry::Entry::Occupied(entry) => { dashmap::mapref::entry::Entry::Occupied(entry) => *entry.get(),
// Return the interned ID set by the thread that beat us here. // We won any races so should intern the data
let id = *entry.get();
// Release the lock on key_map so we don't deadlock.
drop(entry);
// Fixup the broken invariant by removing the vale we just
// optimistically added.
self.value_map.remove(&next_id);
id
}
// We won any races so can complete the interning
dashmap::mapref::entry::Entry::Vacant(entry) => { dashmap::mapref::entry::Entry::Vacant(entry) => {
// Fixup the broken invariant by adding next_id to the key_map. let next_id = self.counter.fetch_add(1);
let next_id = Id::from_id(crate::id::Id::from_u32(next_id));
let old_value = self.value_map.insert(next_id, Box::new(data));
assert!(
old_value.is_none(),
"next_id is guaranteed to be unique, bar overflow"
);
entry.insert(next_id); entry.insert(next_id);
next_id next_id
} }
@ -161,33 +148,32 @@ where
/// ///
/// # Warning /// # Warning
/// ///
/// This should only be used when you are certain that the given `id` has not (and will not) /// This should only be used when you are certain that:
/// be used in the current revision. More specifically, this is used when a query `Q` executes /// 1. The given `id` has not (and will not) be used in the current revision.
/// and we can compare the entities `E_now` that it produced in this revision vs the entities /// 2. The interned data corresponding to `id` will not be interned in this revision.
/// `E_prev` it produced in the last revision. Any missing entities `E_prev - E_new` can be ///
/// deleted. /// More specifically, this is used when a query `Q` executes and we can compare the
/// entities `E_now` that it produced in this revision vs the entities `E_prev` it
/// produced in the last revision. Any missing entities `E_prev - E_new` can be deleted.
/// ///
/// If you are wrong about this, it should not be unsafe, but unpredictable results may occur. /// If you are wrong about this, it should not be unsafe, but unpredictable results may occur.
pub(crate) fn delete_index(&self, id: Id) { pub(crate) fn delete_index(&self, id: Id) {
match self.value_map.entry(id) { let (_, key) = self
dashmap::mapref::entry::Entry::Vacant(_) => { .value_map
panic!("No entry for id `{:?}`", id); .remove(&id)
} .unwrap_or_else(|| panic!("No entry for id `{:?}`", id));
dashmap::mapref::entry::Entry::Occupied(entry) => {
self.key_map.remove(entry.get());
// Careful: even though `id` ought not to have been used in this revision, self.key_map.remove(&key);
// we don't know that for sure since users could have leaked things. If they did, // Careful: even though `id` ought not to have been used in this revision,
// they may have stray references into `data`. So push the box onto the // we don't know that for sure since users could have leaked things. If they did,
// "to be deleted" queue. // they may have stray references into `data`. So push the box onto the
// // "to be deleted" queue.
// To avoid this, we could include some kind of atomic counter in the `Box` that //
// gets set whenever `data` executes, so we can track if the data was accessed since // To avoid this, we could include some kind of atomic counter in the `Box` that
// the last time an `&mut self` method was called. But that'd take extra storage // gets set whenever `data` executes, so we can track if the data was accessed since
// and doesn't obviously seem worth it. // the last time an `&mut self` method was called. But that'd take extra storage
self.deleted_entries.push(entry.remove()); // and doesn't obviously seem worth it.
} self.deleted_entries.push(key);
}
} }
pub(crate) fn clear_deleted_indices(&mut self) { pub(crate) fn clear_deleted_indices(&mut self) {