diff --git a/src/interned.rs b/src/interned.rs index 8a0d8dfa..d7a70deb 100644 --- a/src/interned.rs +++ b/src/interned.rs @@ -494,7 +494,20 @@ where map.retain(|key, intern_index| { let discard = match strategy.discard_if { DiscardIf::Never => false, - DiscardIf::Outdated => match values[intern_index.index()] { + + // NB: Interned keys *never* discard keys unless they + // are outdated, regardless of the sweep strategy. This is + // because interned queries are not deterministic; + // if we were to remove a value from the current revision, + // and the query were later executed again, it would not necessarily + // produce the same intern key the second time. This would wreak + // havoc. See the test `discard_during_same_revision` for an example. + // + // Keys that have not (yet) been accessed during this + // revision don't have this problem. Anything + // dependent on them would regard itself as dirty if + // they are removed and also be forced to re-execute. + DiscardIf::Always | DiscardIf::Outdated => match values[intern_index.index()] { InternValue::Present { accessed_at, .. } => accessed_at < revision_now, InternValue::Free { .. } => { @@ -504,7 +517,6 @@ where ); } }, - DiscardIf::Always => true, }; if discard { diff --git a/tests/gc/db.rs b/tests/gc/db.rs index 777e9f09..b81dae8c 100644 --- a/tests/gc/db.rs +++ b/tests/gc/db.rs @@ -1,7 +1,8 @@ use crate::group; +use crate::interned; use crate::log::{HasLog, Log}; -#[salsa::database(group::Gc)] +#[salsa::database(group::Gc, interned::Intern)] #[derive(Default)] pub(crate) struct DatabaseImpl { runtime: salsa::Runtime, diff --git a/tests/gc/interned.rs b/tests/gc/interned.rs new file mode 100644 index 00000000..ed9a3e6c --- /dev/null +++ b/tests/gc/interned.rs @@ -0,0 +1,57 @@ +use crate::db; +use salsa::{Database, SweepStrategy}; + +#[salsa::query_group(Intern)] +pub(crate) trait InternDatabase { + #[salsa::interned] + fn intern_str(&self, x: &'static str) -> u32; + + fn repeat_intern1(&self, x: &'static str) -> u32; + + fn repeat_intern2(&self, x: &'static str) -> u32; +} + +fn repeat_intern1(db: &impl InternDatabase, x: &'static str) -> u32 { + db.intern_str(x) +} + +fn repeat_intern2(db: &impl InternDatabase, x: &'static str) -> u32 { + db.intern_str(x) +} + +/// This test highlights the difference between *interned queries* and +/// other non-input queries -- in particular, their results are not +/// *deterministic*. Therefore, we cannot GC values that were created +/// in the current revision; that might cause us to re-execute the +/// query twice on the same key during the same revision, which could +/// yield different results each time, wreaking havoc. This test +/// exercises precisely that scenario. +#[test] +fn discard_during_same_revision() { + let db = db::DatabaseImpl::default(); + + // This will assign index 0 for "foo". + let foo1a = db.repeat_intern1("foo"); + + // If we are not careful, this would remove the interned key for + // "foo". + db.query(InternStrQuery).sweep( + SweepStrategy::default() + .discard_everything() + .sweep_all_revisions(), + ); + + // This would then reuse index 0 for "bar". + let bar1 = db.intern_str("bar"); + + // And here we would assign index *1* to "foo". + let foo2 = db.repeat_intern2("foo"); + + // But we would still have a cached result, *from the same + // revision*, with the value 0. So that's inconsistent. + let foo1b = db.repeat_intern1("foo"); + + assert_ne!(foo2, bar1); + assert_eq!(foo1a, foo1b); + assert_eq!(foo1b, foo2); +} diff --git a/tests/gc/main.rs b/tests/gc/main.rs index 1cc88349..ff9dedfe 100644 --- a/tests/gc/main.rs +++ b/tests/gc/main.rs @@ -13,5 +13,6 @@ mod db; mod derived_tests; mod discard_values; mod group; +mod interned; mod log; mod shallow_constant_tests;