switch to the "random LRU" strategy

This commit is contained in:
Niko Matsakis 2019-06-15 10:39:34 -04:00
parent d80b3dd879
commit 7988b5e295
5 changed files with 354 additions and 242 deletions

View file

@ -9,20 +9,19 @@ description = "A generic framework for on-demand, incrementalized computation (e
readme = "README.md"
[dependencies]
arc-swap = "0.3"
derive-new = "0.5.5"
indexmap = "1.0.1"
linked-hash-map = "0.5.2"
lock_api = "0.2.0"
log = "0.4.5"
parking_lot = "0.8.0"
rustc-hash = "1.0"
smallvec = "0.6.5"
salsa-macros = { version = "0.13.0", path = "components/salsa-macros" }
rand = "0.6"
[dev-dependencies]
diff = "0.1.0"
env_logger = "0.5.13"
rand = "0.5.5"
linked-hash-map = "0.5.2"
[workspace]

View file

@ -8,13 +8,9 @@ use crate::plumbing::QueryStorageOps;
use crate::runtime::Revision;
use crate::runtime::StampedValue;
use crate::{Database, SweepStrategy};
use linked_hash_map::LinkedHashMap;
use parking_lot::Mutex;
use parking_lot::RwLock;
use rustc_hash::{FxHashMap, FxHasher};
use std::hash::BuildHasherDefault;
use rustc_hash::FxHashMap;
use std::marker::PhantomData;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
mod slot;
@ -38,10 +34,7 @@ where
DB: Database,
MP: MemoizationPolicy<DB, Q>,
{
// `lru_cap` logically belongs to `QueryMap`, but we store it outside, so
// that we can read it without aquiring the lock.
lru_cap: AtomicUsize,
lru_list: Mutex<Lru<Slot<DB, Q, MP>>>,
lru_list: Lru<Slot<DB, Q, MP>>,
slot_map: RwLock<FxHashMap<Q::Key, Arc<Slot<DB, Q, MP>>>>,
policy: PhantomData<MP>,
}
@ -97,8 +90,6 @@ where
}
}
type LinkedHashSet<T> = LinkedHashMap<T, (), BuildHasherDefault<FxHasher>>;
impl<DB, Q, MP> Default for DerivedStorage<DB, Q, MP>
where
Q: QueryFunction<DB>,
@ -107,9 +98,8 @@ where
{
fn default() -> Self {
DerivedStorage {
lru_cap: AtomicUsize::new(0),
slot_map: RwLock::new(FxHashMap::default()),
lru_list: Mutex::default(),
lru_list: Default::default(),
policy: PhantomData,
}
}
@ -151,12 +141,8 @@ where
let slot = self.slot(key, database_key);
let StampedValue { value, changed_at } = slot.read(db)?;
let lru_cap = self.lru_cap.load(Ordering::Relaxed);
if lru_cap > 0 {
let evicted = self.lru_list.lock().record_use(slot, lru_cap);
if let Some(evicted) = evicted {
evicted.evict();
}
if let Some(evicted) = self.lru_list.record_use(&slot) {
evicted.evict();
}
db.salsa_runtime()
@ -214,16 +200,6 @@ where
MP: MemoizationPolicy<DB, Q>,
{
fn set_lru_capacity(&self, new_capacity: usize) {
let mut lru_list = self.lru_list.lock();
if new_capacity == 0 {
lru_list.clear();
} else {
while lru_list.len() > new_capacity {
if let Some(evicted) = lru_list.pop_lru() {
evicted.evict();
}
}
}
self.lru_cap.store(new_capacity, Ordering::SeqCst);
self.lru_list.set_lru_capacity(new_capacity);
}
}

View file

@ -1,6 +1,6 @@
use crate::debug::TableEntry;
use crate::derived::MemoizationPolicy;
use crate::lru::LruLinks;
use crate::lru::LruIndex;
use crate::lru::LruNode;
use crate::plumbing::CycleDetected;
use crate::plumbing::DatabaseKey;
@ -36,7 +36,7 @@ where
database_key: DB::DatabaseKey,
state: RwLock<QueryState<DB, Q>>,
policy: PhantomData<MP>,
lru_links: LruLinks<Self>,
lru_index: LruIndex,
}
/// Defines the "current state" of query's memoized results.
@ -111,7 +111,7 @@ where
key,
database_key,
state: RwLock::new(QueryState::NotComputed),
lru_links: LruLinks::default(),
lru_index: LruIndex::default(),
policy: PhantomData,
}
}
@ -917,7 +917,7 @@ where
DB: Database,
MP: MemoizationPolicy<DB, Q>,
{
fn links(&self) -> &LruLinks<Self> {
&self.lru_links
fn lru_index(&self) -> &LruIndex {
&self.lru_index
}
}

View file

@ -1,6 +1,11 @@
use arc_swap::ArcSwapOption;
use arc_swap::Lease;
use parking_lot::Mutex;
use rand::rngs::SmallRng;
use rand::FromEntropy;
use rand::Rng;
use rand::SeedableRng;
use std::fmt::Debug;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
use std::sync::Arc;
mod test;
@ -23,18 +28,28 @@ pub(crate) struct Lru<Node>
where
Node: LruNode,
{
len: usize,
head: Option<Arc<Node>>,
tail: Option<Arc<Node>>,
green_zone: AtomicUsize,
data: Mutex<LruData<Node>>,
}
#[derive(Debug)]
struct LruData<Node> {
end_red_zone: usize,
end_yellow_zone: usize,
end_green_zone: usize,
rng: SmallRng,
entries: Vec<Arc<Node>>,
}
pub(crate) trait LruNode: Sized + Debug {
fn links(&self) -> &LruLinks<Self>;
fn lru_index(&self) -> &LruIndex;
}
pub(crate) struct LruLinks<Node> {
prev: ArcSwapOption<Node>,
next: ArcSwapOption<Node>,
#[derive(Debug)]
pub(crate) struct LruIndex {
/// Index in the approprate LRU list, or std::usize::MAX if not a
/// member.
index: AtomicUsize,
}
impl<Node> Default for Lru<Node>
@ -42,20 +57,7 @@ where
Node: LruNode,
{
fn default() -> Self {
Lru {
len: 0,
head: None,
tail: None,
}
}
}
impl<Node> Drop for Lru<Node>
where
Node: LruNode,
{
fn drop(&mut self) {
self.clear();
Lru::new()
}
}
@ -63,115 +65,278 @@ impl<Node> Lru<Node>
where
Node: LruNode,
{
/// Removes everyting from the list.
pub fn clear(&mut self) {
// Not terribly efficient at the moment.
while self.pop_lru().is_some() {}
/// Creates a new LRU list where LRU caching is disabled.
pub fn new() -> Self {
Lru {
green_zone: AtomicUsize::new(0),
data: Mutex::new(LruData::new()),
}
}
/// Current number of entries
pub fn len(&self) -> usize {
self.len
#[cfg_attr(not(test), allow(dead_code))]
fn with_seed(seed: &str) -> Self {
Lru {
green_zone: AtomicUsize::new(0),
data: Mutex::new(LruData::with_seed(seed)),
}
}
pub fn record_use(&mut self, node: Arc<Node>, capacity: usize) -> Option<Arc<Node>> {
if self.promote(node) > capacity {
return self.pop_lru();
/// Adjust the total number of nodes permitted to have a value at
/// once. If `len` is zero, this disables LRU caching completely.
pub fn set_lru_capacity(&self, len: usize) {
let mut data = self.data.lock();
// We require each zone to have at least 1 slot. Therefore,
// the length cannot be just 1 or 2.
if len == 0 {
self.green_zone.store(0, Ordering::Release);
data.resize(0, 0, 0);
} else {
None
let len = std::cmp::max(len, 3);
// Top 10% is the green zone. This must be at least length 1.
let green_zone = std::cmp::max(len / 10, 1);
// Next 20% is the yellow zone.
let yellow_zone = std::cmp::max(len / 5, 1);
// Remaining 70% is the red zone.
let red_zone = len - yellow_zone - green_zone;
// We need quick access to the green zone.
self.green_zone.store(green_zone, Ordering::Release);
// Resize existing array.
data.resize(green_zone, yellow_zone, red_zone);
}
}
/// Removes the least-recently-used item in the list.
pub fn pop_lru(&mut self) -> Option<Arc<Node>> {
log::debug!("pop_lru(self={:?})", self);
let node = self.tail.take()?;
debug_assert!(node.links().next.load().is_none());
self.tail = node.links().prev.swap(None);
if let Some(new_tail) = &self.tail {
new_tail.links().next.store(None);
self.len -= 1;
} else {
self.head = None;
}
Some(node)
}
/// Records that `node` was used. This may displace an old node (if the LRU limits are
pub fn record_use(&self, node: &Arc<Node>) -> Option<Arc<Node>> {
log::debug!("record_use(node={:?})", node);
/// Makes `node` the least-recently-used item in the list, adding
/// it to the list if it was not already a member.
pub fn promote(&mut self, node: Arc<Node>) -> usize {
log::debug!("promote(node={:?})", node);
let node = node.clone();
let node_links = node.links();
// First: check if the node is already in the linked list and has neighbors.
// If so, let's unlink it.
{
let old_prev = node_links.prev.lease().into_option();
let old_next = node_links.next.lease().into_option();
log::debug!("promote: old_prev={:?}", old_prev);
log::debug!("promote: old_next={:?}", old_next);
match (old_prev, old_next) {
(Some(old_prev), Some(old_next)) => {
// Node is in the middle of the list.
old_prev.links().next.store(Some(Lease::upgrade(&old_next)));
old_next
.links()
.prev
.store(Some(Lease::into_upgrade(old_prev)));
self.len -= 1;
}
(None, Some(_)) => {
// Node is already at the head of the list. Nothing to do here.
return self.len;
}
(Some(old_prev), None) => {
// Node is at the tail of the (non-empty) list.
old_prev.links().next.store(None);
self.tail = Some(Lease::into_upgrade(old_prev));
self.len -= 1;
}
(None, None) => {
// Node is either not in the list *or* at the head of a singleton list.
if let Some(head) = &self.head {
if Arc::ptr_eq(head, &node) {
// Node is at the head.
return self.len;
}
}
}
}
// Load green zone length and check if the LRU cache is even enabled.
let green_zone = self.green_zone.load(Ordering::Acquire);
log::debug!("record_use: green_zone={}", green_zone);
if green_zone == 0 {
return None;
}
// At this point, the node's links are stale but the node is not a member
// of the list.
let current_head: Option<Arc<Node>> = self.head.clone();
if let Some(current_head) = &current_head {
current_head.links().prev.store(Some(node.clone()));
// Find current index of list (if any) and the current length
// of our green zone.
let index = node.lru_index().load();
log::debug!("record_use: index={}", index);
// Already a member of the list, and in the green zone -- nothing to do!
if index < green_zone {
return None;
}
node_links.next.store(current_head);
node_links.prev.store(None);
if self.len == 0 {
self.tail = Some(node.clone());
}
self.head = Some(node);
self.len += 1;
return self.len;
self.data.lock().record_use(node)
}
}
impl<Node> Default for LruLinks<Node> {
impl<Node> LruData<Node>
where
Node: LruNode,
{
fn new() -> Self {
Self::with_rng(SmallRng::from_entropy())
}
#[cfg_attr(not(test), allow(dead_code))]
fn with_seed(seed_str: &str) -> Self {
Self::with_rng(rng_with_seed(seed_str))
}
fn with_rng(rng: SmallRng) -> Self {
LruData {
end_yellow_zone: 0,
end_green_zone: 0,
end_red_zone: 0,
entries: Vec::new(),
rng,
}
}
fn green_zone(&self) -> std::ops::Range<usize> {
0..self.end_green_zone
}
fn yellow_zone(&self) -> std::ops::Range<usize> {
self.end_green_zone..self.end_yellow_zone
}
fn red_zone(&self) -> std::ops::Range<usize> {
self.end_yellow_zone..self.end_red_zone
}
fn resize(&mut self, len_green_zone: usize, len_yellow_zone: usize, len_red_zone: usize) {
self.end_green_zone = len_green_zone;
self.end_yellow_zone = self.end_green_zone + len_yellow_zone;
self.end_red_zone = self.end_yellow_zone + len_red_zone;
let entries = std::mem::replace(&mut self.entries, Vec::with_capacity(self.end_red_zone));
log::debug!("green_zone = {:?}", self.green_zone());
log::debug!("yellow_zone = {:?}", self.yellow_zone());
log::debug!("red_zone = {:?}", self.red_zone());
// We expect to resize when the LRU cache is basically empty.
// So just forget all the old LRU indices to start.
for entry in entries {
entry.lru_index().clear();
}
}
/// Records that a node was used. If it is already a member of the
/// LRU list, it is promoted to the green zone (unless it's
/// already there). Otherwise, it is added to the list first and
/// *then* promoted to the green zone. Adding a new node to the
/// list may displace an old member of the red zone, in which case
/// that is returned.
fn record_use(&mut self, node: &Arc<Node>) -> Option<Arc<Node>> {
log::debug!("record_use(node={:?})", node);
// NB: When this is invoked, we have typically already loaded
// the LRU index (to check if it is in green zone). But that
// check was done outside the lock and -- for all we know --
// the index may have changed since. So we always reload.
let index = node.lru_index().load();
if index < self.end_green_zone {
None
} else if index < self.end_yellow_zone {
self.promote_yellow_to_green(node, index);
None
} else if index < self.end_red_zone {
self.promote_red_to_green(node, index);
None
} else {
self.insert_new(node)
}
}
/// Inserts a node that is not yet a member of the LRU list. If
/// the list is at capacity, this can displace an existing member.
fn insert_new(&mut self, node: &Arc<Node>) -> Option<Arc<Node>> {
debug_assert!(!node.lru_index().is_in_lru());
// Easy case: we still have capacity. Push it, and then promote
// it up to the appropriate zone.
let len = self.entries.len();
if len < self.end_red_zone {
self.entries.push(node.clone());
node.lru_index().store(len);
log::debug!("inserted node {:?} at {}", node, len);
return self.record_use(node);
}
// Harder case: no capacity. Create some by evicting somebody from red
// zone and then promoting.
let victim_index = self.pick_index(self.red_zone());
let victim_node = std::mem::replace(&mut self.entries[victim_index], node.clone());
log::debug!("evicting red node {:?} from {}", victim_node, victim_index);
victim_node.lru_index().clear();
self.promote_red_to_green(node, victim_index);
Some(victim_node)
}
/// Promotes the node `node`, stored at `red_index` (in the red
/// zone), into a green index, demoting yellow/green nodes at
/// random.
///
/// NB: It is not required that `node.lru_index()` is up-to-date
/// when entering this method.
fn promote_red_to_green(&mut self, node: &Arc<Node>, red_index: usize) {
debug_assert!(self.red_zone().contains(&red_index));
// Pick a yellow at random and switch places with it.
//
// Subtle: we do not update `node.lru_index` *yet* -- we're
// going to invoke `self.promote_yellow` next, and it will get
// updated then.
let yellow_index = self.pick_index(self.yellow_zone());
log::debug!(
"demoting yellow node {:?} from {} to red at {}",
self.entries[yellow_index],
yellow_index,
red_index,
);
self.entries.swap(yellow_index, red_index);
self.entries[red_index].lru_index().store(red_index);
// Now move ourselves up into the green zone.
self.promote_yellow_to_green(node, yellow_index);
}
/// Promotes the node `node`, stored at `yellow_index` (in the
/// yellow zone), into a green index, demoting a green node at
/// random to replace it.
///
/// NB: It is not required that `node.lru_index()` is up-to-date
/// when entering this method.
fn promote_yellow_to_green(&mut self, node: &Arc<Node>, yellow_index: usize) {
debug_assert!(self.yellow_zone().contains(&yellow_index));
// Pick a yellow at random and switch places with it.
let green_index = self.pick_index(self.green_zone());
log::debug!(
"demoting green node {:?} from {} to yellow at {}",
self.entries[green_index],
green_index,
yellow_index
);
self.entries.swap(green_index, yellow_index);
self.entries[yellow_index].lru_index().store(yellow_index);
node.lru_index().store(green_index);
log::debug!("promoted {:?} to green index {}", node, green_index);
}
fn pick_index(&mut self, zone: std::ops::Range<usize>) -> usize {
let end_index = std::cmp::min(zone.end, self.entries.len());
self.rng.gen_range(zone.start, end_index)
}
}
impl Default for LruIndex {
fn default() -> Self {
Self {
prev: ArcSwapOption::default(),
next: ArcSwapOption::default(),
index: AtomicUsize::new(std::usize::MAX),
}
}
}
impl<Node> std::fmt::Debug for LruLinks<Node> {
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(fmt, "LruLinks {{ .. }}")
impl LruIndex {
fn load(&self) -> usize {
self.index.load(Ordering::Acquire) // see note on ordering below
}
fn store(&self, value: usize) {
self.index.store(value, Ordering::Release) // see note on ordering below
}
fn clear(&self) {
self.store(std::usize::MAX);
}
fn is_in_lru(&self) -> bool {
self.load() != std::usize::MAX
}
}
fn rng_with_seed(seed_str: &str) -> SmallRng {
let mut seed: [u8; 16] = [0; 16];
for (i, &b) in seed_str.as_bytes().iter().take(16).enumerate() {
seed[i] = b;
}
SmallRng::from_seed(seed)
}
// A note on ordering:
//
// I chose to use AcqRel for the ordering but I don't think it's
// strictly needed. All writes occur under a lock, so they should be
// ordered w/r/t one another. As for the reads, they can occur
// outside the lock, but they don't themselves enable dependent reads
// -- if the reads are out of bounds, we would acquire a lock.

View file

@ -1,117 +1,89 @@
#![cfg(test)]
use super::*;
use linked_hash_map::LinkedHashMap;
#[derive(Debug)]
struct TestNode {
data: usize,
links: LruLinks<TestNode>,
id: usize,
index: LruIndex,
}
impl TestNode {
fn new(data: usize) -> Arc<Self> {
fn new(id: usize) -> Arc<Self> {
Arc::new(TestNode {
data,
links: LruLinks::default(),
id,
index: Default::default(),
})
}
}
impl LruNode for TestNode {
fn links(&self) -> &LruLinks<TestNode> {
&self.links
fn lru_index(&self) -> &LruIndex {
&self.index
}
}
#[test]
fn queue() {
let mut lru = Lru::default();
let n1 = TestNode::new(1);
let n2 = TestNode::new(2);
let n3 = TestNode::new(3);
const LRU_SEED: &str = "Hello, Rustaceans";
const PICK_SEED: &str = "Wippity WIP";
assert!(lru.pop_lru().is_none());
/// Randomly requests nodes and compares the performance of a
/// *perfect* LRU vs our more approximate version. Since all the
/// random number generators use fixed seeds, these results are
/// reproducible. Returns (oracle_hits, lru_hits) -- i.e., the number
/// of times that the oracle had something in cache vs the number of
/// times that our LRU did.
fn compare(num_nodes: usize, capacity: usize, requests: usize) -> (usize, usize) {
// Remember the clock each time we access a given element.
let mut last_access: Vec<usize> = (0..num_nodes).map(|_| 0).collect();
assert_eq!(lru.promote(n1.clone()), 1);
assert_eq!(lru.promote(n2.clone()), 2);
assert_eq!(lru.promote(n3.clone()), 3);
// Use a linked hash map as our *oracle* -- we track each node we
// requested and (as the value) the clock in which we requested
// it. When the capacity is exceed, we can pop the oldest.
let mut oracle = LinkedHashMap::new();
assert!(Arc::ptr_eq(&n1, &lru.pop_lru().unwrap()));
assert!(Arc::ptr_eq(&n2, &lru.pop_lru().unwrap()));
assert!(Arc::ptr_eq(&n3, &lru.pop_lru().unwrap()));
assert!(lru.pop_lru().is_none());
let lru = Lru::with_seed(LRU_SEED);
lru.set_lru_capacity(capacity);
let nodes: Vec<_> = (0..num_nodes).map(|i| TestNode::new(i)).collect();
let mut oracle_hits = 0;
let mut lru_hits = 0;
let mut pick_rng = super::rng_with_seed(PICK_SEED);
for clock in (0..requests).map(|n| n + 1) {
let request_id: usize = pick_rng.gen_range(0, num_nodes);
last_access[request_id] = clock;
if oracle.contains_key(&request_id) {
oracle_hits += 1;
}
if nodes[request_id].index.is_in_lru() {
lru_hits += 1;
}
// maintain the oracle LRU
oracle.insert(request_id, ());
if oracle.len() > capacity {
oracle.pop_front().unwrap();
}
// maintain our own version
if let Some(lru_evicted) = lru.record_use(&nodes[request_id]) {
assert!(!lru_evicted.index.is_in_lru());
}
}
println!("oracle_hits = {}", oracle_hits);
println!("lru_hits = {}", lru_hits);
(oracle_hits, lru_hits)
}
#[test]
fn promote_last() {
let mut lru = Lru::default();
let n1 = TestNode::new(1);
let n2 = TestNode::new(2);
let n3 = TestNode::new(3);
assert_eq!(lru.promote(n1.clone()), 1);
assert_eq!(lru.promote(n2.clone()), 2);
assert_eq!(lru.promote(n3.clone()), 3);
assert_eq!(lru.promote(n1.clone()), 3);
assert!(Arc::ptr_eq(&n2, &lru.pop_lru().unwrap()));
assert!(Arc::ptr_eq(&n3, &lru.pop_lru().unwrap()));
assert!(Arc::ptr_eq(&n1, &lru.pop_lru().unwrap()));
assert!(lru.pop_lru().is_none());
}
#[test]
fn promote_middle() {
let mut lru = Lru::default();
let n1 = TestNode::new(1);
let n2 = TestNode::new(2);
let n3 = TestNode::new(3);
assert_eq!(lru.promote(n1.clone()), 1);
assert_eq!(lru.promote(n2.clone()), 2);
assert_eq!(lru.promote(n3.clone()), 3);
assert_eq!(lru.promote(n2.clone()), 3);
assert!(Arc::ptr_eq(&n1, &lru.pop_lru().unwrap()));
assert!(Arc::ptr_eq(&n3, &lru.pop_lru().unwrap()));
assert!(Arc::ptr_eq(&n2, &lru.pop_lru().unwrap()));
assert!(&lru.pop_lru().is_none());
}
#[test]
fn promote_head() {
let mut lru = Lru::default();
let n1 = TestNode::new(1);
let n2 = TestNode::new(2);
let n3 = TestNode::new(3);
assert_eq!(lru.promote(n1.clone()), 1);
assert_eq!(lru.promote(n2.clone()), 2);
assert_eq!(lru.promote(n3.clone()), 3);
assert_eq!(lru.promote(n3.clone()), 3);
assert!(Arc::ptr_eq(&n1, &lru.pop_lru().unwrap()));
assert!(Arc::ptr_eq(&n2, &lru.pop_lru().unwrap()));
assert!(Arc::ptr_eq(&n3, &lru.pop_lru().unwrap()));
assert!(&lru.pop_lru().is_none());
}
#[test]
fn promote_rev() {
let mut lru = Lru::default();
let n1 = TestNode::new(1);
let n2 = TestNode::new(2);
let n3 = TestNode::new(3);
assert_eq!(lru.promote(n1.clone()), 1);
assert_eq!(lru.promote(n2.clone()), 2);
assert_eq!(lru.promote(n3.clone()), 3);
assert_eq!(lru.promote(n3.clone()), 3);
assert_eq!(lru.promote(n2.clone()), 3);
assert_eq!(lru.promote(n1.clone()), 3);
assert!(Arc::ptr_eq(&n3, &lru.pop_lru().unwrap()));
assert!(Arc::ptr_eq(&n2, &lru.pop_lru().unwrap()));
assert!(Arc::ptr_eq(&n1, &lru.pop_lru().unwrap()));
assert!(&lru.pop_lru().is_none());
fn scenario_a() {
let (oracle_hits, lru_hits) = compare(1000, 100, 10000);
assert_eq!(oracle_hits, 993);
assert_eq!(lru_hits, 973);
}