Skip to content

feat(cache): a variant of sieve, with lazy op #13904

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/common/cache/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ heapsize = ["heapsize_"]
bytes = { workspace = true }
hashbrown = "0.14"
hashlink = "0.8"
indexmap = "1.9.2"

[target.'cfg(not(target_os = "macos"))'.dependencies]
heapsize_ = { package = "heapsize", version = "0.4.2", optional = true }
Expand Down
2 changes: 1 addition & 1 deletion src/common/cache/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::Meter;
/// A trait for a cache.
pub trait Cache<K, V, S, M>
where
K: Eq + Hash,
K: Eq + Hash + Clone,
S: BuildHasher,
M: Meter<K, V>,
{
Expand Down
124 changes: 94 additions & 30 deletions src/common/cache/src/cache/lru.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ use std::hash::Hash;
use hashbrown::hash_map::DefaultHashBuilder;
use hashlink::linked_hash_map;
use hashlink::LinkedHashMap;
use indexmap::IndexMap;

use crate::cache::Cache;
use crate::meter::count_meter::Count;
Expand All @@ -69,18 +70,20 @@ use crate::meter::count_meter::CountableMeter;
/// An LRU cache.
#[derive(Clone)]
pub struct LruCache<
K: Eq + Hash,
K: Eq + Hash + Clone,
V,
S: BuildHasher = DefaultHashBuilder,
M: CountableMeter<K, V> = Count,
> {
map: LinkedHashMap<K, V, S>,
visited: IndexMap<K, bool>,
hand: u64,
current_measure: M::Measure,
max_capacity: u64,
meter: M,
}

impl<K: Eq + Hash, V> LruCache<K, V> {
impl<K: Eq + Hash + Clone, V> LruCache<K, V> {
/// Creates an empty cache that can hold at most `capacity` items.
///
/// # Examples
Expand All @@ -92,14 +95,16 @@ impl<K: Eq + Hash, V> LruCache<K, V> {
pub fn new(capacity: u64) -> Self {
LruCache {
map: LinkedHashMap::new(),
visited: IndexMap::new(),
hand: 0,
current_measure: (),
max_capacity: capacity,
meter: Count,
}
}
}

impl<K: Eq + Hash, V, M: CountableMeter<K, V>> LruCache<K, V, DefaultHashBuilder, M> {
impl<K: Eq + Hash + Clone, V, M: CountableMeter<K, V>> LruCache<K, V, DefaultHashBuilder, M> {
/// Creates an empty cache that can hold at most `capacity` as measured by `meter`.
///
/// You can implement the [`Meter`][meter] trait to allow custom metrics.
Expand Down Expand Up @@ -136,33 +141,79 @@ impl<K: Eq + Hash, V, M: CountableMeter<K, V>> LruCache<K, V, DefaultHashBuilder
pub fn with_meter(capacity: u64, meter: M) -> LruCache<K, V, DefaultHashBuilder, M> {
LruCache {
map: LinkedHashMap::new(),
visited: IndexMap::new(),
hand: 0,
current_measure: Default::default(),
max_capacity: capacity,
meter,
}
}
}

impl<K: Eq + Hash, V, S: BuildHasher> LruCache<K, V, S, Count> {
impl<K: Eq + Hash + Clone, V, S: BuildHasher> LruCache<K, V, S, Count> {
/// Creates an empty cache that can hold at most `capacity` items with the given hash builder.
pub fn with_hasher(capacity: u64, hash_builder: S) -> LruCache<K, V, S, Count> {
LruCache {
map: LinkedHashMap::with_hasher(hash_builder),
visited: IndexMap::new(),
hand: 0,
current_measure: (),
max_capacity: capacity,
meter: Count,
}
}
}

impl<K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> Cache<K, V, S, M>
impl<K: Eq + Hash + Clone, V, S: BuildHasher, M: CountableMeter<K, V>> LruCache<K, V, S, M> {
fn find_evict_candidate(&mut self) -> Option<K> {
let length = self.visited.len() as u64;
let mut p: Option<K> = None;
let mut count = self.hand;
if count > length - length / 5 {
count = 0
}
let mut iter = self.visited.iter_mut().skip(count as usize);
for (key, value) in &mut iter {
if !(*value) && p.is_none() {
p = Some(key.clone());
break;
}
count += 1;
*value = false;
}
self.hand = count;
p
}

fn peek_evict_candidate(&self) -> Option<K> {
let length = self.visited.len() as u64;
let mut p: Option<K> = None;
let mut count = self.hand;
if count > length - length / 5 {
count = 0
}
let iter = self.visited.iter().skip(count as usize);
for (key, value) in iter {
if !(*value) && p.is_none() {
p = Some(key.clone());
break;
}
count += 1;
}
p
}
}

impl<K: Eq + Hash + Clone, V, S: BuildHasher, M: CountableMeter<K, V>> Cache<K, V, S, M>
for LruCache<K, V, S, M>
{
/// Creates an empty cache that can hold at most `capacity` as measured by `meter` with the
/// given hash builder.
fn with_meter_and_hasher(capacity: u64, meter: M, hash_builder: S) -> Self {
LruCache {
map: LinkedHashMap::with_hasher(hash_builder),
visited: IndexMap::new(),
hand: 0,
current_measure: Default::default(),
max_capacity: capacity,
meter,
Expand Down Expand Up @@ -195,13 +246,10 @@ impl<K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> Cache<K, V, S, M>
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
match self.map.raw_entry_mut().from_key(k) {
linked_hash_map::RawEntryMut::Occupied(mut occupied) => {
occupied.to_back();
Some(occupied.into_mut())
}
linked_hash_map::RawEntryMut::Vacant(_) => None,
if let Some(v) = self.visited.get_mut(k) {
*v = true;
}
self.map.get(k)
}

/// Returns a reference to the value corresponding to the key in the cache or `None` if it is
Expand Down Expand Up @@ -244,7 +292,11 @@ impl<K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> Cache<K, V, S, M>
/// assert_eq!(cache.peek_by_policy(), Some((&1, &"a")));
/// ```
fn peek_by_policy(&self) -> Option<(&K, &V)> {
self.map.front()
if let Some(old_key) = self.peek_evict_candidate() {
self.map.get_key_value(&old_key)
} else {
None
}
}

/// Checks if the map contains the given key.
Expand Down Expand Up @@ -285,12 +337,17 @@ impl<K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> Cache<K, V, S, M>
fn put(&mut self, k: K, v: V) -> Option<V> {
let new_size = self.meter.measure(&k, &v);
self.current_measure = self.meter.add(self.current_measure, new_size);
if let Some(old) = self.map.get(&k) {
self.current_measure = self
.meter
.sub(self.current_measure, self.meter.measure(&k, old));
match self.map.get(&k) {
Some(old) => {
self.current_measure = self
.meter
.sub(self.current_measure, self.meter.measure(&k, old));
}
None => {
self.visited.insert(k.clone(), false);
}
}
let old_val = self.map.insert(k, v);
let old_val = self.map.replace(k, v);
while self.size() > self.capacity() {
self.pop_by_policy();
}
Expand Down Expand Up @@ -319,6 +376,7 @@ impl<K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> Cache<K, V, S, M>
Q: Hash + Eq + ?Sized,
{
self.map.remove(k).map(|v| {
self.visited.remove(k);
self.current_measure = self
.meter
.sub(self.current_measure, self.meter.measure(k, &v));
Expand All @@ -343,12 +401,17 @@ impl<K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> Cache<K, V, S, M>
/// ```
#[inline]
fn pop_by_policy(&mut self) -> Option<(K, V)> {
self.map.pop_front().map(|(k, v)| {
self.current_measure = self
.meter
.sub(self.current_measure, self.meter.measure(&k, &v));
(k, v)
})
if let Some(old_key) = self.find_evict_candidate() {
self.map.remove_entry(&old_key).map(|(k, v)| {
self.visited.remove(&old_key);
self.current_measure = self
.meter
.sub(self.current_measure, self.meter.measure(&k, &v));
(k, v)
})
} else {
None
}
}

/// Sets the size of the key-value pairs the cache can hold, as measured by the `Meter` used by
Expand Down Expand Up @@ -427,11 +490,12 @@ impl<K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> Cache<K, V, S, M>
/// Removes all key-value pairs from the cache.
fn clear(&mut self) {
self.map.clear();
self.visited.clear();
self.current_measure = Default::default();
}
}

impl<K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> LruCache<K, V, S, M> {
impl<K: Eq + Hash + Clone, V, S: BuildHasher, M: CountableMeter<K, V>> LruCache<K, V, S, M> {
/// Returns an iterator over the cache's key-value pairs in least- to most-recently-used order.
///
/// Accessing the cache through the iterator does _not_ affect the cache's LRU state.
Expand Down Expand Up @@ -490,7 +554,7 @@ impl<K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> LruCache<K, V, S,
}
}

impl<K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> Extend<(K, V)>
impl<K: Eq + Hash + Clone + Clone, V, S: BuildHasher, M: CountableMeter<K, V>> Extend<(K, V)>
for LruCache<K, V, S, M>
{
fn extend<I: IntoIterator<Item = (K, V)>>(&mut self, iter: I) {
Expand All @@ -500,15 +564,15 @@ impl<K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> Extend<(K, V)>
}
}

impl<K: fmt::Debug + Eq + Hash, V: fmt::Debug, S: BuildHasher, M: CountableMeter<K, V>> fmt::Debug
for LruCache<K, V, S, M>
impl<K: fmt::Debug + Eq + Hash + Clone, V: fmt::Debug, S: BuildHasher, M: CountableMeter<K, V>>
fmt::Debug for LruCache<K, V, S, M>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_map().entries(self.iter().rev()).finish()
}
}

impl<K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> IntoIterator
impl<K: Eq + Hash + Clone, V, S: BuildHasher, M: CountableMeter<K, V>> IntoIterator
for LruCache<K, V, S, M>
{
type Item = (K, V);
Expand All @@ -519,7 +583,7 @@ impl<K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> IntoIterator
}
}

impl<'a, K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> IntoIterator
impl<'a, K: Eq + Hash + Clone, V, S: BuildHasher, M: CountableMeter<K, V>> IntoIterator
for &'a LruCache<K, V, S, M>
{
type Item = (&'a K, &'a V);
Expand All @@ -529,7 +593,7 @@ impl<'a, K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> IntoIterator
}
}

impl<'a, K: Eq + Hash, V, S: BuildHasher, M: CountableMeter<K, V>> IntoIterator
impl<'a, K: Eq + Hash + Clone, V, S: BuildHasher, M: CountableMeter<K, V>> IntoIterator
for &'a mut LruCache<K, V, S, M>
{
type Item = (&'a K, &'a mut V);
Expand Down
22 changes: 11 additions & 11 deletions src/common/cache/tests/it/cache/lru.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,13 @@ fn test_debug() {
cache.put(3, 30);
assert_eq!(format!("{:?}", cache), "{3: 30, 2: 20, 1: 10}");
cache.put(2, 22);
assert_eq!(format!("{:?}", cache), "{2: 22, 3: 30, 1: 10}");
assert_eq!(format!("{:?}", cache), "{3: 30, 2: 22, 1: 10}");
cache.put(6, 60);
assert_eq!(format!("{:?}", cache), "{6: 60, 2: 22, 3: 30}");
assert_eq!(format!("{:?}", cache), "{6: 60, 3: 30, 2: 22}");
cache.get(&3);
assert_eq!(format!("{:?}", cache), "{3: 30, 6: 60, 2: 22}");
assert_eq!(format!("{:?}", cache), "{6: 60, 3: 30, 2: 22}");
cache.set_capacity(2);
assert_eq!(format!("{:?}", cache), "{3: 30, 6: 60}");
assert_eq!(format!("{:?}", cache), "{3: 30, 2: 22}");
}

#[test]
Expand All @@ -115,7 +115,7 @@ fn test_remove() {
cache.put(8, 80);
assert!(cache.get(&5).is_none());
assert_eq!(cache.get(&6), Some(&60));
assert_eq!(cache.get(&7), Some(&70));
assert_eq!(cache.get(&7), None);
assert_eq!(cache.get(&8), Some(&80));
}

Expand All @@ -139,24 +139,24 @@ fn test_iter() {
cache.put(4, 40);
cache.put(5, 50);
assert_eq!(cache.iter().collect::<Vec<_>>(), [
(&2, &20),
(&3, &30),
(&4, &40),
(&5, &50)
]);
assert_eq!(cache.iter_mut().collect::<Vec<_>>(), [
(&2, &mut 20),
(&3, &mut 30),
(&4, &mut 40),
(&5, &mut 50)
]);
assert_eq!(cache.iter().rev().collect::<Vec<_>>(), [
(&5, &50),
(&4, &40),
(&3, &30)
(&3, &30),
(&2, &20)
]);
assert_eq!(cache.iter_mut().rev().collect::<Vec<_>>(), [
(&5, &mut 50),
(&4, &mut 40),
(&3, &mut 30)
(&3, &mut 30),
(&2, &mut 20)
]);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,10 @@ fn test_evict_until_enough_space() {

// insert a single slice which size bigger than file1 and less than file1 + file2
c.insert_single_slice("file4", &[3; 2]).unwrap();
assert_eq!(c.size(), 3);
assert_eq!(c.size(), 4);
// file1 and file2 MUST be evicted
assert!(!c.contains_key("file1"));
assert!(!c.contains_key("file2"));
assert!(!c.contains_key("file3"));
// file3 MUST be keeped
assert!(c.contains_key("file3"));
assert!(c.contains_key("file2"));
}