diff --git a/.gitignore b/.gitignore index 707f8413..cbad8b17 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ .vscode/ .idea/ **/*~ + +tmp/ diff --git a/Cargo.lock b/Cargo.lock index f1286f1b..2e432d5e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -124,9 +124,9 @@ dependencies = [ [[package]] name = "candid" -version = "0.10.3" +version = "0.10.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "182543fbc03b4ad0bfc384e6b68346e0b0aad0b11d075b71b4fcaa5d07f8862c" +checksum = "a253bab4a9be502c82332b60cbeee6202ad0692834efeec95fae9f29db33d692" dependencies = [ "anyhow", "binread", @@ -147,9 +147,9 @@ dependencies = [ [[package]] name = "candid_derive" -version = "0.6.5" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "970c220da8aa2fa6f7ef5dbbf3ea5b620a59eb3ac107cfb95ae8c6eebdfb7a08" +checksum = "3de398570c386726e7a59d9887b68763c481477f9a043fb998a2e09d428df1a9" dependencies = [ "lazy_static", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index 7a4c0ab8..350b9246 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,11 +15,11 @@ version = "0.6.8" ic_principal = { version = "0.1.1", default-features = false } # An optional dependency to benchmark parts of the code. canbench-rs = { version = "0.1.7", optional = true } +ic-cdk.workspace = true [dev-dependencies] candid.workspace = true hex = "0.4.3" -ic-cdk.workspace = true ic-cdk-macros.workspace = true maplit = "1.0.2" proptest = "1" diff --git a/src/btreemap.rs b/src/btreemap.rs index e72567e4..2ef9e7b6 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -49,9 +49,14 @@ //! ---------------------------------------- //! ``` mod allocator; +pub(crate) mod cache; mod iter; mod node; -use crate::btreemap::iter::{IterInternal, KeysIter, ValuesIter}; + +use crate::btreemap::{ + cache::Cache, + iter::{IterInternal, KeysIter, ValuesIter}, +}; use crate::{ storable::Bound as StorableBound, types::{Address, NULL}, @@ -61,6 +66,7 @@ use allocator::Allocator; pub use iter::Iter; use node::{DerivedPageSize, Entry, Node, NodeType, PageSize, Version}; use std::borrow::Cow; +use std::cell::RefCell; use std::marker::PhantomData; use std::ops::{Bound, RangeBounds}; @@ -105,6 +111,9 @@ where // A marker to communicate to the Rust compiler that we own these types. _phantom: PhantomData<(K, V)>, + + // A cache for storing recently accessed nodes. + node_cache: RefCell>>, } #[derive(PartialEq, Debug)] @@ -214,6 +223,7 @@ where version: Version::V2(page_size), length: 0, _phantom: PhantomData, + node_cache: RefCell::new(Cache::new()), }; btree.save_header(); @@ -241,6 +251,7 @@ where }), length: 0, _phantom: PhantomData, + node_cache: RefCell::new(Cache::new()), }; btree.save_header(); @@ -290,6 +301,7 @@ where version, length: header.length, _phantom: PhantomData, + node_cache: RefCell::new(Cache::new()), } } @@ -572,6 +584,7 @@ where self.length = 0; self.allocator.clear(); self.save_header(); + self.node_cache.borrow_mut().clear(); } /// Returns the first key-value pair in the map. The key in this @@ -1081,6 +1094,9 @@ where /// [1, 2, 3, 4, 5, 6, 7] (stored in the `into` node) /// `source` is deallocated. fn merge(&mut self, source: Node, mut into: Node, median: Entry) -> Node { + let mut node_cache = self.node_cache.borrow_mut(); + node_cache.remove(&source.address()); + node_cache.remove(&into.address()); into.merge(source, median, &mut self.allocator); into } @@ -1093,21 +1109,35 @@ where } } - /// Deallocates a node. - #[inline] fn deallocate_node(&mut self, node: Node) { + self.node_cache.borrow_mut().remove(&node.address()); node.deallocate(self.allocator_mut()); } - /// Loads a node from memory. - #[inline] fn load_node(&self, address: Address) -> Node { Node::load(address, self.version.page_size(), self.memory()) + + // TODO: debug if this brings any performance improvement. + // self.node_cache.get(address).unwrap_or_else(|| { + // let node = Node::load(address, self.version.page_size(), self.memory()); + // self.node_cache.insert(address, &node); + // node + // }) + } + + fn load_cached_node(&self, address: Address) -> Node { + let mut node_cache = self.node_cache.borrow_mut(); + node_cache.get(&address).unwrap_or_else(|| { + let node = Node::load(address, self.version.page_size(), self.memory()); + if node_cache.capacity() > 0 { + node_cache.insert(address, node.clone()); + } + node + }) } - /// Saves the node to memory. - #[inline] fn save_node(&mut self, node: &mut Node) { + self.node_cache.borrow_mut().remove(&node.address()); node.save(self.allocator_mut()); } diff --git a/src/btreemap/cache.rs b/src/btreemap/cache.rs new file mode 100644 index 00000000..e056a8e5 --- /dev/null +++ b/src/btreemap/cache.rs @@ -0,0 +1,447 @@ +use std::collections::BTreeMap; + +#[allow(non_upper_case_globals)] +const KiB: usize = 1024; +#[allow(non_upper_case_globals)] +const MiB: usize = 1024 * KiB; +#[allow(non_upper_case_globals)] +const GiB: usize = 1024 * MiB; + +const DEFAULT_CAPACITY: usize = 0; +const DEFAULT_SIZE_LIMIT: usize = 3 * GiB; + +pub trait ByteSize { + /// Returns the size (in bytes) of the value. + fn byte_size(&self) -> usize { + std::mem::size_of_val(self) + } +} + +/// Incrementing counter used for tracking the order of usage. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Default)] +struct Counter(u64); + +/// Runtime statistics for the cache. +#[derive(Default, Debug, Copy, Clone)] +pub struct CacheStats { + hits: u64, + misses: u64, +} + +impl CacheStats { + #[inline] + pub fn hits(&self) -> u64 { + self.hits + } + + #[inline] + pub fn misses(&self) -> u64 { + self.misses + } + + #[inline] + pub fn total(&self) -> u64 { + self.hits + self.misses + } + + #[inline] + pub fn hit_ratio(&self) -> f64 { + self.hits as f64 / (self.hits + self.misses).max(1) as f64 + } +} + +/// Cache with eviction policy that minimizes duplication of keys and values. +#[derive(Debug, Default, Clone)] +pub struct Cache +where + K: Clone + Ord + ByteSize, + V: Clone + ByteSize, +{ + cache: BTreeMap, + capacity: usize, + /// Tracks the cumulative bytes for all entries (including duplicated key storage). + size: usize, + size_limit: usize, + counter: Counter, + lru_order: BTreeMap, + usage: BTreeMap, + stats: CacheStats, +} + +impl Cache +where + K: Clone + Ord + ByteSize, + V: Clone + ByteSize, +{ + /// Computes the total overhead of an entry: + /// 3 times the key size (stored in cache, usage, and lru_order) plus the value size. + fn entry_overhead(key_size: usize, value_size: usize) -> usize { + 3 * key_size + value_size + 2 * std::mem::size_of::() + } + + /// Creates a new cache with the given capacity. + pub fn new() -> Self { + crate::debug::print("ABC cache new"); + Self { + cache: BTreeMap::new(), + capacity: DEFAULT_CAPACITY, + size: 0, + size_limit: DEFAULT_SIZE_LIMIT, + counter: Counter(0), + lru_order: BTreeMap::new(), + usage: BTreeMap::new(), + stats: CacheStats::default(), + } + } + + /// Creates a new cache with the given capacity and size limit. + pub fn with_capacity(self, capacity: usize) -> Self { + let mut this = self.clone(); + this.capacity = capacity; + this + } + + /// Creates a new cache with the given size limit. + pub fn with_size_limit(self, size_limit: usize) -> Self { + let mut this = self.clone(); + this.size_limit = size_limit; + this + } + + /// Clears all entries and resets statistics. + pub fn clear(&mut self) { + self.cache.clear(); + self.lru_order.clear(); + self.usage.clear(); + self.size = 0; + self.counter = Counter(0); + self.stats = CacheStats::default(); + } + + /// Retrieves the value for the given key (if any) and updates its LRU status. + pub fn get(&mut self, key: &K) -> Option { + if self.capacity() == 0 || self.size_limit() == 0 { + return None; + } + if let Some(value) = self.cache.get(key).cloned() { + self.touch(key.clone()); + self.stats.hits += 1; + return Some(value); + } + self.stats.misses += 1; + None + } + + /// Inserts the given key and value. + /// If adding this entry would exceed the capacity or size limit, evicts LRU entries. + pub fn insert(&mut self, key: K, value: V) { + if self.capacity() == 0 || self.size_limit() == 0 { + return; + } + let (key_size, value_size) = (key.byte_size(), value.byte_size()); + let overhead = Self::entry_overhead(key_size, value_size); + while self.len() + 1 > self.capacity() || self.size + overhead > self.size_limit() { + self.evict_one(); + } + if self.cache.insert(key.clone(), value).is_none() { + self.size = self.size.saturating_add(key_size + value_size); + } + self.touch(key); + } + + /// Removes the entry associated with the given key. + pub fn remove(&mut self, key: &K) { + if self.capacity() == 0 || self.size_limit() == 0 { + return; + } + if let Some(value) = self.cache.remove(key) { + let (key_size, value_size) = (key.byte_size(), value.byte_size()); + let overhead = Self::entry_overhead(key_size, value_size); + self.size = self.size.saturating_sub(overhead); + } + if let Some(counter) = self.usage.remove(key) { + self.lru_order.remove(&counter); + } + } + + /// Returns the number of entries in the cache. + #[inline] + pub fn len(&self) -> usize { + self.cache.len() + } + + /// Returns the total size in bytes of all entries (including duplicate key storage). + #[inline] + pub fn size(&self) -> usize { + self.size + } + + /// Returns the configured size limit (in bytes). + #[inline] + pub fn size_limit(&self) -> usize { + self.size_limit + } + + /// Sets a new size limit, evicting entries as necessary. + pub fn set_size_limit(&mut self, size_limit: usize) { + self.size_limit = size_limit; + if size_limit == 0 { + self.clear(); + } else { + while self.size() > size_limit { + self.evict_one(); + } + } + } + + /// Returns the cache capacity (number of entries). + #[inline] + pub fn capacity(&self) -> usize { + self.capacity + } + + /// Sets a new capacity, evicting entries as needed. + pub fn set_capacity(&mut self, capacity: usize) { + self.capacity = capacity; + if self.capacity == 0 { + self.clear(); + } else { + while self.len() > self.capacity { + self.evict_one(); + } + } + } + + /// Evicts a single entry using the LRU policy. + /// Returns the key that was evicted. + fn evict_one(&mut self) -> Option { + // Find the least-recently used entry. + if let Some((&old_counter, old_key)) = self.lru_order.iter().next() { + let old_key = old_key.clone(); + let key_size = old_key.byte_size(); + let overhead = Self::entry_overhead(key_size, 0); + self.size = self.size.saturating_sub(overhead); + if let Some(v) = self.cache.remove(&old_key) { + self.size = self.size.saturating_sub(v.byte_size()); + } + self.lru_order.remove(&old_counter); + self.usage.remove(&old_key); + return Some(old_key); + } + None + } + + /// Updates the LRU order for the given key. + /// If the key is already in the LRU maps, its old counter is replaced. + /// For a new key, the overhead for the key (in usage and lru_order) is added. + fn touch(&mut self, key: K) { + self.counter.0 += 1; + let new_counter = self.counter; + let delta: usize = key.byte_size() + std::mem::size_of::(); + // Update usage: if key was present, remove its old LRU overhead. + if let Some(old_counter) = self.usage.insert(key.clone(), new_counter) { + if self.lru_order.remove(&old_counter).is_some() { + self.size = self.size.saturating_sub(delta); + } + } else { + // New key in usage. + self.size = self.size.saturating_add(delta); + } + // Insert into lru_order. If newly inserted, add the overhead. + if self.lru_order.insert(new_counter, key).is_none() { + self.size = self.size.saturating_add(delta); + } + } + + /// Returns the current cache statistics. + pub fn stats(&self) -> CacheStats { + self.stats + } + + /// Resets the cache statistics. + pub fn reset_stats(&mut self) { + self.stats = CacheStats::default(); + } +} + +impl Drop for Cache +where + K: Clone + Ord + ByteSize, + V: Clone + ByteSize, +{ + fn drop(&mut self) { + crate::debug::print(&format!("ABC cache len : {}", self.len())); + crate::debug::print(&format!("ABC cache size : {}", self.size())); + crate::debug::print(&format!( + "ABC cache hit ratio : {:>.1} %", + self.stats().hit_ratio() * 100.0 + )); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::mem::size_of; + + impl ByteSize for u32 {} + impl ByteSize for u64 {} + + /// Helper: returns the expected overhead (in bytes) for an entry with key type u32 and value type u64. + /// Calculation: 3 * size_of(u32) + size_of(u64) + fn entry_size() -> usize { + 3 * size_of::() + size_of::() + 2 * size_of::() + } + + #[test] + fn test_insert_and_get() { + let mut cache: Cache = Cache::new().with_capacity(5); + cache.insert(1, 100); + cache.insert(2, 200); + + // Test that values can be retrieved. + assert_eq!(cache.get(&1), Some(100)); + assert_eq!(cache.get(&2), Some(200)); + + // Test stats: two successful gets. + let stats = cache.stats(); + assert_eq!(stats.hits(), 2); + assert_eq!(stats.misses(), 0); + } + + #[test] + fn test_miss() { + let mut cache: Cache = Cache::new().with_capacity(5); + // Attempt to retrieve a key that was never inserted. + assert_eq!(cache.get(&1), None); + + let stats = cache.stats(); + assert_eq!(stats.hits(), 0); + assert_eq!(stats.misses(), 1); + } + + #[test] + fn test_cache_size_tracking() { + // Allow at most two entries. + let mut cache: Cache = Cache::new() + .with_capacity(5) + .with_size_limit(2 * entry_size()); + + // Insert first entry. + cache.insert(1, 100); + assert_eq!(cache.size(), entry_size()); + assert_eq!(cache.get(&1), Some(100)); + + // Insert the same entry again should not change the overall size. + cache.insert(1, 100); + assert_eq!(cache.size(), entry_size()); + assert_eq!(cache.get(&1), Some(100)); + + // Insert second entry. + cache.insert(2, 200); + assert_eq!(cache.size(), 2 * entry_size()); + assert_eq!(cache.get(&1), Some(100)); + assert_eq!(cache.get(&2), Some(200)); + + // Inserting a third entry should trigger eviction (LRU policy) so that the size remains unchanged. + cache.insert(3, 300); + assert_eq!(cache.size(), 2 * entry_size()); + // Expect the least-recently used entry (key 1) to be evicted. + assert_eq!(cache.get(&1), None); + assert_eq!(cache.get(&2), Some(200)); + assert_eq!(cache.get(&3), Some(300)); + + // Remove an entry. + cache.remove(&2); + assert_eq!(cache.size(), entry_size()); + cache.remove(&3); + assert_eq!(cache.size(), 0); + } + + #[test] + fn test_eviction_by_capacity() { + let mut cache: Cache = Cache::new().with_capacity(3); + cache.insert(1, 10); + cache.insert(2, 20); + cache.insert(3, 30); + + // Inserting a fourth entry should evict the LRU entry. + cache.insert(4, 40); + + // Expect key 1 to be evicted. + assert_eq!(cache.get(&1), None); + // The other keys should be available. + assert_eq!(cache.get(&2), Some(20)); + assert_eq!(cache.get(&3), Some(30)); + assert_eq!(cache.get(&4), Some(40)); + } + + #[test] + fn test_eviction_by_size_limit() { + // Set a size limit to allow only two entries. + let mut cache: Cache = Cache::new() + .with_capacity(10) + .with_size_limit(2 * entry_size()); + + cache.insert(1, 10); + cache.insert(2, 20); + + // Inserting another entry should trigger eviction due to the size limit. + cache.insert(3, 30); + + // Expect that one entry is evicted (key 1, as the LRU). + assert_eq!(cache.get(&1), None); + assert_eq!(cache.get(&2), Some(20)); + assert_eq!(cache.get(&3), Some(30)); + } + + #[test] + fn test_remove() { + let mut cache: Cache = Cache::new().with_capacity(5); + cache.insert(1, 10); + cache.insert(2, 20); + + // Remove key 1. + cache.remove(&1); + assert_eq!(cache.get(&1), None); + // Removing a non-existent key should be safe. + cache.remove(&3); + // Key 2 should still be retrievable. + assert_eq!(cache.get(&2), Some(20)); + } + + #[test] + fn test_clear() { + let mut cache: Cache = Cache::new().with_capacity(5); + cache.insert(1, 10); + cache.insert(2, 20); + cache.insert(3, 30); + + cache.clear(); + assert_eq!(cache.len(), 0); + assert_eq!(cache.get(&1), None); + assert_eq!(cache.get(&2), None); + assert_eq!(cache.get(&3), None); + } + + #[test] + fn test_stats() { + let mut cache: Cache = Cache::new().with_capacity(3); + // Initially, no hits or misses. + let stats = cache.stats(); + assert_eq!(stats.hits(), 0); + assert_eq!(stats.misses(), 0); + + cache.insert(1, 10); + cache.insert(2, 20); + + // One hit. + let _ = cache.get(&1); + // One miss. + let _ = cache.get(&3); + + let stats = cache.stats(); + assert_eq!(stats.hits(), 1); + assert_eq!(stats.misses(), 1); + } +} diff --git a/src/btreemap/node.rs b/src/btreemap/node.rs index 50af80e0..e6bc4367 100644 --- a/src/btreemap/node.rs +++ b/src/btreemap/node.rs @@ -64,6 +64,13 @@ pub struct Node { overflows: Vec
, } +impl crate::btreemap::cache::ByteSize for Node { + fn byte_size(&self) -> usize { + // The size is not counting overflow pages, but that's close enough. + self.page_size().get() as usize + } +} + impl Node { /// Loads a node from memory at the given address. pub fn load(address: Address, page_size: PageSize, memory: &M) -> Self { @@ -448,6 +455,19 @@ impl Node { } } +impl Clone for Node { + fn clone(&self) -> Self { + Self { + address: self.address, + keys_and_encoded_values: self.keys_and_encoded_values.clone(), + children: self.children.clone(), + node_type: self.node_type, + version: self.version, + overflows: self.overflows.clone(), + } + } +} + // A transient data structure for reading/writing metadata into/from stable memory. #[repr(C, packed)] struct NodeHeader { @@ -464,7 +484,7 @@ impl NodeHeader { } /// The value in a K/V pair. -#[derive(Debug)] +#[derive(Clone, Debug)] enum Value { /// The value's encoded bytes. ByVal(Vec), diff --git a/src/debug.rs b/src/debug.rs new file mode 100644 index 00000000..cf1e1df6 --- /dev/null +++ b/src/debug.rs @@ -0,0 +1,112 @@ +use std::cell::RefCell; +use std::collections::BTreeMap; + +thread_local! { + static STATS: RefCell> = const { RefCell::new(BTreeMap::new()) }; +} + +pub struct InstructionCounter { + name: &'static str, +} + +#[derive(Clone, Copy)] +pub struct Stats { + start_instructions: Option, + running_count: u64, + total_instructions: u64, + call_count: u64, +} + +impl Stats { + pub fn instructions(&self) -> u64 { + self.total_instructions + } + pub fn calls(&self) -> u64 { + self.call_count + } +} + +impl std::fmt::Debug for Stats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Stats: ")?; + writeln!(f, "total_instructions : {}, ", self.total_instructions)?; + writeln!(f, "call_count : {}", self.call_count)?; + Ok(()) + } +} + +impl InstructionCounter { + pub fn new(name: &'static str) -> Self { + let now = instruction_count(); + STATS.with(|c| { + let mut stats = c.borrow_mut(); + let entry = stats.entry(name).or_insert(Stats { + start_instructions: Some(now), + running_count: 0, + total_instructions: 0, + call_count: 0, + }); + if entry.running_count == 0 { + entry.start_instructions = Some(now); + } + entry.running_count += 1; + }); + Self { name } + } +} + +impl Drop for InstructionCounter { + fn drop(&mut self) { + STATS.with(|c| { + let mut stats = c.borrow_mut(); + let entry = stats.entry(self.name).or_insert_with(|| { + panic!("InstructionCounter not initialized"); + }); + entry.running_count -= 1; + if entry.running_count == 0 { + let now = instruction_count(); + let elapsed = now + - entry + .start_instructions + .expect("start_instructions is None"); + entry.start_instructions = None; + entry.total_instructions += elapsed; + } + entry.call_count += 1; + }); + } +} + +pub fn get_stats() -> Vec<(&'static str, Stats)> { + STATS.with(|c| { + let stats = c.borrow(); + let mut stats_vec: Vec<_> = stats.iter().map(|(&k, &v)| (k, v)).collect(); + stats_vec.sort_by(|a, b| b.1.total_instructions.cmp(&a.1.total_instructions)); + stats_vec + }) +} + +fn instruction_count() -> u64 { + #[cfg(target_arch = "wasm32")] + { + ic_cdk::api::performance_counter(0) + } + + #[cfg(not(target_arch = "wasm32"))] + { + // Consider using cpu time here. + 0 + } +} + +pub fn print(msg: &str) { + #[cfg(target_arch = "wasm32")] + { + ic_cdk::api::print(msg); + } + + #[cfg(not(target_arch = "wasm32"))] + { + println!("{msg}"); + } +} diff --git a/src/lib.rs b/src/lib.rs index e1d0e1d2..64cf2a72 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,6 +31,9 @@ pub use storable::Storable; use types::Address; pub use vec_mem::VectorMemory; +#[cfg(feature = "canbench-rs")] +mod debug; // TODO: remove debug code. + #[cfg(target_arch = "wasm32")] pub type DefaultMemoryImpl = Ic0StableMemory; diff --git a/src/types.rs b/src/types.rs index 0c9bbf7f..a5c8e8b5 100644 --- a/src/types.rs +++ b/src/types.rs @@ -3,7 +3,7 @@ use core::ops::{Add, AddAssign, Div, Mul, Rem, Sub, SubAssign}; pub const NULL: Address = Address(0); #[repr(C, packed)] -#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Eq)] +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] pub struct Address(u64); impl From for Address { @@ -53,6 +53,12 @@ impl AddAssign for Address { } } +impl crate::btreemap::cache::ByteSize for Address { + fn byte_size(&self) -> usize { + std::mem::size_of_val(self) + } +} + #[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Ord, Eq)] pub struct Bytes(u64);