Improve

jedisct1 · jedisct1 · commit c415c7815e4a · 2025-05-14T00:56:19.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,5 @@
 zig-cache
 *~
 Cargo.lock
+
+**/.claude/settings.local.json
diff --git a/README.md b/README.md
@@ -44,9 +44,11 @@ assert_eq!(cache.len(), 1);
 assert_eq!(cache.capacity(), 100000);
 ```
 
-## Thread-safe Cache Example
+## Thread-safe Cache Examples
 
-You can also use the thread-safe wrapper `SyncSieveCache` for concurrent access:
+### Basic Thread-safe Cache
+
+You can use the thread-safe wrapper `SyncSieveCache` for concurrent access:
 
 ```rust
 use sieve_cache::SyncSieveCache;
@@ -62,7 +64,7 @@ cache.insert("foo".to_string(), "foocontent".to_string());
 // Access the cache from another thread.
 let handle = thread::spawn(move || {
     cache_clone.insert("bar".to_string(), "barcontent".to_string());
-    
+
     // Retrieve a value from the cache. Returns a clone of the value.
     assert_eq!(cache_clone.get(&"foo".to_string()), Some("foocontent".to_string()));
 });
@@ -87,3 +89,55 @@ cache.with_lock(|inner_cache| {
 });
 ```
 
+### Sharded Thread-safe Cache
+
+For higher concurrency, you can use the sharded implementation `ShardedSieveCache` which uses multiple internal locks to reduce contention:
+
+```rust
+use sieve_cache::ShardedSieveCache;
+use std::thread;
+use std::sync::Arc;
+
+// Create a sharded cache with default shard count (16)
+let cache = Arc::new(ShardedSieveCache::new(100000).unwrap());
+
+// Or specify a custom number of shards
+// let cache = Arc::new(ShardedSieveCache::with_shards(100000, 32).unwrap());
+
+// Insert key/value pairs from the main thread
+cache.insert("foo".to_string(), "foocontent".to_string());
+
+// Create multiple worker threads
+let mut handles = vec![];
+for i in 0..8 {
+    let cache_clone = Arc::clone(&cache);
+    let handle = thread::spawn(move || {
+        // Each thread inserts multiple values
+        for j in 0..100 {
+            let key = format!("key_thread{}_item{}", i, j);
+            let value = format!("value_{}", j);
+            cache_clone.insert(key, value);
+        }
+    });
+    handles.push(handle);
+}
+
+// Wait for all threads to complete
+for handle in handles {
+    handle.join().unwrap();
+}
+
+// Perform operations specific to a key's shard
+cache.with_key_lock(&"foo", |shard| {
+    shard.insert("related_key1".to_string(), "value1".to_string());
+    shard.insert("related_key2".to_string(), "value2".to_string());
+});
+
+// Get the number of entries across all shards
+assert_eq!(cache.len(), 803); // 800 from threads + 1 "foo" + 2 related keys
+
+// Get cache statistics
+println!("Cache has {} shards with total capacity {}",
+         cache.num_shards(), cache.capacity());
+```
+
diff --git a/benches/criterion.rs b/benches/criterion.rs
@@ -2,9 +2,12 @@
 extern crate criterion;
 
 use criterion::{black_box, Criterion};
-use rand::rng;
-use rand_distr::{Distribution, Normal, Uniform};
-use sieve_cache::SieveCache;
+use rand::prelude::*;
+use rand::thread_rng;
+use rand_distr::{Distribution, Normal};
+use sieve_cache::{ShardedSieveCache, SieveCache, SyncSieveCache};
+use std::sync::Arc;
+use std::thread;
 
 fn bench_sequence(c: &mut Criterion) {
     c.bench_function("bench_sequence", |b| {
@@ -27,18 +30,18 @@ fn bench_sequence(c: &mut Criterion) {
 fn bench_composite(c: &mut Criterion) {
     c.bench_function("bench_composite", |b| {
         let mut cache: SieveCache<u64, (Vec<u8>, u64)> = SieveCache::new(68).unwrap();
-        let mut rng = rng();
-        let uniform = Uniform::new(0, 100).unwrap();
-        let mut rand_iter = uniform.sample_iter(&mut rng);
+        let mut rng = thread_rng();
+
         b.iter(|| {
             for _ in 1..1000 {
-                let n = rand_iter.next().unwrap();
+                let n = rng.gen_range(0..100);
                 black_box(cache.insert(n, (vec![0u8; 12], n)));
             }
         });
+
         b.iter(|| {
             for _ in 1..1000 {
-                let n = rand_iter.next().unwrap();
+                let n = rng.gen_range(0..100);
                 black_box(cache.get(&n));
             }
         });
@@ -53,28 +56,131 @@ fn bench_composite_normal(c: &mut Criterion) {
         let mut cache: SieveCache<u64, (Vec<u8>, u64)> = SieveCache::new(SIGMA as usize).unwrap();
 
         // This should roughly cover all elements (within 3-sigma)
-        let mut rng = rng();
+        let mut rng = thread_rng();
         let normal = Normal::new(50.0, SIGMA).unwrap();
-        let mut rand_iter = normal.sample_iter(&mut rng).map(|x| (x as u64) % 100);
+
         b.iter(|| {
             for _ in 1..1000 {
-                let n = rand_iter.next().unwrap();
+                let sample = normal.sample(&mut rng);
+                let n = (sample as u64) % 100;
                 black_box(cache.insert(n, (vec![0u8; 12], n)));
             }
         });
+
         b.iter(|| {
             for _ in 1..1000 {
-                let n = rand_iter.next().unwrap();
+                let sample = normal.sample(&mut rng);
+                let n = (sample as u64) % 100;
                 black_box(cache.get(&n));
             }
         });
     });
 }
 
+// Benchmark to compare thread-safe implementations in high-concurrency scenario
+fn bench_concurrent_access(c: &mut Criterion) {
+    let mut group = c.benchmark_group("concurrent_access");
+
+    // Set up benchmark parameters
+    const CACHE_SIZE: usize = 10000;
+    const NUM_THREADS: usize = 8;
+    const OPS_PER_THREAD: usize = 1000;
+
+    // Benchmark with SyncSieveCache (single mutex)
+    group.bench_function("sync_cache", |b| {
+        b.iter(|| {
+            let cache = Arc::new(SyncSieveCache::new(CACHE_SIZE).unwrap());
+            let mut handles = Vec::with_capacity(NUM_THREADS);
+
+            for _ in 0..NUM_THREADS {
+                let cache_clone = Arc::clone(&cache);
+                let handle = thread::spawn(move || {
+                    let mut rng = thread_rng();
+
+                    for _ in 0..OPS_PER_THREAD {
+                        let key = rng.gen_range(0..1000);
+                        if rng.gen::<bool>() {
+                            black_box(cache_clone.insert(key, key));
+                        } else {
+                            black_box(cache_clone.get(&key));
+                        }
+                    }
+                });
+                handles.push(handle);
+            }
+
+            for handle in handles {
+                handle.join().unwrap();
+            }
+        });
+    });
+
+    // Benchmark with ShardedSieveCache (multiple mutexes)
+    group.bench_function("sharded_cache", |b| {
+        b.iter(|| {
+            let cache = Arc::new(ShardedSieveCache::new(CACHE_SIZE).unwrap());
+            let mut handles = Vec::with_capacity(NUM_THREADS);
+
+            for _ in 0..NUM_THREADS {
+                let cache_clone = Arc::clone(&cache);
+                let handle = thread::spawn(move || {
+                    let mut rng = thread_rng();
+
+                    for _ in 0..OPS_PER_THREAD {
+                        let key = rng.gen_range(0..1000);
+                        if rng.gen::<bool>() {
+                            black_box(cache_clone.insert(key, key));
+                        } else {
+                            black_box(cache_clone.get(&key));
+                        }
+                    }
+                });
+                handles.push(handle);
+            }
+
+            for handle in handles {
+                handle.join().unwrap();
+            }
+        });
+    });
+
+    // Benchmark with different shard counts
+    group.bench_function("sharded_cache_32_shards", |b| {
+        b.iter(|| {
+            let cache = Arc::new(ShardedSieveCache::with_shards(CACHE_SIZE, 32).unwrap());
+            let mut handles = Vec::with_capacity(NUM_THREADS);
+
+            for _ in 0..NUM_THREADS {
+                let cache_clone = Arc::clone(&cache);
+                let handle = thread::spawn(move || {
+                    let mut rng = thread_rng();
+
+                    for _ in 0..OPS_PER_THREAD {
+                        let key = rng.gen_range(0..1000);
+                        if rng.gen::<bool>() {
+                            black_box(cache_clone.insert(key, key));
+                        } else {
+                            black_box(cache_clone.get(&key));
+                        }
+                    }
+                });
+                handles.push(handle);
+            }
+
+            for handle in handles {
+                handle.join().unwrap();
+            }
+        });
+    });
+
+    group.finish();
+}
+
 criterion_group!(
     benches,
     bench_sequence,
     bench_composite,
-    bench_composite_normal
+    bench_composite_normal,
+    bench_concurrent_access
 );
 criterion_main!(benches);
diff --git a/src/lib.rs b/src/lib.rs
@@ -4,7 +4,10 @@ use std::borrow::Borrow;
 use std::hash::Hash;
 use std::{collections::HashMap, ptr::NonNull};
 
+mod sharded;
 mod sync;
+
+pub use sharded::ShardedSieveCache;
 pub use sync::SyncSieveCache;
 
 struct Node<K: Eq + Hash + Clone, V> {
diff --git a/src/sharded.rs b/src/sharded.rs