Skip to content

Commit 576ca1e

Browse files
committed
Merge branch 'reuse-raw-iter'
2 parents 611daf0 + 3da7807 commit 576ca1e

File tree

13 files changed

+583
-165
lines changed

13 files changed

+583
-165
lines changed

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ keywords = ["hash", "no_std", "hashmap", "amortized"]
1313
categories = ["data-structures", "no-std"]
1414

1515
[badges]
16-
azure-devops = { project = "jonhoo/jonhoo", pipeline = "arrav", build = "26" }
16+
azure-devops = { project = "jonhoo/jonhoo", pipeline = "griddle", build = "26" }
1717
codecov = { repository = "jonhoo/griddle", branch = "master", service = "github" }
1818
maintenance = { status = "experimental" }
1919

2020
[dependencies]
21-
hashbrown = { version = "0.8", default-features = false, features = ["raw"] }
21+
hashbrown = { version = "0.8.1", default-features = false, features = ["raw"] }
2222

2323
# For external trait impls
2424
rayon_ = { version = "1.0", optional = true, package = "rayon" }

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ pauses at all.
2323
Where resizing becomes a problem is in applications that use maps to
2424
keep ever-growing state where tail latency is important. At large scale,
2525
it is simply not okay for one map insert to take 30 milliseconds when
26-
most take single-digit **micro**seconds. Worse yet, these resize pauses
26+
most take below a **micro**second. Worse yet, these resize pauses
2727
can compound to create [significant spikes] in tail latency.
2828

2929
This crate implements a technique referred to as "incremental resizing",
@@ -62,9 +62,9 @@ just runs lots of inserts back-to-back, and measures how long each one
6262
takes. The problem quickly becomes apparent:
6363

6464
```console
65-
$ cargo bench --bench vroom > data.txt
66-
hashbrown::HashMap max: 25.481194ms, mean: 1.349µs
67-
griddle::HashMap max: 1.700794ms, mean: 1.362µs
65+
$ cargo bench --bench vroom > vroom.dat
66+
hashbrown::HashMap max: 38.335088ms, mean: 94ns
67+
griddle::HashMap max: 1.846561ms, mean: 126ns
6868
```
6969

7070
You can see that the standard library implementation (through

benches/vroom.rs

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,16 @@ use griddle::HashMap as IncrHashMap;
22
use hashbrown::HashMap;
33
use std::time::{Duration, Instant};
44

5-
const N: u32 = 1 << 21;
5+
const N: u32 = 1 << 22;
66

77
fn main() {
88
let mut hm = HashMap::new();
9-
let mut t = Instant::now();
109
let mut mx = 0.0f64;
1110
let mut sum = Duration::new(0, 0);
1211
for i in 0..N {
12+
let t = Instant::now();
1313
hm.insert(i, i);
14-
let t2 = Instant::now();
15-
let took = t2.duration_since(t);
16-
t = t2;
14+
let took = t.elapsed();
1715
mx = mx.max(took.as_secs_f64());
1816
sum += took;
1917
println!("{} hashbrown {} ms", i, took.as_secs_f64() * 1000.0);
@@ -25,14 +23,12 @@ fn main() {
2523
);
2624

2725
let mut hm = IncrHashMap::new();
28-
let mut t = Instant::now();
2926
let mut mx = 0.0f64;
3027
let mut sum = Duration::new(0, 0);
3128
for i in 0..N {
29+
let t = Instant::now();
3230
hm.insert(i, i);
33-
let t2 = Instant::now();
34-
let took = t2.duration_since(t);
35-
t = t2;
31+
let took = t.elapsed();
3632
mx = mx.max(took.as_secs_f64());
3733
sum += took;
3834
println!("{} griddle {} ms", i, took.as_secs_f64() * 1000.0);

codecov.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@ coverage:
88
default:
99
threshold: 1%
1010

11-
# Tests and Java files aren't important for coverage
11+
# Tests aren't important for coverage
1212
ignore:
1313
- "tests"
14-
- "jsr166"
1514

1615
# Make less noisy comments
1716
comment:

misc/vroom.plt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ set output 'vroom.png'
66
set title "Insertion time"
77
set xlabel "Index"
88
set ylabel "Latency [ms]"
9-
set xrange [0:2e6]
9+
set xrange [0:4194304]
1010

1111
set style line 1 lc rgb '#1b9e77'
1212
set style line 2 lc rgb '#d95f02'

misc/vroom.png

2.88 KB
Loading

src/external_trait_impls/rayon/raw.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ impl<T> RawTable<T> {
5353
pub unsafe fn par_iter(&self) -> RawParIter<T> {
5454
RawParIter {
5555
iter: self.main().par_iter(),
56-
leftovers: self.leftovers().map(|t| t.par_iter()),
56+
leftovers: self.leftovers().map(|t| t.iter().into()),
5757
}
5858
}
5959
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
#![no_std]
5454
#![warn(missing_docs)]
5555
#![warn(rust_2018_idioms)]
56+
#![warn(rustdoc)]
5657

5758
#[cfg(test)]
5859
#[macro_use]

src/map.rs

Lines changed: 175 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::raw::{Bucket, RawIntoIter, RawIter, RawTable};
1+
use crate::raw::{Bucket, RawDrain, RawIntoIter, RawIter, RawTable};
22
use crate::TryReserveError;
33
use core::borrow::Borrow;
44
use core::fmt::{self, Debug};
@@ -189,19 +189,27 @@ pub struct HashMap<K, V, S = DefaultHashBuilder> {
189189
pub(crate) table: RawTable<(K, V)>,
190190
}
191191

192-
impl<K: Clone, V: Clone, S: Clone> Clone for HashMap<K, V, S> {
192+
impl<K: Clone + Hash, V: Clone, S: Clone + BuildHasher> Clone for HashMap<K, V, S> {
193193
fn clone(&self) -> Self {
194+
let hash_builder = self.hash_builder.clone();
195+
let table = self
196+
.table
197+
.clone_with_hasher(|x| make_hash(&hash_builder, &x.0));
194198
HashMap {
195-
hash_builder: self.hash_builder.clone(),
196-
table: self.table.clone(),
199+
hash_builder,
200+
table,
197201
}
198202
}
199203

200204
fn clone_from(&mut self, source: &Self) {
201-
self.table.clone_from(&source.table);
205+
// NOTE: Since we may re-hash leftovers on clone, we need the new hash builder straight
206+
// away, unlike hashbrown which can get away with just cloning after. We don't want to
207+
// change self.hash_builder yet though, in case the cloning panics.
208+
let hash_builder = source.hash_builder.clone();
202209

203-
// Update hash_builder only if we successfully cloned all elements.
204-
self.hash_builder.clone_from(&source.hash_builder);
210+
self.table
211+
.clone_from_with_hasher(&source.table, |x| make_hash(&hash_builder, &x.0));
212+
self.hash_builder = hash_builder;
205213
}
206214
}
207215

@@ -531,6 +539,35 @@ impl<K, V, S> HashMap<K, V, S> {
531539
self.len() == 0
532540
}
533541

542+
/// Clears the map, returning all key-value pairs as an iterator. Keeps the
543+
/// allocated memory for reuse.
544+
///
545+
/// # Examples
546+
///
547+
/// ```
548+
/// use griddle::HashMap;
549+
///
550+
/// let mut a = HashMap::new();
551+
/// a.insert(1, "a");
552+
/// a.insert(2, "b");
553+
///
554+
/// for (k, v) in a.drain().take(1) {
555+
/// assert!(k == 1 || k == 2);
556+
/// assert!(v == "a" || v == "b");
557+
/// }
558+
///
559+
/// assert!(a.is_empty());
560+
/// ```
561+
#[cfg_attr(feature = "inline-more", inline)]
562+
pub fn drain(&mut self) -> Drain<'_, K, V> {
563+
// Here we tie the lifetime of self to the iter.
564+
unsafe {
565+
Drain {
566+
inner: self.table.drain(),
567+
}
568+
}
569+
}
570+
534571
/// Retains only the elements specified by the predicate.
535572
///
536573
/// In other words, remove all pairs `(k, v)` such that `f(&k,&mut v)` returns `false`.
@@ -553,11 +590,7 @@ impl<K, V, S> HashMap<K, V, S> {
553590
for item in self.table.iter() {
554591
let &mut (ref key, ref mut value) = item.as_mut();
555592
if !f(key, value) {
556-
// Erase the element from the table first since drop might panic.
557-
// But read it before the erase in case erase invalidates the memory.
558-
let v = item.read();
559-
self.table.erase_no_drop(&item);
560-
drop(v);
593+
self.table.erase(item);
561594
}
562595
}
563596
}
@@ -994,10 +1027,7 @@ where
9941027
unsafe {
9951028
let hash = make_hash(&self.hash_builder, &k);
9961029
if let Some(item) = self.table.find(hash, |x| k.eq(x.0.borrow())) {
997-
// Read the item before the erase, in case erase reclaims memory.
998-
let v = item.read();
999-
self.table.erase_no_drop(&item);
1000-
Some(v)
1030+
Some(self.table.remove(item))
10011031
} else {
10021032
None
10031033
}
@@ -1203,6 +1233,28 @@ impl<K, V: Debug> fmt::Debug for Values<'_, K, V> {
12031233
}
12041234
}
12051235

1236+
/// A draining iterator over the entries of a `HashMap`.
1237+
///
1238+
/// This `struct` is created by the [`drain`] method on [`HashMap`]. See its
1239+
/// documentation for more.
1240+
///
1241+
/// [`drain`]: struct.HashMap.html#method.drain
1242+
/// [`HashMap`]: struct.HashMap.html
1243+
pub struct Drain<'a, K, V> {
1244+
inner: RawDrain<'a, (K, V)>,
1245+
}
1246+
1247+
impl<K, V> Drain<'_, K, V> {
1248+
/// Returns a iterator of references over the remaining items.
1249+
#[cfg_attr(feature = "inline-more", inline)]
1250+
pub(super) fn iter(&self) -> Iter<'_, K, V> {
1251+
Iter {
1252+
inner: self.inner.iter(),
1253+
marker: PhantomData,
1254+
}
1255+
}
1256+
}
1257+
12061258
/// A mutable iterator over the values of a `HashMap`.
12071259
///
12081260
/// This `struct` is created by the [`values_mut`] method on [`HashMap`]. See its
@@ -1489,6 +1541,36 @@ where
14891541
}
14901542
}
14911543

1544+
impl<'a, K, V> Iterator for Drain<'a, K, V> {
1545+
type Item = (K, V);
1546+
1547+
#[cfg_attr(feature = "inline-more", inline)]
1548+
fn next(&mut self) -> Option<(K, V)> {
1549+
self.inner.next()
1550+
}
1551+
#[cfg_attr(feature = "inline-more", inline)]
1552+
fn size_hint(&self) -> (usize, Option<usize>) {
1553+
self.inner.size_hint()
1554+
}
1555+
}
1556+
impl<K, V> ExactSizeIterator for Drain<'_, K, V> {
1557+
#[cfg_attr(feature = "inline-more", inline)]
1558+
fn len(&self) -> usize {
1559+
self.inner.len()
1560+
}
1561+
}
1562+
impl<K, V> FusedIterator for Drain<'_, K, V> {}
1563+
1564+
impl<K, V> fmt::Debug for Drain<'_, K, V>
1565+
where
1566+
K: fmt::Debug,
1567+
V: fmt::Debug,
1568+
{
1569+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1570+
f.debug_list().entries(self.iter()).finish()
1571+
}
1572+
}
1573+
14921574
impl<'a, K, V, S> Entry<'a, K, V, S> {
14931575
/// Sets the value of the entry, and returns an OccupiedEntry.
14941576
///
@@ -1719,12 +1801,7 @@ impl<'a, K, V, S> OccupiedEntry<'a, K, V, S> {
17191801
/// ```
17201802
#[cfg_attr(feature = "inline-more", inline)]
17211803
pub fn remove_entry(self) -> (K, V) {
1722-
unsafe {
1723-
// Read the item before the erase, in case erase reclaims memory.
1724-
let v = self.elem.read();
1725-
self.table.table.erase_no_drop(&self.elem);
1726-
v
1727-
}
1804+
unsafe { self.table.table.remove(self.elem) }
17281805
}
17291806

17301807
/// Gets a reference to the value in the entry.
@@ -2084,6 +2161,11 @@ fn assert_covariance() {
20842161
fn values_val<'a, 'new>(v: Values<'a, u8, &'static str>) -> Values<'a, u8, &'new str> {
20852162
v
20862163
}
2164+
fn drain<'new>(
2165+
d: Drain<'static, &'static str, &'static str>,
2166+
) -> Drain<'new, &'new str, &'new str> {
2167+
d
2168+
}
20872169
}
20882170

20892171
#[cfg(test)]
@@ -2421,6 +2503,7 @@ mod test_map {
24212503
#[test]
24222504
fn test_empty_iter() {
24232505
let mut m: HashMap<i32, bool> = HashMap::new();
2506+
assert_eq!(m.drain().next(), None);
24242507
assert_eq!(m.keys().next(), None);
24252508
assert_eq!(m.values().next(), None);
24262509
assert_eq!(m.values_mut().next(), None);
@@ -3094,4 +3177,74 @@ mod test_map {
30943177
}
30953178
}
30963179
}
3180+
3181+
#[test]
3182+
#[cfg(feature = "raw")]
3183+
fn test_into_iter_refresh() {
3184+
use core::hash::{BuildHasher, Hash, Hasher};
3185+
3186+
#[cfg(miri)]
3187+
const N: usize = 32;
3188+
#[cfg(not(miri))]
3189+
const N: usize = 128;
3190+
3191+
let mut rng = rand::thread_rng();
3192+
for n in 0..N {
3193+
let mut m = HashMap::new();
3194+
for i in 0..n {
3195+
assert!(m.insert(i, 2 * i).is_none());
3196+
}
3197+
let hasher = m.hasher().clone();
3198+
3199+
let mut it = unsafe { m.table.iter() };
3200+
assert_eq!(it.len(), n);
3201+
3202+
let mut i = 0;
3203+
let mut left = n;
3204+
let mut removed = Vec::new();
3205+
loop {
3206+
// occasionally remove some elements
3207+
if i < n && rng.gen_bool(0.1) {
3208+
let mut hsh = hasher.build_hasher();
3209+
i.hash(&mut hsh);
3210+
let hash = hsh.finish();
3211+
3212+
unsafe {
3213+
let e = m.table.find(hash, |q| q.0.eq(&i));
3214+
if let Some(e) = e {
3215+
it.reflect_remove(&e);
3216+
let t = m.table.remove(e);
3217+
removed.push(t);
3218+
left -= 1;
3219+
} else {
3220+
assert!(removed.contains(&(i, 2 * i)), "{} not in {:?}", i, removed);
3221+
let e = m
3222+
.table
3223+
.insert(hash, (i, 2 * i), |x| super::make_hash(&hasher, &x.0));
3224+
it.reflect_insert(&e);
3225+
if let Some(p) = removed.iter().position(|e| e == &(i, 2 * i)) {
3226+
removed.swap_remove(p);
3227+
}
3228+
left += 1;
3229+
}
3230+
}
3231+
}
3232+
3233+
let e = it.next();
3234+
if e.is_none() {
3235+
break;
3236+
}
3237+
assert!(i < n);
3238+
let t = unsafe { e.unwrap().as_ref() };
3239+
assert!(!removed.contains(t));
3240+
let (k, v) = t;
3241+
assert_eq!(*v, 2 * k);
3242+
i += 1;
3243+
}
3244+
assert!(i <= n);
3245+
3246+
// just for safety:
3247+
assert_eq!(m.table.len(), left);
3248+
}
3249+
}
30973250
}

0 commit comments

Comments
 (0)