Skip to content

Commit f3fd89d

Browse files
authored
Merge pull request #175 from jonhoo/reuse-raw-iter
2 parents 53644bd + 0ce28cb commit f3fd89d

File tree

4 files changed

+251
-11
lines changed

4 files changed

+251
-11
lines changed

src/external_trait_impls/rayon/raw.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::raw::Bucket;
2-
use crate::raw::{RawIterRange, RawTable};
2+
use crate::raw::{RawIter, RawIterRange, RawTable};
33
use crate::scopeguard::guard;
44
use alloc::alloc::dealloc;
55
use core::marker::PhantomData;
@@ -15,6 +15,12 @@ pub struct RawParIter<T> {
1515
iter: RawIterRange<T>,
1616
}
1717

18+
impl<T> From<RawIter<T>> for RawParIter<T> {
19+
fn from(it: RawIter<T>) -> Self {
20+
RawParIter { iter: it.iter }
21+
}
22+
}
23+
1824
impl<T> ParallelIterator for RawParIter<T> {
1925
type Item = Bucket<T>;
2026

src/map.rs

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3894,4 +3894,74 @@ mod test_map {
38943894
assert!(m.raw_entry().from_hash(1, |k| k.0 == 1).is_some());
38953895
assert!(m.raw_entry().from_hash(2, |k| k.0 == 2).is_none());
38963896
}
3897+
3898+
#[test]
3899+
#[cfg(feature = "raw")]
3900+
fn test_into_iter_refresh() {
3901+
use core::hash::{BuildHasher, Hash, Hasher};
3902+
3903+
#[cfg(miri)]
3904+
const N: usize = 32;
3905+
#[cfg(not(miri))]
3906+
const N: usize = 128;
3907+
3908+
let mut rng = rand::thread_rng();
3909+
for n in 0..N {
3910+
let mut m = HashMap::new();
3911+
for i in 0..n {
3912+
assert!(m.insert(i, 2 * i).is_none());
3913+
}
3914+
let hasher = m.hasher().clone();
3915+
3916+
let mut it = unsafe { m.table.iter() };
3917+
assert_eq!(it.len(), n);
3918+
3919+
let mut i = 0;
3920+
let mut left = n;
3921+
let mut removed = Vec::new();
3922+
loop {
3923+
// occasionally remove some elements
3924+
if i < n && rng.gen_bool(0.1) {
3925+
let mut hsh = hasher.build_hasher();
3926+
i.hash(&mut hsh);
3927+
let hash = hsh.finish();
3928+
3929+
unsafe {
3930+
let e = m.table.find(hash, |q| q.0.eq(&i));
3931+
if let Some(e) = e {
3932+
it.reflect_remove(&e);
3933+
let t = m.table.remove(e);
3934+
removed.push(t);
3935+
left -= 1;
3936+
} else {
3937+
assert!(removed.contains(&(i, 2 * i)), "{} not in {:?}", i, removed);
3938+
let e = m
3939+
.table
3940+
.insert(hash, (i, 2 * i), |x| super::make_hash(&hasher, &x.0));
3941+
it.reflect_insert(&e);
3942+
if let Some(p) = removed.iter().position(|e| e == &(i, 2 * i)) {
3943+
removed.swap_remove(p);
3944+
}
3945+
left += 1;
3946+
}
3947+
}
3948+
}
3949+
3950+
let e = it.next();
3951+
if e.is_none() {
3952+
break;
3953+
}
3954+
assert!(i < n);
3955+
let t = unsafe { e.unwrap().as_ref() };
3956+
assert!(!removed.contains(t));
3957+
let (k, v) = t;
3958+
assert_eq!(*v, 2 * k);
3959+
i += 1;
3960+
}
3961+
assert!(i <= n);
3962+
3963+
// just for safety:
3964+
assert_eq!(m.table.len(), left);
3965+
}
3966+
}
38973967
}

src/raw/bitmask.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,20 @@ impl BitMask {
2525
BitMask(self.0 ^ BITMASK_MASK)
2626
}
2727

28+
/// Flip the bit in the mask for the entry at the given index.
29+
///
30+
/// Returns the bit's previous state.
31+
#[inline]
32+
#[allow(clippy::cast_ptr_alignment)]
33+
#[cfg(feature = "raw")]
34+
pub unsafe fn flip(&mut self, index: usize) -> bool {
35+
// NOTE: The + BITMASK_STRIDE - 1 is to set the high bit.
36+
let mask = 1 << (index * BITMASK_STRIDE + BITMASK_STRIDE - 1);
37+
self.0 ^= mask;
38+
// The bit was set if the bit is now 0.
39+
self.0 & mask == 0
40+
}
41+
2842
/// Returns a new `BitMask` with the lowest bit removed.
2943
#[inline]
3044
#[must_use]

src/raw/mod.rs

Lines changed: 160 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,19 +1002,56 @@ impl<T> RawTable<T> {
10021002
}
10031003

10041004
/// Returns an iterator which removes all elements from the table without
1005-
/// freeing the memory. It is up to the caller to ensure that the `RawTable`
1006-
/// outlives the `RawDrain`. Because we cannot make the `next` method unsafe
1007-
/// on the `RawDrain`, we have to make the `drain` method unsafe.
1005+
/// freeing the memory.
1006+
///
1007+
/// It is up to the caller to ensure that the `RawTable` outlives the `RawDrain`.
1008+
/// Because we cannot make the `next` method unsafe on the `RawDrain`,
1009+
/// we have to make the `drain` method unsafe.
10081010
#[cfg_attr(feature = "inline-more", inline)]
10091011
pub unsafe fn drain(&mut self) -> RawDrain<'_, T> {
1012+
let iter = self.iter();
1013+
self.drain_iter_from(iter)
1014+
}
1015+
1016+
/// Returns an iterator which removes all elements from the table without
1017+
/// freeing the memory.
1018+
///
1019+
/// It is up to the caller to ensure that the `RawTable` outlives the `RawDrain`.
1020+
/// Because we cannot make the `next` method unsafe on the `RawDrain`,
1021+
/// we have to make the `drain` method unsafe.
1022+
///
1023+
/// Iteration starts at the provided iterator's current location.
1024+
/// You must ensure that the iterator covers all items that remain in the table.
1025+
#[cfg_attr(feature = "inline-more", inline)]
1026+
pub unsafe fn drain_iter_from(&mut self, iter: RawIter<T>) -> RawDrain<'_, T> {
1027+
debug_assert_eq!(iter.len(), self.len());
10101028
RawDrain {
1011-
iter: self.iter(),
1029+
iter,
10121030
table: ManuallyDrop::new(mem::replace(self, Self::new())),
10131031
orig_table: NonNull::from(self),
10141032
marker: PhantomData,
10151033
}
10161034
}
10171035

1036+
/// Returns an iterator which consumes all elements from the table.
1037+
///
1038+
/// It is up to the caller to ensure that the `RawTable` outlives the `RawIntoIter`.
1039+
/// Because we cannot make the `next` method unsafe on the `RawIntoIter`,
1040+
/// we have to make the `into_iter_from` method unsafe.
1041+
///
1042+
/// Iteration starts at the provided iterator's current location.
1043+
/// You must ensure that the iterator covers all items that remain in the table.
1044+
pub unsafe fn into_iter_from(self, iter: RawIter<T>) -> RawIntoIter<T> {
1045+
debug_assert_eq!(iter.len(), self.len());
1046+
1047+
let alloc = self.into_alloc();
1048+
RawIntoIter {
1049+
iter,
1050+
alloc,
1051+
marker: PhantomData,
1052+
}
1053+
}
1054+
10181055
/// Converts the table into a raw allocation. The contents of the table
10191056
/// should be dropped using a `RawIter` before freeing the allocation.
10201057
#[cfg_attr(feature = "inline-more", inline)]
@@ -1250,12 +1287,7 @@ impl<T> IntoIterator for RawTable<T> {
12501287
fn into_iter(self) -> RawIntoIter<T> {
12511288
unsafe {
12521289
let iter = self.iter();
1253-
let alloc = self.into_alloc();
1254-
RawIntoIter {
1255-
iter,
1256-
alloc,
1257-
marker: PhantomData,
1258-
}
1290+
self.into_iter_from(iter)
12591291
}
12601292
}
12611293
}
@@ -1408,6 +1440,124 @@ pub struct RawIter<T> {
14081440
items: usize,
14091441
}
14101442

1443+
impl<T> RawIter<T> {
1444+
/// Refresh the iterator so that it reflects a removal from the given bucket.
1445+
///
1446+
/// For the iterator to remain valid, this method must be called once
1447+
/// for each removed bucket before `next` is called again.
1448+
///
1449+
/// This method should be called _before_ the removal is made. It is not necessary to call this
1450+
/// method if you are removing an item that this iterator yielded in the past.
1451+
#[cfg(feature = "raw")]
1452+
pub fn reflect_remove(&mut self, b: &Bucket<T>) {
1453+
self.reflect_toggle_full(b, false);
1454+
}
1455+
1456+
/// Refresh the iterator so that it reflects an insertion into the given bucket.
1457+
///
1458+
/// For the iterator to remain valid, this method must be called once
1459+
/// for each insert before `next` is called again.
1460+
///
1461+
/// This method does not guarantee that an insertion of a bucket witha greater
1462+
/// index than the last one yielded will be reflected in the iterator.
1463+
///
1464+
/// This method should be called _after_ the given insert is made.
1465+
#[cfg(feature = "raw")]
1466+
pub fn reflect_insert(&mut self, b: &Bucket<T>) {
1467+
self.reflect_toggle_full(b, true);
1468+
}
1469+
1470+
/// Refresh the iterator so that it reflects a change to the state of the given bucket.
1471+
#[cfg(feature = "raw")]
1472+
fn reflect_toggle_full(&mut self, b: &Bucket<T>, is_insert: bool) {
1473+
unsafe {
1474+
if b.as_ptr() > self.iter.data.as_ptr() {
1475+
// The iterator has already passed the bucket's group.
1476+
// So the toggle isn't relevant to this iterator.
1477+
return;
1478+
}
1479+
1480+
if self.iter.next_ctrl < self.iter.end
1481+
&& b.as_ptr() <= self.iter.data.next_n(Group::WIDTH).as_ptr()
1482+
{
1483+
// The iterator has not yet reached the bucket's group.
1484+
// We don't need to reload anything, but we do need to adjust the item count.
1485+
1486+
if cfg!(debug_assertions) {
1487+
// Double-check that the user isn't lying to us by checking the bucket state.
1488+
// To do that, we need to find its control byte. We know that self.iter.data is
1489+
// at self.iter.next_ctrl - Group::WIDTH, so we work from there:
1490+
let offset = offset_from(self.iter.data.as_ptr(), b.as_ptr());
1491+
let ctrl = self.iter.next_ctrl.sub(Group::WIDTH).add(offset);
1492+
// This method should be called _before_ a removal, or _after_ an insert,
1493+
// so in both cases the ctrl byte should indicate that the bucket is full.
1494+
assert!(is_full(*ctrl));
1495+
}
1496+
1497+
if is_insert {
1498+
self.items += 1;
1499+
} else {
1500+
self.items -= 1;
1501+
}
1502+
1503+
return;
1504+
}
1505+
1506+
// The iterator is at the bucket group that the toggled bucket is in.
1507+
// We need to do two things:
1508+
//
1509+
// - Determine if the iterator already yielded the toggled bucket.
1510+
// If it did, we're done.
1511+
// - Otherwise, update the iterator cached group so that it won't
1512+
// yield a to-be-removed bucket, or _will_ yield a to-be-added bucket.
1513+
// We'll also need ot update the item count accordingly.
1514+
if let Some(index) = self.iter.current_group.lowest_set_bit() {
1515+
let next_bucket = self.iter.data.next_n(index);
1516+
if b.as_ptr() > next_bucket.as_ptr() {
1517+
// The toggled bucket is "before" the bucket the iterator would yield next. We
1518+
// therefore don't need to do anything --- the iterator has already passed the
1519+
// bucket in question.
1520+
//
1521+
// The item count must already be correct, since a removal or insert "prior" to
1522+
// the iterator's position wouldn't affect the item count.
1523+
} else {
1524+
// The removed bucket is an upcoming bucket. We need to make sure it does _not_
1525+
// get yielded, and also that it's no longer included in the item count.
1526+
//
1527+
// NOTE: We can't just reload the group here, both since that might reflect
1528+
// inserts we've already passed, and because that might inadvertently unset the
1529+
// bits for _other_ removals. If we do that, we'd have to also decrement the
1530+
// item count for those other bits that we unset. But the presumably subsequent
1531+
// call to reflect for those buckets might _also_ decrement the item count.
1532+
// Instead, we _just_ flip the bit for the particular bucket the caller asked
1533+
// us to reflect.
1534+
let our_bit = offset_from(self.iter.data.as_ptr(), b.as_ptr());
1535+
let was_full = self.iter.current_group.flip(our_bit);
1536+
debug_assert_ne!(was_full, is_insert);
1537+
1538+
if is_insert {
1539+
self.items += 1;
1540+
} else {
1541+
self.items -= 1;
1542+
}
1543+
1544+
if cfg!(debug_assertions) {
1545+
if b.as_ptr() == next_bucket.as_ptr() {
1546+
// The removed bucket should no longer be next
1547+
debug_assert_ne!(self.iter.current_group.lowest_set_bit(), Some(index));
1548+
} else {
1549+
// We should not have changed what bucket comes next.
1550+
debug_assert_eq!(self.iter.current_group.lowest_set_bit(), Some(index));
1551+
}
1552+
}
1553+
}
1554+
} else {
1555+
// We must have already iterated past the removed item.
1556+
}
1557+
}
1558+
}
1559+
}
1560+
14111561
impl<T> Clone for RawIter<T> {
14121562
#[cfg_attr(feature = "inline-more", inline)]
14131563
fn clone(&self) -> Self {

0 commit comments

Comments
 (0)