Skip to content

Commit 37e519a

Browse files
authored
Merge pull request #308 from cuviper/extract_if
Add map and set `extract_if`
2 parents 4d7618f + b19d84e commit 37e519a

File tree

7 files changed

+360
-5
lines changed

7 files changed

+360
-5
lines changed

src/map.rs

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ mod tests;
1616
pub use self::core::raw_entry_v1::{self, RawEntryApiV1};
1717
pub use self::core::{Entry, IndexedEntry, OccupiedEntry, VacantEntry};
1818
pub use self::iter::{
19-
Drain, IntoIter, IntoKeys, IntoValues, Iter, IterMut, IterMut2, Keys, Splice, Values, ValuesMut,
19+
Drain, ExtractIf, IntoIter, IntoKeys, IntoValues, Iter, IterMut, IterMut2, Keys, Splice,
20+
Values, ValuesMut,
2021
};
2122
pub use self::mutable::MutableEntryKey;
2223
pub use self::mutable::MutableKeys;
@@ -36,7 +37,7 @@ use alloc::vec::Vec;
3637
#[cfg(feature = "std")]
3738
use std::collections::hash_map::RandomState;
3839

39-
use self::core::IndexMapCore;
40+
pub(crate) use self::core::{ExtractCore, IndexMapCore};
4041
use crate::util::{third, try_simplify_range};
4142
use crate::{Bucket, Equivalent, GetDisjointMutError, HashValue, TryReserveError};
4243

@@ -303,6 +304,55 @@ impl<K, V, S> IndexMap<K, V, S> {
303304
Drain::new(self.core.drain(range))
304305
}
305306

307+
/// Creates an iterator which uses a closure to determine if an element should be removed,
308+
/// for all elements in the given range.
309+
///
310+
/// If the closure returns true, the element is removed from the map and yielded.
311+
/// If the closure returns false, or panics, the element remains in the map and will not be
312+
/// yielded.
313+
///
314+
/// Note that `extract_if` lets you mutate every value in the filter closure, regardless of
315+
/// whether you choose to keep or remove it.
316+
///
317+
/// The range may be any type that implements [`RangeBounds<usize>`],
318+
/// including all of the `std::ops::Range*` types, or even a tuple pair of
319+
/// `Bound` start and end values. To check the entire map, use `RangeFull`
320+
/// like `map.extract_if(.., predicate)`.
321+
///
322+
/// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating
323+
/// or the iteration short-circuits, then the remaining elements will be retained.
324+
/// Use [`retain`] with a negated predicate if you do not need the returned iterator.
325+
///
326+
/// [`retain`]: IndexMap::retain
327+
///
328+
/// ***Panics*** if the starting point is greater than the end point or if
329+
/// the end point is greater than the length of the map.
330+
///
331+
/// # Examples
332+
///
333+
/// Splitting a map into even and odd keys, reusing the original map:
334+
///
335+
/// ```
336+
/// use indexmap::IndexMap;
337+
///
338+
/// let mut map: IndexMap<i32, i32> = (0..8).map(|x| (x, x)).collect();
339+
/// let extracted: IndexMap<i32, i32> = map.extract_if(.., |k, _v| k % 2 == 0).collect();
340+
///
341+
/// let evens = extracted.keys().copied().collect::<Vec<_>>();
342+
/// let odds = map.keys().copied().collect::<Vec<_>>();
343+
///
344+
/// assert_eq!(evens, vec![0, 2, 4, 6]);
345+
/// assert_eq!(odds, vec![1, 3, 5, 7]);
346+
/// ```
347+
#[track_caller]
348+
pub fn extract_if<F, R>(&mut self, range: R, pred: F) -> ExtractIf<'_, K, V, F>
349+
where
350+
F: FnMut(&K, &mut V) -> bool,
351+
R: RangeBounds<usize>,
352+
{
353+
ExtractIf::new(&mut self.core, range, pred)
354+
}
355+
306356
/// Splits the collection into two at the given index.
307357
///
308358
/// Returns a newly allocated map containing the elements in the range

src/map/core.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
//! However, we should probably not let this show in the public API or docs.
99
1010
mod entry;
11+
mod extract;
1112

1213
pub mod raw_entry_v1;
1314

@@ -23,6 +24,7 @@ type Indices = hash_table::HashTable<usize>;
2324
type Entries<K, V> = Vec<Bucket<K, V>>;
2425

2526
pub use entry::{Entry, IndexedEntry, OccupiedEntry, VacantEntry};
27+
pub(crate) use extract::ExtractCore;
2628

2729
/// Core of the map that does not depend on S
2830
#[derive(Debug)]
@@ -157,6 +159,7 @@ impl<K, V> IndexMapCore<K, V> {
157159

158160
#[inline]
159161
pub(crate) fn len(&self) -> usize {
162+
debug_assert_eq!(self.entries.len(), self.indices.len());
160163
self.indices.len()
161164
}
162165

src/map/core/extract.rs

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#![allow(unsafe_code)]
2+
3+
use super::{Bucket, IndexMapCore};
4+
use crate::util::simplify_range;
5+
6+
use core::ops::RangeBounds;
7+
8+
impl<K, V> IndexMapCore<K, V> {
9+
#[track_caller]
10+
pub(crate) fn extract<R>(&mut self, range: R) -> ExtractCore<'_, K, V>
11+
where
12+
R: RangeBounds<usize>,
13+
{
14+
let range = simplify_range(range, self.entries.len());
15+
16+
// SAFETY: We must have consistent lengths to start, so that's a hard assertion.
17+
// Then the worst `set_len` can do is leak items if `ExtractCore` doesn't drop.
18+
assert_eq!(self.entries.len(), self.indices.len());
19+
unsafe {
20+
self.entries.set_len(range.start);
21+
}
22+
ExtractCore {
23+
map: self,
24+
new_len: range.start,
25+
current: range.start,
26+
end: range.end,
27+
}
28+
}
29+
}
30+
31+
pub(crate) struct ExtractCore<'a, K, V> {
32+
map: &'a mut IndexMapCore<K, V>,
33+
new_len: usize,
34+
current: usize,
35+
end: usize,
36+
}
37+
38+
impl<K, V> Drop for ExtractCore<'_, K, V> {
39+
fn drop(&mut self) {
40+
let old_len = self.map.indices.len();
41+
let mut new_len = self.new_len;
42+
43+
debug_assert!(new_len <= self.current);
44+
debug_assert!(self.current <= self.end);
45+
debug_assert!(self.current <= old_len);
46+
debug_assert!(old_len <= self.map.entries.capacity());
47+
48+
// SAFETY: We assume `new_len` and `current` were correctly maintained by the iterator.
49+
// So `entries[new_len..current]` were extracted, but the rest before and after are valid.
50+
unsafe {
51+
if new_len == self.current {
52+
// Nothing was extracted, so any remaining items can be left in place.
53+
new_len = old_len;
54+
} else if self.current < old_len {
55+
// Need to shift the remaining items down.
56+
let tail_len = old_len - self.current;
57+
let base = self.map.entries.as_mut_ptr();
58+
let src = base.add(self.current);
59+
let dest = base.add(new_len);
60+
src.copy_to(dest, tail_len);
61+
new_len += tail_len;
62+
}
63+
self.map.entries.set_len(new_len);
64+
}
65+
66+
if new_len != old_len {
67+
// We don't keep track of *which* items were extracted, so reindex everything.
68+
self.map.rebuild_hash_table();
69+
}
70+
}
71+
}
72+
73+
impl<K, V> ExtractCore<'_, K, V> {
74+
pub(crate) fn extract_if<F>(&mut self, mut pred: F) -> Option<Bucket<K, V>>
75+
where
76+
F: FnMut(&mut Bucket<K, V>) -> bool,
77+
{
78+
debug_assert!(self.end <= self.map.entries.capacity());
79+
80+
let base = self.map.entries.as_mut_ptr();
81+
while self.current < self.end {
82+
// SAFETY: We're maintaining both indices within bounds of the original entries, so
83+
// 0..new_len and current..indices.len() are always valid items for our Drop to keep.
84+
unsafe {
85+
let item = base.add(self.current);
86+
if pred(&mut *item) {
87+
// Extract it!
88+
self.current += 1;
89+
return Some(item.read());
90+
} else {
91+
// Keep it, shifting it down if needed.
92+
if self.new_len != self.current {
93+
debug_assert!(self.new_len < self.current);
94+
let dest = base.add(self.new_len);
95+
item.copy_to_nonoverlapping(dest, 1);
96+
}
97+
self.current += 1;
98+
self.new_len += 1;
99+
}
100+
}
101+
}
102+
None
103+
}
104+
105+
pub(crate) fn remaining(&self) -> usize {
106+
self.end - self.current
107+
}
108+
}

src/map/iter.rs

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use super::core::IndexMapCore;
2-
use super::{Bucket, IndexMap, Slice};
1+
use super::{Bucket, ExtractCore, IndexMap, IndexMapCore, Slice};
32

43
use alloc::vec::{self, Vec};
54
use core::fmt;
@@ -774,3 +773,58 @@ where
774773
.finish()
775774
}
776775
}
776+
777+
/// An extracting iterator for `IndexMap`.
778+
///
779+
/// This `struct` is created by [`IndexMap::extract_if()`].
780+
/// See its documentation for more.
781+
pub struct ExtractIf<'a, K, V, F> {
782+
inner: ExtractCore<'a, K, V>,
783+
pred: F,
784+
}
785+
786+
impl<K, V, F> ExtractIf<'_, K, V, F> {
787+
#[track_caller]
788+
pub(super) fn new<R>(core: &mut IndexMapCore<K, V>, range: R, pred: F) -> ExtractIf<'_, K, V, F>
789+
where
790+
R: RangeBounds<usize>,
791+
F: FnMut(&K, &mut V) -> bool,
792+
{
793+
ExtractIf {
794+
inner: core.extract(range),
795+
pred,
796+
}
797+
}
798+
}
799+
800+
impl<K, V, F> Iterator for ExtractIf<'_, K, V, F>
801+
where
802+
F: FnMut(&K, &mut V) -> bool,
803+
{
804+
type Item = (K, V);
805+
806+
fn next(&mut self) -> Option<Self::Item> {
807+
self.inner
808+
.extract_if(|bucket| {
809+
let (key, value) = bucket.ref_mut();
810+
(self.pred)(key, value)
811+
})
812+
.map(Bucket::key_value)
813+
}
814+
815+
fn size_hint(&self) -> (usize, Option<usize>) {
816+
(0, Some(self.inner.remaining()))
817+
}
818+
}
819+
820+
impl<K, V, F> FusedIterator for ExtractIf<'_, K, V, F> where F: FnMut(&K, &mut V) -> bool {}
821+
822+
impl<K, V, F> fmt::Debug for ExtractIf<'_, K, V, F>
823+
where
824+
K: fmt::Debug,
825+
V: fmt::Debug,
826+
{
827+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
828+
f.debug_struct("ExtractIf").finish_non_exhaustive()
829+
}
830+
}

src/set.rs

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ mod slice;
88
mod tests;
99

1010
pub use self::iter::{
11-
Difference, Drain, Intersection, IntoIter, Iter, Splice, SymmetricDifference, Union,
11+
Difference, Drain, ExtractIf, Intersection, IntoIter, Iter, Splice, SymmetricDifference, Union,
1212
};
1313
pub use self::mutable::MutableValues;
1414
pub use self::slice::Slice;
@@ -249,6 +249,52 @@ impl<T, S> IndexSet<T, S> {
249249
Drain::new(self.map.core.drain(range))
250250
}
251251

252+
/// Creates an iterator which uses a closure to determine if a value should be removed,
253+
/// for all values in the given range.
254+
///
255+
/// If the closure returns true, then the value is removed and yielded.
256+
/// If the closure returns false, the value will remain in the list and will not be yielded
257+
/// by the iterator.
258+
///
259+
/// The range may be any type that implements [`RangeBounds<usize>`],
260+
/// including all of the `std::ops::Range*` types, or even a tuple pair of
261+
/// `Bound` start and end values. To check the entire set, use `RangeFull`
262+
/// like `set.extract_if(.., predicate)`.
263+
///
264+
/// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating
265+
/// or the iteration short-circuits, then the remaining elements will be retained.
266+
/// Use [`retain`] with a negated predicate if you do not need the returned iterator.
267+
///
268+
/// [`retain`]: IndexSet::retain
269+
///
270+
/// ***Panics*** if the starting point is greater than the end point or if
271+
/// the end point is greater than the length of the set.
272+
///
273+
/// # Examples
274+
///
275+
/// Splitting a set into even and odd values, reusing the original set:
276+
///
277+
/// ```
278+
/// use indexmap::IndexSet;
279+
///
280+
/// let mut set: IndexSet<i32> = (0..8).collect();
281+
/// let extracted: IndexSet<i32> = set.extract_if(.., |v| v % 2 == 0).collect();
282+
///
283+
/// let evens = extracted.into_iter().collect::<Vec<_>>();
284+
/// let odds = set.into_iter().collect::<Vec<_>>();
285+
///
286+
/// assert_eq!(evens, vec![0, 2, 4, 6]);
287+
/// assert_eq!(odds, vec![1, 3, 5, 7]);
288+
/// ```
289+
#[track_caller]
290+
pub fn extract_if<F, R>(&mut self, range: R, pred: F) -> ExtractIf<'_, T, F>
291+
where
292+
F: FnMut(&T) -> bool,
293+
R: RangeBounds<usize>,
294+
{
295+
ExtractIf::new(&mut self.map.core, range, pred)
296+
}
297+
252298
/// Splits the collection into two at the given index.
253299
///
254300
/// Returns a newly allocated set containing the elements in the range

src/set/iter.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use crate::map::{ExtractCore, IndexMapCore};
2+
13
use super::{Bucket, IndexSet, Slice};
24

35
use alloc::vec::{self, Vec};
@@ -626,3 +628,54 @@ impl<I: fmt::Debug> fmt::Debug for UnitValue<I> {
626628
fmt::Debug::fmt(&self.0, f)
627629
}
628630
}
631+
632+
/// An extracting iterator for `IndexSet`.
633+
///
634+
/// This `struct` is created by [`IndexSet::extract_if()`].
635+
/// See its documentation for more.
636+
pub struct ExtractIf<'a, T, F> {
637+
inner: ExtractCore<'a, T, ()>,
638+
pred: F,
639+
}
640+
641+
impl<T, F> ExtractIf<'_, T, F> {
642+
#[track_caller]
643+
pub(super) fn new<R>(core: &mut IndexMapCore<T, ()>, range: R, pred: F) -> ExtractIf<'_, T, F>
644+
where
645+
R: RangeBounds<usize>,
646+
F: FnMut(&T) -> bool,
647+
{
648+
ExtractIf {
649+
inner: core.extract(range),
650+
pred,
651+
}
652+
}
653+
}
654+
655+
impl<T, F> Iterator for ExtractIf<'_, T, F>
656+
where
657+
F: FnMut(&T) -> bool,
658+
{
659+
type Item = T;
660+
661+
fn next(&mut self) -> Option<Self::Item> {
662+
self.inner
663+
.extract_if(|bucket| (self.pred)(bucket.key_ref()))
664+
.map(Bucket::key)
665+
}
666+
667+
fn size_hint(&self) -> (usize, Option<usize>) {
668+
(0, Some(self.inner.remaining()))
669+
}
670+
}
671+
672+
impl<T, F> FusedIterator for ExtractIf<'_, T, F> where F: FnMut(&T) -> bool {}
673+
674+
impl<T, F> fmt::Debug for ExtractIf<'_, T, F>
675+
where
676+
T: fmt::Debug,
677+
{
678+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
679+
f.debug_struct("ExtractIf").finish_non_exhaustive()
680+
}
681+
}

0 commit comments

Comments
 (0)