Skip to content

Commit f0eece4

Browse files
committed
Auto merge of #529 - ToMe25:set_ops_assign, r=Amanieu
Implement XxxAssign operations on HashSets This PR primarily implements the XxxAssign operation traits for `HashSet`. My primary motivation to do so is for convenience, but depending on the situation they can provide a significant performance improvement as well.\* In my tests, which may not be ideal because I don't have much benchmarking experience, the assigning operations are, with the exception of `Sub`, a minimum of 25% faster.\* Note that when swapping the large and the small set around, some of these are significantly slower than the non-assigning variant. Therefore using them is likely only worth it performance wise, if you already know which set is larger, and the right one of the sets just so happens to be the one you don't need to keep. \* Results may have changed due to #530 being merged Here my exact benchmark results, done with the newly added benchmark suit: <!DOCTYPE html> VER | LSIZE | SSIZE | OP | NS/ITER | DIFF (%) | COMMENT -- | -- | -- | -- | -- | -- | -- 1 | 1000 | 100 | and | 5,682.88 |   |   1 | 1000 | 100 | or | 41,427.82 |   |   1 | 1000 | 100 | xor | 57,404.27 |   |   1 | 1000 | 100 | subls | 56,262.53 |   |   1 | 1000 | 100 | subsl | 751.42 |   |   1 | 1000 | 2 | and | 100.16 |   |   1 | 1000 | 2 | or | 40,435.09 |   |   1 | 1000 | 2 | xor | 59,058.05 |   |   1 | 1000 | 2 | subls | 58,668.34 |   |   1 | 1000 | 2 | subsl | 18.89 |   |   1 | 1000 | 100 | or_ass | 32,888.49 | -20.61% | unconditional insert 2 | 1000 | 100 | or_ass | 29,397.04 | -29.04% | !contains insert 3 | 1000 | 100 | or_ass | 32,399.65 | -21.79% | extend iter().cloned() 4 | 1000 | 100 | or_ass | 30,693.33 | -25.91% | get_or_insert_owned 5 | 1000 | 100 | or_ass | 33,722.59 | -18.60% | calc intersection; extend rhs.iter() !intersection contains; Requires S: Clone 1 | 1000 | 100 | add_ass | 30,114.17 | -26.66% | !contains insert 1 | 1000 | 100 | xor_ass | 32,309.85 | -43.72% | contains remove else insert 2 | 1000 | 100 | xor_ass | 40,058.48 | -30.22% | extract_if rhs contains; extend !removed contains 3 | 1000 | 100 | xor_ass | 31,801.04 | -44.60% | raw_entry().from_key() replace_entry_with / insert 4 | 1000 | 100 | xor_ass | 31,935.07 | -44.37% | raw_entry().from_key_hashed_nocheck() replace_entry_with / insert_hashed_nocheck 5 | 1000 | 100 | xor_ass | 31,843.33 | -44.53% | self.map.table.get.is_none self.map.table.insert else self.map.table.remove_entry 1 | 1000 | 100 | subls_ass | 33,366.13 | -40.70% | contains remove 1 | 1000 | 100 | subsl_ass | 10,686.02 | 1322.11% | contains remove 2 | 1000 | 100 | subls_ass | 36,351.69 | -35.39% | retain !contains 2 | 1000 | 100 | subsl_ass | 3,939.67 | 424.30% | retain !contains 3 | 1000 | 100 | subls_ass | 32,012.82 | -43.10% | unconditional remove 3 | 1000 | 100 | subsl_ass | 9,908.76 | 1218.67% | unconditional remove 4 | 1000 | 100 | subls_ass | 36,232.13 | -35.60% | self.map.retain !contains 4 | 1000 | 100 | subsl_ass | 3,939.35 | 424.25% | self.map.retain !contains 5 | 1000 | 100 | subls_ass | 31,879.32 | -43.34% | if rhs smaller self unconditional remove else retain !contains 5 | 1000 | 100 | subsl_ass | 3,946.98 | 425.27% | if rhs smaller self unconditional remove else retain !contains 1 | 1000 | 2 | add_ass | 28,324.95 | -29.27% |   2 | 1000 | 2 | or_ass | 28,322.62 | -29.96% |   1 | 1000 | 2 | xor_ass | 29,107.31 | -50.71% |   3 | 1000 | 2 | xor_ass | 29,026.82 | -50.85% |   1 | 1000 | 2 | subls_ass | 29,310.04 | -50.04% |   1 | 1000 | 2 | subsl_ass | 4,212.56 | 22200.48% |   2 | 1000 | 2 | subls_ass | 34,074.85 | -41.92% |   2 | 1000 | 2 | subsl_ass | 66.43 | 251.67% |   3 | 1000 | 2 | subls_ass | 29,340.86 | -49.99% |   3 | 1000 | 2 | subsl_ass | 5,972.25 | 31515.93% |   5 | 1000 | 2 | subls_ass | 29,460.49 | -49.78% |   5 | 1000 | 2 | subsl_ass | 65.32 | 245.79% |   In addition to the Assigning operators this PR changes a few more things: * It changes the allocator bound on the non-assigning set operations to `A: Allocator + Default`. * I also added a benchmark suit for the set operations.
2 parents 65c553d + 481ef39 commit f0eece4

File tree

2 files changed

+307
-13
lines changed

2 files changed

+307
-13
lines changed

benches/set_ops.rs

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
//! This file contains benchmarks for the ops traits implemented by HashSet.
2+
//! Each test is intended to have a defined larger and smaller set,
3+
//! but using a larger size for the "small" set works just as well.
4+
//!
5+
//! Each assigning test is done in the configuration that is faster. Cheating, I know.
6+
//! The exception to this is Sub, because there the result differs. So I made two benchmarks for Sub.
7+
8+
#![feature(test)]
9+
10+
extern crate test;
11+
12+
use hashbrown::HashSet;
13+
use test::Bencher;
14+
15+
/// The number of items to generate for the larger of the sets.
16+
const LARGE_SET_SIZE: usize = 1000;
17+
18+
/// The number of items to generate for the smaller of the sets.
19+
const SMALL_SET_SIZE: usize = 100;
20+
21+
/// The number of keys present in both sets.
22+
const OVERLAPP: usize =
23+
[LARGE_SET_SIZE, SMALL_SET_SIZE][(LARGE_SET_SIZE < SMALL_SET_SIZE) as usize] / 2;
24+
25+
/// Creates a set containing end - start unique string elements.
26+
fn create_set(start: usize, end: usize) -> HashSet<String> {
27+
(start..end).map(|nr| format!("key{}", nr)).collect()
28+
}
29+
30+
#[bench]
31+
fn set_ops_bit_or(b: &mut Bencher) {
32+
let large_set = create_set(0, LARGE_SET_SIZE);
33+
let small_set = create_set(
34+
LARGE_SET_SIZE - OVERLAPP,
35+
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
36+
);
37+
b.iter(|| &large_set | &small_set)
38+
}
39+
40+
#[bench]
41+
fn set_ops_bit_and(b: &mut Bencher) {
42+
let large_set = create_set(0, LARGE_SET_SIZE);
43+
let small_set = create_set(
44+
LARGE_SET_SIZE - OVERLAPP,
45+
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
46+
);
47+
b.iter(|| &large_set & &small_set)
48+
}
49+
50+
#[bench]
51+
fn set_ops_bit_xor(b: &mut Bencher) {
52+
let large_set = create_set(0, LARGE_SET_SIZE);
53+
let small_set = create_set(
54+
LARGE_SET_SIZE - OVERLAPP,
55+
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
56+
);
57+
b.iter(|| &large_set ^ &small_set)
58+
}
59+
60+
#[bench]
61+
fn set_ops_sub_large_small(b: &mut Bencher) {
62+
let large_set = create_set(0, LARGE_SET_SIZE);
63+
let small_set = create_set(
64+
LARGE_SET_SIZE - OVERLAPP,
65+
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
66+
);
67+
b.iter(|| &large_set - &small_set)
68+
}
69+
70+
#[bench]
71+
fn set_ops_sub_small_large(b: &mut Bencher) {
72+
let large_set = create_set(0, LARGE_SET_SIZE);
73+
let small_set = create_set(
74+
LARGE_SET_SIZE - OVERLAPP,
75+
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
76+
);
77+
b.iter(|| &small_set - &large_set)
78+
}
79+
80+
#[bench]
81+
fn set_ops_bit_or_assign(b: &mut Bencher) {
82+
let large_set = create_set(0, LARGE_SET_SIZE);
83+
let small_set = create_set(
84+
LARGE_SET_SIZE - OVERLAPP,
85+
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
86+
);
87+
b.iter(|| {
88+
let mut set = large_set.clone();
89+
set |= &small_set;
90+
set
91+
});
92+
}
93+
94+
#[bench]
95+
fn set_ops_bit_and_assign(b: &mut Bencher) {
96+
let large_set = create_set(0, LARGE_SET_SIZE);
97+
let small_set = create_set(
98+
LARGE_SET_SIZE - OVERLAPP,
99+
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
100+
);
101+
b.iter(|| {
102+
let mut set = small_set.clone();
103+
set &= &large_set;
104+
set
105+
});
106+
}
107+
108+
#[bench]
109+
fn set_ops_bit_xor_assign(b: &mut Bencher) {
110+
let large_set = create_set(0, LARGE_SET_SIZE);
111+
let small_set = create_set(
112+
LARGE_SET_SIZE - OVERLAPP,
113+
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
114+
);
115+
b.iter(|| {
116+
let mut set = large_set.clone();
117+
set ^= &small_set;
118+
set
119+
});
120+
}
121+
122+
#[bench]
123+
fn set_ops_sub_assign_large_small(b: &mut Bencher) {
124+
let large_set = create_set(0, LARGE_SET_SIZE);
125+
let small_set = create_set(
126+
LARGE_SET_SIZE - OVERLAPP,
127+
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
128+
);
129+
b.iter(|| {
130+
let mut set = large_set.clone();
131+
set -= &small_set;
132+
set
133+
});
134+
}
135+
136+
#[bench]
137+
fn set_ops_sub_assign_small_large(b: &mut Bencher) {
138+
let large_set = create_set(0, LARGE_SET_SIZE);
139+
let small_set = create_set(
140+
LARGE_SET_SIZE - OVERLAPP,
141+
LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
142+
);
143+
b.iter(|| {
144+
let mut set = small_set.clone();
145+
set -= &large_set;
146+
set
147+
});
148+
}

src/set.rs

Lines changed: 159 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use alloc::borrow::ToOwned;
55
use core::fmt;
66
use core::hash::{BuildHasher, Hash};
77
use core::iter::{Chain, FusedIterator};
8-
use core::ops::{BitAnd, BitOr, BitXor, Sub};
8+
use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign};
99

1010
use super::map::{self, DefaultHashBuilder, HashMap, Keys};
1111
use crate::raw::{Allocator, Global, RawExtractIf};
@@ -1410,9 +1410,9 @@ impl<T, S, A> BitOr<&HashSet<T, S, A>> for &HashSet<T, S, A>
14101410
where
14111411
T: Eq + Hash + Clone,
14121412
S: BuildHasher + Default,
1413-
A: Allocator,
1413+
A: Allocator + Default,
14141414
{
1415-
type Output = HashSet<T, S>;
1415+
type Output = HashSet<T, S, A>;
14161416

14171417
/// Returns the union of `self` and `rhs` as a new `HashSet<T, S>`.
14181418
///
@@ -1434,7 +1434,7 @@ where
14341434
/// }
14351435
/// assert_eq!(i, expected.len());
14361436
/// ```
1437-
fn bitor(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S> {
1437+
fn bitor(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S, A> {
14381438
self.union(rhs).cloned().collect()
14391439
}
14401440
}
@@ -1443,9 +1443,9 @@ impl<T, S, A> BitAnd<&HashSet<T, S, A>> for &HashSet<T, S, A>
14431443
where
14441444
T: Eq + Hash + Clone,
14451445
S: BuildHasher + Default,
1446-
A: Allocator,
1446+
A: Allocator + Default,
14471447
{
1448-
type Output = HashSet<T, S>;
1448+
type Output = HashSet<T, S, A>;
14491449

14501450
/// Returns the intersection of `self` and `rhs` as a new `HashSet<T, S>`.
14511451
///
@@ -1467,17 +1467,18 @@ where
14671467
/// }
14681468
/// assert_eq!(i, expected.len());
14691469
/// ```
1470-
fn bitand(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S> {
1470+
fn bitand(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S, A> {
14711471
self.intersection(rhs).cloned().collect()
14721472
}
14731473
}
14741474

1475-
impl<T, S> BitXor<&HashSet<T, S>> for &HashSet<T, S>
1475+
impl<T, S, A> BitXor<&HashSet<T, S, A>> for &HashSet<T, S, A>
14761476
where
14771477
T: Eq + Hash + Clone,
14781478
S: BuildHasher + Default,
1479+
A: Allocator + Default,
14791480
{
1480-
type Output = HashSet<T, S>;
1481+
type Output = HashSet<T, S, A>;
14811482

14821483
/// Returns the symmetric difference of `self` and `rhs` as a new `HashSet<T, S>`.
14831484
///
@@ -1499,17 +1500,18 @@ where
14991500
/// }
15001501
/// assert_eq!(i, expected.len());
15011502
/// ```
1502-
fn bitxor(self, rhs: &HashSet<T, S>) -> HashSet<T, S> {
1503+
fn bitxor(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S, A> {
15031504
self.symmetric_difference(rhs).cloned().collect()
15041505
}
15051506
}
15061507

1507-
impl<T, S> Sub<&HashSet<T, S>> for &HashSet<T, S>
1508+
impl<T, S, A> Sub<&HashSet<T, S, A>> for &HashSet<T, S, A>
15081509
where
15091510
T: Eq + Hash + Clone,
15101511
S: BuildHasher + Default,
1512+
A: Allocator + Default,
15111513
{
1512-
type Output = HashSet<T, S>;
1514+
type Output = HashSet<T, S, A>;
15131515

15141516
/// Returns the difference of `self` and `rhs` as a new `HashSet<T, S>`.
15151517
///
@@ -1531,11 +1533,155 @@ where
15311533
/// }
15321534
/// assert_eq!(i, expected.len());
15331535
/// ```
1534-
fn sub(self, rhs: &HashSet<T, S>) -> HashSet<T, S> {
1536+
fn sub(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S, A> {
15351537
self.difference(rhs).cloned().collect()
15361538
}
15371539
}
15381540

1541+
impl<T, S, A> BitOrAssign<&HashSet<T, S, A>> for HashSet<T, S, A>
1542+
where
1543+
T: Eq + Hash + Clone,
1544+
S: BuildHasher,
1545+
A: Allocator,
1546+
{
1547+
/// Modifies this set to contain the union of `self` and `rhs`.
1548+
///
1549+
/// # Examples
1550+
///
1551+
/// ```
1552+
/// use hashbrown::HashSet;
1553+
///
1554+
/// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect();
1555+
/// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect();
1556+
///
1557+
/// a |= &b;
1558+
///
1559+
/// let mut i = 0;
1560+
/// let expected = [1, 2, 3, 4, 5];
1561+
/// for x in &a {
1562+
/// assert!(expected.contains(x));
1563+
/// i += 1;
1564+
/// }
1565+
/// assert_eq!(i, expected.len());
1566+
/// ```
1567+
fn bitor_assign(&mut self, rhs: &HashSet<T, S, A>) {
1568+
for item in rhs {
1569+
if !self.contains(item) {
1570+
self.insert(item.clone());
1571+
}
1572+
}
1573+
}
1574+
}
1575+
1576+
impl<T, S, A> BitAndAssign<&HashSet<T, S, A>> for HashSet<T, S, A>
1577+
where
1578+
T: Eq + Hash + Clone,
1579+
S: BuildHasher,
1580+
A: Allocator,
1581+
{
1582+
/// Modifies this set to contain the intersection of `self` and `rhs`.
1583+
///
1584+
/// # Examples
1585+
///
1586+
/// ```
1587+
/// use hashbrown::HashSet;
1588+
///
1589+
/// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect();
1590+
/// let b: HashSet<_> = vec![2, 3, 4].into_iter().collect();
1591+
///
1592+
/// a &= &b;
1593+
///
1594+
/// let mut i = 0;
1595+
/// let expected = [2, 3];
1596+
/// for x in &a {
1597+
/// assert!(expected.contains(x));
1598+
/// i += 1;
1599+
/// }
1600+
/// assert_eq!(i, expected.len());
1601+
/// ```
1602+
fn bitand_assign(&mut self, rhs: &HashSet<T, S, A>) {
1603+
self.retain(|item| rhs.contains(item));
1604+
}
1605+
}
1606+
1607+
impl<T, S, A> BitXorAssign<&HashSet<T, S, A>> for HashSet<T, S, A>
1608+
where
1609+
T: Eq + Hash + Clone,
1610+
S: BuildHasher,
1611+
A: Allocator,
1612+
{
1613+
/// Modifies this set to contain the symmetric difference of `self` and `rhs`.
1614+
///
1615+
/// # Examples
1616+
///
1617+
/// ```
1618+
/// use hashbrown::HashSet;
1619+
///
1620+
/// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect();
1621+
/// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect();
1622+
///
1623+
/// a ^= &b;
1624+
///
1625+
/// let mut i = 0;
1626+
/// let expected = [1, 2, 4, 5];
1627+
/// for x in &a {
1628+
/// assert!(expected.contains(x));
1629+
/// i += 1;
1630+
/// }
1631+
/// assert_eq!(i, expected.len());
1632+
/// ```
1633+
fn bitxor_assign(&mut self, rhs: &HashSet<T, S, A>) {
1634+
for item in rhs {
1635+
let entry = self.map.raw_entry_mut().from_key(item);
1636+
match entry {
1637+
map::RawEntryMut::Occupied(e) => {
1638+
e.remove();
1639+
}
1640+
map::RawEntryMut::Vacant(e) => {
1641+
e.insert(item.to_owned(), ());
1642+
}
1643+
};
1644+
}
1645+
}
1646+
}
1647+
1648+
impl<T, S, A> SubAssign<&HashSet<T, S, A>> for HashSet<T, S, A>
1649+
where
1650+
T: Eq + Hash + Clone,
1651+
S: BuildHasher,
1652+
A: Allocator,
1653+
{
1654+
/// Modifies this set to contain the difference of `self` and `rhs`.
1655+
///
1656+
/// # Examples
1657+
///
1658+
/// ```
1659+
/// use hashbrown::HashSet;
1660+
///
1661+
/// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect();
1662+
/// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect();
1663+
///
1664+
/// a -= &b;
1665+
///
1666+
/// let mut i = 0;
1667+
/// let expected = [1, 2];
1668+
/// for x in &a {
1669+
/// assert!(expected.contains(x));
1670+
/// i += 1;
1671+
/// }
1672+
/// assert_eq!(i, expected.len());
1673+
/// ```
1674+
fn sub_assign(&mut self, rhs: &HashSet<T, S, A>) {
1675+
if rhs.len() < self.len() {
1676+
for item in rhs {
1677+
self.remove(item);
1678+
}
1679+
} else {
1680+
self.retain(|item| !rhs.contains(item));
1681+
}
1682+
}
1683+
}
1684+
15391685
/// An iterator over the items of a `HashSet`.
15401686
///
15411687
/// This `struct` is created by the [`iter`] method on [`HashSet`].

0 commit comments

Comments
 (0)