Auto merge of #529 - ToMe25:set_ops_assign, r=Amanieu

bors · bors · commit f0eece41a0e4 · 2024-06-18T13:50:41.000Z
Implement XxxAssign operations on HashSets This PR primarily implements the XxxAssign operation traits for `HashSet`. My primary motivation to do so is for convenience, but depending on the situation they can provide a significant performance improvement as well.\* In my tests, which may not be ideal because I don't have much benchmarking experience, the assigning operations are, with the exception of `Sub`, a minimum of 25% faster.\* Note that when swapping the large and the small set around, some of these are significantly slower than the non-assigning variant. Therefore using them is likely only worth it performance wise, if you already know which set is larger, and the right one of the sets just so happens to be the one you don't need to keep. \* Results may have changed due to #530 being merged Here my exact benchmark results, done with the newly added benchmark suit: <!DOCTYPE html> VER | LSIZE | SSIZE | OP | NS/ITER | DIFF (%) | COMMENT -- | -- | -- | -- | -- | -- | -- 1 | 1000 | 100 | and | 5,682.88 | | 1 | 1000 | 100 | or | 41,427.82 | | 1 | 1000 | 100 | xor | 57,404.27 | | 1 | 1000 | 100 | subls | 56,262.53 | | 1 | 1000 | 100 | subsl | 751.42 | | 1 | 1000 | 2 | and | 100.16 | | 1 | 1000 | 2 | or | 40,435.09 | | 1 | 1000 | 2 | xor | 59,058.05 | | 1 | 1000 | 2 | subls | 58,668.34 | | 1 | 1000 | 2 | subsl | 18.89 | | 1 | 1000 | 100 | or_ass | 32,888.49 | -20.61% | unconditional insert 2 | 1000 | 100 | or_ass | 29,397.04 | -29.04% | !contains insert 3 | 1000 | 100 | or_ass | 32,399.65 | -21.79% | extend iter().cloned() 4 | 1000 | 100 | or_ass | 30,693.33 | -25.91% | get_or_insert_owned 5 | 1000 | 100 | or_ass | 33,722.59 | -18.60% | calc intersection; extend rhs.iter() !intersection contains; Requires S: Clone 1 | 1000 | 100 | add_ass | 30,114.17 | -26.66% | !contains insert 1 | 1000 | 100 | xor_ass | 32,309.85 | -43.72% | contains remove else insert 2 | 1000 | 100 | xor_ass | 40,058.48 | -30.22% | extract_if rhs contains; extend !removed contains 3 | 1000 | 100 | xor_ass | 31,801.04 | -44.60% | raw_entry().from_key() replace_entry_with / insert 4 | 1000 | 100 | xor_ass | 31,935.07 | -44.37% | raw_entry().from_key_hashed_nocheck() replace_entry_with / insert_hashed_nocheck 5 | 1000 | 100 | xor_ass | 31,843.33 | -44.53% | self.map.table.get.is_none self.map.table.insert else self.map.table.remove_entry 1 | 1000 | 100 | subls_ass | 33,366.13 | -40.70% | contains remove 1 | 1000 | 100 | subsl_ass | 10,686.02 | 1322.11% | contains remove 2 | 1000 | 100 | subls_ass | 36,351.69 | -35.39% | retain !contains 2 | 1000 | 100 | subsl_ass | 3,939.67 | 424.30% | retain !contains 3 | 1000 | 100 | subls_ass | 32,012.82 | -43.10% | unconditional remove 3 | 1000 | 100 | subsl_ass | 9,908.76 | 1218.67% | unconditional remove 4 | 1000 | 100 | subls_ass | 36,232.13 | -35.60% | self.map.retain !contains 4 | 1000 | 100 | subsl_ass | 3,939.35 | 424.25% | self.map.retain !contains 5 | 1000 | 100 | subls_ass | 31,879.32 | -43.34% | if rhs smaller self unconditional remove else retain !contains 5 | 1000 | 100 | subsl_ass | 3,946.98 | 425.27% | if rhs smaller self unconditional remove else retain !contains 1 | 1000 | 2 | add_ass | 28,324.95 | -29.27% | 2 | 1000 | 2 | or_ass | 28,322.62 | -29.96% | 1 | 1000 | 2 | xor_ass | 29,107.31 | -50.71% | 3 | 1000 | 2 | xor_ass | 29,026.82 | -50.85% | 1 | 1000 | 2 | subls_ass | 29,310.04 | -50.04% | 1 | 1000 | 2 | subsl_ass | 4,212.56 | 22200.48% | 2 | 1000 | 2 | subls_ass | 34,074.85 | -41.92% | 2 | 1000 | 2 | subsl_ass | 66.43 | 251.67% | 3 | 1000 | 2 | subls_ass | 29,340.86 | -49.99% | 3 | 1000 | 2 | subsl_ass | 5,972.25 | 31515.93% | 5 | 1000 | 2 | subls_ass | 29,460.49 | -49.78% | 5 | 1000 | 2 | subsl_ass | 65.32 | 245.79% | In addition to the Assigning operators this PR changes a few more things: * It changes the allocator bound on the non-assigning set operations to `A: Allocator + Default`. * I also added a benchmark suit for the set operations.
diff --git a/benches/set_ops.rs b/benches/set_ops.rs
@@ -0,0 +1,148 @@
+//! This file contains benchmarks for the ops traits implemented by HashSet.
+//! Each test is intended to have a defined larger and smaller set,
+//! but using a larger size for the "small" set works just as well.
+//!
+//! Each assigning test is done in the configuration that is faster. Cheating, I know.
+//! The exception to this is Sub, because there the result differs. So I made two benchmarks for Sub.
+
+#![feature(test)]
+
+extern crate test;
+
+use hashbrown::HashSet;
+use test::Bencher;
+
+/// The number of items to generate for the larger of the sets.
+const LARGE_SET_SIZE: usize = 1000;
+
+/// The number of items to generate for the smaller of the sets.
+const SMALL_SET_SIZE: usize = 100;
+
+/// The number of keys present in both sets.
+const OVERLAPP: usize =
+    [LARGE_SET_SIZE, SMALL_SET_SIZE][(LARGE_SET_SIZE < SMALL_SET_SIZE) as usize] / 2;
+
+/// Creates a set containing end - start unique string elements.
+fn create_set(start: usize, end: usize) -> HashSet<String> {
+    (start..end).map(|nr| format!("key{}", nr)).collect()
+}
+
+#[bench]
+fn set_ops_bit_or(b: &mut Bencher) {
+    let large_set = create_set(0, LARGE_SET_SIZE);
+    let small_set = create_set(
+        LARGE_SET_SIZE - OVERLAPP,
+        LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
+    );
+    b.iter(|| &large_set | &small_set)
+}
+
+#[bench]
+fn set_ops_bit_and(b: &mut Bencher) {
+    let large_set = create_set(0, LARGE_SET_SIZE);
+    let small_set = create_set(
+        LARGE_SET_SIZE - OVERLAPP,
+        LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
+    );
+    b.iter(|| &large_set & &small_set)
+}
+
+#[bench]
+fn set_ops_bit_xor(b: &mut Bencher) {
+    let large_set = create_set(0, LARGE_SET_SIZE);
+    let small_set = create_set(
+        LARGE_SET_SIZE - OVERLAPP,
+        LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
+    );
+    b.iter(|| &large_set ^ &small_set)
+}
+
+#[bench]
+fn set_ops_sub_large_small(b: &mut Bencher) {
+    let large_set = create_set(0, LARGE_SET_SIZE);
+    let small_set = create_set(
+        LARGE_SET_SIZE - OVERLAPP,
+        LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
+    );
+    b.iter(|| &large_set - &small_set)
+}
+
+#[bench]
+fn set_ops_sub_small_large(b: &mut Bencher) {
+    let large_set = create_set(0, LARGE_SET_SIZE);
+    let small_set = create_set(
+        LARGE_SET_SIZE - OVERLAPP,
+        LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
+    );
+    b.iter(|| &small_set - &large_set)
+}
+
+#[bench]
+fn set_ops_bit_or_assign(b: &mut Bencher) {
+    let large_set = create_set(0, LARGE_SET_SIZE);
+    let small_set = create_set(
+        LARGE_SET_SIZE - OVERLAPP,
+        LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
+    );
+    b.iter(|| {
+        let mut set = large_set.clone();
+        set |= &small_set;
+        set
+    });
+}
+
+#[bench]
+fn set_ops_bit_and_assign(b: &mut Bencher) {
+    let large_set = create_set(0, LARGE_SET_SIZE);
+    let small_set = create_set(
+        LARGE_SET_SIZE - OVERLAPP,
+        LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
+    );
+    b.iter(|| {
+        let mut set = small_set.clone();
+        set &= &large_set;
+        set
+    });
+}
+
+#[bench]
+fn set_ops_bit_xor_assign(b: &mut Bencher) {
+    let large_set = create_set(0, LARGE_SET_SIZE);
+    let small_set = create_set(
+        LARGE_SET_SIZE - OVERLAPP,
+        LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
+    );
+    b.iter(|| {
+        let mut set = large_set.clone();
+        set ^= &small_set;
+        set
+    });
+}
+
+#[bench]
+fn set_ops_sub_assign_large_small(b: &mut Bencher) {
+    let large_set = create_set(0, LARGE_SET_SIZE);
+    let small_set = create_set(
+        LARGE_SET_SIZE - OVERLAPP,
+        LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
+    );
+    b.iter(|| {
+        let mut set = large_set.clone();
+        set -= &small_set;
+        set
+    });
+}
+
+#[bench]
+fn set_ops_sub_assign_small_large(b: &mut Bencher) {
+    let large_set = create_set(0, LARGE_SET_SIZE);
+    let small_set = create_set(
+        LARGE_SET_SIZE - OVERLAPP,
+        LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP,
+    );
+    b.iter(|| {
+        let mut set = small_set.clone();
+        set -= &large_set;
+        set
+    });
+}
diff --git a/src/set.rs b/src/set.rs
@@ -5,7 +5,7 @@ use alloc::borrow::ToOwned;
 use core::fmt;
 use core::hash::{BuildHasher, Hash};
 use core::iter::{Chain, FusedIterator};
-use core::ops::{BitAnd, BitOr, BitXor, Sub};
+use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign};
 
 use super::map::{self, DefaultHashBuilder, HashMap, Keys};
 use crate::raw::{Allocator, Global, RawExtractIf};
@@ -1410,9 +1410,9 @@ impl<T, S, A> BitOr<&HashSet<T, S, A>> for &HashSet<T, S, A>
 where
     T: Eq + Hash + Clone,
     S: BuildHasher + Default,
-    A: Allocator,
+    A: Allocator + Default,
 {
-    type Output = HashSet<T, S>;
+    type Output = HashSet<T, S, A>;
 
     /// Returns the union of `self` and `rhs` as a new `HashSet<T, S>`.
     ///
@@ -1434,7 +1434,7 @@ where
     /// }
     /// assert_eq!(i, expected.len());
     /// ```
-    fn bitor(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S> {
+    fn bitor(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S, A> {
         self.union(rhs).cloned().collect()
     }
 }
@@ -1443,9 +1443,9 @@ impl<T, S, A> BitAnd<&HashSet<T, S, A>> for &HashSet<T, S, A>
 where
     T: Eq + Hash + Clone,
     S: BuildHasher + Default,
-    A: Allocator,
+    A: Allocator + Default,
 {
-    type Output = HashSet<T, S>;
+    type Output = HashSet<T, S, A>;
 
     /// Returns the intersection of `self` and `rhs` as a new `HashSet<T, S>`.
     ///
@@ -1467,17 +1467,18 @@ where
     /// }
     /// assert_eq!(i, expected.len());
     /// ```
-    fn bitand(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S> {
+    fn bitand(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S, A> {
         self.intersection(rhs).cloned().collect()
     }
 }
 
-impl<T, S> BitXor<&HashSet<T, S>> for &HashSet<T, S>
+impl<T, S, A> BitXor<&HashSet<T, S, A>> for &HashSet<T, S, A>
 where
     T: Eq + Hash + Clone,
     S: BuildHasher + Default,
+    A: Allocator + Default,
 {
-    type Output = HashSet<T, S>;
+    type Output = HashSet<T, S, A>;
 
     /// Returns the symmetric difference of `self` and `rhs` as a new `HashSet<T, S>`.
     ///
@@ -1499,17 +1500,18 @@ where
     /// }
     /// assert_eq!(i, expected.len());
     /// ```
-    fn bitxor(self, rhs: &HashSet<T, S>) -> HashSet<T, S> {
+    fn bitxor(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S, A> {
         self.symmetric_difference(rhs).cloned().collect()
     }
 }
 
-impl<T, S> Sub<&HashSet<T, S>> for &HashSet<T, S>
+impl<T, S, A> Sub<&HashSet<T, S, A>> for &HashSet<T, S, A>
 where
     T: Eq + Hash + Clone,
     S: BuildHasher + Default,
+    A: Allocator + Default,
 {
-    type Output = HashSet<T, S>;
+    type Output = HashSet<T, S, A>;
 
     /// Returns the difference of `self` and `rhs` as a new `HashSet<T, S>`.
     ///
@@ -1531,11 +1533,155 @@ where
     /// }
     /// assert_eq!(i, expected.len());
     /// ```
-    fn sub(self, rhs: &HashSet<T, S>) -> HashSet<T, S> {
+    fn sub(self, rhs: &HashSet<T, S, A>) -> HashSet<T, S, A> {
         self.difference(rhs).cloned().collect()
     }
 }
 
+impl<T, S, A> BitOrAssign<&HashSet<T, S, A>> for HashSet<T, S, A>
+where
+    T: Eq + Hash + Clone,
+    S: BuildHasher,
+    A: Allocator,
+{
+    /// Modifies this set to contain the union of `self` and `rhs`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use hashbrown::HashSet;
+    ///
+    /// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect();
+    /// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect();
+    ///
+    /// a |= &b;
+    ///
+    /// let mut i = 0;
+    /// let expected = [1, 2, 3, 4, 5];
+    /// for x in &a {
+    ///     assert!(expected.contains(x));
+    ///     i += 1;
+    /// }
+    /// assert_eq!(i, expected.len());
+    /// ```
+    fn bitor_assign(&mut self, rhs: &HashSet<T, S, A>) {
+        for item in rhs {
+            if !self.contains(item) {
+                self.insert(item.clone());
+            }
+        }
+    }
+}
+
+impl<T, S, A> BitAndAssign<&HashSet<T, S, A>> for HashSet<T, S, A>
+where
+    T: Eq + Hash + Clone,
+    S: BuildHasher,
+    A: Allocator,
+{
+    /// Modifies this set to contain the intersection of `self` and `rhs`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use hashbrown::HashSet;
+    ///
+    /// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect();
+    /// let b: HashSet<_> = vec![2, 3, 4].into_iter().collect();
+    ///
+    /// a &= &b;
+    ///
+    /// let mut i = 0;
+    /// let expected = [2, 3];
+    /// for x in &a {
+    ///     assert!(expected.contains(x));
+    ///     i += 1;
+    /// }
+    /// assert_eq!(i, expected.len());
+    /// ```
+    fn bitand_assign(&mut self, rhs: &HashSet<T, S, A>) {
+        self.retain(|item| rhs.contains(item));
+    }
+}
+
+impl<T, S, A> BitXorAssign<&HashSet<T, S, A>> for HashSet<T, S, A>
+where
+    T: Eq + Hash + Clone,
+    S: BuildHasher,
+    A: Allocator,
+{
+    /// Modifies this set to contain the symmetric difference of `self` and `rhs`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use hashbrown::HashSet;
+    ///
+    /// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect();
+    /// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect();
+    ///
+    /// a ^= &b;
+    ///
+    /// let mut i = 0;
+    /// let expected = [1, 2, 4, 5];
+    /// for x in &a {
+    ///     assert!(expected.contains(x));
+    ///     i += 1;
+    /// }
+    /// assert_eq!(i, expected.len());
+    /// ```
+    fn bitxor_assign(&mut self, rhs: &HashSet<T, S, A>) {
+        for item in rhs {
+            let entry = self.map.raw_entry_mut().from_key(item);
+            match entry {
+                map::RawEntryMut::Occupied(e) => {
+                    e.remove();
+                }
+                map::RawEntryMut::Vacant(e) => {
+                    e.insert(item.to_owned(), ());
+                }
+            };
+        }
+    }
+}
+
+impl<T, S, A> SubAssign<&HashSet<T, S, A>> for HashSet<T, S, A>
+where
+    T: Eq + Hash + Clone,
+    S: BuildHasher,
+    A: Allocator,
+{
+    /// Modifies this set to contain the difference of `self` and `rhs`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use hashbrown::HashSet;
+    ///
+    /// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect();
+    /// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect();
+    ///
+    /// a -= &b;
+    ///
+    /// let mut i = 0;
+    /// let expected = [1, 2];
+    /// for x in &a {
+    ///     assert!(expected.contains(x));
+    ///     i += 1;
+    /// }
+    /// assert_eq!(i, expected.len());
+    /// ```
+    fn sub_assign(&mut self, rhs: &HashSet<T, S, A>) {
+        if rhs.len() < self.len() {
+            for item in rhs {
+                self.remove(item);
+            }
+        } else {
+            self.retain(|item| !rhs.contains(item));
+        }
+    }
+}
+
 /// An iterator over the items of a `HashSet`.
 ///
 /// This `struct` is created by the [`iter`] method on [`HashSet`].