diff --git a/Cargo.toml b/Cargo.toml index 9c2e0ccee..51d01c829 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,7 @@ default = ["ahash", "inline-more", "allocator-api2"] nightly = ["allocator-api2?/nightly", "bumpalo/allocator_api"] rustc-internal-api = [] + rustc-dep-of-std = [ "nightly", "core", @@ -59,6 +60,7 @@ rustc-dep-of-std = [ "alloc", "rustc-internal-api", ] + raw = [] # Enables usage of `#[inline]` on far more functions than by default in this @@ -66,6 +68,22 @@ raw = [] # time cost. inline-more = [] +# If no overflow-tracker is selected, then the default is none. +# +# A single tracker can be selected at any time, selecting two or more is an error. + +# Bloom filter overflow-tracker, ala boost::unordered_flat_map. +overflow-tracker-bloom-1-u8 = [] + +# Bloom filter overflow-tracker, with more accuracy. +overflow-tracker-bloom-1-u16 = [] + +# Counter overflow-tracker, ala F14. +overflow-tracker-counter-u8 = [] + +# Hybrid overflow-tracker, mixing a counter and a bloom filter. +overflow-tracker-hybrid = [] + [package.metadata.docs.rs] features = ["nightly", "rayon", "serde", "raw"] rustdoc-args = ["--generate-link-to-definition"] diff --git a/src/raw/bitmask.rs b/src/raw/bitmask.rs index 6576b3c5c..d7ae9ed5e 100644 --- a/src/raw/bitmask.rs +++ b/src/raw/bitmask.rs @@ -54,21 +54,6 @@ impl BitMask { } } - /// Returns the number of trailing zeroes in the `BitMask`. - #[inline] - pub(crate) fn trailing_zeros(self) -> usize { - // ARM doesn't have a trailing_zeroes instruction, and instead uses - // reverse_bits (RBIT) + leading_zeroes (CLZ). However older ARM - // versions (pre-ARMv7) don't have RBIT and need to emulate it - // instead. Since we only have 1 bit set in each byte on ARM, we can - // use swap_bytes (REV) + leading_zeroes instead. - if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 { - self.0.swap_bytes().leading_zeros() as usize / BITMASK_STRIDE - } else { - self.0.trailing_zeros() as usize / BITMASK_STRIDE - } - } - /// Same as above but takes a `NonZeroBitMaskWord`. #[inline] fn nonzero_trailing_zeros(nonzero: NonZeroBitMaskWord) -> usize { @@ -80,12 +65,6 @@ impl BitMask { nonzero.trailing_zeros() as usize / BITMASK_STRIDE } } - - /// Returns the number of leading zeroes in the `BitMask`. - #[inline] - pub(crate) fn leading_zeros(self) -> usize { - self.0.leading_zeros() as usize / BITMASK_STRIDE - } } impl IntoIterator for BitMask { diff --git a/src/raw/generic.rs b/src/raw/generic.rs index c668b0642..c0cd571df 100644 --- a/src/raw/generic.rs +++ b/src/raw/generic.rs @@ -69,13 +69,6 @@ impl Group { &ALIGNED_BYTES.bytes } - /// Loads a group of bytes starting at the given address. - #[inline] - #[allow(clippy::cast_ptr_alignment)] // unaligned load - pub(crate) unsafe fn load(ptr: *const u8) -> Self { - Group(ptr::read_unaligned(ptr.cast())) - } - /// Loads a group of bytes starting at the given address, which must be /// aligned to `mem::align_of::()`. #[inline] diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 22c01f5e9..ec7a96ac1 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -3,10 +3,9 @@ use crate::scopeguard::{guard, ScopeGuard}; use crate::TryReserveError; use core::iter::FusedIterator; use core::marker::PhantomData; -use core::mem; use core::mem::MaybeUninit; use core::ptr::NonNull; -use core::{hint, ptr}; +use core::{hint, mem, ptr}; cfg_if! { // Use the SSE2 implementation if possible: it allows us to scan 16 buckets @@ -41,12 +40,15 @@ cfg_if! { } mod alloc; + pub(crate) use self::alloc::{do_alloc, Allocator, Global}; mod bitmask; +mod overflow; use self::bitmask::BitMaskIter; use self::imp::Group; +use self::overflow::OverflowTracker; // Branch prediction hint. This is currently only available on nightly but it // consistently improves performance by 10-15%. @@ -110,6 +112,9 @@ const EMPTY: u8 = 0b1111_1111; /// Control byte value for a deleted bucket. const DELETED: u8 = 0b1000_0000; +/// Size of the tracker, to avoid repeated calls to `mem::size_of::()` which is chunky. +const OVERFLOW_TRACKER_SIZE: usize = mem::size_of::(); + /// Checks whether a control byte represents a full bucket (top bit is clear). #[inline] fn is_full(ctrl: u8) -> bool { @@ -166,11 +171,32 @@ fn h2(hash: u64) -> u8 { /// Proof that the probe will visit every group in the table: /// struct ProbeSeq { - pos: usize, + // Index of the first element of the group. + group: usize, stride: usize, } impl ProbeSeq { + fn with_hash(hash: u64, bucket_mask: usize) -> Self { + debug_assert!((bucket_mask + 1).is_power_of_two(), "{bucket_mask}"); + + // This is the same as `hash as usize % self.buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. + let group = h1(hash) & bucket_mask; + + Self { + group: group / Group::WIDTH * Group::WIDTH, + stride: 0, + } + } + + fn with_displacement(index: usize, displacement: u8) -> Self { + Self { + group: index / Group::WIDTH * Group::WIDTH, + stride: Group::WIDTH * (displacement as usize), + } + } + #[inline] fn move_next(&mut self, bucket_mask: usize) { // We should have found an empty bucket by now and ended the probe. @@ -179,9 +205,22 @@ impl ProbeSeq { "Went past end of probe sequence" ); + debug_assert_eq!(0, self.group % Group::WIDTH, "{}", self.group); + self.stride += Group::WIDTH; - self.pos += self.stride; - self.pos &= bucket_mask; + + self.group += self.stride; + self.group &= bucket_mask; + } + + #[inline] + fn move_prev(&mut self, bucket_mask: usize) { + debug_assert_eq!(0, self.group % Group::WIDTH, "{}", self.group); + + self.group = self.group.wrapping_sub(self.stride); + self.group &= bucket_mask; + + self.stride -= Group::WIDTH; } } @@ -257,11 +296,23 @@ impl TableLayout { debug_assert!(buckets.is_power_of_two()); let TableLayout { size, ctrl_align } = self; + // Manual layout calculation since Layout methods are not yet stable. let ctrl_offset = size.checked_mul(buckets)?.checked_add(ctrl_align - 1)? & !(ctrl_align - 1); let len = ctrl_offset.checked_add(buckets + Group::WIDTH)?; + // No special consideration for alignment is necessary, as `OverflowTracker` has a lower alignment than `Group`. + debug_assert!(mem::align_of::() <= ctrl_align); + + let len = len.checked_add(OVERFLOW_TRACKER_SIZE * Self::number_groups(buckets))?; + + let len = if OverflowTracker::TRACK_REMOVALS { + len.checked_add(buckets / 2)? + } else { + len + }; + // We need an additional check to ensure that the allocation doesn't // exceed `isize::MAX` (https://github.com/rust-lang/rust/pull/95295). if len > isize::MAX as usize - (ctrl_align - 1) { @@ -273,6 +324,12 @@ impl TableLayout { ctrl_offset, )) } + + #[inline] + fn number_groups(buckets: usize) -> usize { + // Allocate one more group, to match the extra control bytes allocated. + (buckets + Group::WIDTH - 1) / Group::WIDTH + 1 + } } /// A reference to an empty bucket into which an can be inserted. @@ -797,7 +854,7 @@ struct RawTableInner { // number of buckets in the table. bucket_mask: usize, - // [Padding], T1, T2, ..., Tlast, C1, C2, ... + // [Padding], T1, T2, ..., Tlast, C1, C2, ..., Clast, O1, O2, ..., Olast, D1, D2, ..., Dlast // ^ points here ctrl: NonNull, @@ -857,7 +914,7 @@ impl RawTable { /// Allocates a new hash table with the given number of buckets. /// - /// The control bytes are left uninitialized. + /// The control bytes and overflow-tracking bytes are left uninitialized. #[cfg_attr(feature = "inline-more", inline)] unsafe fn new_uninitialized( alloc: A, @@ -1046,7 +1103,8 @@ impl RawTable { // Avoid `Option::map` because it bloats LLVM IR. if let Some(bucket) = self.find(hash, eq) { unsafe { - self.erase(bucket); + self.erase_no_drop(bucket); + bucket.drop(); } true } else { @@ -1382,10 +1440,12 @@ impl RawTable { F: FnOnce(T) -> Option, { let index = self.bucket_index(&bucket); - let old_ctrl = *self.table.ctrl(index); debug_assert!(self.is_bucket_full(index)); + let old_growth_left = self.table.growth_left; - let item = self.remove(bucket).0; + let old_ctrl = self.table.half_erase(index); + let item = bucket.read(); + if let Some(new_item) = f(item) { self.table.growth_left = old_growth_left; self.table.set_ctrl(index, old_ctrl); @@ -1393,6 +1453,7 @@ impl RawTable { self.bucket(index).write(new_item); true } else { + self.table.untrack_overflow_trail(index, old_ctrl); false } } @@ -1786,10 +1847,17 @@ impl RawTableInner { capacity_to_buckets(capacity).ok_or_else(|| fallibility.capacity_overflow())?; let result = Self::new_uninitialized(alloc, table_layout, buckets, fallibility)?; + // SAFETY: We checked that the table is allocated and therefore the table already has // `self.bucket_mask + 1 + Group::WIDTH` number of control bytes (see TableLayout::calculate_layout_for) // so writing `self.num_ctrl_bytes() == bucket_mask + 1 + Group::WIDTH` bytes is safe. result.ctrl(0).write_bytes(EMPTY, result.num_ctrl_bytes()); + result + .overflow(0) + .write_bytes(0, result.num_overflow_trackers()); + result + .displacement(0) + .write_bytes(0, result.num_displacement_bytes()); Ok(result) } @@ -1825,7 +1893,7 @@ impl RawTableInner { /// bytes outside the range of the table are filled with [`EMPTY`] entries. These will unfortunately /// trigger a match of [`RawTableInner::find_insert_slot_in_group`] function. This is because /// the `Some(bit)` returned by `group.match_empty_or_deleted().lowest_set_bit()` after masking - /// (`(probe_seq.pos + bit) & self.bucket_mask`) may point to a full bucket that is already occupied. + /// (`(probe_seq.group + bit) & self.bucket_mask`) may point to a full bucket that is already occupied. /// We detect this situation here and perform a second scan starting at the beginning of the table. /// This second scan is guaranteed to find an empty slot (due to the load factor) before hitting the /// trailing control bytes (containing [`EMPTY`] bytes). @@ -1867,7 +1935,12 @@ impl RawTableInner { unsafe fn fix_insert_slot(&self, mut index: usize) -> InsertSlot { // SAFETY: The caller of this function ensures that `index` is in the range `0..=self.bucket_mask`. if unlikely(self.is_bucket_full(index)) { - debug_assert!(self.bucket_mask < Group::WIDTH); + debug_assert!( + self.bucket_mask < Group::WIDTH, + "{} >= {}", + self.bucket_mask, + Group::WIDTH + ); // SAFETY: // // * Since the caller of this function ensures that the control bytes are properly @@ -1879,7 +1952,7 @@ impl RawTableInner { // * Because the caller of this function ensures that the index was provided by the // `self.find_insert_slot_in_group()` function, so for for tables larger than the // group width (self.buckets() >= Group::WIDTH), we will never end up in the given - // branch, since `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_slot_in_group` + // branch, since `(probe_seq.group + bit) & self.bucket_mask` in `find_insert_slot_in_group` // cannot return a full bucket index. For tables smaller than the group width, calling // the `unwrap_unchecked` function is also safe, as the trailing control bytes outside // the range of the table are filled with EMPTY bytes (and we know for sure that there @@ -1905,9 +1978,9 @@ impl RawTableInner { let bit = group.match_empty_or_deleted().lowest_set_bit(); if likely(bit.is_some()) { - // This is the same as `(probe_seq.pos + bit) % self.buckets()` because the number + // This is the same as `(probe_seq.group + bit) % self.buckets()` because the number // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. - Some((probe_seq.pos + bit.unwrap()) & self.bucket_mask) + Some((probe_seq.group + bit.unwrap()) & self.bucket_mask) } else { None } @@ -1957,28 +2030,28 @@ impl RawTableInner { let mut insert_slot = None; let h2_hash = h2(hash); - let mut probe_seq = self.probe_seq(hash); + let mut probe_seq = ProbeSeq::with_hash(hash, self.bucket_mask); loop { // SAFETY: // * Caller of this function ensures that the control bytes are properly initialized. // - // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.buckets() - 1` + // * `ProbeSeq.group` cannot be greater than `self.bucket_mask = self.buckets() - 1` // of the table due to masking with `self.bucket_mask` and also because mumber of // buckets is a power of two (see `self.probe_seq` function). // - // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to - // call `Group::load` due to the extended control bytes range, which is + // * Even if `ProbeSeq.group` returns `position == self.bucket_mask`, it is safe to + // call `load_group` due to the extended control bytes range, which is // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control // byte will never be read for the allocated table); // - // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will - // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()` + // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.group` will + // always return "0" (zero), so `load_group` will read unaligned `Group::static_empty()` // bytes, which is safe (see RawTableInner::new). - let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; + let group = unsafe { self.load_group(probe_seq.group) }; for bit in group.match_byte(h2_hash) { - let index = (probe_seq.pos + bit) & self.bucket_mask; + let index = (probe_seq.group + bit) & self.bucket_mask; if likely(eq(index)) { return Ok(index); @@ -1991,9 +2064,15 @@ impl RawTableInner { insert_slot = self.find_insert_slot_in_group(&group, &probe_seq); } - // Only stop the search if the group contains at least one empty element. - // Otherwise, the element that we are looking for might be in a following group. - if likely(group.match_empty().any_bit_set()) { + // The search ends with a slot if: + // + // - There is an empty slot available, hence the element that we are looking for did not overflow. + // - The overflow tracker indicates the absence of overflow, and there is a deleted slot available. + if likely( + group.match_empty().any_bit_set() + || (group.match_empty_or_deleted().any_bit_set() + && !self.may_have_overflowed(probe_seq.group, h2_hash)), + ) { // We must have found a insert slot by now, since the current group contains at // least one. For tables smaller than the group width, there will still be an // empty element in the current (and only) group due to the load factor. @@ -2066,6 +2145,7 @@ impl RawTableInner { unsafe fn prepare_insert_slot(&mut self, hash: u64) -> (usize, u8) { // SAFETY: Caller of this function ensures that the control bytes are properly initialized. let index: usize = self.find_insert_slot(hash).index; + // SAFETY: // 1. The `find_insert_slot` function either returns an `index` less than or // equal to `self.buckets() = self.bucket_mask + 1` of the table, or never @@ -2074,6 +2154,8 @@ impl RawTableInner { // allocated let old_ctrl = *self.ctrl(index); self.set_ctrl_h2(index, hash); + self.track_overflow_trail(InsertSlot { index }, hash); + (index, old_ctrl) } @@ -2107,24 +2189,24 @@ impl RawTableInner { /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] unsafe fn find_insert_slot(&self, hash: u64) -> InsertSlot { - let mut probe_seq = self.probe_seq(hash); + let mut probe_seq = ProbeSeq::with_hash(hash, self.bucket_mask); loop { // SAFETY: // * Caller of this function ensures that the control bytes are properly initialized. // - // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.buckets() - 1` + // * `ProbeSeq.group` cannot be greater than `self.bucket_mask = self.buckets() - 1` // of the table due to masking with `self.bucket_mask` and also because mumber of // buckets is a power of two (see `self.probe_seq` function). // - // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to - // call `Group::load` due to the extended control bytes range, which is + // * Even if `ProbeSeq.group` returns `position == self.bucket_mask`, it is safe to + // call `load_group` due to the extended control bytes range, which is // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control // byte will never be read for the allocated table); // - // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will - // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()` + // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.group` will + // always return "0" (zero), so `load_group` will read unaligned `Group::static_empty()` // bytes, which is safe (see RawTableInner::new). - let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; + let group = unsafe { self.load_group(probe_seq.group) }; let index = self.find_insert_slot_in_group(&group, &probe_seq); if likely(index.is_some()) { @@ -2165,36 +2247,39 @@ impl RawTableInner { #[inline(always)] unsafe fn find_inner(&self, hash: u64, eq: &mut dyn FnMut(usize) -> bool) -> Option { let h2_hash = h2(hash); - let mut probe_seq = self.probe_seq(hash); + let mut probe_seq = ProbeSeq::with_hash(hash, self.bucket_mask); loop { // SAFETY: // * Caller of this function ensures that the control bytes are properly initialized. // - // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.buckets() - 1` + // * `ProbeSeq.group` cannot be greater than `self.bucket_mask = self.buckets() - 1` // of the table due to masking with `self.bucket_mask`. // - // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to - // call `Group::load` due to the extended control bytes range, which is + // * Even if `ProbeSeq.group` returns `position == self.bucket_mask`, it is safe to + // call `load_group` due to the extended control bytes range, which is // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control // byte will never be read for the allocated table); // - // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will - // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()` + // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.group` will + // always return "0" (zero), so `load_group` will read unaligned `Group::static_empty()` // bytes, which is safe (see RawTableInner::new_in). - let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; + let group = unsafe { self.load_group(probe_seq.group) }; for bit in group.match_byte(h2_hash) { - // This is the same as `(probe_seq.pos + bit) % self.buckets()` because the number + // This is the same as `(probe_seq.group + bit) % self.buckets()` because the number // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. - let index = (probe_seq.pos + bit) & self.bucket_mask; + let index = (probe_seq.group + bit) & self.bucket_mask; if likely(eq(index)) { return Some(index); } } - if likely(group.match_empty().any_bit_set()) { + if likely( + group.match_empty().any_bit_set() + || !self.may_have_overflowed(probe_seq.group, h2_hash), + ) { return None; } @@ -2203,6 +2288,7 @@ impl RawTableInner { } /// Prepares for rehashing data in place (that is, without allocating new memory). + /// /// Converts all full index `control bytes` to `DELETED` and all `DELETED` control /// bytes to `EMPTY`, i.e. performs the following conversion: /// @@ -2210,6 +2296,8 @@ impl RawTableInner { /// - `DELETED` control bytes -> `EMPTY`; /// - `FULL` control bytes -> `DELETED`. /// + /// Erases all overflow trackers. + /// /// This function does not make any changes to the `data` parts of the table, /// or any changes to the the `items` or `growth_left` field of the table. /// @@ -2272,6 +2360,11 @@ impl RawTableInner { self.ctrl(0) .copy_to(self.ctrl(self.buckets()), Group::WIDTH); } + + self.overflow(0) + .write_bytes(0, self.num_overflow_trackers()); + self.displacement(0) + .write_bytes(0, self.num_displacement_bytes()); } /// Returns an iterator over every element in the table. @@ -2596,21 +2689,6 @@ impl RawTableInner { self.ctrl.cast() } - /// Returns an iterator-like object for a probe sequence on the table. - /// - /// This iterator never terminates, but is guaranteed to visit each bucket - /// group exactly once. The loop using `probe_seq` must terminate upon - /// reaching a group containing an empty bucket. - #[inline] - fn probe_seq(&self, hash: u64) -> ProbeSeq { - ProbeSeq { - // This is the same as `hash as usize % self.buckets()` because the number - // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. - pos: h1(hash) & self.bucket_mask, - stride: 0, - } - } - /// Returns the index of a bucket for which a value must be inserted if there is enough rooom /// in the table, otherwise returns error #[cfg(feature = "raw")] @@ -2631,13 +2709,15 @@ impl RawTableInner { self.growth_left -= usize::from(special_is_empty(old_ctrl)); self.set_ctrl_h2(index, hash); self.items += 1; + + self.track_overflow_trail(InsertSlot { index }, hash); } #[inline] fn is_in_same_group(&self, i: usize, new_i: usize, hash: u64) -> bool { - let probe_seq_pos = self.probe_seq(hash).pos; + let probe_seq_pos = ProbeSeq::with_hash(hash, self.bucket_mask).group; let probe_index = - |pos: usize| (pos.wrapping_sub(probe_seq_pos) & self.bucket_mask) / Group::WIDTH; + |group: usize| (group.wrapping_sub(probe_seq_pos) & self.bucket_mask) / Group::WIDTH; probe_index(i) == probe_index(new_i) } @@ -2737,7 +2817,7 @@ impl RawTableInner { #[inline] unsafe fn set_ctrl(&mut self, index: usize, ctrl: u8) { // Replicate the first Group::WIDTH control bytes at the end of - // the array without using a branch. If the tables smaller than + // the array without using a branch. If the table is smaller than // the group width (self.buckets() < Group::WIDTH), // `index2 = Group::WIDTH + index`, otherwise `index2` is: // @@ -2797,6 +2877,195 @@ impl RawTableInner { self.ctrl.as_ptr().add(index) } + /// Returns an aligned group. + /// + /// # Safety + /// + /// See `ctrl`. + #[inline] + unsafe fn load_group(&self, index: usize) -> Group { + debug_assert_eq!(0, index % Group::WIDTH, "{index}"); + + Group::load_aligned(self.ctrl(index)) + } + + /// Returns whether a given element may have overflowed the current index, based on its `h2`. + /// + /// # Safety + /// + /// See `ctrl`. + #[inline(always)] + unsafe fn may_have_overflowed(&self, index: usize, h2: u8) -> bool { + let tracker = self.overflow(index) as *const OverflowTracker; + + (*tracker).may_have_overflowed(h2) + } + + /// Marks an element with hash `hash` has having overflowed from its initial group until `slot`. + /// + /// # Safety + /// + /// See `ctrl`. + #[inline(always)] + unsafe fn track_overflow_trail(&mut self, slot: InsertSlot, hash: u64) { + #[inline(never)] + unsafe fn track(this: &mut RawTableInner, slot: InsertSlot, hash: u64) { + let h2_hash = h2(hash); + let mut probe_seq = ProbeSeq::with_hash(hash, this.bucket_mask); + let mut displacement = 0usize; + + while probe_seq.group / Group::WIDTH != slot.index / Group::WIDTH { + let tracker = this.overflow(probe_seq.group); + + (*tracker).add(h2_hash); + displacement += 1; + + probe_seq.move_next(this.bucket_mask); + } + + if !OverflowTracker::TRACK_REMOVALS { + return; + } + + if displacement > 0xF { + return; + } + + this.set_displacement(slot.index, displacement as u8); + } + + let probe_seq = ProbeSeq::with_hash(hash, self.bucket_mask); + + // Insertion at ideal group, no overflow to track. + if probe_seq.group / Group::WIDTH == slot.index / Group::WIDTH { + return; + } + + track(self, slot, hash); + } + + /// Removes mark of an element with hash `hash` has having overflowed from its initial group until `slot`. + /// + /// # Safety + /// + /// See `ctrl`. + #[inline(always)] + unsafe fn untrack_overflow_trail(&mut self, index: usize, h2_hash: u8) { + #[inline(never)] + unsafe fn untrack(this: &mut RawTableInner, index: usize, h2_hash: u8, displacement: u8) { + this.set_displacement(index, 0); + + let mut probe_seq = ProbeSeq::with_displacement(index, displacement); + + for _ in 0..displacement { + probe_seq.move_prev(this.bucket_mask); + + let tracker = this.overflow(probe_seq.group); + + (*tracker).remove(h2_hash); + } + + debug_assert_eq!(0, probe_seq.stride); + } + + if !OverflowTracker::TRACK_REMOVALS { + return; + } + + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::untrack_overflow_trail`]. + let displacement = self.get_displacement(index); + + if likely(displacement == 0) { + return; + } + + untrack(self, index, h2_hash, displacement); + } + + /// Returns a pointer to an `OverflowTracker`. + /// + /// The `index` is that of the _element_, not that of the group of the element. + /// + /// # Safety + /// + /// See `ctrl`. + #[inline(always)] + unsafe fn overflow(&self, index: usize) -> *mut OverflowTracker { + // ZST is special-cased. + #![allow(clippy::zst_offset)] + + debug_assert!(index / Group::WIDTH < self.num_overflow_trackers()); + + if OVERFLOW_TRACKER_SIZE == 0 { + return invalid_mut(mem::align_of::()); + } + + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::overflow`] + let ctrl_end = self.ctrl.as_ptr().add(self.num_ctrl_bytes()); + + let overflow_start: *mut OverflowTracker = ctrl_end as *mut OverflowTracker; + + overflow_start.add(index / Group::WIDTH) + } + + /// Returns the displacement. + /// + /// # Safety + /// + /// See `displacement`. + #[inline(always)] + unsafe fn get_displacement(&self, index: usize) -> u8 { + debug_assert!(OverflowTracker::TRACK_REMOVALS); + + let pair = self.displacement(index); + + if index % 2 == 0 { + *pair & 0xF + } else { + *pair >> 4 + } + } + + /// Sets the displacement. + /// + /// # Safety + /// + /// See `displacement`. + #[inline] + unsafe fn set_displacement(&mut self, index: usize, displacement: u8) { + debug_assert!(OverflowTracker::TRACK_REMOVALS); + debug_assert!(displacement <= 0xF, "{displacement}"); + + let pair = self.displacement(index); + + *pair = if index % 2 == 0 { + (*pair & 0xF0) | displacement + } else { + (displacement << 4) | (*pair & 0xF) + }; + } + + /// Returns a pointer to the displacement pair. + /// + /// # Safety + /// + /// See `ctrl`. + #[inline(always)] + unsafe fn displacement(&self, index: usize) -> *mut u8 { + debug_assert!(index <= self.bucket_mask); + + if !OverflowTracker::TRACK_REMOVALS { + return invalid_mut(mem::align_of::()); + } + + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::displacement`] + let ctrl_end = self.ctrl.as_ptr().add(self.num_ctrl_bytes()); + + let overflow_end = ctrl_end.add(OVERFLOW_TRACKER_SIZE * self.num_overflow_trackers()); + + overflow_end.add(index / 2) + } + #[inline] fn buckets(&self) -> usize { self.bucket_mask + 1 @@ -2818,6 +3087,20 @@ impl RawTableInner { self.bucket_mask + 1 + Group::WIDTH } + #[inline] + fn num_overflow_trackers(&self) -> usize { + (self.bucket_mask + Group::WIDTH) / Group::WIDTH + 1 + } + + #[inline] + fn num_displacement_bytes(&self) -> usize { + if OverflowTracker::TRACK_REMOVALS { + (self.bucket_mask + 1) / 2 + } else { + 0 + } + } + #[inline] fn is_empty_singleton(&self) -> bool { self.bucket_mask == 0 @@ -3186,13 +3469,14 @@ impl RawTableInner { // are properly initialized. let new_i = guard.find_insert_slot(hash).index; - // Probing works by scanning through all of the control - // bytes in groups, which may not be aligned to the group - // size. If both the new and old position fall within the - // same unaligned group, then there is no benefit in moving + // Probing works by scanning through all of the control bytes + // in groups. If both the new and old position fall within + // the same group, then there is no benefit in moving // it and we can just continue to the next item. if likely(guard.is_in_same_group(i, new_i, hash)) { guard.set_ctrl_h2(i, hash); + guard.track_overflow_trail(InsertSlot { index: i }, hash); + continue 'outer; } @@ -3201,6 +3485,8 @@ impl RawTableInner { // We are moving the current item to a new position. Write // our H2 to the control byte of the new position. let prev_ctrl = guard.replace_ctrl_h2(new_i, hash); + guard.track_overflow_trail(InsertSlot { index: new_i }, hash); + if prev_ctrl == EMPTY { guard.set_ctrl(i, EMPTY); // If the target slot is empty, simply move the current @@ -3333,6 +3619,10 @@ impl RawTableInner { if !self.is_empty_singleton() { unsafe { self.ctrl(0).write_bytes(EMPTY, self.num_ctrl_bytes()); + self.overflow(0) + .write_bytes(0, self.num_overflow_trackers()); + self.displacement(0) + .write_bytes(0, self.num_displacement_bytes()); } } self.items = 0; @@ -3374,16 +3664,57 @@ impl RawTableInner { /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] unsafe fn erase(&mut self, index: usize) { + let prev_ctrl = self.half_erase(index); + + self.untrack_overflow_trail(index, prev_ctrl); + } + + /// Erases the [`Bucket`]'s control byte at the given index so that it does not + /// triggered as full, decreases the `items` of the table and, if it can be done, + /// increases `self.growth_left`. + /// + /// This function does NOT adjust overflow/displacements. If actually removing the + /// element, the caller of this function must call `untrack_overflow_trail`. See + /// `erase` for unconditional removals. + /// + /// This function does not actually erase / drop the [`Bucket`] itself, i.e. it + /// does not make any changes to the `data` parts of the table. The caller of this + /// function must take care to properly drop the `data`, otherwise calling this + /// function may result in a memory leak. + /// + /// # Safety + /// + /// You must observe the following safety rules when calling this function: + /// + /// * The [`RawTableInner`] has already been allocated; + /// + /// * It must be the full control byte at the given position; + /// + /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e. + /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must + /// be no greater than the number returned by the function [`RawTableInner::buckets`]. + /// + /// Calling this function on a table that has not been allocated results in [`undefined behavior`]. + /// + /// Calling this function on a table with no elements is unspecified, but calling subsequent + /// functions is likely to result in [`undefined behavior`] due to overflow subtraction + /// (`self.items -= 1 cause overflow when self.items == 0`). + /// + /// See also [`Bucket::as_ptr`] method, for more information about of properly removing + /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. + /// + /// [`RawTableInner::buckets`]: RawTableInner::buckets + /// [`Bucket::as_ptr`]: Bucket::as_ptr + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + #[inline] + unsafe fn half_erase(&mut self, index: usize) -> u8 { debug_assert!(self.is_bucket_full(index)); - // This is the same as `index.wrapping_sub(Group::WIDTH) % self.buckets()` because - // the number of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. - let index_before = index.wrapping_sub(Group::WIDTH) & self.bucket_mask; // SAFETY: // - The caller must uphold the safety contract for `erase` method; - // - `index_before` is guaranteed to be in range due to masking with `self.bucket_mask` - let empty_before = Group::load(self.ctrl(index_before)).match_empty(); - let empty_after = Group::load(self.ctrl(index)).match_empty(); + let empty = self + .load_group(index / Group::WIDTH * Group::WIDTH) + .match_empty(); // Inserting and searching in the map is performed by two key functions: // @@ -3408,28 +3739,28 @@ impl RawTableInner { // function may stumble upon an `EMPTY` byte before finding the desired element and stop // searching. // - // Thus it is necessary to check all bytes after and before the erased element. If we are in - // a contiguous `Group` of `FULL` or `DELETED` bytes (the number of `FULL` or `DELETED` bytes - // before and after is greater than or equal to `Group::WIDTH`), then we must mark our byte as - // `DELETED` in order for the `find_inner` function to go further. On the other hand, if there - // is at least one `EMPTY` slot in the `Group`, then the `find_inner` function will still stumble - // upon an `EMPTY` byte, so we can safely mark our erased byte as `EMPTY` as well. - // - // Finally, since `index_before == (index.wrapping_sub(Group::WIDTH) & self.bucket_mask) == index` - // and given all of the above, tables smaller than the group width (self.buckets() < Group::WIDTH) - // cannot have `DELETED` bytes. + // Thus it is necessary to check all bytes in the group of the erased element. If the group is + // composed only of `FULL` and `DELETED` bytes, then we must mark our byte as `DELETED` in order + // for the `find_inner` function to go further. On the other hand, if there is at least one `EMPTY` + // slot in the group, then the `find_inner` function will still stumble upon it, so we can safely + // mark our erased byte as `EMPTY` as well. // - // Note that in this context `leading_zeros` refers to the bytes at the end of a group, while - // `trailing_zeros` refers to the bytes at the beginning of a group. - let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH { - DELETED - } else { + // Note that overflow tracking does not alter this picture, since overflow tracking is only checked + // in the absence of `EMPTY` control byte in the group. + let ctrl = if empty.any_bit_set() { self.growth_left += 1; EMPTY + } else { + DELETED }; + // SAFETY: the caller must uphold the safety contract for `erase` method. + let prev_ctrl = *self.ctrl(index); self.set_ctrl(index, ctrl); + self.items -= 1; + + prev_ctrl } } @@ -3538,6 +3869,7 @@ impl Clone for RawTable { trait RawTableClone { unsafe fn clone_from_spec(&mut self, source: &Self); } + impl RawTableClone for RawTable { default_fn! { #[cfg_attr(feature = "inline-more", inline)] @@ -3546,10 +3878,19 @@ impl RawTableClone for RawTable { } } } + #[cfg(feature = "nightly")] impl RawTableClone for RawTable { #[cfg_attr(feature = "inline-more", inline)] unsafe fn clone_from_spec(&mut self, source: &Self) { + source.table.displacement(0).copy_to_nonoverlapping( + self.table.displacement(0), + self.table.num_displacement_bytes(), + ); + source + .table + .overflow(0) + .copy_to_nonoverlapping(self.table.overflow(0), self.table.num_overflow_trackers()); source .table .ctrl(0) @@ -3571,7 +3912,15 @@ impl RawTable { /// - The control bytes are not initialized yet. #[cfg_attr(feature = "inline-more", inline)] unsafe fn clone_from_impl(&mut self, source: &Self) { - // Copy the control bytes unchanged. We do this in a single pass + // Copy the displacements, overflow trackers & control bytes unchanged. We do this in a single pass. + source.table.displacement(0).copy_to_nonoverlapping( + self.table.displacement(0), + self.table.num_displacement_bytes(), + ); + source + .table + .overflow(0) + .copy_to_nonoverlapping(self.table.overflow(0), self.table.num_overflow_trackers()); source .table .ctrl(0) @@ -3675,6 +4024,7 @@ unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawTable { } } } + #[cfg(not(feature = "nightly"))] impl Drop for RawTable { #[cfg_attr(feature = "inline-more", inline)] @@ -4316,6 +4666,7 @@ unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawIntoIter { } } } + #[cfg(not(feature = "nightly"))] impl Drop for RawIntoIter { #[cfg_attr(feature = "inline-more", inline)] @@ -4473,13 +4824,14 @@ impl RawIterHash { } } } + impl RawIterHashInner { #[cfg_attr(feature = "inline-more", inline)] #[cfg(feature = "raw")] unsafe fn new(table: &RawTableInner, hash: u64) -> Self { let h2_hash = h2(hash); - let probe_seq = table.probe_seq(hash); - let group = Group::load(table.ctrl(probe_seq.pos)); + let probe_seq = ProbeSeq::with_hash(hash, table.bucket_mask); + let group = table.load_group(probe_seq.group); let bitmask = group.match_byte(h2_hash).into_iter(); RawIterHashInner { @@ -4519,7 +4871,7 @@ impl Iterator for RawIterHashInner { unsafe { loop { if let Some(bit) = self.bitmask.next() { - let index = (self.probe_seq.pos + bit) & self.bucket_mask; + let index = (self.probe_seq.group + bit) & self.bucket_mask; return Some(index); } if likely(self.group.match_empty().any_bit_set()) { @@ -4529,11 +4881,11 @@ impl Iterator for RawIterHashInner { // Can't use `RawTableInner::ctrl` here as we don't have // an actual `RawTableInner` reference to use. - let index = self.probe_seq.pos; + let index = self.probe_seq.group; debug_assert!(index < self.bucket_mask + 1 + Group::WIDTH); - let group_ctrl = self.ctrl.as_ptr().add(index); + let group_ctrl = self.ctrl.as_ptr().add(index / Group::WIDTH * Group::WIDTH); - self.group = Group::load(group_ctrl); + self.group = Group::load_aligned(group_ctrl); self.bitmask = self.group.match_byte(self.h2_hash).into_iter(); } } @@ -4562,6 +4914,42 @@ impl RawExtractIf<'_, T, A> { } } +#[cfg(test)] +mod test_probe_seq { + use super::*; + + #[test] + fn move_next_prev() { + const BUCKET_MASK: usize = if Group::WIDTH == 16 { 255 } else { 127 }; + + const EXPECTED_PROBE_SEQUENCE: [usize; 5] = if Group::WIDTH == 16 { + [160, 176, 208, 0, 64] + } else { + [80, 88, 104, 0, 32] + }; + + let mut probe = ProbeSeq::with_hash(10 * Group::WIDTH as u64, BUCKET_MASK); + + for group in EXPECTED_PROBE_SEQUENCE { + assert_eq!(group, probe.group); + + probe.move_next(BUCKET_MASK); + } + + for (i, group) in EXPECTED_PROBE_SEQUENCE.into_iter().enumerate() { + let mut rev_probe = ProbeSeq::with_displacement(group, i as u8); + + assert_eq!(group, rev_probe.group); + + for k in (0..i).rev() { + rev_probe.move_prev(BUCKET_MASK); + + assert_eq!(EXPECTED_PROBE_SEQUENCE[k], rev_probe.group); + } + } + } +} + #[cfg(test)] mod test_map { use super::*; @@ -4642,6 +5030,15 @@ mod test_map { .ctrl(0) .write_bytes(EMPTY, table.table.num_ctrl_bytes()); + table + .table + .overflow(0) + .write_bytes(0, table.table.num_overflow_trackers()); + table + .table + .displacement(0) + .write_bytes(0, table.table.num_displacement_bytes()); + // SAFETY: table.capacity() is guaranteed to be smaller than table.buckets() table.table.ctrl(0).write_bytes(0, table.capacity()); diff --git a/src/raw/neon.rs b/src/raw/neon.rs index 44e82d57d..af0c99f36 100644 --- a/src/raw/neon.rs +++ b/src/raw/neon.rs @@ -40,13 +40,6 @@ impl Group { &ALIGNED_BYTES.bytes } - /// Loads a group of bytes starting at the given address. - #[inline] - #[allow(clippy::cast_ptr_alignment)] // unaligned load - pub(crate) unsafe fn load(ptr: *const u8) -> Self { - Group(neon::vld1_u8(ptr)) - } - /// Loads a group of bytes starting at the given address, which must be /// aligned to `mem::align_of::()`. #[inline] diff --git a/src/raw/overflow.rs b/src/raw/overflow.rs new file mode 100644 index 000000000..ea2fe45b3 --- /dev/null +++ b/src/raw/overflow.rs @@ -0,0 +1,241 @@ +//! Overflow tracking, for finer grained probing. +//! +//! This modules defines an `OverflowTracker`, selected by features. +//! +//! An `OverflowTracker` tracks, in some fashion, whether elements inserted into the hash-table overflowed the group +//! paired with the `OverflowTracker`. During a look-up, the `OverflowTracker` can then indicate whether further probing +//! is necessary or not, though on a probalistic basis: it can indicate "no" with certainty, but only a "maybe" +//! otherwise. +//! +//! Which `OverflowTracker` to choose is... a good question. +//! +//! Elements to consider: +//! +//! - No overflow: no tracker! If insertion never overflows nor fully fills a group, then any overflow tracking is +//! pure overhead. +//! - No removal: no counter! If the hash-table is append-only, then counters (which allow clean-up on removal) are +//! pure overhead. +//! - Bloom is good! Bloom-filter based trackers are expected to perform better than pure-counter trackers. + +pub use imp::OverflowTracker; + +#[cfg(not(any( + feature = "overflow-tracker-counter-u8", + feature = "overflow-tracker-bloom-1-u8", + feature = "overflow-tracker-bloom-1-u16", + feature = "overflow-tracker-hybrid" +)))] +mod imp { + /// An intangible `OverflowTracker` which tracks nothing. + pub struct OverflowTracker(()); + + impl OverflowTracker { + /// Does not track removals. + /// + /// The `remove` function will unconditionally panic, it is only provided to ease compilation. + pub const TRACK_REMOVALS: bool = false; + + /// Adds the `h2` to the tracker. + #[inline(always)] + pub fn add(&mut self, _h2: u8) {} + + /// Removes the `h2` from the tracker, if supported. + #[inline(always)] + pub fn remove(&mut self, _h2: u8) { + unreachable!("`remove` should not be called when `TRACK_REMOVALS` is false"); + } + + /// Returns whether the element of this `h2` may be further ahead in the probing sequence, or not. + /// + /// This is a probalistic response. `false` is definite, `true` is only a possibility. + #[inline(always)] + pub fn may_have_overflowed(&self, _h2: u8) -> bool { + true + } + } +} // mod imp + +#[cfg(feature = "overflow-tracker-counter-u8")] +mod imp { + /// A counter based `OverflowTracker`. + /// + /// The counter tracks the number of elements which overflowed, and were not yet removed. If a great number of + /// elements overflow, the counter saturates, and removal is no longer tracked. + /// + /// This strategy is used in Facebook's F14 hash-table. + pub struct OverflowTracker(u8); + + impl OverflowTracker { + /// Tracks removal in a best-effort fashion. + /// + /// If the tracker overflows, removals can no longer be tracked, and calling `remove` has no effect. + pub const TRACK_REMOVALS: bool = true; + + /// Adds the `h2` to the tracker. + #[inline(always)] + pub fn add(&mut self, _h2: u8) { + self.0 = self.0.saturating_add(1); + } + + /// Removes the `h2` from the tracker, if supported. + #[inline(always)] + pub fn remove(&mut self, _h2: u8) { + // The counter is saturated, an unknown number of additions may have been ignored, and thus removals can no + // longer be tracked. + if self.0 == u8::MAX { + return; + } + + self.0 -= 1; + } + + /// Returns whether the element of this `h2` may be further ahead in the probing sequence, or not. + /// + /// This is a probalistic response. `false` is definite, `true` is only a possibility. + #[inline(always)] + pub fn may_have_overflowed(&self, _h2: u8) -> bool { + self.0 > 0 + } + } +} // mod imp + +#[cfg(feature = "overflow-tracker-bloom-1-u8")] +mod imp { + /// A bloom-filter based `OverflowTracker`. + /// + /// The filter tracks whether an element with the same "reduced" hash has ever overflowed. It cannot distinguish + /// between different elements with the same "reduced" hash, and thus cannot track removals. + /// + /// This strategy is used in Boost's `std::unordered_flat_map`. + pub struct OverflowTracker(u8); + + impl OverflowTracker { + /// Does not track removals. + /// + /// The `remove` function will unconditionally panic, it is only provided to ease compilation. + pub const TRACK_REMOVALS: bool = false; + + /// Adds the `h2` to the tracker. + #[inline(always)] + pub fn add(&mut self, h2: u8) { + self.0 |= Self::mask(h2); + } + + /// Removes the `h2` from the tracker, if supported. + #[inline(always)] + pub fn remove(&mut self, _h2: u8) { + unreachable!("`remove` should not be called when `TRACK_REMOVALS` is false"); + } + + /// Returns whether the element of this `h2` may be further ahead in the probing sequence, or not. + /// + /// This is a probalistic response. `false` is definite, `true` is only a possibility. + #[inline(always)] + pub fn may_have_overflowed(&self, h2: u8) -> bool { + (self.0 & Self::mask(h2)) != 0 + } + + #[inline(always)] + fn mask(h2: u8) -> u8 { + 1u8 << (h2 & 0x7) + } + } +} // mod imp + +#[cfg(feature = "overflow-tracker-bloom-1-u16")] +mod imp { + /// A bloom-filter based `OverflowTracker`. + /// + /// The filter tracks whether an element with the same "reduced" hash has ever overflowed. It cannot distinguish + /// between different elements with the same "reduced" hash, and thus cannot track removals. + /// + /// This tracker uses twice as many bits as Boost's `std::unordered_map` in an attempt to improve accuracy. + pub struct OverflowTracker(u16); + + impl OverflowTracker { + /// Does not track removals. + /// + /// The `remove` function will unconditionally panic, it is only provided to ease compilation. + pub const TRACK_REMOVALS: bool = false; + + /// Adds the `h2` to the tracker. + #[inline(always)] + pub fn add(&mut self, h2: u8) { + self.0 |= Self::mask(h2); + } + + /// Removes the `h2` from the tracker, if supported. + #[inline(always)] + pub fn remove(&mut self, _h2: u8) { + unreachable!("`remove` should not be called when `TRACK_REMOVALS` is false"); + } + + /// Returns whether the element of this `h2` may be further ahead in the probing sequence, or not. + /// + /// This is a probalistic response. `false` is definite, `true` is only a possibility. + #[inline(always)] + pub fn may_have_overflowed(&self, h2: u8) -> bool { + (self.0 & Self::mask(h2)) != 0 + } + + #[inline(always)] + fn mask(h2: u8) -> u16 { + 1u16 << (h2 & 0xF) + } + } +} // mod imp + +#[cfg(feature = "overflow-tracker-hybrid")] +mod imp { + /// A hybrid counter and bloom-filter based `OverflowTracker`. + /// + /// This combines both a counter and a filter. This allows tracking removals coarsely, while also tracking elements + /// in a more fine-grained fashion than with a pure counter. + pub struct OverflowTracker { + counter: u8, + filter: u8, + } + + impl OverflowTracker { + /// Tracks removal in a best-effort fashion. + /// + /// If the tracker overflows, removals can no longer be tracked, and calling `remove` has no effect. + pub const TRACK_REMOVALS: bool = true; + + /// Adds the `h2` to the tracker. + #[inline(always)] + pub fn add(&mut self, h2: u8) { + self.counter = self.counter.saturating_add(1); + self.filter |= Self::mask(h2); + } + + /// Removes the `h2` from the tracker, if supported. + #[inline(always)] + pub fn remove(&mut self, _h2: u8) { + // The counter is saturated, an unknown number of additions may have been ignored, and thus removals can no + // longer be tracked. + if self.counter == u8::MAX { + return; + } + + self.counter -= 1; + + if self.counter == 0 { + self.filter = 0; + } + } + + /// Returns whether the element of this `h2` may be further ahead in the probing sequence, or not. + /// + /// This is a probalistic response. `false` is definite, `true` is only a possibility. + #[inline(always)] + pub fn may_have_overflowed(&self, h2: u8) -> bool { + (self.filter & Self::mask(h2)) != 0 + } + + #[inline(always)] + fn mask(h2: u8) -> u8 { + 1u8 << (h2 & 0x7) + } + } +} // mod imp diff --git a/src/raw/sse2.rs b/src/raw/sse2.rs index 956ba5d26..cc742a585 100644 --- a/src/raw/sse2.rs +++ b/src/raw/sse2.rs @@ -46,13 +46,6 @@ impl Group { &ALIGNED_BYTES.bytes } - /// Loads a group of bytes starting at the given address. - #[inline] - #[allow(clippy::cast_ptr_alignment)] // unaligned load - pub(crate) unsafe fn load(ptr: *const u8) -> Self { - Group(x86::_mm_loadu_si128(ptr.cast())) - } - /// Loads a group of bytes starting at the given address, which must be /// aligned to `mem::align_of::()`. #[inline]