diff --git a/Cargo.toml b/Cargo.toml
index 9c2e0ccee..51d01c829 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -52,6 +52,7 @@ default = ["ahash", "inline-more", "allocator-api2"]
 nightly = ["allocator-api2?/nightly", "bumpalo/allocator_api"]
 
 rustc-internal-api = []
+
 rustc-dep-of-std = [
     "nightly",
     "core",
@@ -59,6 +60,7 @@ rustc-dep-of-std = [
     "alloc",
     "rustc-internal-api",
 ]
+
 raw = []
 
 # Enables usage of `#[inline]` on far more functions than by default in this
@@ -66,6 +68,22 @@ raw = []
 # time cost.
 inline-more = []
 
+# If no overflow-tracker is selected, then the default is none.
+#
+# A single tracker can be selected at any time, selecting two or more is an error.
+
+# Bloom filter overflow-tracker, ala boost::unordered_flat_map.
+overflow-tracker-bloom-1-u8 = []
+
+# Bloom filter overflow-tracker, with more accuracy.
+overflow-tracker-bloom-1-u16 = []
+
+# Counter overflow-tracker, ala F14.
+overflow-tracker-counter-u8 = []
+
+# Hybrid overflow-tracker, mixing a counter and a bloom filter.
+overflow-tracker-hybrid = []
+
 [package.metadata.docs.rs]
 features = ["nightly", "rayon", "serde", "raw"]
 rustdoc-args = ["--generate-link-to-definition"]
diff --git a/src/raw/bitmask.rs b/src/raw/bitmask.rs
index 6576b3c5c..d7ae9ed5e 100644
--- a/src/raw/bitmask.rs
+++ b/src/raw/bitmask.rs
@@ -54,21 +54,6 @@ impl BitMask {
         }
     }
 
-    /// Returns the number of trailing zeroes in the `BitMask`.
-    #[inline]
-    pub(crate) fn trailing_zeros(self) -> usize {
-        // ARM doesn't have a trailing_zeroes instruction, and instead uses
-        // reverse_bits (RBIT) + leading_zeroes (CLZ). However older ARM
-        // versions (pre-ARMv7) don't have RBIT and need to emulate it
-        // instead. Since we only have 1 bit set in each byte on ARM, we can
-        // use swap_bytes (REV) + leading_zeroes instead.
-        if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 {
-            self.0.swap_bytes().leading_zeros() as usize / BITMASK_STRIDE
-        } else {
-            self.0.trailing_zeros() as usize / BITMASK_STRIDE
-        }
-    }
-
     /// Same as above but takes a `NonZeroBitMaskWord`.
     #[inline]
     fn nonzero_trailing_zeros(nonzero: NonZeroBitMaskWord) -> usize {
@@ -80,12 +65,6 @@ impl BitMask {
             nonzero.trailing_zeros() as usize / BITMASK_STRIDE
         }
     }
-
-    /// Returns the number of leading zeroes in the `BitMask`.
-    #[inline]
-    pub(crate) fn leading_zeros(self) -> usize {
-        self.0.leading_zeros() as usize / BITMASK_STRIDE
-    }
 }
 
 impl IntoIterator for BitMask {
diff --git a/src/raw/generic.rs b/src/raw/generic.rs
index c668b0642..c0cd571df 100644
--- a/src/raw/generic.rs
+++ b/src/raw/generic.rs
@@ -69,13 +69,6 @@ impl Group {
         &ALIGNED_BYTES.bytes
     }
 
-    /// Loads a group of bytes starting at the given address.
-    #[inline]
-    #[allow(clippy::cast_ptr_alignment)] // unaligned load
-    pub(crate) unsafe fn load(ptr: *const u8) -> Self {
-        Group(ptr::read_unaligned(ptr.cast()))
-    }
-
     /// Loads a group of bytes starting at the given address, which must be
     /// aligned to `mem::align_of::<Group>()`.
     #[inline]
diff --git a/src/raw/mod.rs b/src/raw/mod.rs
index 22c01f5e9..ec7a96ac1 100644
--- a/src/raw/mod.rs
+++ b/src/raw/mod.rs
@@ -3,10 +3,9 @@ use crate::scopeguard::{guard, ScopeGuard};
 use crate::TryReserveError;
 use core::iter::FusedIterator;
 use core::marker::PhantomData;
-use core::mem;
 use core::mem::MaybeUninit;
 use core::ptr::NonNull;
-use core::{hint, ptr};
+use core::{hint, mem, ptr};
 
 cfg_if! {
     // Use the SSE2 implementation if possible: it allows us to scan 16 buckets
@@ -41,12 +40,15 @@ cfg_if! {
 }
 
 mod alloc;
+
 pub(crate) use self::alloc::{do_alloc, Allocator, Global};
 
 mod bitmask;
+mod overflow;
 
 use self::bitmask::BitMaskIter;
 use self::imp::Group;
+use self::overflow::OverflowTracker;
 
 // Branch prediction hint. This is currently only available on nightly but it
 // consistently improves performance by 10-15%.
@@ -110,6 +112,9 @@ const EMPTY: u8 = 0b1111_1111;
 /// Control byte value for a deleted bucket.
 const DELETED: u8 = 0b1000_0000;
 
+/// Size of the tracker, to avoid repeated calls to `mem::size_of::<OverflowTracker>()` which is chunky.
+const OVERFLOW_TRACKER_SIZE: usize = mem::size_of::<OverflowTracker>();
+
 /// Checks whether a control byte represents a full bucket (top bit is clear).
 #[inline]
 fn is_full(ctrl: u8) -> bool {
@@ -166,11 +171,32 @@ fn h2(hash: u64) -> u8 {
 /// Proof that the probe will visit every group in the table:
 /// <https://fgiesen.wordpress.com/2015/02/22/triangular-numbers-mod-2n/>
 struct ProbeSeq {
-    pos: usize,
+    //  Index of the first element of the group.
+    group: usize,
     stride: usize,
 }
 
 impl ProbeSeq {
+    fn with_hash(hash: u64, bucket_mask: usize) -> Self {
+        debug_assert!((bucket_mask + 1).is_power_of_two(), "{bucket_mask}");
+
+        // This is the same as `hash as usize % self.buckets()` because the number
+        // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
+        let group = h1(hash) & bucket_mask;
+
+        Self {
+            group: group / Group::WIDTH * Group::WIDTH,
+            stride: 0,
+        }
+    }
+
+    fn with_displacement(index: usize, displacement: u8) -> Self {
+        Self {
+            group: index / Group::WIDTH * Group::WIDTH,
+            stride: Group::WIDTH * (displacement as usize),
+        }
+    }
+
     #[inline]
     fn move_next(&mut self, bucket_mask: usize) {
         // We should have found an empty bucket by now and ended the probe.
@@ -179,9 +205,22 @@ impl ProbeSeq {
             "Went past end of probe sequence"
         );
 
+        debug_assert_eq!(0, self.group % Group::WIDTH, "{}", self.group);
+
         self.stride += Group::WIDTH;
-        self.pos += self.stride;
-        self.pos &= bucket_mask;
+
+        self.group += self.stride;
+        self.group &= bucket_mask;
+    }
+
+    #[inline]
+    fn move_prev(&mut self, bucket_mask: usize) {
+        debug_assert_eq!(0, self.group % Group::WIDTH, "{}", self.group);
+
+        self.group = self.group.wrapping_sub(self.stride);
+        self.group &= bucket_mask;
+
+        self.stride -= Group::WIDTH;
     }
 }
 
@@ -257,11 +296,23 @@ impl TableLayout {
         debug_assert!(buckets.is_power_of_two());
 
         let TableLayout { size, ctrl_align } = self;
+
         // Manual layout calculation since Layout methods are not yet stable.
         let ctrl_offset =
             size.checked_mul(buckets)?.checked_add(ctrl_align - 1)? & !(ctrl_align - 1);
         let len = ctrl_offset.checked_add(buckets + Group::WIDTH)?;
 
+        // No special consideration for alignment is necessary, as `OverflowTracker` has a lower alignment than `Group`.
+        debug_assert!(mem::align_of::<OverflowTracker>() <= ctrl_align);
+
+        let len = len.checked_add(OVERFLOW_TRACKER_SIZE * Self::number_groups(buckets))?;
+
+        let len = if OverflowTracker::TRACK_REMOVALS {
+            len.checked_add(buckets / 2)?
+        } else {
+            len
+        };
+
         // We need an additional check to ensure that the allocation doesn't
         // exceed `isize::MAX` (https://github.com/rust-lang/rust/pull/95295).
         if len > isize::MAX as usize - (ctrl_align - 1) {
@@ -273,6 +324,12 @@ impl TableLayout {
             ctrl_offset,
         ))
     }
+
+    #[inline]
+    fn number_groups(buckets: usize) -> usize {
+        // Allocate one more group, to match the extra control bytes allocated.
+        (buckets + Group::WIDTH - 1) / Group::WIDTH + 1
+    }
 }
 
 /// A reference to an empty bucket into which an can be inserted.
@@ -797,7 +854,7 @@ struct RawTableInner {
     // number of buckets in the table.
     bucket_mask: usize,
 
-    // [Padding], T1, T2, ..., Tlast, C1, C2, ...
+    // [Padding], T1, T2, ..., Tlast, C1, C2, ..., Clast, O1, O2, ..., Olast, D1, D2, ..., Dlast
     //                                ^ points here
     ctrl: NonNull<u8>,
 
@@ -857,7 +914,7 @@ impl<T, A: Allocator> RawTable<T, A> {
 
     /// Allocates a new hash table with the given number of buckets.
     ///
-    /// The control bytes are left uninitialized.
+    /// The control bytes and overflow-tracking bytes are left uninitialized.
     #[cfg_attr(feature = "inline-more", inline)]
     unsafe fn new_uninitialized(
         alloc: A,
@@ -1046,7 +1103,8 @@ impl<T, A: Allocator> RawTable<T, A> {
         // Avoid `Option::map` because it bloats LLVM IR.
         if let Some(bucket) = self.find(hash, eq) {
             unsafe {
-                self.erase(bucket);
+                self.erase_no_drop(bucket);
+                bucket.drop();
             }
             true
         } else {
@@ -1382,10 +1440,12 @@ impl<T, A: Allocator> RawTable<T, A> {
         F: FnOnce(T) -> Option<T>,
     {
         let index = self.bucket_index(&bucket);
-        let old_ctrl = *self.table.ctrl(index);
         debug_assert!(self.is_bucket_full(index));
+
         let old_growth_left = self.table.growth_left;
-        let item = self.remove(bucket).0;
+        let old_ctrl = self.table.half_erase(index);
+        let item = bucket.read();
+
         if let Some(new_item) = f(item) {
             self.table.growth_left = old_growth_left;
             self.table.set_ctrl(index, old_ctrl);
@@ -1393,6 +1453,7 @@ impl<T, A: Allocator> RawTable<T, A> {
             self.bucket(index).write(new_item);
             true
         } else {
+            self.table.untrack_overflow_trail(index, old_ctrl);
             false
         }
     }
@@ -1786,10 +1847,17 @@ impl RawTableInner {
                     capacity_to_buckets(capacity).ok_or_else(|| fallibility.capacity_overflow())?;
 
                 let result = Self::new_uninitialized(alloc, table_layout, buckets, fallibility)?;
+
                 // SAFETY: We checked that the table is allocated and therefore the table already has
                 // `self.bucket_mask + 1 + Group::WIDTH` number of control bytes (see TableLayout::calculate_layout_for)
                 // so writing `self.num_ctrl_bytes() == bucket_mask + 1 + Group::WIDTH` bytes is safe.
                 result.ctrl(0).write_bytes(EMPTY, result.num_ctrl_bytes());
+                result
+                    .overflow(0)
+                    .write_bytes(0, result.num_overflow_trackers());
+                result
+                    .displacement(0)
+                    .write_bytes(0, result.num_displacement_bytes());
 
                 Ok(result)
             }
@@ -1825,7 +1893,7 @@ impl RawTableInner {
     /// bytes outside the range of the table are filled with [`EMPTY`] entries. These will unfortunately
     /// trigger a match of [`RawTableInner::find_insert_slot_in_group`] function. This is because
     /// the `Some(bit)` returned by `group.match_empty_or_deleted().lowest_set_bit()` after masking
-    /// (`(probe_seq.pos + bit) & self.bucket_mask`) may point to a full bucket that is already occupied.
+    /// (`(probe_seq.group + bit) & self.bucket_mask`) may point to a full bucket that is already occupied.
     /// We detect this situation here and perform a second scan starting at the beginning of the table.
     /// This second scan is guaranteed to find an empty slot (due to the load factor) before hitting the
     /// trailing control bytes (containing [`EMPTY`] bytes).
@@ -1867,7 +1935,12 @@ impl RawTableInner {
     unsafe fn fix_insert_slot(&self, mut index: usize) -> InsertSlot {
         // SAFETY: The caller of this function ensures that `index` is in the range `0..=self.bucket_mask`.
         if unlikely(self.is_bucket_full(index)) {
-            debug_assert!(self.bucket_mask < Group::WIDTH);
+            debug_assert!(
+                self.bucket_mask < Group::WIDTH,
+                "{} >= {}",
+                self.bucket_mask,
+                Group::WIDTH
+            );
             // SAFETY:
             //
             // * Since the caller of this function ensures that the control bytes are properly
@@ -1879,7 +1952,7 @@ impl RawTableInner {
             // * Because the caller of this function ensures that the index was provided by the
             //   `self.find_insert_slot_in_group()` function, so for for tables larger than the
             //   group width (self.buckets() >= Group::WIDTH), we will never end up in the given
-            //   branch, since `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_slot_in_group`
+            //   branch, since `(probe_seq.group + bit) & self.bucket_mask` in `find_insert_slot_in_group`
             //   cannot return a full bucket index. For tables smaller than the group width, calling
             //   the `unwrap_unchecked` function is also safe, as the trailing control bytes outside
             //   the range of the table are filled with EMPTY bytes (and we know for sure that there
@@ -1905,9 +1978,9 @@ impl RawTableInner {
         let bit = group.match_empty_or_deleted().lowest_set_bit();
 
         if likely(bit.is_some()) {
-            // This is the same as `(probe_seq.pos + bit) % self.buckets()` because the number
+            // This is the same as `(probe_seq.group + bit) % self.buckets()` because the number
             // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
-            Some((probe_seq.pos + bit.unwrap()) & self.bucket_mask)
+            Some((probe_seq.group + bit.unwrap()) & self.bucket_mask)
         } else {
             None
         }
@@ -1957,28 +2030,28 @@ impl RawTableInner {
         let mut insert_slot = None;
 
         let h2_hash = h2(hash);
-        let mut probe_seq = self.probe_seq(hash);
+        let mut probe_seq = ProbeSeq::with_hash(hash, self.bucket_mask);
 
         loop {
             // SAFETY:
             // * Caller of this function ensures that the control bytes are properly initialized.
             //
-            // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.buckets() - 1`
+            // * `ProbeSeq.group` cannot be greater than `self.bucket_mask = self.buckets() - 1`
             //   of the table due to masking with `self.bucket_mask` and also because mumber of
             //   buckets is a power of two (see `self.probe_seq` function).
             //
-            // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to
-            //   call `Group::load` due to the extended control bytes range, which is
+            // * Even if `ProbeSeq.group` returns `position == self.bucket_mask`, it is safe to
+            //   call `load_group` due to the extended control bytes range, which is
             //  `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control
             //   byte will never be read for the allocated table);
             //
-            // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will
-            //   always return "0" (zero), so Group::load will read unaligned `Group::static_empty()`
+            // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.group` will
+            //   always return "0" (zero), so `load_group` will read unaligned `Group::static_empty()`
             //   bytes, which is safe (see RawTableInner::new).
-            let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) };
+            let group = unsafe { self.load_group(probe_seq.group) };
 
             for bit in group.match_byte(h2_hash) {
-                let index = (probe_seq.pos + bit) & self.bucket_mask;
+                let index = (probe_seq.group + bit) & self.bucket_mask;
 
                 if likely(eq(index)) {
                     return Ok(index);
@@ -1991,9 +2064,15 @@ impl RawTableInner {
                 insert_slot = self.find_insert_slot_in_group(&group, &probe_seq);
             }
 
-            // Only stop the search if the group contains at least one empty element.
-            // Otherwise, the element that we are looking for might be in a following group.
-            if likely(group.match_empty().any_bit_set()) {
+            // The search ends with a slot if:
+            //
+            // - There is an empty slot available, hence the element that we are looking for did not overflow.
+            // - The overflow tracker indicates the absence of overflow, and there is a deleted slot available.
+            if likely(
+                group.match_empty().any_bit_set()
+                    || (group.match_empty_or_deleted().any_bit_set()
+                        && !self.may_have_overflowed(probe_seq.group, h2_hash)),
+            ) {
                 // We must have found a insert slot by now, since the current group contains at
                 // least one. For tables smaller than the group width, there will still be an
                 // empty element in the current (and only) group due to the load factor.
@@ -2066,6 +2145,7 @@ impl RawTableInner {
     unsafe fn prepare_insert_slot(&mut self, hash: u64) -> (usize, u8) {
         // SAFETY: Caller of this function ensures that the control bytes are properly initialized.
         let index: usize = self.find_insert_slot(hash).index;
+
         // SAFETY:
         // 1. The `find_insert_slot` function either returns an `index` less than or
         //    equal to `self.buckets() = self.bucket_mask + 1` of the table, or never
@@ -2074,6 +2154,8 @@ impl RawTableInner {
         //    allocated
         let old_ctrl = *self.ctrl(index);
         self.set_ctrl_h2(index, hash);
+        self.track_overflow_trail(InsertSlot { index }, hash);
+
         (index, old_ctrl)
     }
 
@@ -2107,24 +2189,24 @@ impl RawTableInner {
     /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
     #[inline]
     unsafe fn find_insert_slot(&self, hash: u64) -> InsertSlot {
-        let mut probe_seq = self.probe_seq(hash);
+        let mut probe_seq = ProbeSeq::with_hash(hash, self.bucket_mask);
         loop {
             // SAFETY:
             // * Caller of this function ensures that the control bytes are properly initialized.
             //
-            // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.buckets() - 1`
+            // * `ProbeSeq.group` cannot be greater than `self.bucket_mask = self.buckets() - 1`
             //   of the table due to masking with `self.bucket_mask` and also because mumber of
             //   buckets is a power of two (see `self.probe_seq` function).
             //
-            // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to
-            //   call `Group::load` due to the extended control bytes range, which is
+            // * Even if `ProbeSeq.group` returns `position == self.bucket_mask`, it is safe to
+            //   call `load_group` due to the extended control bytes range, which is
             //  `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control
             //   byte will never be read for the allocated table);
             //
-            // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will
-            //   always return "0" (zero), so Group::load will read unaligned `Group::static_empty()`
+            // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.group` will
+            //   always return "0" (zero), so `load_group` will read unaligned `Group::static_empty()`
             //   bytes, which is safe (see RawTableInner::new).
-            let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) };
+            let group = unsafe { self.load_group(probe_seq.group) };
 
             let index = self.find_insert_slot_in_group(&group, &probe_seq);
             if likely(index.is_some()) {
@@ -2165,36 +2247,39 @@ impl RawTableInner {
     #[inline(always)]
     unsafe fn find_inner(&self, hash: u64, eq: &mut dyn FnMut(usize) -> bool) -> Option<usize> {
         let h2_hash = h2(hash);
-        let mut probe_seq = self.probe_seq(hash);
+        let mut probe_seq = ProbeSeq::with_hash(hash, self.bucket_mask);
 
         loop {
             // SAFETY:
             // * Caller of this function ensures that the control bytes are properly initialized.
             //
-            // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.buckets() - 1`
+            // * `ProbeSeq.group` cannot be greater than `self.bucket_mask = self.buckets() - 1`
             //   of the table due to masking with `self.bucket_mask`.
             //
-            // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to
-            //   call `Group::load` due to the extended control bytes range, which is
+            // * Even if `ProbeSeq.group` returns `position == self.bucket_mask`, it is safe to
+            //   call `load_group` due to the extended control bytes range, which is
             //  `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control
             //   byte will never be read for the allocated table);
             //
-            // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will
-            //   always return "0" (zero), so Group::load will read unaligned `Group::static_empty()`
+            // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.group` will
+            //   always return "0" (zero), so `load_group` will read unaligned `Group::static_empty()`
             //   bytes, which is safe (see RawTableInner::new_in).
-            let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) };
+            let group = unsafe { self.load_group(probe_seq.group) };
 
             for bit in group.match_byte(h2_hash) {
-                // This is the same as `(probe_seq.pos + bit) % self.buckets()` because the number
+                // This is the same as `(probe_seq.group + bit) % self.buckets()` because the number
                 // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
-                let index = (probe_seq.pos + bit) & self.bucket_mask;
+                let index = (probe_seq.group + bit) & self.bucket_mask;
 
                 if likely(eq(index)) {
                     return Some(index);
                 }
             }
 
-            if likely(group.match_empty().any_bit_set()) {
+            if likely(
+                group.match_empty().any_bit_set()
+                    || !self.may_have_overflowed(probe_seq.group, h2_hash),
+            ) {
                 return None;
             }
 
@@ -2203,6 +2288,7 @@ impl RawTableInner {
     }
 
     /// Prepares for rehashing data in place (that is, without allocating new memory).
+    ///
     /// Converts all full index `control bytes` to `DELETED` and all `DELETED` control
     /// bytes to `EMPTY`, i.e. performs the following conversion:
     ///
@@ -2210,6 +2296,8 @@ impl RawTableInner {
     /// - `DELETED` control bytes -> `EMPTY`;
     /// - `FULL` control bytes    -> `DELETED`.
     ///
+    /// Erases all overflow trackers.
+    ///
     /// This function does not make any changes to the `data` parts of the table,
     /// or any changes to the the `items` or `growth_left` field of the table.
     ///
@@ -2272,6 +2360,11 @@ impl RawTableInner {
             self.ctrl(0)
                 .copy_to(self.ctrl(self.buckets()), Group::WIDTH);
         }
+
+        self.overflow(0)
+            .write_bytes(0, self.num_overflow_trackers());
+        self.displacement(0)
+            .write_bytes(0, self.num_displacement_bytes());
     }
 
     /// Returns an iterator over every element in the table.
@@ -2596,21 +2689,6 @@ impl RawTableInner {
         self.ctrl.cast()
     }
 
-    /// Returns an iterator-like object for a probe sequence on the table.
-    ///
-    /// This iterator never terminates, but is guaranteed to visit each bucket
-    /// group exactly once. The loop using `probe_seq` must terminate upon
-    /// reaching a group containing an empty bucket.
-    #[inline]
-    fn probe_seq(&self, hash: u64) -> ProbeSeq {
-        ProbeSeq {
-            // This is the same as `hash as usize % self.buckets()` because the number
-            // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
-            pos: h1(hash) & self.bucket_mask,
-            stride: 0,
-        }
-    }
-
     /// Returns the index of a bucket for which a value must be inserted if there is enough rooom
     /// in the table, otherwise returns error
     #[cfg(feature = "raw")]
@@ -2631,13 +2709,15 @@ impl RawTableInner {
         self.growth_left -= usize::from(special_is_empty(old_ctrl));
         self.set_ctrl_h2(index, hash);
         self.items += 1;
+
+        self.track_overflow_trail(InsertSlot { index }, hash);
     }
 
     #[inline]
     fn is_in_same_group(&self, i: usize, new_i: usize, hash: u64) -> bool {
-        let probe_seq_pos = self.probe_seq(hash).pos;
+        let probe_seq_pos = ProbeSeq::with_hash(hash, self.bucket_mask).group;
         let probe_index =
-            |pos: usize| (pos.wrapping_sub(probe_seq_pos) & self.bucket_mask) / Group::WIDTH;
+            |group: usize| (group.wrapping_sub(probe_seq_pos) & self.bucket_mask) / Group::WIDTH;
         probe_index(i) == probe_index(new_i)
     }
 
@@ -2737,7 +2817,7 @@ impl RawTableInner {
     #[inline]
     unsafe fn set_ctrl(&mut self, index: usize, ctrl: u8) {
         // Replicate the first Group::WIDTH control bytes at the end of
-        // the array without using a branch. If the tables smaller than
+        // the array without using a branch. If the table is smaller than
         // the group width (self.buckets() < Group::WIDTH),
         // `index2 = Group::WIDTH + index`, otherwise `index2` is:
         //
@@ -2797,6 +2877,195 @@ impl RawTableInner {
         self.ctrl.as_ptr().add(index)
     }
 
+    /// Returns an aligned group.
+    ///
+    /// # Safety
+    ///
+    /// See `ctrl`.
+    #[inline]
+    unsafe fn load_group(&self, index: usize) -> Group {
+        debug_assert_eq!(0, index % Group::WIDTH, "{index}");
+
+        Group::load_aligned(self.ctrl(index))
+    }
+
+    /// Returns whether a given element may have overflowed the current index, based on its `h2`.
+    ///
+    /// # Safety
+    ///
+    /// See `ctrl`.
+    #[inline(always)]
+    unsafe fn may_have_overflowed(&self, index: usize, h2: u8) -> bool {
+        let tracker = self.overflow(index) as *const OverflowTracker;
+
+        (*tracker).may_have_overflowed(h2)
+    }
+
+    /// Marks an element with hash `hash` has having overflowed from its initial group until `slot`.
+    ///
+    /// # Safety
+    ///
+    /// See `ctrl`.
+    #[inline(always)]
+    unsafe fn track_overflow_trail(&mut self, slot: InsertSlot, hash: u64) {
+        #[inline(never)]
+        unsafe fn track(this: &mut RawTableInner, slot: InsertSlot, hash: u64) {
+            let h2_hash = h2(hash);
+            let mut probe_seq = ProbeSeq::with_hash(hash, this.bucket_mask);
+            let mut displacement = 0usize;
+
+            while probe_seq.group / Group::WIDTH != slot.index / Group::WIDTH {
+                let tracker = this.overflow(probe_seq.group);
+
+                (*tracker).add(h2_hash);
+                displacement += 1;
+
+                probe_seq.move_next(this.bucket_mask);
+            }
+
+            if !OverflowTracker::TRACK_REMOVALS {
+                return;
+            }
+
+            if displacement > 0xF {
+                return;
+            }
+
+            this.set_displacement(slot.index, displacement as u8);
+        }
+
+        let probe_seq = ProbeSeq::with_hash(hash, self.bucket_mask);
+
+        // Insertion at ideal group, no overflow to track.
+        if probe_seq.group / Group::WIDTH == slot.index / Group::WIDTH {
+            return;
+        }
+
+        track(self, slot, hash);
+    }
+
+    /// Removes mark of an element with hash `hash` has having overflowed from its initial group until `slot`.
+    ///
+    /// # Safety
+    ///
+    /// See `ctrl`.
+    #[inline(always)]
+    unsafe fn untrack_overflow_trail(&mut self, index: usize, h2_hash: u8) {
+        #[inline(never)]
+        unsafe fn untrack(this: &mut RawTableInner, index: usize, h2_hash: u8, displacement: u8) {
+            this.set_displacement(index, 0);
+
+            let mut probe_seq = ProbeSeq::with_displacement(index, displacement);
+
+            for _ in 0..displacement {
+                probe_seq.move_prev(this.bucket_mask);
+
+                let tracker = this.overflow(probe_seq.group);
+
+                (*tracker).remove(h2_hash);
+            }
+
+            debug_assert_eq!(0, probe_seq.stride);
+        }
+
+        if !OverflowTracker::TRACK_REMOVALS {
+            return;
+        }
+
+        // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::untrack_overflow_trail`].
+        let displacement = self.get_displacement(index);
+
+        if likely(displacement == 0) {
+            return;
+        }
+
+        untrack(self, index, h2_hash, displacement);
+    }
+
+    /// Returns a pointer to an `OverflowTracker`.
+    ///
+    /// The `index` is that of the _element_, not that of the group of the element.
+    ///
+    /// # Safety
+    ///
+    /// See `ctrl`.
+    #[inline(always)]
+    unsafe fn overflow(&self, index: usize) -> *mut OverflowTracker {
+        //  ZST is special-cased.
+        #![allow(clippy::zst_offset)]
+
+        debug_assert!(index / Group::WIDTH < self.num_overflow_trackers());
+
+        if OVERFLOW_TRACKER_SIZE == 0 {
+            return invalid_mut(mem::align_of::<OverflowTracker>());
+        }
+
+        // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::overflow`]
+        let ctrl_end = self.ctrl.as_ptr().add(self.num_ctrl_bytes());
+
+        let overflow_start: *mut OverflowTracker = ctrl_end as *mut OverflowTracker;
+
+        overflow_start.add(index / Group::WIDTH)
+    }
+
+    /// Returns the displacement.
+    ///
+    /// # Safety
+    ///
+    /// See `displacement`.
+    #[inline(always)]
+    unsafe fn get_displacement(&self, index: usize) -> u8 {
+        debug_assert!(OverflowTracker::TRACK_REMOVALS);
+
+        let pair = self.displacement(index);
+
+        if index % 2 == 0 {
+            *pair & 0xF
+        } else {
+            *pair >> 4
+        }
+    }
+
+    /// Sets the displacement.
+    ///
+    /// # Safety
+    ///
+    /// See `displacement`.
+    #[inline]
+    unsafe fn set_displacement(&mut self, index: usize, displacement: u8) {
+        debug_assert!(OverflowTracker::TRACK_REMOVALS);
+        debug_assert!(displacement <= 0xF, "{displacement}");
+
+        let pair = self.displacement(index);
+
+        *pair = if index % 2 == 0 {
+            (*pair & 0xF0) | displacement
+        } else {
+            (displacement << 4) | (*pair & 0xF)
+        };
+    }
+
+    /// Returns a pointer to the displacement pair.
+    ///
+    /// # Safety
+    ///
+    /// See `ctrl`.
+    #[inline(always)]
+    unsafe fn displacement(&self, index: usize) -> *mut u8 {
+        debug_assert!(index <= self.bucket_mask);
+
+        if !OverflowTracker::TRACK_REMOVALS {
+            return invalid_mut(mem::align_of::<u8>());
+        }
+
+        // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::displacement`]
+        let ctrl_end = self.ctrl.as_ptr().add(self.num_ctrl_bytes());
+
+        let overflow_end = ctrl_end.add(OVERFLOW_TRACKER_SIZE * self.num_overflow_trackers());
+
+        overflow_end.add(index / 2)
+    }
+
     #[inline]
     fn buckets(&self) -> usize {
         self.bucket_mask + 1
@@ -2818,6 +3087,20 @@ impl RawTableInner {
         self.bucket_mask + 1 + Group::WIDTH
     }
 
+    #[inline]
+    fn num_overflow_trackers(&self) -> usize {
+        (self.bucket_mask + Group::WIDTH) / Group::WIDTH + 1
+    }
+
+    #[inline]
+    fn num_displacement_bytes(&self) -> usize {
+        if OverflowTracker::TRACK_REMOVALS {
+            (self.bucket_mask + 1) / 2
+        } else {
+            0
+        }
+    }
+
     #[inline]
     fn is_empty_singleton(&self) -> bool {
         self.bucket_mask == 0
@@ -3186,13 +3469,14 @@ impl RawTableInner {
                 // are properly initialized.
                 let new_i = guard.find_insert_slot(hash).index;
 
-                // Probing works by scanning through all of the control
-                // bytes in groups, which may not be aligned to the group
-                // size. If both the new and old position fall within the
-                // same unaligned group, then there is no benefit in moving
+                // Probing works by scanning through all of the control bytes
+                // in groups. If both the new and old position fall within
+                // the same group, then there is no benefit in moving
                 // it and we can just continue to the next item.
                 if likely(guard.is_in_same_group(i, new_i, hash)) {
                     guard.set_ctrl_h2(i, hash);
+                    guard.track_overflow_trail(InsertSlot { index: i }, hash);
+
                     continue 'outer;
                 }
 
@@ -3201,6 +3485,8 @@ impl RawTableInner {
                 // We are moving the current item to a new position. Write
                 // our H2 to the control byte of the new position.
                 let prev_ctrl = guard.replace_ctrl_h2(new_i, hash);
+                guard.track_overflow_trail(InsertSlot { index: new_i }, hash);
+
                 if prev_ctrl == EMPTY {
                     guard.set_ctrl(i, EMPTY);
                     // If the target slot is empty, simply move the current
@@ -3333,6 +3619,10 @@ impl RawTableInner {
         if !self.is_empty_singleton() {
             unsafe {
                 self.ctrl(0).write_bytes(EMPTY, self.num_ctrl_bytes());
+                self.overflow(0)
+                    .write_bytes(0, self.num_overflow_trackers());
+                self.displacement(0)
+                    .write_bytes(0, self.num_displacement_bytes());
             }
         }
         self.items = 0;
@@ -3374,16 +3664,57 @@ impl RawTableInner {
     /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
     #[inline]
     unsafe fn erase(&mut self, index: usize) {
+        let prev_ctrl = self.half_erase(index);
+
+        self.untrack_overflow_trail(index, prev_ctrl);
+    }
+
+    /// Erases the [`Bucket`]'s control byte at the given index so that it does not
+    /// triggered as full, decreases the `items` of the table and, if it can be done,
+    /// increases `self.growth_left`.
+    ///
+    /// This function does NOT adjust overflow/displacements. If actually removing the
+    /// element, the caller of this function must call `untrack_overflow_trail`. See
+    /// `erase` for unconditional removals.
+    ///
+    /// This function does not actually erase / drop the [`Bucket`] itself, i.e. it
+    /// does not make any changes to the `data` parts of the table. The caller of this
+    /// function must take care to properly drop the `data`, otherwise calling this
+    /// function may result in a memory leak.
+    ///
+    /// # Safety
+    ///
+    /// You must observe the following safety rules when calling this function:
+    ///
+    /// * The [`RawTableInner`] has already been allocated;
+    ///
+    /// * It must be the full control byte at the given position;
+    ///
+    /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e.
+    ///   `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must
+    ///   be no greater than the number returned by the function [`RawTableInner::buckets`].
+    ///
+    /// Calling this function on a table that has not been allocated results in [`undefined behavior`].
+    ///
+    /// Calling this function on a table with no elements is unspecified, but calling subsequent
+    /// functions is likely to result in [`undefined behavior`] due to overflow subtraction
+    /// (`self.items -= 1 cause overflow when self.items == 0`).
+    ///
+    /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
+    /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
+    ///
+    /// [`RawTableInner::buckets`]: RawTableInner::buckets
+    /// [`Bucket::as_ptr`]: Bucket::as_ptr
+    /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+    #[inline]
+    unsafe fn half_erase(&mut self, index: usize) -> u8 {
         debug_assert!(self.is_bucket_full(index));
 
-        // This is the same as `index.wrapping_sub(Group::WIDTH) % self.buckets()` because
-        // the number of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
-        let index_before = index.wrapping_sub(Group::WIDTH) & self.bucket_mask;
         // SAFETY:
         // - The caller must uphold the safety contract for `erase` method;
-        // - `index_before` is guaranteed to be in range due to masking with `self.bucket_mask`
-        let empty_before = Group::load(self.ctrl(index_before)).match_empty();
-        let empty_after = Group::load(self.ctrl(index)).match_empty();
+        let empty = self
+            .load_group(index / Group::WIDTH * Group::WIDTH)
+            .match_empty();
 
         // Inserting and searching in the map is performed by two key functions:
         //
@@ -3408,28 +3739,28 @@ impl RawTableInner {
         //   function may stumble upon an `EMPTY` byte before finding the desired element and stop
         //   searching.
         //
-        // Thus it is necessary to check all bytes after and before the erased element. If we are in
-        // a contiguous `Group` of `FULL` or `DELETED` bytes (the number of `FULL` or `DELETED` bytes
-        // before and after is greater than or equal to `Group::WIDTH`), then we must mark our byte as
-        // `DELETED` in order for the `find_inner` function to go further. On the other hand, if there
-        // is at least one `EMPTY` slot in the `Group`, then the `find_inner` function will still stumble
-        // upon an `EMPTY` byte, so we can safely mark our erased byte as `EMPTY` as well.
-        //
-        // Finally, since `index_before == (index.wrapping_sub(Group::WIDTH) & self.bucket_mask) == index`
-        // and given all of the above, tables smaller than the group width (self.buckets() < Group::WIDTH)
-        // cannot have `DELETED` bytes.
+        // Thus it is necessary to check all bytes in the group of the erased element. If the group is
+        // composed only of `FULL` and `DELETED` bytes, then we must mark our byte as `DELETED` in order
+        // for the `find_inner` function to go further. On the other hand, if there is at least one `EMPTY`
+        // slot in the group, then the `find_inner` function will still stumble upon it, so we can safely
+        // mark our erased byte as `EMPTY` as well.
         //
-        // Note that in this context `leading_zeros` refers to the bytes at the end of a group, while
-        // `trailing_zeros` refers to the bytes at the beginning of a group.
-        let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH {
-            DELETED
-        } else {
+        // Note that overflow tracking does not alter this picture, since overflow tracking is only checked
+        // in the absence of `EMPTY` control byte in the group.
+        let ctrl = if empty.any_bit_set() {
             self.growth_left += 1;
             EMPTY
+        } else {
+            DELETED
         };
+
         // SAFETY: the caller must uphold the safety contract for `erase` method.
+        let prev_ctrl = *self.ctrl(index);
         self.set_ctrl(index, ctrl);
+
         self.items -= 1;
+
+        prev_ctrl
     }
 }
 
@@ -3538,6 +3869,7 @@ impl<T: Clone, A: Allocator + Clone> Clone for RawTable<T, A> {
 trait RawTableClone {
     unsafe fn clone_from_spec(&mut self, source: &Self);
 }
+
 impl<T: Clone, A: Allocator + Clone> RawTableClone for RawTable<T, A> {
     default_fn! {
         #[cfg_attr(feature = "inline-more", inline)]
@@ -3546,10 +3878,19 @@ impl<T: Clone, A: Allocator + Clone> RawTableClone for RawTable<T, A> {
         }
     }
 }
+
 #[cfg(feature = "nightly")]
 impl<T: Copy, A: Allocator + Clone> RawTableClone for RawTable<T, A> {
     #[cfg_attr(feature = "inline-more", inline)]
     unsafe fn clone_from_spec(&mut self, source: &Self) {
+        source.table.displacement(0).copy_to_nonoverlapping(
+            self.table.displacement(0),
+            self.table.num_displacement_bytes(),
+        );
+        source
+            .table
+            .overflow(0)
+            .copy_to_nonoverlapping(self.table.overflow(0), self.table.num_overflow_trackers());
         source
             .table
             .ctrl(0)
@@ -3571,7 +3912,15 @@ impl<T: Clone, A: Allocator + Clone> RawTable<T, A> {
     /// - The control bytes are not initialized yet.
     #[cfg_attr(feature = "inline-more", inline)]
     unsafe fn clone_from_impl(&mut self, source: &Self) {
-        // Copy the control bytes unchanged. We do this in a single pass
+        // Copy the displacements, overflow trackers & control bytes unchanged. We do this in a single pass.
+        source.table.displacement(0).copy_to_nonoverlapping(
+            self.table.displacement(0),
+            self.table.num_displacement_bytes(),
+        );
+        source
+            .table
+            .overflow(0)
+            .copy_to_nonoverlapping(self.table.overflow(0), self.table.num_overflow_trackers());
         source
             .table
             .ctrl(0)
@@ -3675,6 +4024,7 @@ unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawTable<T, A> {
         }
     }
 }
+
 #[cfg(not(feature = "nightly"))]
 impl<T, A: Allocator> Drop for RawTable<T, A> {
     #[cfg_attr(feature = "inline-more", inline)]
@@ -4316,6 +4666,7 @@ unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawIntoIter<T, A> {
         }
     }
 }
+
 #[cfg(not(feature = "nightly"))]
 impl<T, A: Allocator> Drop for RawIntoIter<T, A> {
     #[cfg_attr(feature = "inline-more", inline)]
@@ -4473,13 +4824,14 @@ impl<T> RawIterHash<T> {
         }
     }
 }
+
 impl RawIterHashInner {
     #[cfg_attr(feature = "inline-more", inline)]
     #[cfg(feature = "raw")]
     unsafe fn new(table: &RawTableInner, hash: u64) -> Self {
         let h2_hash = h2(hash);
-        let probe_seq = table.probe_seq(hash);
-        let group = Group::load(table.ctrl(probe_seq.pos));
+        let probe_seq = ProbeSeq::with_hash(hash, table.bucket_mask);
+        let group = table.load_group(probe_seq.group);
         let bitmask = group.match_byte(h2_hash).into_iter();
 
         RawIterHashInner {
@@ -4519,7 +4871,7 @@ impl Iterator for RawIterHashInner {
         unsafe {
             loop {
                 if let Some(bit) = self.bitmask.next() {
-                    let index = (self.probe_seq.pos + bit) & self.bucket_mask;
+                    let index = (self.probe_seq.group + bit) & self.bucket_mask;
                     return Some(index);
                 }
                 if likely(self.group.match_empty().any_bit_set()) {
@@ -4529,11 +4881,11 @@ impl Iterator for RawIterHashInner {
 
                 // Can't use `RawTableInner::ctrl` here as we don't have
                 // an actual `RawTableInner` reference to use.
-                let index = self.probe_seq.pos;
+                let index = self.probe_seq.group;
                 debug_assert!(index < self.bucket_mask + 1 + Group::WIDTH);
-                let group_ctrl = self.ctrl.as_ptr().add(index);
+                let group_ctrl = self.ctrl.as_ptr().add(index / Group::WIDTH * Group::WIDTH);
 
-                self.group = Group::load(group_ctrl);
+                self.group = Group::load_aligned(group_ctrl);
                 self.bitmask = self.group.match_byte(self.h2_hash).into_iter();
             }
         }
@@ -4562,6 +4914,42 @@ impl<T, A: Allocator> RawExtractIf<'_, T, A> {
     }
 }
 
+#[cfg(test)]
+mod test_probe_seq {
+    use super::*;
+
+    #[test]
+    fn move_next_prev() {
+        const BUCKET_MASK: usize = if Group::WIDTH == 16 { 255 } else { 127 };
+
+        const EXPECTED_PROBE_SEQUENCE: [usize; 5] = if Group::WIDTH == 16 {
+            [160, 176, 208, 0, 64]
+        } else {
+            [80, 88, 104, 0, 32]
+        };
+
+        let mut probe = ProbeSeq::with_hash(10 * Group::WIDTH as u64, BUCKET_MASK);
+
+        for group in EXPECTED_PROBE_SEQUENCE {
+            assert_eq!(group, probe.group);
+
+            probe.move_next(BUCKET_MASK);
+        }
+
+        for (i, group) in EXPECTED_PROBE_SEQUENCE.into_iter().enumerate() {
+            let mut rev_probe = ProbeSeq::with_displacement(group, i as u8);
+
+            assert_eq!(group, rev_probe.group);
+
+            for k in (0..i).rev() {
+                rev_probe.move_prev(BUCKET_MASK);
+
+                assert_eq!(EXPECTED_PROBE_SEQUENCE[k], rev_probe.group);
+            }
+        }
+    }
+}
+
 #[cfg(test)]
 mod test_map {
     use super::*;
@@ -4642,6 +5030,15 @@ mod test_map {
                 .ctrl(0)
                 .write_bytes(EMPTY, table.table.num_ctrl_bytes());
 
+            table
+                .table
+                .overflow(0)
+                .write_bytes(0, table.table.num_overflow_trackers());
+            table
+                .table
+                .displacement(0)
+                .write_bytes(0, table.table.num_displacement_bytes());
+
             // SAFETY: table.capacity() is guaranteed to be smaller than table.buckets()
             table.table.ctrl(0).write_bytes(0, table.capacity());
 
diff --git a/src/raw/neon.rs b/src/raw/neon.rs
index 44e82d57d..af0c99f36 100644
--- a/src/raw/neon.rs
+++ b/src/raw/neon.rs
@@ -40,13 +40,6 @@ impl Group {
         &ALIGNED_BYTES.bytes
     }
 
-    /// Loads a group of bytes starting at the given address.
-    #[inline]
-    #[allow(clippy::cast_ptr_alignment)] // unaligned load
-    pub(crate) unsafe fn load(ptr: *const u8) -> Self {
-        Group(neon::vld1_u8(ptr))
-    }
-
     /// Loads a group of bytes starting at the given address, which must be
     /// aligned to `mem::align_of::<Group>()`.
     #[inline]
diff --git a/src/raw/overflow.rs b/src/raw/overflow.rs
new file mode 100644
index 000000000..ea2fe45b3
--- /dev/null
+++ b/src/raw/overflow.rs
@@ -0,0 +1,241 @@
+//! Overflow tracking, for finer grained probing.
+//!
+//! This modules defines an `OverflowTracker`, selected by features.
+//!
+//! An `OverflowTracker` tracks, in some fashion, whether elements inserted into the hash-table overflowed the group
+//! paired with the `OverflowTracker`. During a look-up, the `OverflowTracker` can then indicate whether further probing
+//! is necessary or not, though on a probalistic basis: it can indicate "no" with certainty, but only a "maybe"
+//! otherwise.
+//!
+//! Which `OverflowTracker` to choose is... a good question.
+//!
+//! Elements to consider:
+//!
+//! -   No overflow: no tracker! If insertion never overflows nor fully fills a group, then any overflow tracking is
+//!     pure overhead.
+//! -   No removal: no counter! If the hash-table is append-only, then counters (which allow clean-up on removal) are
+//!     pure overhead.
+//! -   Bloom is good! Bloom-filter based trackers are expected to perform better than pure-counter trackers.
+
+pub use imp::OverflowTracker;
+
+#[cfg(not(any(
+    feature = "overflow-tracker-counter-u8",
+    feature = "overflow-tracker-bloom-1-u8",
+    feature = "overflow-tracker-bloom-1-u16",
+    feature = "overflow-tracker-hybrid"
+)))]
+mod imp {
+    /// An intangible `OverflowTracker` which tracks nothing.
+    pub struct OverflowTracker(());
+
+    impl OverflowTracker {
+        /// Does not track removals.
+        ///
+        /// The `remove` function will unconditionally panic, it is only provided to ease compilation.
+        pub const TRACK_REMOVALS: bool = false;
+
+        /// Adds the `h2` to the tracker.
+        #[inline(always)]
+        pub fn add(&mut self, _h2: u8) {}
+
+        /// Removes the `h2` from the tracker, if supported.
+        #[inline(always)]
+        pub fn remove(&mut self, _h2: u8) {
+            unreachable!("`remove` should not be called when `TRACK_REMOVALS` is false");
+        }
+
+        /// Returns whether the element of this `h2` may be further ahead in the probing sequence, or not.
+        ///
+        /// This is a probalistic response. `false` is definite, `true` is only a possibility.
+        #[inline(always)]
+        pub fn may_have_overflowed(&self, _h2: u8) -> bool {
+            true
+        }
+    }
+} // mod imp
+
+#[cfg(feature = "overflow-tracker-counter-u8")]
+mod imp {
+    /// A counter based `OverflowTracker`.
+    ///
+    /// The counter tracks the number of elements which overflowed, and were not yet removed. If a great number of
+    /// elements overflow, the counter saturates, and removal is no longer tracked.
+    ///
+    /// This strategy is used in Facebook's F14 hash-table.
+    pub struct OverflowTracker(u8);
+
+    impl OverflowTracker {
+        /// Tracks removal in a best-effort fashion.
+        ///
+        /// If the tracker overflows, removals can no longer be tracked, and calling `remove` has no effect.
+        pub const TRACK_REMOVALS: bool = true;
+
+        /// Adds the `h2` to the tracker.
+        #[inline(always)]
+        pub fn add(&mut self, _h2: u8) {
+            self.0 = self.0.saturating_add(1);
+        }
+
+        /// Removes the `h2` from the tracker, if supported.
+        #[inline(always)]
+        pub fn remove(&mut self, _h2: u8) {
+            //  The counter is saturated, an unknown number of additions may have been ignored, and thus removals can no
+            //  longer be tracked.
+            if self.0 == u8::MAX {
+                return;
+            }
+
+            self.0 -= 1;
+        }
+
+        /// Returns whether the element of this `h2` may be further ahead in the probing sequence, or not.
+        ///
+        /// This is a probalistic response. `false` is definite, `true` is only a possibility.
+        #[inline(always)]
+        pub fn may_have_overflowed(&self, _h2: u8) -> bool {
+            self.0 > 0
+        }
+    }
+} // mod imp
+
+#[cfg(feature = "overflow-tracker-bloom-1-u8")]
+mod imp {
+    /// A bloom-filter based `OverflowTracker`.
+    ///
+    /// The filter tracks whether an element with the same "reduced" hash has ever overflowed. It cannot distinguish
+    /// between different elements with the same "reduced" hash, and thus cannot track removals.
+    ///
+    /// This strategy is used in Boost's `std::unordered_flat_map`.
+    pub struct OverflowTracker(u8);
+
+    impl OverflowTracker {
+        /// Does not track removals.
+        ///
+        /// The `remove` function will unconditionally panic, it is only provided to ease compilation.
+        pub const TRACK_REMOVALS: bool = false;
+
+        /// Adds the `h2` to the tracker.
+        #[inline(always)]
+        pub fn add(&mut self, h2: u8) {
+            self.0 |= Self::mask(h2);
+        }
+
+        /// Removes the `h2` from the tracker, if supported.
+        #[inline(always)]
+        pub fn remove(&mut self, _h2: u8) {
+            unreachable!("`remove` should not be called when `TRACK_REMOVALS` is false");
+        }
+
+        /// Returns whether the element of this `h2` may be further ahead in the probing sequence, or not.
+        ///
+        /// This is a probalistic response. `false` is definite, `true` is only a possibility.
+        #[inline(always)]
+        pub fn may_have_overflowed(&self, h2: u8) -> bool {
+            (self.0 & Self::mask(h2)) != 0
+        }
+
+        #[inline(always)]
+        fn mask(h2: u8) -> u8 {
+            1u8 << (h2 & 0x7)
+        }
+    }
+} // mod imp
+
+#[cfg(feature = "overflow-tracker-bloom-1-u16")]
+mod imp {
+    /// A bloom-filter based `OverflowTracker`.
+    ///
+    /// The filter tracks whether an element with the same "reduced" hash has ever overflowed. It cannot distinguish
+    /// between different elements with the same "reduced" hash, and thus cannot track removals.
+    ///
+    /// This tracker uses twice as many bits as Boost's `std::unordered_map` in an attempt to improve accuracy.
+    pub struct OverflowTracker(u16);
+
+    impl OverflowTracker {
+        /// Does not track removals.
+        ///
+        /// The `remove` function will unconditionally panic, it is only provided to ease compilation.
+        pub const TRACK_REMOVALS: bool = false;
+
+        /// Adds the `h2` to the tracker.
+        #[inline(always)]
+        pub fn add(&mut self, h2: u8) {
+            self.0 |= Self::mask(h2);
+        }
+
+        /// Removes the `h2` from the tracker, if supported.
+        #[inline(always)]
+        pub fn remove(&mut self, _h2: u8) {
+            unreachable!("`remove` should not be called when `TRACK_REMOVALS` is false");
+        }
+
+        /// Returns whether the element of this `h2` may be further ahead in the probing sequence, or not.
+        ///
+        /// This is a probalistic response. `false` is definite, `true` is only a possibility.
+        #[inline(always)]
+        pub fn may_have_overflowed(&self, h2: u8) -> bool {
+            (self.0 & Self::mask(h2)) != 0
+        }
+
+        #[inline(always)]
+        fn mask(h2: u8) -> u16 {
+            1u16 << (h2 & 0xF)
+        }
+    }
+} // mod imp
+
+#[cfg(feature = "overflow-tracker-hybrid")]
+mod imp {
+    /// A hybrid counter and bloom-filter based `OverflowTracker`.
+    ///
+    /// This combines both a counter and a filter. This allows tracking removals coarsely, while also tracking elements
+    /// in a more fine-grained fashion than with a pure counter.
+    pub struct OverflowTracker {
+        counter: u8,
+        filter: u8,
+    }
+
+    impl OverflowTracker {
+        /// Tracks removal in a best-effort fashion.
+        ///
+        /// If the tracker overflows, removals can no longer be tracked, and calling `remove` has no effect.
+        pub const TRACK_REMOVALS: bool = true;
+
+        /// Adds the `h2` to the tracker.
+        #[inline(always)]
+        pub fn add(&mut self, h2: u8) {
+            self.counter = self.counter.saturating_add(1);
+            self.filter |= Self::mask(h2);
+        }
+
+        /// Removes the `h2` from the tracker, if supported.
+        #[inline(always)]
+        pub fn remove(&mut self, _h2: u8) {
+            //  The counter is saturated, an unknown number of additions may have been ignored, and thus removals can no
+            //  longer be tracked.
+            if self.counter == u8::MAX {
+                return;
+            }
+
+            self.counter -= 1;
+
+            if self.counter == 0 {
+                self.filter = 0;
+            }
+        }
+
+        /// Returns whether the element of this `h2` may be further ahead in the probing sequence, or not.
+        ///
+        /// This is a probalistic response. `false` is definite, `true` is only a possibility.
+        #[inline(always)]
+        pub fn may_have_overflowed(&self, h2: u8) -> bool {
+            (self.filter & Self::mask(h2)) != 0
+        }
+
+        #[inline(always)]
+        fn mask(h2: u8) -> u8 {
+            1u8 << (h2 & 0x7)
+        }
+    }
+} // mod imp
diff --git a/src/raw/sse2.rs b/src/raw/sse2.rs
index 956ba5d26..cc742a585 100644
--- a/src/raw/sse2.rs
+++ b/src/raw/sse2.rs
@@ -46,13 +46,6 @@ impl Group {
         &ALIGNED_BYTES.bytes
     }
 
-    /// Loads a group of bytes starting at the given address.
-    #[inline]
-    #[allow(clippy::cast_ptr_alignment)] // unaligned load
-    pub(crate) unsafe fn load(ptr: *const u8) -> Self {
-        Group(x86::_mm_loadu_si128(ptr.cast()))
-    }
-
     /// Loads a group of bytes starting at the given address, which must be
     /// aligned to `mem::align_of::<Group>()`.
     #[inline]