From 775a3bf6dffb00fb94fe99d9bbaa622e6c643bfb Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Thu, 24 Apr 2025 15:27:40 -0700 Subject: [PATCH 01/18] Preliminary implementation of shift_remove_index --- src/index_map.rs | 110 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 80 insertions(+), 30 deletions(-) diff --git a/src/index_map.rs b/src/index_map.rs index bc75236842..e54d698f98 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -302,43 +302,61 @@ where where F: FnMut(&mut K, &mut V) -> bool, { - const INIT: Option = None; - self.entries .retain_mut(|entry| keep(&mut entry.key, &mut entry.value)); if self.entries.len() < self.indices.len() { - for index in self.indices.iter_mut() { - *index = INIT; - } + self.after_removal() + } + } + + fn shift_remove_index(&mut self, index: usize) -> Option<(K, V)> + { + if index >= self.entries.len() { + return None; + } + + let bucket = self.entries.remove(index); + + self.after_removal(); + + Some((bucket.key, bucket.value)) + } - for (index, entry) in self.entries.iter().enumerate() { - let mut probe = entry.hash.desired_pos(Self::mask()); - let mut dist = 0; - - probe_loop!(probe < self.indices.len(), { - let pos = &mut self.indices[probe]; - - if let Some(pos) = *pos { - let entry_hash = pos.hash(); - - // robin hood: steal the spot if it's better for us - let their_dist = entry_hash.probe_distance(Self::mask(), probe); - if their_dist < dist { - Self::insert_phase_2( - &mut self.indices, - probe, - Pos::new(index, entry.hash), - ); - break; - } - } else { - *pos = Some(Pos::new(index, entry.hash)); + fn after_removal(&mut self) + { + const INIT: Option = None; + + for index in self.indices.iter_mut() { + *index = INIT; + } + + for (index, entry) in self.entries.iter().enumerate() { + let mut probe = entry.hash.desired_pos(Self::mask()); + let mut dist = 0; + + probe_loop!(probe < self.indices.len(), { + let pos = &mut self.indices[probe]; + + if let Some(pos) = *pos { + let entry_hash = pos.hash(); + + // robin hood: steal the spot if it's better for us + let their_dist = entry_hash.probe_distance(Self::mask(), probe); + if their_dist < dist { + Self::insert_phase_2( + &mut self.indices, + probe, + Pos::new(index, entry.hash), + ); break; } - dist += 1; - }); - } + } else { + *pos = Some(Pos::new(index, entry.hash)); + break; + } + dist += 1; + }); } } @@ -1231,6 +1249,38 @@ where .map(|(probe, found)| self.core.remove_found(probe, found).1) } + /// Remove the key-value pair at position `index` and return them. + /// + /// Like `Vec::remove`, the pair is removed by shifting all remaining items. This maintains + /// the remaining elements' insertion order, but is a more expensive operation + /// + /// Return `None` if `index` is not in 0..len(). + /// + /// Computes in *O*(n) time (average). + /// + /// # Examples + /// + /// ``` + /// use heapless::index_map::FnvIndexMap; + /// + /// let mut map = FnvIndexMap::<_, _, 8>::new(); + /// map.insert(3, "a").unwrap(); + /// map.insert(2, "b").unwrap(); + /// map.insert(1, "c").unwrap(); + /// let removed = map.shift_remove_index(1); + /// assert_eq!(removed, Some((2, "b"))); + /// assert_eq!(map.len(), 2); + /// + /// let mut iter = map.iter(); + /// assert_eq!(iter.next(), Some((&3, &"a"))); + /// assert_eq!(iter.next(), Some((&1, &"c"))); + /// assert_eq!(iter.next(), None); + /// ``` + pub fn shift_remove_index(&mut self, index: usize) -> Option<(K,V)> + { + self.core.shift_remove_index(index) + } + /// Retains only the elements specified by the predicate. /// /// In other words, remove all pairs `(k, v)` for which `f(&k, &mut v)` returns `false`. From b20d35de419f8db99f48f3dbb2f6fcd42e712f2d Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Thu, 24 Apr 2025 16:14:09 -0700 Subject: [PATCH 02/18] Implement remaining shift_remove* functions --- src/index_map.rs | 131 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 129 insertions(+), 2 deletions(-) diff --git a/src/index_map.rs b/src/index_map.rs index e54d698f98..46a7dd70ec 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -322,7 +322,18 @@ where Some((bucket.key, bucket.value)) } + + fn shift_remove_found(&mut self, _probe: usize, found: usize) -> (K, V) + { + let entry = self.entries.remove(found); + + self.after_removal(); /* Todo: pass probe if this starts taking an index parameter */ + + (entry.key, entry.value) + } + // Todo: Should this take in a parameter to allow it to only process the moved + // elements? fn after_removal(&mut self) { const INIT: Option = None; @@ -1251,8 +1262,9 @@ where /// Remove the key-value pair at position `index` and return them. /// - /// Like `Vec::remove`, the pair is removed by shifting all remaining items. This maintains - /// the remaining elements' insertion order, but is a more expensive operation + /// Like [`Vec::remove`], the pair is removed by shifting all + /// remaining items. This maintains the remaining elements' relative + /// insertion order, but is a more expensive operation /// /// Return `None` if `index` is not in 0..len(). /// @@ -1281,6 +1293,121 @@ where self.core.shift_remove_index(index) } + /// Remove the key-value pair equivalent to `key` and return it and + /// the index it had. + /// + /// Like [`Vec::remove`], the pair is removed by shifting all + /// remaining items. This maintains the remaining elements' relative + /// insertion order, but is a more expensive operation + /// + /// Return `None` if `key` is not in map. + /// + /// Computes in **O(n)** time (average). + /// # Examples + /// + /// ``` + /// use heapless::index_map::FnvIndexMap; + /// + /// let mut map = FnvIndexMap::<_, _, 8>::new(); + /// map.insert(3, "a").unwrap(); + /// map.insert(2, "b").unwrap(); + /// map.insert(1, "c").unwrap(); + /// let removed = map.shift_remove_full(&2); + /// assert_eq!(removed, Some((1, 2, "b"))); + /// assert_eq!(map.len(), 2); + /// assert_eq!(map.shift_remove_full(&2), None); + /// + /// let mut iter = map.iter(); + /// assert_eq!(iter.next(), Some((&3, &"a"))); + /// assert_eq!(iter.next(), Some((&1, &"c"))); + /// assert_eq!(iter.next(), None); + /// ``` + pub fn shift_remove_full(&mut self, key: &Q) -> Option<(usize, K, V)> + where + K: Borrow, + Q: ?Sized + Hash + Eq, + { + self.find(key).map(|(probe, found)| { + let (k,v) = self.core.shift_remove_found(probe, found); + (found, k, v) + }) + } + + /// Remove and return the key-value pair equivalent to `key`. + /// + /// Like [`Vec::remove`], the pair is removed by shifting all + /// remaining items. This maintains the remaining elements' relative + /// insertion order, but is a more expensive operation + /// + /// Return `None` if `key` is not in map. + /// + /// Computes in **O(n)** time (average). + /// # Examples + /// + /// ``` + /// use heapless::index_map::FnvIndexMap; + /// + /// let mut map = FnvIndexMap::<_, _, 8>::new(); + /// map.insert(3, "a").unwrap(); + /// map.insert(2, "b").unwrap(); + /// map.insert(1, "c").unwrap(); + /// let removed = map.shift_remove_entry(&2); + /// assert_eq!(removed, Some((2, "b"))); + /// assert_eq!(map.len(), 2); + /// assert_eq!(map.shift_remove_entry(&2), None); + /// + /// let mut iter = map.iter(); + /// assert_eq!(iter.next(), Some((&3, &"a"))); + /// assert_eq!(iter.next(), Some((&1, &"c"))); + /// assert_eq!(iter.next(), None); + /// ``` + pub fn shift_remove_entry(&mut self, key: &Q) -> Option<(K, V)> + where + K: Borrow, + Q: ?Sized + Hash + Eq, + { + self.shift_remove_full(key).map(|(_idx, k,v)| (k,v)) + } + + /// Remove the key-value pair equivalent to `key` and return + /// its value. + /// + /// Like [`Vec::remove`], the pair is removed by shifting all of the + /// elements that follow it, preserving their relative order. + /// **This perturbs the index of all of those elements!** + /// + /// Return `None` if `key` is not in map. + /// + /// Computes in **O(n)** time (average). + /// + /// # Examples + /// + /// ``` + /// use heapless::index_map::FnvIndexMap; + /// + /// let mut map = FnvIndexMap::<_, _, 8>::new(); + /// map.insert(3, "a").unwrap(); + /// map.insert(2, "b").unwrap(); + /// map.insert(1, "c").unwrap(); + /// let removed = map.shift_remove(&2); + /// assert_eq!(removed, Some(("b"))); + /// assert_eq!(map.len(), 2); + /// assert_eq!(map.shift_remove(&2), None); + /// + /// let mut iter = map.iter(); + /// assert_eq!(iter.next(), Some((&3, &"a"))); + /// assert_eq!(iter.next(), Some((&1, &"c"))); + /// assert_eq!(iter.next(), None); + /// ``` + pub fn shift_remove(&mut self, key: &Q) -> Option + where + K: Borrow, + Q: ?Sized + Hash + Eq, + { + self.shift_remove_full(key).map(|(_idx, _k, v)| v) + } + + /// Retains only the elements specified by the predicate. /// /// In other words, remove all pairs `(k, v)` for which `f(&k, &mut v)` returns `false`. From 851ab3d41e2939b531c091faef41aa08d3fd18ad Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Thu, 24 Apr 2025 16:19:52 -0700 Subject: [PATCH 03/18] Placate clippy --- src/index_map.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/index_map.rs b/src/index_map.rs index 46a7dd70ec..36a655ec59 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -306,7 +306,7 @@ where .retain_mut(|entry| keep(&mut entry.key, &mut entry.value)); if self.entries.len() < self.indices.len() { - self.after_removal() + self.after_removal(); } } @@ -1266,7 +1266,7 @@ where /// remaining items. This maintains the remaining elements' relative /// insertion order, but is a more expensive operation /// - /// Return `None` if `index` is not in 0..len(). + /// Return `None` if `index` is not in `0..len()`. /// /// Computes in *O*(n) time (average). /// From 45c16ae6263067aa399f2352ad33094965569c32 Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Thu, 24 Apr 2025 16:23:47 -0700 Subject: [PATCH 04/18] Populate changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4691b73511..caae4cfbc2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] ### Added - +- Added `shift_remove`, `shift_remove_entry`, `shift_remove_full`, `shift_remove_index` to `IndexMap` - Added `bytes::Buf` and `bytes::BufMut` implementations for `Vec`. - Added `format` macro. - Added `String::from_utf16`. From a2ab5a9f66e4b115a38d0b124dd364ad08593af6 Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Thu, 24 Apr 2025 16:25:09 -0700 Subject: [PATCH 05/18] cargo fmt --- src/index_map.rs | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/src/index_map.rs b/src/index_map.rs index 36a655ec59..c57ad8835b 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -310,8 +310,7 @@ where } } - fn shift_remove_index(&mut self, index: usize) -> Option<(K, V)> - { + fn shift_remove_index(&mut self, index: usize) -> Option<(K, V)> { if index >= self.entries.len() { return None; } @@ -322,9 +321,8 @@ where Some((bucket.key, bucket.value)) } - - fn shift_remove_found(&mut self, _probe: usize, found: usize) -> (K, V) - { + + fn shift_remove_found(&mut self, _probe: usize, found: usize) -> (K, V) { let entry = self.entries.remove(found); self.after_removal(); /* Todo: pass probe if this starts taking an index parameter */ @@ -334,8 +332,7 @@ where // Todo: Should this take in a parameter to allow it to only process the moved // elements? - fn after_removal(&mut self) - { + fn after_removal(&mut self) { const INIT: Option = None; for index in self.indices.iter_mut() { @@ -355,11 +352,7 @@ where // robin hood: steal the spot if it's better for us let their_dist = entry_hash.probe_distance(Self::mask(), probe); if their_dist < dist { - Self::insert_phase_2( - &mut self.indices, - probe, - Pos::new(index, entry.hash), - ); + Self::insert_phase_2(&mut self.indices, probe, Pos::new(index, entry.hash)); break; } } else { @@ -1288,8 +1281,7 @@ where /// assert_eq!(iter.next(), Some((&1, &"c"))); /// assert_eq!(iter.next(), None); /// ``` - pub fn shift_remove_index(&mut self, index: usize) -> Option<(K,V)> - { + pub fn shift_remove_index(&mut self, index: usize) -> Option<(K, V)> { self.core.shift_remove_index(index) } @@ -1316,19 +1308,19 @@ where /// assert_eq!(removed, Some((1, 2, "b"))); /// assert_eq!(map.len(), 2); /// assert_eq!(map.shift_remove_full(&2), None); - /// + /// /// let mut iter = map.iter(); /// assert_eq!(iter.next(), Some((&3, &"a"))); /// assert_eq!(iter.next(), Some((&1, &"c"))); /// assert_eq!(iter.next(), None); - /// ``` + /// ``` pub fn shift_remove_full(&mut self, key: &Q) -> Option<(usize, K, V)> where K: Borrow, Q: ?Sized + Hash + Eq, { self.find(key).map(|(probe, found)| { - let (k,v) = self.core.shift_remove_found(probe, found); + let (k, v) = self.core.shift_remove_found(probe, found); (found, k, v) }) } @@ -1355,18 +1347,18 @@ where /// assert_eq!(removed, Some((2, "b"))); /// assert_eq!(map.len(), 2); /// assert_eq!(map.shift_remove_entry(&2), None); - /// + /// /// let mut iter = map.iter(); /// assert_eq!(iter.next(), Some((&3, &"a"))); /// assert_eq!(iter.next(), Some((&1, &"c"))); /// assert_eq!(iter.next(), None); - /// ``` + /// ``` pub fn shift_remove_entry(&mut self, key: &Q) -> Option<(K, V)> where K: Borrow, Q: ?Sized + Hash + Eq, { - self.shift_remove_full(key).map(|(_idx, k,v)| (k,v)) + self.shift_remove_full(key).map(|(_idx, k, v)| (k, v)) } /// Remove the key-value pair equivalent to `key` and return @@ -1379,7 +1371,7 @@ where /// Return `None` if `key` is not in map. /// /// Computes in **O(n)** time (average). - /// + /// /// # Examples /// /// ``` @@ -1393,12 +1385,12 @@ where /// assert_eq!(removed, Some(("b"))); /// assert_eq!(map.len(), 2); /// assert_eq!(map.shift_remove(&2), None); - /// + /// /// let mut iter = map.iter(); /// assert_eq!(iter.next(), Some((&3, &"a"))); /// assert_eq!(iter.next(), Some((&1, &"c"))); /// assert_eq!(iter.next(), None); - /// ``` + /// ``` pub fn shift_remove(&mut self, key: &Q) -> Option where K: Borrow, @@ -1407,7 +1399,6 @@ where self.shift_remove_full(key).map(|(_idx, _k, v)| v) } - /// Retains only the elements specified by the predicate. /// /// In other words, remove all pairs `(k, v)` for which `f(&k, &mut v)` returns `false`. From 176575fba9e937f32b7f5246d21c100e30863c32 Mon Sep 17 00:00:00 2001 From: prutschman Date: Fri, 25 Apr 2025 12:04:24 -0700 Subject: [PATCH 06/18] Fix punctuation and doc formatting. Co-authored-by: Markus Reiter --- CHANGELOG.md | 2 +- src/index_map.rs | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index caae4cfbc2..4d787f1ac5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] ### Added -- Added `shift_remove`, `shift_remove_entry`, `shift_remove_full`, `shift_remove_index` to `IndexMap` +- Added `shift_remove`, `shift_remove_entry`, `shift_remove_full`, `shift_remove_index` to `IndexMap`. - Added `bytes::Buf` and `bytes::BufMut` implementations for `Vec`. - Added `format` macro. - Added `String::from_utf16`. diff --git a/src/index_map.rs b/src/index_map.rs index c57ad8835b..d40eff9031 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -1290,11 +1290,12 @@ where /// /// Like [`Vec::remove`], the pair is removed by shifting all /// remaining items. This maintains the remaining elements' relative - /// insertion order, but is a more expensive operation + /// insertion order, but is a more expensive operation. /// /// Return `None` if `key` is not in map. /// - /// Computes in **O(n)** time (average). + /// Computes in *O*(n) time (average). + /// /// # Examples /// /// ``` @@ -1329,11 +1330,12 @@ where /// /// Like [`Vec::remove`], the pair is removed by shifting all /// remaining items. This maintains the remaining elements' relative - /// insertion order, but is a more expensive operation + /// insertion order, but is a more expensive operation. /// /// Return `None` if `key` is not in map. /// - /// Computes in **O(n)** time (average). + /// Computes in *O*(n) time (average). + /// /// # Examples /// /// ``` @@ -1370,7 +1372,7 @@ where /// /// Return `None` if `key` is not in map. /// - /// Computes in **O(n)** time (average). + /// Computes in *O*(n) time (average). /// /// # Examples /// From f63d5eff04f3ac3bd35683340fabfe9caf506639 Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Fri, 25 Apr 2025 12:10:25 -0700 Subject: [PATCH 07/18] Only re-hash changed index slots --- src/index_map.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/index_map.rs b/src/index_map.rs index d40eff9031..13c5a11c15 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -306,7 +306,7 @@ where .retain_mut(|entry| keep(&mut entry.key, &mut entry.value)); if self.entries.len() < self.indices.len() { - self.after_removal(); + self.after_removal(0); } } @@ -317,7 +317,7 @@ where let bucket = self.entries.remove(index); - self.after_removal(); + self.after_removal(index); Some((bucket.key, bucket.value)) } @@ -325,21 +325,19 @@ where fn shift_remove_found(&mut self, _probe: usize, found: usize) -> (K, V) { let entry = self.entries.remove(found); - self.after_removal(); /* Todo: pass probe if this starts taking an index parameter */ + self.after_removal(found); (entry.key, entry.value) } - // Todo: Should this take in a parameter to allow it to only process the moved - // elements? - fn after_removal(&mut self) { + fn after_removal(&mut self, first_modified: usize) { const INIT: Option = None; for index in self.indices.iter_mut() { *index = INIT; } - for (index, entry) in self.entries.iter().enumerate() { + for (index, entry) in self.entries.iter().skip(first_modified).enumerate() { let mut probe = entry.hash.desired_pos(Self::mask()); let mut dist = 0; From 97eb1ddbc2fdf31de91fbd7dde250805107a0ae2 Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Fri, 25 Apr 2025 13:22:17 -0700 Subject: [PATCH 08/18] Add failing test for bogus after_removal logic --- src/index_map.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/index_map.rs b/src/index_map.rs index 13c5a11c15..0d8f793640 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -2018,6 +2018,20 @@ mod tests { assert_eq!(MAP_SLOTS - 1, src.len()); } + #[test] + fn shift_remove_index() { + const REMOVED_KEY: usize = 7; + let mut map = almost_filled_map(); + assert_eq!(map.shift_remove_index(REMOVED_KEY-1), Some((REMOVED_KEY, REMOVED_KEY))); + // Verify all other elements can still be looked up after removing the entry + for x in 1..MAP_SLOTS-1 { + if x == REMOVED_KEY { + continue; + } + assert!(map.contains_key(&x)); + } + } + #[test] fn retain() { let mut none = almost_filled_map(); From 5238e6a640c889c75c465a54f047de4701d1c005 Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Fri, 25 Apr 2025 13:55:14 -0700 Subject: [PATCH 09/18] Remove bad "optimization" in `after_removal` --- src/index_map.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/index_map.rs b/src/index_map.rs index 0d8f793640..1f27955532 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -306,7 +306,7 @@ where .retain_mut(|entry| keep(&mut entry.key, &mut entry.value)); if self.entries.len() < self.indices.len() { - self.after_removal(0); + self.after_removal(); } } @@ -317,7 +317,7 @@ where let bucket = self.entries.remove(index); - self.after_removal(index); + self.after_removal(); Some((bucket.key, bucket.value)) } @@ -325,19 +325,19 @@ where fn shift_remove_found(&mut self, _probe: usize, found: usize) -> (K, V) { let entry = self.entries.remove(found); - self.after_removal(found); + self.after_removal(); (entry.key, entry.value) } - fn after_removal(&mut self, first_modified: usize) { + fn after_removal(&mut self) { const INIT: Option = None; for index in self.indices.iter_mut() { *index = INIT; } - for (index, entry) in self.entries.iter().skip(first_modified).enumerate() { + for (index, entry) in self.entries.iter().enumerate() { let mut probe = entry.hash.desired_pos(Self::mask()); let mut dist = 0; @@ -2022,9 +2022,12 @@ mod tests { fn shift_remove_index() { const REMOVED_KEY: usize = 7; let mut map = almost_filled_map(); - assert_eq!(map.shift_remove_index(REMOVED_KEY-1), Some((REMOVED_KEY, REMOVED_KEY))); + assert_eq!( + map.shift_remove_index(REMOVED_KEY - 1), + Some((REMOVED_KEY, REMOVED_KEY)) + ); // Verify all other elements can still be looked up after removing the entry - for x in 1..MAP_SLOTS-1 { + for x in 1..MAP_SLOTS - 1 { if x == REMOVED_KEY { continue; } From 4c49505201ab051099e46dc3fb89ac52c1f7d4c4 Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Fri, 25 Apr 2025 13:58:30 -0700 Subject: [PATCH 10/18] Tweak comments --- src/index_map.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/index_map.rs b/src/index_map.rs index 1f27955532..c05ea3dfdc 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -1251,13 +1251,13 @@ where .map(|(probe, found)| self.core.remove_found(probe, found).1) } - /// Remove the key-value pair at position `index` and return them. + /// Removes the key-value pair at position `index` and returns it. /// /// Like [`Vec::remove`], the pair is removed by shifting all /// remaining items. This maintains the remaining elements' relative /// insertion order, but is a more expensive operation /// - /// Return `None` if `index` is not in `0..len()`. + /// Returns `None` if `index` is not in `0..len()`. /// /// Computes in *O*(n) time (average). /// @@ -1283,14 +1283,14 @@ where self.core.shift_remove_index(index) } - /// Remove the key-value pair equivalent to `key` and return it and + /// Removes the key-value pair equivalent to `key` and returns it and /// the index it had. /// /// Like [`Vec::remove`], the pair is removed by shifting all /// remaining items. This maintains the remaining elements' relative /// insertion order, but is a more expensive operation. /// - /// Return `None` if `key` is not in map. + /// Returns `None` if `key` is not in map. /// /// Computes in *O*(n) time (average). /// @@ -1366,9 +1366,8 @@ where /// /// Like [`Vec::remove`], the pair is removed by shifting all of the /// elements that follow it, preserving their relative order. - /// **This perturbs the index of all of those elements!** /// - /// Return `None` if `key` is not in map. + /// Returns `None` if `key` is not in map. /// /// Computes in *O*(n) time (average). /// From 8b6b7146ed7bea3ff9c08b271f6104d296208f2c Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Fri, 25 Apr 2025 14:00:18 -0700 Subject: [PATCH 11/18] Tweak comments --- src/index_map.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/index_map.rs b/src/index_map.rs index c05ea3dfdc..c791195ba1 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -1324,13 +1324,13 @@ where }) } - /// Remove and return the key-value pair equivalent to `key`. + /// Removes and returns the key-value pair equivalent to `key`. /// /// Like [`Vec::remove`], the pair is removed by shifting all /// remaining items. This maintains the remaining elements' relative /// insertion order, but is a more expensive operation. /// - /// Return `None` if `key` is not in map. + /// Returns `None` if `key` is not in map. /// /// Computes in *O*(n) time (average). /// From f21b0897cb55c122aae2a225f7d02b7e20e3ee01 Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Fri, 25 Apr 2025 15:09:35 -0700 Subject: [PATCH 12/18] Pass 2 at better re-hash logic shift_remove_found is now the core operation. It follows the same logic as remove_found, but adds a step to re-write the shifted index positions. shift_remove_index is now implemented in IndexMap, by calling shift_remove_found. --- src/index_map.rs | 101 +++++++++++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 42 deletions(-) diff --git a/src/index_map.rs b/src/index_map.rs index c791195ba1..648fad6a1c 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -306,60 +306,72 @@ where .retain_mut(|entry| keep(&mut entry.key, &mut entry.value)); if self.entries.len() < self.indices.len() { - self.after_removal(); - } - } + const INIT: Option = None; - fn shift_remove_index(&mut self, index: usize) -> Option<(K, V)> { - if index >= self.entries.len() { - return None; + for index in self.indices.iter_mut() { + *index = INIT; + } + + for (index, entry) in self.entries.iter().enumerate() { + let mut probe = entry.hash.desired_pos(Self::mask()); + let mut dist = 0; + + probe_loop!(probe < self.indices.len(), { + let pos = &mut self.indices[probe]; + + if let Some(pos) = *pos { + let entry_hash = pos.hash(); + + // robin hood: steal the spot if it's better for us + let their_dist = entry_hash.probe_distance(Self::mask(), probe); + if their_dist < dist { + Self::insert_phase_2(&mut self.indices, probe, Pos::new(index, entry.hash)); + break; + } + } else { + *pos = Some(Pos::new(index, entry.hash)); + break; + } + dist += 1; + }); + } } - - let bucket = self.entries.remove(index); - - self.after_removal(); - - Some((bucket.key, bucket.value)) } - - fn shift_remove_found(&mut self, _probe: usize, found: usize) -> (K, V) { + + fn shift_remove_found(&mut self, probe: usize, found: usize) -> (K, V) { + self.indices[probe] = None; let entry = self.entries.remove(found); - self.after_removal(); - - (entry.key, entry.value) - } - - fn after_removal(&mut self) { - const INIT: Option = None; - - for index in self.indices.iter_mut() { - *index = INIT; - } - - for (index, entry) in self.entries.iter().enumerate() { + // correct index that points to the entry that had to swap places + if let Some(entry) = self.entries.get(found) { + // was not last element + // examine new element in `found` and find it in indices let mut probe = entry.hash.desired_pos(Self::mask()); - let mut dist = 0; probe_loop!(probe < self.indices.len(), { - let pos = &mut self.indices[probe]; - - if let Some(pos) = *pos { - let entry_hash = pos.hash(); - - // robin hood: steal the spot if it's better for us - let their_dist = entry_hash.probe_distance(Self::mask(), probe); - if their_dist < dist { - Self::insert_phase_2(&mut self.indices, probe, Pos::new(index, entry.hash)); + if let Some(pos) = self.indices[probe] { + if pos.index() >= self.entries.len() { + // found it + self.indices[probe] = Some(Pos::new(found, entry.hash)); break; } - } else { - *pos = Some(Pos::new(index, entry.hash)); - break; } - dist += 1; }); } + + // Re-write the indexes for all elements that moved + for index in self.indices.iter_mut() { + if let Some(pos) = index { + if pos.index() > found { + *index = Some(Pos::new(pos.index()-1, pos.hash())); + } + } + } + + self.backward_shift_after_removal(probe); + + (entry.key, entry.value) + } fn backward_shift_after_removal(&mut self, probe_at_remove: usize) { @@ -1280,7 +1292,12 @@ where /// assert_eq!(iter.next(), None); /// ``` pub fn shift_remove_index(&mut self, index: usize) -> Option<(K, V)> { - self.core.shift_remove_index(index) + if index > self.len() { + return None; + } + self.find(&self.core.entries[index].key).map(|(probe, found)| { + self.core.shift_remove_found(probe, found) + }) } /// Removes the key-value pair equivalent to `key` and returns it and From 13067042c8a3a66b0fcb55bfe2201bdb1c8758d4 Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Fri, 25 Apr 2025 15:11:00 -0700 Subject: [PATCH 13/18] cargo fmt --- src/index_map.rs | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/index_map.rs b/src/index_map.rs index 648fad6a1c..c277fac100 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -311,21 +311,25 @@ where for index in self.indices.iter_mut() { *index = INIT; } - + for (index, entry) in self.entries.iter().enumerate() { let mut probe = entry.hash.desired_pos(Self::mask()); let mut dist = 0; - + probe_loop!(probe < self.indices.len(), { let pos = &mut self.indices[probe]; - + if let Some(pos) = *pos { let entry_hash = pos.hash(); - + // robin hood: steal the spot if it's better for us let their_dist = entry_hash.probe_distance(Self::mask(), probe); if their_dist < dist { - Self::insert_phase_2(&mut self.indices, probe, Pos::new(index, entry.hash)); + Self::insert_phase_2( + &mut self.indices, + probe, + Pos::new(index, entry.hash), + ); break; } } else { @@ -337,7 +341,7 @@ where } } } - + fn shift_remove_found(&mut self, probe: usize, found: usize) -> (K, V) { self.indices[probe] = None; let entry = self.entries.remove(found); @@ -363,7 +367,7 @@ where for index in self.indices.iter_mut() { if let Some(pos) = index { if pos.index() > found { - *index = Some(Pos::new(pos.index()-1, pos.hash())); + *index = Some(Pos::new(pos.index() - 1, pos.hash())); } } } @@ -371,7 +375,6 @@ where self.backward_shift_after_removal(probe); (entry.key, entry.value) - } fn backward_shift_after_removal(&mut self, probe_at_remove: usize) { @@ -1295,9 +1298,8 @@ where if index > self.len() { return None; } - self.find(&self.core.entries[index].key).map(|(probe, found)| { - self.core.shift_remove_found(probe, found) - }) + self.find(&self.core.entries[index].key) + .map(|(probe, found)| self.core.shift_remove_found(probe, found)) } /// Removes the key-value pair equivalent to `key` and returns it and From 2107222bbf8b5e639b800bf5a90ddc05081f5efa Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Fri, 25 Apr 2025 15:24:59 -0700 Subject: [PATCH 14/18] Fix and add test for off-by-one error in shift_remove_index. --- src/index_map.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/index_map.rs b/src/index_map.rs index c277fac100..4c5d997331 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -1288,6 +1288,8 @@ where /// let removed = map.shift_remove_index(1); /// assert_eq!(removed, Some((2, "b"))); /// assert_eq!(map.len(), 2); + /// assert_eq!(map.shift_remove_index(2), None); + /// assert_eq!(map.shift_remove_index(3), None); /// /// let mut iter = map.iter(); /// assert_eq!(iter.next(), Some((&3, &"a"))); @@ -1295,7 +1297,7 @@ where /// assert_eq!(iter.next(), None); /// ``` pub fn shift_remove_index(&mut self, index: usize) -> Option<(K, V)> { - if index > self.len() { + if index >= self.len() { return None; } self.find(&self.core.entries[index].key) From b25ee91118ad9a3275f3baa07cc2d52f868aac72 Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Sat, 26 Apr 2025 10:30:26 -0700 Subject: [PATCH 15/18] Check internal consistency of IndexMap in tests. --- src/index_map.rs | 53 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/src/index_map.rs b/src/index_map.rs index 4c5d997331..e028bd621b 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -65,7 +65,7 @@ use crate::Vec; /// ``` pub type FnvIndexMap = IndexMap, N>; -#[derive(Clone, Copy, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] struct HashValue(u16); impl HashValue { @@ -116,6 +116,15 @@ impl Pos { } } +impl fmt::Debug for Pos { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Pos") + .field("index", &self.index()) + .field("hash", &self.hash().0) + .finish() + } +} + enum Insert { Success(Inserted), Full((K, V)), @@ -1469,6 +1478,42 @@ where let h = hash_with(key, &self.build_hasher); self.core.find(h, key) } + + #[cfg(test)] + fn assert_internally_consistent(&mut self) { + // For every entry + for (index, bucket) in self.core.entries.iter().enumerate() { + let found = self.find(&bucket.key); + // We must be able to find it + assert!(found.is_some(), "Entry couldn't be found"); + let (probe, found_index) = found.unwrap(); + // In the right place + assert_eq!(found_index, index, "Found a different entry"); + // For the right reason + assert_eq!( + self.core.indices.get(probe), + Some(&Some(Pos::new(index, bucket.hash))), + "Found hash mismatch" + ); + } + + // For every non-empty index + let mut used_index_count = 0; + for pos in self.core.indices.iter().filter_map(|p| p.as_ref()) { + let entry = self.core.entries.get(pos.index()); + // It must point to an entry + assert!(entry.is_some(), "Dangling reference"); + // With a matching hash + assert_eq!(pos.hash(), entry.unwrap().hash, "Index hash mismatch"); + used_index_count += 1; + } + + assert_eq!( + self.core.entries.len(), + used_index_count, + "Entry/index count mismatch" + ) + } } impl ops::Index<&Q> for IndexMap @@ -2042,6 +2087,7 @@ mod tests { fn shift_remove_index() { const REMOVED_KEY: usize = 7; let mut map = almost_filled_map(); + map.assert_internally_consistent(); assert_eq!( map.shift_remove_index(REMOVED_KEY - 1), Some((REMOVED_KEY, REMOVED_KEY)) @@ -2053,6 +2099,7 @@ mod tests { } assert!(map.contains_key(&x)); } + map.assert_internally_consistent(); } #[test] @@ -2060,10 +2107,12 @@ mod tests { let mut none = almost_filled_map(); none.retain(|_, _| false); assert!(none.is_empty()); + none.assert_internally_consistent(); let mut all = almost_filled_map(); all.retain(|_, _| true); assert_eq!(all.len(), MAP_SLOTS - 1); + all.assert_internally_consistent(); let mut even = almost_filled_map(); even.retain(|_, &mut v| v % 2 == 0); @@ -2071,6 +2120,7 @@ mod tests { for &v in even.values() { assert_eq!(v % 2, 0); } + even.assert_internally_consistent(); let mut odd = almost_filled_map(); odd.retain(|_, &mut v| v % 2 != 0); @@ -2080,6 +2130,7 @@ mod tests { } assert_eq!(odd.insert(2, 2), Ok(None)); assert_eq!(odd.len(), (MAP_SLOTS / 2) + 1); + odd.assert_internally_consistent(); } #[test] From c9f68279ef814f5b2ab2362d551a0c399ac5cc8f Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Sat, 26 Apr 2025 10:30:57 -0700 Subject: [PATCH 16/18] Removal logic passes tests --- src/index_map.rs | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/src/index_map.rs b/src/index_map.rs index e028bd621b..5df2a719b7 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -355,23 +355,6 @@ where self.indices[probe] = None; let entry = self.entries.remove(found); - // correct index that points to the entry that had to swap places - if let Some(entry) = self.entries.get(found) { - // was not last element - // examine new element in `found` and find it in indices - let mut probe = entry.hash.desired_pos(Self::mask()); - - probe_loop!(probe < self.indices.len(), { - if let Some(pos) = self.indices[probe] { - if pos.index() >= self.entries.len() { - // found it - self.indices[probe] = Some(Pos::new(found, entry.hash)); - break; - } - } - }); - } - // Re-write the indexes for all elements that moved for index in self.indices.iter_mut() { if let Some(pos) = index { @@ -2083,6 +2066,27 @@ mod tests { assert_eq!(MAP_SLOTS - 1, src.len()); } + #[test] + fn shift_remove_full() { + const REMOVED_KEY: usize = 7; + let mut map = almost_filled_map(); + map.assert_internally_consistent(); + assert_eq!( + map.shift_remove_full(&REMOVED_KEY), + Some((REMOVED_KEY - 1, REMOVED_KEY, REMOVED_KEY)) + ); + // Verify all other elements can still be looked up after removing the entry + for x in 1..MAP_SLOTS - 1 { + if x == REMOVED_KEY { + continue; + } + assert!(map.contains_key(&x)); + } + // Make sure we can't any entry for the one we removed + assert_eq!(map.get(&REMOVED_KEY), None); + map.assert_internally_consistent(); + } + #[test] fn shift_remove_index() { const REMOVED_KEY: usize = 7; @@ -2099,6 +2103,8 @@ mod tests { } assert!(map.contains_key(&x)); } + // Make sure we can't find any entry for the one we removed + assert_eq!(map.get(&REMOVED_KEY), None); map.assert_internally_consistent(); } From 25db2746a833d1aa1cf652a0739a47f290d63986 Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Sat, 26 Apr 2025 10:36:06 -0700 Subject: [PATCH 17/18] Clippy --- src/index_map.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/index_map.rs b/src/index_map.rs index 5df2a719b7..94a450c6b9 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -1495,7 +1495,7 @@ where self.core.entries.len(), used_index_count, "Entry/index count mismatch" - ) + ); } } From 337ea625c8ab1fd1d9ca40e0299576be4acd924e Mon Sep 17 00:00:00 2001 From: Phil Rutschman Date: Sat, 26 Apr 2025 10:38:30 -0700 Subject: [PATCH 18/18] fix comment --- src/index_map.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/index_map.rs b/src/index_map.rs index 94a450c6b9..07854e422a 100644 --- a/src/index_map.rs +++ b/src/index_map.rs @@ -2082,7 +2082,7 @@ mod tests { } assert!(map.contains_key(&x)); } - // Make sure we can't any entry for the one we removed + // Make sure we can't find any entry for the one we removed assert_eq!(map.get(&REMOVED_KEY), None); map.assert_internally_consistent(); }