Skip to content

Commit ee00971

Browse files
committed
Auto merge of #565 - clarfonthey:tag-type, r=Amanieu
Add Tag(u8) newtype in an attempt to stop using byte-pointers for everything The longer-term goal is to make it so that all the pointers passed around inside the are either `Tag` pointers or `T` pointers, so that we know whether we're using them for indexing into the control or the buckets. Then, `u8` pointers mean that we're referring to a raw allocation, rather than the control bytes. However, the current code isn't really built for this, and the result is a lot of pointer casts everywhere. I didn't want to just replace `u8` with `Tag` everywhere, since there are some cases where we use `u8` to really mean bytes, and that would be counter to the original purpose. One short-term gain, however, is that the constant tags and the various methods on them can now be real associated constants and methods, instead of just standalone functions and constants that have to be imported separately. ---- This change also bumps MSRV to 1.65.0. I could bump it higher, but only 1.65 was needed, so, I decided to go with the smallest amount.
2 parents e057e87 + 4de01fe commit ee00971

File tree

5 files changed

+231
-214
lines changed

5 files changed

+231
-214
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ keywords = ["hash", "no_std", "hashmap", "swisstable"]
1010
categories = ["data-structures", "no-std"]
1111
exclude = [".github", "/ci/*"]
1212
edition = "2021"
13-
rust-version = "1.63.0"
13+
rust-version = "1.65.0"
1414

1515
[dependencies]
1616
# For the default hasher

src/raw/generic.rs

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use super::bitmask::BitMask;
2-
use super::EMPTY;
2+
use super::Tag;
33
use core::{mem, ptr};
44

55
// Use the native word size as the group size. Using a 64-bit group size on
@@ -24,18 +24,18 @@ cfg_if! {
2424
pub(crate) type BitMaskWord = GroupWord;
2525
pub(crate) type NonZeroBitMaskWord = NonZeroGroupWord;
2626
pub(crate) const BITMASK_STRIDE: usize = 8;
27-
// We only care about the highest bit of each byte for the mask.
27+
// We only care about the highest bit of each tag for the mask.
2828
#[allow(clippy::cast_possible_truncation, clippy::unnecessary_cast)]
29-
pub(crate) const BITMASK_MASK: BitMaskWord = 0x8080_8080_8080_8080_u64 as GroupWord;
29+
pub(crate) const BITMASK_MASK: BitMaskWord = u64::from_ne_bytes([Tag::DELETED.0; 8]) as GroupWord;
3030
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0;
3131

32-
/// Helper function to replicate a byte across a `GroupWord`.
32+
/// Helper function to replicate a tag across a `GroupWord`.
3333
#[inline]
34-
fn repeat(byte: u8) -> GroupWord {
35-
GroupWord::from_ne_bytes([byte; Group::WIDTH])
34+
fn repeat(tag: Tag) -> GroupWord {
35+
GroupWord::from_ne_bytes([tag.0; Group::WIDTH])
3636
}
3737

38-
/// Abstraction over a group of control bytes which can be scanned in
38+
/// Abstraction over a group of control tags which can be scanned in
3939
/// parallel.
4040
///
4141
/// This implementation uses a word-sized integer.
@@ -51,94 +51,94 @@ impl Group {
5151
/// Number of bytes in the group.
5252
pub(crate) const WIDTH: usize = mem::size_of::<Self>();
5353

54-
/// Returns a full group of empty bytes, suitable for use as the initial
54+
/// Returns a full group of empty tags, suitable for use as the initial
5555
/// value for an empty hash table.
5656
///
5757
/// This is guaranteed to be aligned to the group size.
5858
#[inline]
59-
pub(crate) const fn static_empty() -> &'static [u8; Group::WIDTH] {
59+
pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] {
6060
#[repr(C)]
61-
struct AlignedBytes {
61+
struct AlignedTags {
6262
_align: [Group; 0],
63-
bytes: [u8; Group::WIDTH],
63+
tags: [Tag; Group::WIDTH],
6464
}
65-
const ALIGNED_BYTES: AlignedBytes = AlignedBytes {
65+
const ALIGNED_TAGS: AlignedTags = AlignedTags {
6666
_align: [],
67-
bytes: [EMPTY; Group::WIDTH],
67+
tags: [Tag::EMPTY; Group::WIDTH],
6868
};
69-
&ALIGNED_BYTES.bytes
69+
&ALIGNED_TAGS.tags
7070
}
7171

72-
/// Loads a group of bytes starting at the given address.
72+
/// Loads a group of tags starting at the given address.
7373
#[inline]
7474
#[allow(clippy::cast_ptr_alignment)] // unaligned load
75-
pub(crate) unsafe fn load(ptr: *const u8) -> Self {
75+
pub(crate) unsafe fn load(ptr: *const Tag) -> Self {
7676
Group(ptr::read_unaligned(ptr.cast()))
7777
}
7878

79-
/// Loads a group of bytes starting at the given address, which must be
79+
/// Loads a group of tags starting at the given address, which must be
8080
/// aligned to `mem::align_of::<Group>()`.
8181
#[inline]
8282
#[allow(clippy::cast_ptr_alignment)]
83-
pub(crate) unsafe fn load_aligned(ptr: *const u8) -> Self {
83+
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
8484
// FIXME: use align_offset once it stabilizes
8585
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
8686
Group(ptr::read(ptr.cast()))
8787
}
8888

89-
/// Stores the group of bytes to the given address, which must be
89+
/// Stores the group of tags to the given address, which must be
9090
/// aligned to `mem::align_of::<Group>()`.
9191
#[inline]
9292
#[allow(clippy::cast_ptr_alignment)]
93-
pub(crate) unsafe fn store_aligned(self, ptr: *mut u8) {
93+
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
9494
// FIXME: use align_offset once it stabilizes
9595
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
9696
ptr::write(ptr.cast(), self.0);
9797
}
9898

99-
/// Returns a `BitMask` indicating all bytes in the group which *may*
99+
/// Returns a `BitMask` indicating all tags in the group which *may*
100100
/// have the given value.
101101
///
102102
/// This function may return a false positive in certain cases where
103-
/// the byte in the group differs from the searched value only in its
103+
/// the tag in the group differs from the searched value only in its
104104
/// lowest bit. This is fine because:
105105
/// - This never happens for `EMPTY` and `DELETED`, only full entries.
106106
/// - The check for key equality will catch these.
107107
/// - This only happens if there is at least 1 true match.
108108
/// - The chance of this happening is very low (< 1% chance per byte).
109109
#[inline]
110-
pub(crate) fn match_byte(self, byte: u8) -> BitMask {
110+
pub(crate) fn match_tag(self, tag: Tag) -> BitMask {
111111
// This algorithm is derived from
112112
// https://graphics.stanford.edu/~seander/bithacks.html##ValueInWord
113-
let cmp = self.0 ^ repeat(byte);
114-
BitMask((cmp.wrapping_sub(repeat(0x01)) & !cmp & repeat(0x80)).to_le())
113+
let cmp = self.0 ^ repeat(tag);
114+
BitMask((cmp.wrapping_sub(repeat(Tag(0x01))) & !cmp & repeat(Tag::DELETED)).to_le())
115115
}
116116

117-
/// Returns a `BitMask` indicating all bytes in the group which are
117+
/// Returns a `BitMask` indicating all tags in the group which are
118118
/// `EMPTY`.
119119
#[inline]
120120
pub(crate) fn match_empty(self) -> BitMask {
121-
// If the high bit is set, then the byte must be either:
121+
// If the high bit is set, then the tag must be either:
122122
// 1111_1111 (EMPTY) or 1000_0000 (DELETED).
123123
// So we can just check if the top two bits are 1 by ANDing them.
124-
BitMask((self.0 & (self.0 << 1) & repeat(0x80)).to_le())
124+
BitMask((self.0 & (self.0 << 1) & repeat(Tag::DELETED)).to_le())
125125
}
126126

127-
/// Returns a `BitMask` indicating all bytes in the group which are
127+
/// Returns a `BitMask` indicating all tags in the group which are
128128
/// `EMPTY` or `DELETED`.
129129
#[inline]
130130
pub(crate) fn match_empty_or_deleted(self) -> BitMask {
131-
// A byte is EMPTY or DELETED iff the high bit is set
132-
BitMask((self.0 & repeat(0x80)).to_le())
131+
// A tag is EMPTY or DELETED iff the high bit is set
132+
BitMask((self.0 & repeat(Tag::DELETED)).to_le())
133133
}
134134

135-
/// Returns a `BitMask` indicating all bytes in the group which are full.
135+
/// Returns a `BitMask` indicating all tags in the group which are full.
136136
#[inline]
137137
pub(crate) fn match_full(self) -> BitMask {
138138
self.match_empty_or_deleted().invert()
139139
}
140140

141-
/// Performs the following transformation on all bytes in the group:
141+
/// Performs the following transformation on all tags in the group:
142142
/// - `EMPTY => EMPTY`
143143
/// - `DELETED => EMPTY`
144144
/// - `FULL => DELETED`
@@ -151,7 +151,7 @@ impl Group {
151151
// let full = 1000_0000 (true) or 0000_0000 (false)
152152
// !1000_0000 + 1 = 0111_1111 + 1 = 1000_0000 (no carry)
153153
// !0000_0000 + 0 = 1111_1111 + 0 = 1111_1111 (no carry)
154-
let full = !self.0 & repeat(0x80);
154+
let full = !self.0 & repeat(Tag::DELETED);
155155
Group(!full + (full >> 7))
156156
}
157157
}

0 commit comments

Comments
 (0)