Skip to content

Commit e3c528f

Browse files
committed
Add the first 31 bytes to the hash of long strings
Signed-off-by: Michael Mior <mmior@mail.rit.edu>
1 parent ef23a63 commit e3c528f

File tree

2 files changed

+19
-18
lines changed

2 files changed

+19
-18
lines changed

src/core/json/include/sourcemeta/core/json_hash.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ template <typename T> struct PropertyHashJSON {
4949
-> hash_type {
5050
hash_type result;
5151
assert(!value.empty());
52-
assert(value.size() <= 31);
5352
// Copy starting a byte 2
5453
std::memcpy(reinterpret_cast<char *>(&result) + 1, value.data(), size);
5554
return result;
@@ -126,18 +125,20 @@ template <typename T> struct PropertyHashJSON {
126125
// This case is specifically designed to be constant with regards to
127126
// string length, and to exploit the fact that most JSON objects don't
128127
// have a lot of entries, so hash collision is not as common
129-
return {1 +
130-
(size + static_cast<typename hash_type::type>(value.front()) +
128+
auto hash = this->perfect(value, 31);
129+
hash.a |=
130+
1 + (size + static_cast<typename hash_type::type>(value.front()) +
131131
static_cast<typename hash_type::type>(value.back())) %
132132
// Make sure the property hash can never exceed 8 bits
133-
255};
133+
255;
134+
return hash;
134135
}
135136
}
136137

137138
inline auto is_perfect(const hash_type &hash) const noexcept -> bool {
138139
// If there is anything written past the first byte,
139140
// then it is a perfect hash
140-
return hash.a > 255;
141+
return (hash.a & 255) == 0;
141142
}
142143
};
143144

test/json/json_hash_test.cc

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ TEST(JSON_key_hash, hash_empty) {
77
hasher;
88
const sourcemeta::core::JSON::String value{""};
99
const auto hash{hasher(value)};
10-
EXPECT_FALSE(hasher.is_perfect(hash));
10+
EXPECT_TRUE(hasher.is_perfect(hash));
1111
#if defined(__SIZEOF_INT128__)
1212
EXPECT_EQ(hash.a,
1313
(__uint128_t{0x0000000000000000} << 64) | 0x0000000000000000);
@@ -619,15 +619,15 @@ TEST(JSON_key_hash, hash_fooooooooooooooooooooooooooooooo) {
619619
EXPECT_FALSE(hasher.is_perfect(hash));
620620
#if defined(__SIZEOF_INT128__)
621621
EXPECT_EQ(hash.a,
622-
(__uint128_t{0x0000000000000000} << 64) | 0x00000000000000f6);
622+
(__uint128_t{0x6f6f6f6f6f6f6f6f} << 64) | 0x6f6f6f6f6f6f66f6);
623623
EXPECT_EQ(hash.b,
624-
(__uint128_t{0x0000000000000000} << 64) | 0x0000000000000000);
624+
(__uint128_t{0x6f6f6f6f6f6f6f6f} << 64) | 0x6f6f6f6f6f6f6f6f);
625625
#else
626626
// 0x20 (length) + 0x66 (f) + 0x6f (o)
627-
EXPECT_EQ(hash.a, 0x00000000000000f6);
628-
EXPECT_EQ(hash.b, 0x0000000000000000);
629-
EXPECT_EQ(hash.c, 0x0000000000000000);
630-
EXPECT_EQ(hash.d, 0x0000000000000000);
627+
EXPECT_EQ(hash.a, 0x6f6f6f6f6f6f66f6);
628+
EXPECT_EQ(hash.b, 0x6f6f6f6f6f6f6f6f);
629+
EXPECT_EQ(hash.c, 0x6f6f6f6f6f6f6f6f);
630+
EXPECT_EQ(hash.d, 0x6f6f6f6f6f6f6f6f);
631631
#endif
632632
}
633633

@@ -643,14 +643,14 @@ TEST(JSON_key_hash, hash_no_collision) {
643643
EXPECT_FALSE(hasher.is_perfect(hash));
644644
#if defined(__SIZEOF_INT128__)
645645
EXPECT_EQ(hash.a,
646-
(__uint128_t{0x0000000000000000} << 64) | 0x0000000000000003);
646+
(__uint128_t{0x6161616161616161} << 64) | 0x6161616161617A03);
647647
EXPECT_EQ(hash.b,
648-
(__uint128_t{0x0000000000000000} << 64) | 0x0000000000000000);
648+
(__uint128_t{0x6161616161616161} << 64) | 0x6161616161616161);
649649
#else
650650
// 0x10C (length) + 0x7A (z) + 0x7A (z)
651-
EXPECT_EQ(hash.a, 0x0000000000000003);
652-
EXPECT_EQ(hash.b, 0x0000000000000000);
653-
EXPECT_EQ(hash.c, 0x0000000000000000);
654-
EXPECT_EQ(hash.d, 0x0000000000000000);
651+
EXPECT_EQ(hash.a, 0x6161616161617A03);
652+
EXPECT_EQ(hash.b, 0x6161616161616161);
653+
EXPECT_EQ(hash.c, 0x6161616161616161);
654+
EXPECT_EQ(hash.d, 0x6161616161616161);
655655
#endif
656656
}

0 commit comments

Comments
 (0)