Skip to content

Commit 3efa461

Browse files
xbcnnarbiny
andauthored
[libcxx] Avoid hash key in __hash_table::find() if it is empty. (#126837)
If the hash table is empty, with or without buckets, the find() can do fast return. Then computing hash key is useless and avoidable, since it could be expensive for some key types, such as long strings. This is a small optimization but useful in cases like a checklist (unordered_set/map) which is mostly empty. ``` For std::unordered_set<*>, `--benchmark_filter=find` 1. With the opt: --------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------------------- std::unordered_set<int>::find(key) (existent)/0 0.118 ns 0.118 ns 5939922720 std::unordered_set<int>::find(key) (existent)/32 52.1 ns 52.1 ns 13287232 std::unordered_set<int>::find(key) (existent)/1024 51.1 ns 51.1 ns 13449472 std::unordered_set<int>::find(key) (existent)/8192 53.1 ns 53.1 ns 13420864 std::unordered_set<int>::find(key) (non-existent)/0 14.7 ns 14.7 ns 47725472 std::unordered_set<int>::find(key) (non-existent)/32 44.1 ns 44.1 ns 15478144 std::unordered_set<int>::find(key) (non-existent)/1024 41.2 ns 41.2 ns 1508246 std::unordered_set<int>::find(key) (non-existent)/8192 49.5 ns 49.5 ns 15233600 std::unordered_set<std::string>::find(key) (existent)/0 0.136 ns 0.136 ns 5157977920 std::unordered_set<std::string>::find(key) (existent)/32 739 ns 739 ns 1023744 std::unordered_set<std::string>::find(key) (existent)/1024 836 ns 836 ns 840448 std::unordered_set<std::string>::find(key) (existent)/8192 768 ns 768 ns 1085664 std::unordered_set<std::string>::find(key) (non-existent)/0 14.6 ns 14.6 ns 47844160 std::unordered_set<std::string>::find(key) (non-existent)/32 608 ns 608 ns 1106496 std::unordered_set<std::string>::find(key) (non-existent)/1024 646 ns 646 ns 986272 std::unordered_set<std::string>::find(key) (non-existent)/8192 669 ns 669 ns 1047584 2. Without the opt: --------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------------------------- std::unordered_set<int>::find(key) (existent)/0 0.135 ns 0.135 ns 5188502304 std::unordered_set<int>::find(key) (existent)/32 54.4 ns 54.4 ns 12954144 std::unordered_set<int>::find(key) (existent)/1024 57.7 ns 57.7 ns 13107008 std::unordered_set<int>::find(key) (existent)/8192 50.7 ns 50.7 ns 12953312 std::unordered_set<int>::find(key) (non-existent)/0 16.1 ns 16.1 ns 43460192 std::unordered_set<int>::find(key) (non-existent)/32 45.8 ns 45.8 ns 17139584 std::unordered_set<int>::find(key) (non-existent)/1024 44.6 ns 44.6 ns 16538048 std::unordered_set<int>::find(key) (non-existent)/8192 41.5 ns 41.5 ns 12850816 std::unordered_set<std::string>::find(key) (existent)/0 0.133 ns 0.133 ns 5214104992 std::unordered_set<std::string>::find(key) (existent)/32 731 ns 731 ns 1000576 std::unordered_set<std::string>::find(key) (existent)/1024 716 ns 716 ns 1131584 std::unordered_set<std::string>::find(key) (existent)/8192 745 ns 745 ns 909632 std::unordered_set<std::string>::find(key) (non-existent)/0 600 ns 600 ns 1089792 std::unordered_set<std::string>::find(key) (non-existent)/32 645 ns 645 ns 979232 std::unordered_set<std::string>::find(key) (non-existent)/1024 675 ns 675 ns 962240 std::unordered_set<std::string>::find(key) (non-existent)/8192 711 ns 711 ns 1054880 ``` We can see the improvements when find() for non-existent `std::string`(random size 1~1024) keys: ``` std::unordered_set<std::string>::find(key) (non-existent)/0 14.6 ns 14.6 ns 47844160 std::unordered_set<std::string>::find(key) (non-existent)/0 600 ns 600 ns 1089792 ``` --------- Co-authored-by: yangxiaobing <yangxiaobing@jwzg.com>
1 parent 3e37045 commit 3efa461

File tree

3 files changed

+11
-9
lines changed

3 files changed

+11
-9
lines changed

libcxx/include/__hash_table

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1831,9 +1831,9 @@ template <class _Tp, class _Hash, class _Equal, class _Alloc>
18311831
template <class _Key>
18321832
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
18331833
__hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) {
1834-
size_t __hash = hash_function()(__k);
18351834
size_type __bc = bucket_count();
1836-
if (__bc != 0) {
1835+
if (__bc != 0 && size() != 0) {
1836+
size_t __hash = hash_function()(__k);
18371837
size_t __chash = std::__constrain_hash(__hash, __bc);
18381838
__next_pointer __nd = __bucket_list_[__chash];
18391839
if (__nd != nullptr) {
@@ -1852,9 +1852,9 @@ template <class _Tp, class _Hash, class _Equal, class _Alloc>
18521852
template <class _Key>
18531853
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::const_iterator
18541854
__hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) const {
1855-
size_t __hash = hash_function()(__k);
18561855
size_type __bc = bucket_count();
1857-
if (__bc != 0) {
1856+
if (__bc != 0 && size() != 0) {
1857+
size_t __hash = hash_function()(__k);
18581858
size_t __chash = std::__constrain_hash(__hash, __bc);
18591859
__next_pointer __nd = __bucket_list_[__chash];
18601860
if (__nd != nullptr) {

libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ void associative_container_benchmarks(std::string container) {
5959
auto get_key = [](Value const& v) { return adapt_operations<Container>::key_from_value(v); };
6060

6161
auto bench = [&](std::string operation, auto f) {
62-
benchmark::RegisterBenchmark(container + "::" + operation, f)->Arg(32)->Arg(1024)->Arg(8192);
62+
benchmark::RegisterBenchmark(container + "::" + operation, f)->Arg(0)->Arg(32)->Arg(1024)->Arg(8192);
6363
};
6464

6565
static constexpr bool is_multi_key_container =
@@ -176,7 +176,7 @@ void associative_container_benchmarks(std::string container) {
176176
// Insertion
177177
/////////////////////////
178178
bench("insert(value) (already present)", [=](auto& st) {
179-
const std::size_t size = st.range(0);
179+
const std::size_t size = st.range(0) ? st.range(0) : 1;
180180
std::vector<Value> in = make_value_types(generate_unique_keys(size));
181181
Value to_insert = in[in.size() / 2]; // pick any existing value
182182
std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
@@ -371,7 +371,7 @@ void associative_container_benchmarks(std::string container) {
371371
// Erasure
372372
/////////////////////////
373373
bench("erase(key) (existent)", [=](auto& st) {
374-
const std::size_t size = st.range(0);
374+
const std::size_t size = st.range(0) ? st.range(0) : 1; // avoid empty container
375375
std::vector<Value> in = make_value_types(generate_unique_keys(size));
376376
Value element = in[in.size() / 2]; // pick any element
377377
std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
@@ -415,7 +415,7 @@ void associative_container_benchmarks(std::string container) {
415415
});
416416

417417
bench("erase(iterator)", [=](auto& st) {
418-
const std::size_t size = st.range(0);
418+
const std::size_t size = st.range(0) ? st.range(0) : 1; // avoid empty container
419419
std::vector<Value> in = make_value_types(generate_unique_keys(size));
420420
Value element = in[in.size() / 2]; // pick any element
421421

@@ -494,7 +494,7 @@ void associative_container_benchmarks(std::string container) {
494494
Container c(in.begin(), in.end());
495495

496496
while (st.KeepRunningBatch(BatchSize)) {
497-
for (std::size_t i = 0; i != BatchSize; ++i) {
497+
for (std::size_t i = 0; i != keys.size(); ++i) { // possible empty keys when Arg(0)
498498
auto result = func(c, keys[i]);
499499
benchmark::DoNotOptimize(c);
500500
benchmark::DoNotOptimize(result);

libcxx/test/benchmarks/containers/associative/unordered_set.bench.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
// UNSUPPORTED: c++03, c++11, c++14, c++17
1010

11+
#include <string>
1112
#include <unordered_set>
1213
#include <utility>
1314

@@ -27,6 +28,7 @@ struct support::adapt_operations<std::unordered_set<K>> {
2728

2829
int main(int argc, char** argv) {
2930
support::associative_container_benchmarks<std::unordered_set<int>>("std::unordered_set<int>");
31+
support::associative_container_benchmarks<std::unordered_set<std::string>>("std::unordered_set<std::string>");
3032

3133
benchmark::Initialize(&argc, argv);
3234
benchmark::RunSpecifiedBenchmarks();

0 commit comments

Comments
 (0)