Skip to content

Commit 22c2191

Browse files
lipzhuguowangymadolsonzuiderkwast
authored
Optimize findBucket performance using SIMD (#2030)
### Description This PR introduces an optimization for the `findBucket` function utilizing SSE2 instructions. The `findBucket` function is widely be used in hashtable data structure, particularly in **SET** and **GET** related commands, but also for looking up a command by its name and many other things. The core optimization happens with `_mm_cmpeq_epi8()`, which performs 16 bytes (with 7 bytes being valid) comparisons in parallel. The `_mm_movemask_epi8()` function then converts this vector of byte comparisons into a bit mask where each bit represents whether a comparison was successful. This implementation demonstrates sophisticated use of SIMD instructions to accelerate hash table lookups by performing multiple hash comparisons in parallel. ### Performance Boost The corresponding **GET** and **SET** commands can gain up to **~2% - ~6%** performance improvement especially with pipeline enabled. Below test scenarios showed they are benefit for this optimization. |Benchmark|Performance Boost| |-|-| |[memtier_benchmark-10Mkeys-string-get-10B-pipeline-100-nokeyprefix](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-10Mkeys-string-get-10B-pipeline-100-nokeyprefix.yml) |**4%**| |[memtier_benchmark-1Mkeys-string-get-10B-pipeline-50](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-1Mkeys-string-get-10B-pipeline-50.yml) |**4%**| |[memtier_benchmark-1Mkeys-string-get-10B-pipeline-100](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-1Mkeys-string-get-10B-pipeline-100.yml) |**4%**| |[memtier_benchmark-1Mkeys-string-get-10B-pipeline-100-nokeyprefix](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-1Mkeys-string-get-10B-pipeline-100-nokeyprefix.yml) |**2%**| |[memtier_benchmark-1Mkeys-string-get-10B-pipeline-500](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-1Mkeys-string-get-10B-pipeline-500.yml) |**6%**| |[memtier_benchmark-1Mkeys-load-string-with-10B-values-pipeline-500](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-1Mkeys-load-string-with-10B-values-pipeline-500.yml) |**4%**| |[memtier_benchmark-1Mkeys-load-string-with-10B-values-pipeline-100](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-1Mkeys-load-string-with-10B-values-pipeline-100.yml) |**3%**| |[memtier_benchmark-1Mkeys-load-string-with-10B-values-pipeline-100-nokeyprefix](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-1Mkeys-load-string-with-10B-values-pipeline-100-nokeyprefix.yml) |**3%**| #### Test Env - OS: CentOS Stream 9 - Platform: Intel Xeon 6980P - Server and Client in same socket #### Valkey-server configuration ``` taskset -c 0 ~/valkey/src/valkey-server ~/tmp_valkey.conf port 9001 bind * -::* daemonize no protected-mode no save ``` --------- Signed-off-by: Lipeng Zhu <lipeng.zhu@intel.com> Signed-off-by: Madelyn Olson <madelyneolson@gmail.com> Co-authored-by: Wangyang Guo <wangyang.guo@intel.com> Co-authored-by: Madelyn Olson <madelyneolson@gmail.com> Co-authored-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
1 parent 785ffe6 commit 22c2191

File tree

2 files changed

+57
-12
lines changed

2 files changed

+57
-12
lines changed

src/config.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,9 +382,11 @@ void setcpuaffinity(const char *cpulist);
382382
#endif
383383

384384
#if HAVE_X86_SIMD
385+
#define ATTRIBUTE_TARGET_SSE2 __attribute__((target("sse2")))
385386
#define ATTRIBUTE_TARGET_AVX2 __attribute__((target("avx2")))
386387
#define ATTRIBUTE_TARGET_AVX512 __attribute__((target("avx512f,avx512bw,avx512vl")))
387388
#else
389+
#define ATTRIBUTE_TARGET_SSE2
388390
#define ATTRIBUTE_TARGET_AVX2
389391
#define ATTRIBUTE_TARGET_AVX512
390392
#endif

src/hashtable.c

Lines changed: 55 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@
5757
#include <stdlib.h>
5858
#include <string.h>
5959
#include <unistd.h>
60+
#if HAVE_X86_SIMD
61+
#include <immintrin.h>
62+
#endif
6063

6164
/* The default hashing function uses the SipHash implementation in siphash.c. */
6265

@@ -669,6 +672,51 @@ static int expand(hashtable *ht, size_t size, int *malloc_failed) {
669672
return resize(ht, size, malloc_failed);
670673
}
671674

675+
/* Checks if a candidate entry in a bucket matches the given key.
676+
*
677+
* This function examines a specific position in a bucket to determine if the
678+
* entry at that position matches the provided key. If a match is found, it
679+
* updates the position and table index pointers and returns 1. Otherwise,
680+
* it returns 0. */
681+
static inline int checkCandidateInBucket(hashtable *ht, bucket *b, int pos, const void *key, int table, int *pos_in_bucket, int *table_index) {
682+
/* It's a candidate. */
683+
void *entry = b->entries[pos];
684+
const void *elem_key = entryGetKey(ht, entry);
685+
if (compareKeys(ht, key, elem_key) == 0) {
686+
/* It's a match. */
687+
assert(pos_in_bucket != NULL);
688+
*pos_in_bucket = pos;
689+
if (table_index) *table_index = table;
690+
return 1;
691+
}
692+
return 0;
693+
}
694+
695+
#if HAVE_X86_SIMD
696+
ATTRIBUTE_TARGET_SSE2
697+
static int findKeyInBucketSSE2(hashtable *ht, bucket *b, uint8_t h2, const void *key, int table, int *pos_in_bucket, int *table_index) {
698+
/* Get the bucket's presence mask - indicates which positions are filled. */
699+
BUCKET_BITS_TYPE presence_mask = b->presence & ((1 << ENTRIES_PER_BUCKET) - 1);
700+
__m128i hash_vector = _mm_loadu_si128((__m128i *)b->hashes);
701+
__m128i h2_vector = _mm_set1_epi8(h2);
702+
/* Compare all hash values against the target hash simultaneously.
703+
* The result is a vector of 16 bytes, where each byte is 0xFF if
704+
* the corresponding hash matches the target hash, and 0x00 if it
705+
* doesn't. */
706+
__m128i result = _mm_cmpeq_epi8(hash_vector, h2_vector);
707+
BUCKET_BITS_TYPE newmask = _mm_movemask_epi8(result);
708+
/* Only consider positions that are both filled (presence) and match the hash (newmask). */
709+
newmask &= presence_mask;
710+
while (newmask > 0) {
711+
int pos = __builtin_ctz(newmask);
712+
if (checkCandidateInBucket(ht, b, pos, key, table, pos_in_bucket, table_index)) return 1;
713+
/* Clear the processed bit and continue with next match. */
714+
newmask &= ~(1 << pos);
715+
}
716+
return 0;
717+
}
718+
#endif
719+
672720
/* Finds an entry matching the key. If a match is found, returns a pointer to
673721
* the bucket containing the matching entry and points 'pos_in_bucket' to the
674722
* index within the bucket. Returns NULL if no matching entry was found.
@@ -693,21 +741,16 @@ static bucket *findBucket(hashtable *ht, uint64_t hash, const void *key, int *po
693741
}
694742
bucket *b = &ht->tables[table][bucket_idx];
695743
do {
744+
#if HAVE_X86_SIMD
745+
/* All x86-64 CPUs have SSE2. */
746+
if (findKeyInBucketSSE2(ht, b, h2, key, table, pos_in_bucket, table_index)) return b;
747+
#else
696748
/* Find candidate entries with presence flag set and matching h2 hash. */
697749
for (int pos = 0; pos < numBucketPositions(b); pos++) {
698-
if (isPositionFilled(b, pos) && b->hashes[pos] == h2) {
699-
/* It's a candidate. */
700-
void *entry = b->entries[pos];
701-
const void *elem_key = entryGetKey(ht, entry);
702-
if (compareKeys(ht, key, elem_key) == 0) {
703-
/* It's a match. */
704-
assert(pos_in_bucket != NULL);
705-
*pos_in_bucket = pos;
706-
if (table_index) *table_index = table;
707-
return b;
708-
}
709-
}
750+
if (isPositionFilled(b, pos) && b->hashes[pos] == h2 &&
751+
checkCandidateInBucket(ht, b, pos, key, table, pos_in_bucket, table_index)) return b;
710752
}
753+
#endif
711754
b = getChildBucket(b);
712755
} while (b != NULL);
713756
}

0 commit comments

Comments
 (0)