diff --git a/src/avx512bitpacking.c b/src/avx512bitpacking.c index ac93ae6..dceb967 100644 --- a/src/avx512bitpacking.c +++ b/src/avx512bitpacking.c @@ -7,14 +7,14 @@ static uint32_t maxbitas32int(const __m256i accumulator) { const __m256i _tmp2 = _mm256_or_si256(_mm256_srli_si256(_tmp1, 4), _tmp1); uint32_t ans1 = _mm256_extract_epi32(_tmp2, 0); uint32_t ans2 = _mm256_extract_epi32(_tmp2, 4); - uint32_t ans = ans1 > ans2 ? ans1 : ans2; + uint32_t ans = ans1 | ans2; return ans; } static uint32_t avx512maxbitas32int(const __m512i accumulator) { uint32_t ans1 = maxbitas32int(_mm512_castsi512_si256(accumulator)); uint32_t ans2 = maxbitas32int(_mm512_extracti64x4_epi64(accumulator, 1)); - uint32_t ans = ans1 > ans2 ? ans1 : ans2; + uint32_t ans = ans1 | ans2; return bits(ans); } diff --git a/src/avxbitpacking.c b/src/avxbitpacking.c index 0f25b94..e5eb080 100644 --- a/src/avxbitpacking.c +++ b/src/avxbitpacking.c @@ -7,7 +7,7 @@ static uint32_t maxbitas32int(const __m256i accumulator) { const __m256i _tmp2 = _mm256_or_si256(_mm256_srli_si256(_tmp1, 4), _tmp1); uint32_t ans1 = _mm256_extract_epi32(_tmp2, 0); uint32_t ans2 = _mm256_extract_epi32(_tmp2, 4); - uint32_t ans = ans1 > ans2 ? ans1 : ans2; + uint32_t ans = ans1 | ans2; return bits(ans); }