Skip to content

Commit f81a1f8

Browse files
TDeckingAmanieu
authored andcommitted
Fix incorrect reduction operations in avx512f
1 parent d5ba463 commit f81a1f8

File tree

1 file changed

+10
-31
lines changed

1 file changed

+10
-31
lines changed

crates/core_arch/src/x86/avx512f.rs

Lines changed: 10 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -31571,7 +31571,7 @@ pub unsafe fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
3157131571
simd_reduce_max(simd_select_bitmask(
3157231572
k,
3157331573
a.as_i32x16(),
31574-
_mm512_undefined_epi32().as_i32x16(),
31574+
i32x16::splat(i32::MIN),
3157531575
))
3157631576
}
3157731577

@@ -31592,11 +31592,7 @@ pub unsafe fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
3159231592
#[target_feature(enable = "avx512f")]
3159331593
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3159431594
pub unsafe fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
31595-
simd_reduce_max(simd_select_bitmask(
31596-
k,
31597-
a.as_i64x8(),
31598-
_mm512_set1_epi64(0).as_i64x8(),
31599-
))
31595+
simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN)))
3160031596
}
3160131597

3160231598
/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
@@ -31619,7 +31615,7 @@ pub unsafe fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
3161931615
simd_reduce_max(simd_select_bitmask(
3162031616
k,
3162131617
a.as_u32x16(),
31622-
_mm512_undefined_epi32().as_u32x16(),
31618+
_mm512_setzero_si512().as_u32x16(),
3162331619
))
3162431620
}
3162531621

@@ -31643,7 +31639,7 @@ pub unsafe fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
3164331639
simd_reduce_max(simd_select_bitmask(
3164431640
k,
3164531641
a.as_u64x8(),
31646-
_mm512_set1_epi64(0).as_u64x8(),
31642+
_mm512_setzero_si512().as_u64x8(),
3164731643
))
3164831644
}
3164931645

@@ -31718,7 +31714,7 @@ pub unsafe fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
3171831714
simd_reduce_min(simd_select_bitmask(
3171931715
k,
3172031716
a.as_i32x16(),
31721-
_mm512_undefined_epi32().as_i32x16(),
31717+
i32x16::splat(i32::MAX),
3172231718
))
3172331719
}
3172431720

@@ -31739,11 +31735,7 @@ pub unsafe fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
3173931735
#[target_feature(enable = "avx512f")]
3174031736
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3174131737
pub unsafe fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
31742-
simd_reduce_min(simd_select_bitmask(
31743-
k,
31744-
a.as_i64x8(),
31745-
_mm512_set1_epi64(0).as_i64x8(),
31746-
))
31738+
simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX)))
3174731739
}
3174831740

3174931741
/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
@@ -31766,7 +31758,7 @@ pub unsafe fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
3176631758
simd_reduce_min(simd_select_bitmask(
3176731759
k,
3176831760
a.as_u32x16(),
31769-
_mm512_undefined_epi32().as_u32x16(),
31761+
u32x16::splat(u32::MAX),
3177031762
))
3177131763
}
3177231764

@@ -31787,11 +31779,7 @@ pub unsafe fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
3178731779
#[target_feature(enable = "avx512f")]
3178831780
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3178931781
pub unsafe fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
31790-
simd_reduce_min(simd_select_bitmask(
31791-
k,
31792-
a.as_u64x8(),
31793-
_mm512_set1_epi64(0).as_u64x8(),
31794-
))
31782+
simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX)))
3179531783
}
3179631784

3179731785
/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
@@ -31862,11 +31850,7 @@ pub unsafe fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
3186231850
#[target_feature(enable = "avx512f")]
3186331851
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3186431852
pub unsafe fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
31865-
simd_reduce_and(simd_select_bitmask(
31866-
k,
31867-
a.as_i32x16(),
31868-
_mm512_set1_epi32(0xFF).as_i32x16(),
31869-
))
31853+
simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1)))
3187031854
}
3187131855

3187231856
/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
@@ -31886,12 +31870,7 @@ pub unsafe fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
3188631870
#[target_feature(enable = "avx512f")]
3188731871
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3188831872
pub unsafe fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
31889-
simd_reduce_and(simd_select_bitmask(
31890-
k,
31891-
a.as_i64x8(),
31892-
_mm512_set1_epi64(1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7)
31893-
.as_i64x8(),
31894-
))
31873+
simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1)))
3189531874
}
3189631875

3189731876
/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.

0 commit comments

Comments
 (0)