Skip to content

Commit 0947572

Browse files
satiscugcatAmanieu
authored andcommitted
Changed the implementation of bsrli to match bslli in avx2 intrinsics
1 parent 1c5b2a6 commit 0947572

File tree

1 file changed

+46
-132
lines changed
  • library/stdarch/crates/core_arch/src/x86

1 file changed

+46
-132
lines changed

library/stdarch/crates/core_arch/src/x86/avx2.rs

Lines changed: 46 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -2961,140 +2961,54 @@ pub fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
29612961
#[stable(feature = "simd_x86", since = "1.27.0")]
29622962
pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
29632963
static_assert_uimm_bits!(IMM8, 8);
2964+
const fn mask(shift: i32, i: u32) -> u32 {
2965+
let shift = shift as u32 & 0xff;
2966+
if shift > 15 || (15 - (i % 16)) < shift {
2967+
0
2968+
} else {
2969+
32 + (i + shift)
2970+
}
2971+
}
29642972
unsafe {
29652973
let a = a.as_i8x32();
2966-
let zero = i8x32::ZERO;
2967-
let r: i8x32 = match IMM8 {
2968-
0 => simd_shuffle!(
2969-
a,
2970-
zero,
2971-
[
2972-
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
2973-
22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2974-
],
2975-
),
2976-
1 => simd_shuffle!(
2977-
a,
2978-
zero,
2979-
[
2980-
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22,
2981-
23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
2982-
],
2983-
),
2984-
2 => simd_shuffle!(
2985-
a,
2986-
zero,
2987-
[
2988-
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 18, 19, 20, 21, 22, 23,
2989-
24, 25, 26, 27, 28, 29, 30, 31, 32, 32,
2990-
],
2991-
),
2992-
3 => simd_shuffle!(
2993-
a,
2994-
zero,
2995-
[
2996-
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 19, 20, 21, 22, 23,
2997-
24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 32,
2998-
],
2999-
),
3000-
4 => simd_shuffle!(
3001-
a,
3002-
zero,
3003-
[
3004-
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 20, 21, 22, 23, 24,
3005-
25, 26, 27, 28, 29, 30, 31, 32, 32, 32, 32,
3006-
],
3007-
),
3008-
5 => simd_shuffle!(
3009-
a,
3010-
zero,
3011-
[
3012-
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 21, 22, 23, 24, 25,
3013-
26, 27, 28, 29, 30, 31, 32, 32, 32, 32, 32,
3014-
],
3015-
),
3016-
6 => simd_shuffle!(
3017-
a,
3018-
zero,
3019-
[
3020-
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 22, 23, 24, 25, 26,
3021-
27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32,
3022-
],
3023-
),
3024-
7 => simd_shuffle!(
3025-
a,
3026-
zero,
3027-
[
3028-
7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 23, 24, 25, 26,
3029-
27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32,
3030-
],
3031-
),
3032-
8 => simd_shuffle!(
3033-
a,
3034-
zero,
3035-
[
3036-
8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 24, 25, 26, 27,
3037-
28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32,
3038-
],
3039-
),
3040-
9 => simd_shuffle!(
3041-
a,
3042-
zero,
3043-
[
3044-
9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 25, 26, 27, 28,
3045-
29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3046-
],
3047-
),
3048-
10 => simd_shuffle!(
3049-
a,
3050-
zero,
3051-
[
3052-
10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 26, 27, 28, 29,
3053-
30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3054-
],
3055-
),
3056-
11 => simd_shuffle!(
3057-
a,
3058-
zero,
3059-
[
3060-
11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 27, 28, 29, 30,
3061-
31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3062-
],
3063-
),
3064-
12 => simd_shuffle!(
3065-
a,
3066-
zero,
3067-
[
3068-
12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 28, 29, 30, 31,
3069-
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3070-
],
3071-
),
3072-
13 => simd_shuffle!(
3073-
a,
3074-
zero,
3075-
[
3076-
13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 29, 30, 31, 32,
3077-
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3078-
],
3079-
),
3080-
14 => simd_shuffle!(
3081-
a,
3082-
zero,
3083-
[
3084-
14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 30, 31, 32, 32,
3085-
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3086-
],
3087-
),
3088-
15 => simd_shuffle!(
3089-
a,
3090-
zero,
3091-
[
3092-
15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32,
3093-
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
3094-
],
3095-
),
3096-
_ => zero,
3097-
};
2974+
let r: i8x32 = simd_shuffle!(
2975+
i8x32::ZERO,
2976+
a,
2977+
[
2978+
mask(IMM8, 0),
2979+
mask(IMM8, 1),
2980+
mask(IMM8, 2),
2981+
mask(IMM8, 3),
2982+
mask(IMM8, 4),
2983+
mask(IMM8, 5),
2984+
mask(IMM8, 6),
2985+
mask(IMM8, 7),
2986+
mask(IMM8, 8),
2987+
mask(IMM8, 9),
2988+
mask(IMM8, 10),
2989+
mask(IMM8, 11),
2990+
mask(IMM8, 12),
2991+
mask(IMM8, 13),
2992+
mask(IMM8, 14),
2993+
mask(IMM8, 15),
2994+
mask(IMM8, 16),
2995+
mask(IMM8, 17),
2996+
mask(IMM8, 18),
2997+
mask(IMM8, 19),
2998+
mask(IMM8, 20),
2999+
mask(IMM8, 21),
3000+
mask(IMM8, 22),
3001+
mask(IMM8, 23),
3002+
mask(IMM8, 24),
3003+
mask(IMM8, 25),
3004+
mask(IMM8, 26),
3005+
mask(IMM8, 27),
3006+
mask(IMM8, 28),
3007+
mask(IMM8, 29),
3008+
mask(IMM8, 30),
3009+
mask(IMM8, 31),
3010+
],
3011+
);
30983012
transmute(r)
30993013
}
31003014
}

0 commit comments

Comments
 (0)