Skip to content

Commit edf90ff

Browse files
lqdAmanieu
authored andcommitted
convert _mm256_shufflelo_epi16 to const generics
1 parent 3e20523 commit edf90ff

File tree

2 files changed

+29
-52
lines changed

2 files changed

+29
-52
lines changed

crates/core_arch/src/x86/avx2.rs

Lines changed: 27 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2531,57 +2531,34 @@ pub unsafe fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
25312531
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shufflelo_epi16)
25322532
#[inline]
25332533
#[target_feature(enable = "avx2")]
2534-
#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 9))]
2535-
#[rustc_args_required_const(1)]
2534+
#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2535+
#[rustc_legacy_const_generics(1)]
25362536
#[stable(feature = "simd_x86", since = "1.27.0")]
2537-
pub unsafe fn _mm256_shufflelo_epi16(a: __m256i, imm8: i32) -> __m256i {
2538-
let imm8 = (imm8 & 0xFF) as u8;
2537+
pub unsafe fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2538+
static_assert_imm8!(IMM8);
25392539
let a = a.as_i16x16();
2540-
macro_rules! shuffle_done {
2541-
($x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
2542-
#[rustfmt::skip]
2543-
simd_shuffle16(a, a, [
2544-
0+$x01, 0+$x23, 0+$x45, 0+$x67, 4, 5, 6, 7,
2545-
8+$x01, 8+$x23, 8+$x45, 8+$x67, 12, 13, 14, 15,
2546-
])
2547-
};
2548-
}
2549-
macro_rules! shuffle_x67 {
2550-
($x01:expr, $x23:expr, $x45:expr) => {
2551-
match (imm8 >> 6) & 0b11 {
2552-
0b00 => shuffle_done!($x01, $x23, $x45, 0),
2553-
0b01 => shuffle_done!($x01, $x23, $x45, 1),
2554-
0b10 => shuffle_done!($x01, $x23, $x45, 2),
2555-
_ => shuffle_done!($x01, $x23, $x45, 3),
2556-
}
2557-
};
2558-
}
2559-
macro_rules! shuffle_x45 {
2560-
($x01:expr, $x23:expr) => {
2561-
match (imm8 >> 4) & 0b11 {
2562-
0b00 => shuffle_x67!($x01, $x23, 0),
2563-
0b01 => shuffle_x67!($x01, $x23, 1),
2564-
0b10 => shuffle_x67!($x01, $x23, 2),
2565-
_ => shuffle_x67!($x01, $x23, 3),
2566-
}
2567-
};
2568-
}
2569-
macro_rules! shuffle_x23 {
2570-
($x01:expr) => {
2571-
match (imm8 >> 2) & 0b11 {
2572-
0b00 => shuffle_x45!($x01, 0),
2573-
0b01 => shuffle_x45!($x01, 1),
2574-
0b10 => shuffle_x45!($x01, 2),
2575-
_ => shuffle_x45!($x01, 3),
2576-
}
2577-
};
2578-
}
2579-
let r: i16x16 = match imm8 & 0b11 {
2580-
0b00 => shuffle_x23!(0),
2581-
0b01 => shuffle_x23!(1),
2582-
0b10 => shuffle_x23!(2),
2583-
_ => shuffle_x23!(3),
2584-
};
2540+
let r: i16x16 = simd_shuffle16(
2541+
a,
2542+
a,
2543+
[
2544+
0 + (IMM8 as u32 & 0b11),
2545+
0 + ((IMM8 as u32 >> 2) & 0b11),
2546+
0 + ((IMM8 as u32 >> 4) & 0b11),
2547+
0 + ((IMM8 as u32 >> 6) & 0b11),
2548+
4,
2549+
5,
2550+
6,
2551+
7,
2552+
8 + (IMM8 as u32 & 0b11),
2553+
8 + ((IMM8 as u32 >> 2) & 0b11),
2554+
8 + ((IMM8 as u32 >> 4) & 0b11),
2555+
8 + ((IMM8 as u32 >> 6) & 0b11),
2556+
12,
2557+
13,
2558+
14,
2559+
15,
2560+
],
2561+
);
25852562
transmute(r)
25862563
}
25872564

@@ -4884,7 +4861,7 @@ mod tests {
48844861
44, 22, 22, 11, 0, 1, 2, 3,
48854862
88, 66, 66, 55, 4, 5, 6, 7,
48864863
);
4887-
let r = _mm256_shufflelo_epi16(a, 0b00_01_01_11);
4864+
let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
48884865
assert_eq_m256i(r, e);
48894866
}
48904867

crates/core_arch/src/x86/avx512bw.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7233,7 +7233,7 @@ pub unsafe fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
72337233
a: __m256i,
72347234
) -> __m256i {
72357235
static_assert_imm8!(IMM8);
7236-
let shuffle = _mm256_shufflelo_epi16(a, IMM8);
7236+
let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
72377237
transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
72387238
}
72397239

@@ -7246,7 +7246,7 @@ pub unsafe fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
72467246
#[rustc_legacy_const_generics(2)]
72477247
pub unsafe fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
72487248
static_assert_imm8!(IMM8);
7249-
let shuffle = _mm256_shufflelo_epi16(a, IMM8);
7249+
let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
72507250
let zero = _mm256_setzero_si256().as_i16x16();
72517251
transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero))
72527252
}

0 commit comments

Comments
 (0)