Skip to content

Commit c0c416f

Browse files
lqdAmanieu
authored andcommitted
convert _mm256_blend_epi32 to const generics
1 parent 69a3768 commit c0c416f

File tree

2 files changed

+28
-64
lines changed

2 files changed

+28
-64
lines changed

crates/core_arch/src/x86/avx2.rs

Lines changed: 22 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -383,68 +383,32 @@ pub unsafe fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128
383383
transmute(r)
384384
}
385385

386-
/// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`.
386+
/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM8`.
387387
///
388388
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi32)
389389
#[inline]
390390
#[target_feature(enable = "avx2")]
391-
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
392-
#[rustc_args_required_const(2)]
391+
#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
392+
#[rustc_legacy_const_generics(2)]
393393
#[stable(feature = "simd_x86", since = "1.27.0")]
394-
pub unsafe fn _mm256_blend_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
395-
let imm8 = (imm8 & 0xFF) as u8;
394+
pub unsafe fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
395+
static_assert_imm8!(IMM8);
396396
let a = a.as_i32x8();
397397
let b = b.as_i32x8();
398-
macro_rules! blend4 {
399-
(
400-
$a:expr,
401-
$b:expr,
402-
$c:expr,
403-
$d:expr,
404-
$e:expr,
405-
$f:expr,
406-
$g:expr,
407-
$h:expr
408-
) => {
409-
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
410-
};
411-
}
412-
macro_rules! blend3 {
413-
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => {
414-
match (imm8 >> 6) & 0b11 {
415-
0b00 => blend4!($a, $b, $c, $d, $e, $f, 6, 7),
416-
0b01 => blend4!($a, $b, $c, $d, $e, $f, 14, 7),
417-
0b10 => blend4!($a, $b, $c, $d, $e, $f, 6, 15),
418-
_ => blend4!($a, $b, $c, $d, $e, $f, 14, 15),
419-
}
420-
};
421-
}
422-
macro_rules! blend2 {
423-
($a:expr, $b:expr, $c:expr, $d:expr) => {
424-
match (imm8 >> 4) & 0b11 {
425-
0b00 => blend3!($a, $b, $c, $d, 4, 5),
426-
0b01 => blend3!($a, $b, $c, $d, 12, 5),
427-
0b10 => blend3!($a, $b, $c, $d, 4, 13),
428-
_ => blend3!($a, $b, $c, $d, 12, 13),
429-
}
430-
};
431-
}
432-
macro_rules! blend1 {
433-
($a:expr, $b:expr) => {
434-
match (imm8 >> 2) & 0b11 {
435-
0b00 => blend2!($a, $b, 2, 3),
436-
0b01 => blend2!($a, $b, 10, 3),
437-
0b10 => blend2!($a, $b, 2, 11),
438-
_ => blend2!($a, $b, 10, 11),
439-
}
440-
};
441-
}
442-
let r: i32x8 = match imm8 & 0b11 {
443-
0b00 => blend1!(0, 1),
444-
0b01 => blend1!(8, 1),
445-
0b10 => blend1!(0, 9),
446-
_ => blend1!(8, 9),
447-
};
398+
let r: i32x8 = simd_shuffle8(
399+
a,
400+
b,
401+
[
402+
[0, 8, 0, 8][IMM8 as usize & 0b11],
403+
[1, 1, 9, 9][IMM8 as usize & 0b11],
404+
[2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
405+
[3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
406+
[4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
407+
[5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
408+
[6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
409+
[7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
410+
],
411+
);
448412
transmute(r)
449413
}
450414

@@ -4065,15 +4029,15 @@ mod tests {
40654029
unsafe fn test_mm256_blend_epi32() {
40664030
let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
40674031
let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4068-
let r = _mm256_blend_epi32(a, b, 0x01 as i32);
4032+
let r = _mm256_blend_epi32::<0x01>(a, b);
40694033
assert_eq_m256i(r, e);
40704034

40714035
let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4072-
let r = _mm256_blend_epi32(a, b, 0x82 as i32);
4036+
let r = _mm256_blend_epi32::<0x82>(a, b);
40734037
assert_eq_m256i(r, e);
40744038

40754039
let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4076-
let r = _mm256_blend_epi32(a, b, 0x7C as i32);
4040+
let r = _mm256_blend_epi32::<0x7C>(a, b);
40774041
assert_eq_m256i(r, e);
40784042
}
40794043

crates/core_arch/src/x86/avx512gfni.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,7 +1008,7 @@ mod tests {
10081008
let expected_result = _mm256_gf2p8mul_epi8(left, right);
10091009
let result_masked = _mm256_maskz_gf2p8mul_epi8(mask_bytes, left, right);
10101010
let expected_masked =
1011-
_mm256_blend_epi32(_mm256_setzero_si256(), expected_result, MASK_WORDS);
1011+
_mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
10121012
assert_eq_m256i(result_masked, expected_masked);
10131013
}
10141014
}
@@ -1026,7 +1026,7 @@ mod tests {
10261026
const MASK_WORDS: i32 = 0b01_10_11_00;
10271027
let expected_result = _mm256_gf2p8mul_epi8(left, right);
10281028
let result_masked = _mm256_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
1029-
let expected_masked = _mm256_blend_epi32(left, expected_result, MASK_WORDS);
1029+
let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
10301030
assert_eq_m256i(result_masked, expected_masked);
10311031
}
10321032
}
@@ -1207,7 +1207,7 @@ mod tests {
12071207
let result_masked =
12081208
_mm256_maskz_gf2p8affine_epi64_epi8(mask_bytes, vector, matrix, CONSTANT_BYTE);
12091209
let expected_masked =
1210-
_mm256_blend_epi32(_mm256_setzero_si256(), expected_result, MASK_WORDS);
1210+
_mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
12111211
assert_eq_m256i(result_masked, expected_masked);
12121212
}
12131213
}
@@ -1228,7 +1228,7 @@ mod tests {
12281228
let expected_result = _mm256_gf2p8affine_epi64_epi8(left, right, CONSTANT_BYTE);
12291229
let result_masked =
12301230
_mm256_mask_gf2p8affine_epi64_epi8(left, mask_bytes, left, right, CONSTANT_BYTE);
1231-
let expected_masked = _mm256_blend_epi32(left, expected_result, MASK_WORDS);
1231+
let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
12321232
assert_eq_m256i(result_masked, expected_masked);
12331233
}
12341234
}
@@ -1456,7 +1456,7 @@ mod tests {
14561456
let result_masked =
14571457
_mm256_maskz_gf2p8affineinv_epi64_epi8(mask_bytes, vector, matrix, CONSTANT_BYTE);
14581458
let expected_masked =
1459-
_mm256_blend_epi32(_mm256_setzero_si256(), expected_result, MASK_WORDS);
1459+
_mm256_blend_epi32::<MASK_WORDS>(_mm256_setzero_si256(), expected_result);
14601460
assert_eq_m256i(result_masked, expected_masked);
14611461
}
14621462
}
@@ -1477,7 +1477,7 @@ mod tests {
14771477
let expected_result = _mm256_gf2p8affineinv_epi64_epi8(left, right, CONSTANT_BYTE);
14781478
let result_masked =
14791479
_mm256_mask_gf2p8affineinv_epi64_epi8(left, mask_bytes, left, right, CONSTANT_BYTE);
1480-
let expected_masked = _mm256_blend_epi32(left, expected_result, MASK_WORDS);
1480+
let expected_masked = _mm256_blend_epi32::<MASK_WORDS>(left, expected_result);
14811481
assert_eq_m256i(result_masked, expected_masked);
14821482
}
14831483
}

0 commit comments

Comments
 (0)