Skip to content

Commit 69a3768

Browse files
lqdAmanieu
authored andcommitted
convert _mm_blend_epi32 to const generics
1 parent 2b6e568 commit 69a3768

File tree

2 files changed

+26
-34
lines changed

2 files changed

+26
-34
lines changed

crates/core_arch/src/x86/avx2.rs

Lines changed: 17 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -358,39 +358,28 @@ pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
358358
transmute(pavgb(a.as_u8x32(), b.as_u8x32()))
359359
}
360360

361-
/// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`.
361+
/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM4`.
362362
///
363363
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi32)
364364
#[inline]
365365
#[target_feature(enable = "avx2")]
366-
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
367-
#[rustc_args_required_const(2)]
366+
#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
367+
#[rustc_legacy_const_generics(2)]
368368
#[stable(feature = "simd_x86", since = "1.27.0")]
369-
pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
370-
let imm8 = (imm8 & 0xFF) as u8;
369+
pub unsafe fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
370+
static_assert_imm4!(IMM4);
371371
let a = a.as_i32x4();
372372
let b = b.as_i32x4();
373-
macro_rules! blend2 {
374-
($a:expr, $b:expr, $c:expr, $d:expr) => {
375-
simd_shuffle4(a, b, [$a, $b, $c, $d])
376-
};
377-
}
378-
macro_rules! blend1 {
379-
($a:expr, $b:expr) => {
380-
match (imm8 >> 2) & 0b11 {
381-
0b00 => blend2!($a, $b, 2, 3),
382-
0b01 => blend2!($a, $b, 6, 3),
383-
0b10 => blend2!($a, $b, 2, 7),
384-
_ => blend2!($a, $b, 6, 7),
385-
}
386-
};
387-
}
388-
let r: i32x4 = match imm8 & 0b11 {
389-
0b00 => blend1!(0, 1),
390-
0b01 => blend1!(4, 1),
391-
0b10 => blend1!(0, 5),
392-
_ => blend1!(4, 5),
393-
};
373+
let r: i32x4 = simd_shuffle4(
374+
a,
375+
b,
376+
[
377+
[0, 4, 0, 4][IMM4 as usize & 0b11],
378+
[1, 1, 5, 5][IMM4 as usize & 0b11],
379+
[2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
380+
[3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
381+
],
382+
);
394383
transmute(r)
395384
}
396385

@@ -4065,10 +4054,10 @@ mod tests {
40654054
unsafe fn test_mm_blend_epi32() {
40664055
let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
40674056
let e = _mm_setr_epi32(9, 3, 3, 3);
4068-
let r = _mm_blend_epi32(a, b, 0x01 as i32);
4057+
let r = _mm_blend_epi32::<0x01>(a, b);
40694058
assert_eq_m128i(r, e);
40704059

4071-
let r = _mm_blend_epi32(b, a, 0x0E as i32);
4060+
let r = _mm_blend_epi32::<0x0E>(b, a);
40724061
assert_eq_m128i(r, e);
40734062
}
40744063

crates/core_arch/src/x86/avx512gfni.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,7 +1057,8 @@ mod tests {
10571057
const MASK_WORDS: i32 = 0b01_10;
10581058
let expected_result = _mm_gf2p8mul_epi8(left, right);
10591059
let result_masked = _mm_maskz_gf2p8mul_epi8(mask_bytes, left, right);
1060-
let expected_masked = _mm_blend_epi32(_mm_setzero_si128(), expected_result, MASK_WORDS);
1060+
let expected_masked =
1061+
_mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
10611062
assert_eq_m128i(result_masked, expected_masked);
10621063
}
10631064
}
@@ -1075,7 +1076,7 @@ mod tests {
10751076
const MASK_WORDS: i32 = 0b01_10;
10761077
let expected_result = _mm_gf2p8mul_epi8(left, right);
10771078
let result_masked = _mm_mask_gf2p8mul_epi8(left, mask_bytes, left, right);
1078-
let expected_masked = _mm_blend_epi32(left, expected_result, MASK_WORDS);
1079+
let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
10791080
assert_eq_m128i(result_masked, expected_masked);
10801081
}
10811082
}
@@ -1281,7 +1282,8 @@ mod tests {
12811282
let expected_result = _mm_gf2p8affine_epi64_epi8(vector, matrix, CONSTANT_BYTE);
12821283
let result_masked =
12831284
_mm_maskz_gf2p8affine_epi64_epi8(mask_bytes, vector, matrix, CONSTANT_BYTE);
1284-
let expected_masked = _mm_blend_epi32(_mm_setzero_si128(), expected_result, MASK_WORDS);
1285+
let expected_masked =
1286+
_mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
12851287
assert_eq_m128i(result_masked, expected_masked);
12861288
}
12871289
}
@@ -1301,7 +1303,7 @@ mod tests {
13011303
let expected_result = _mm_gf2p8affine_epi64_epi8(left, right, CONSTANT_BYTE);
13021304
let result_masked =
13031305
_mm_mask_gf2p8affine_epi64_epi8(left, mask_bytes, left, right, CONSTANT_BYTE);
1304-
let expected_masked = _mm_blend_epi32(left, expected_result, MASK_WORDS);
1306+
let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
13051307
assert_eq_m128i(result_masked, expected_masked);
13061308
}
13071309
}
@@ -1539,7 +1541,8 @@ mod tests {
15391541
let expected_result = _mm_gf2p8affineinv_epi64_epi8(vector, matrix, CONSTANT_BYTE);
15401542
let result_masked =
15411543
_mm_maskz_gf2p8affineinv_epi64_epi8(mask_bytes, vector, matrix, CONSTANT_BYTE);
1542-
let expected_masked = _mm_blend_epi32(_mm_setzero_si128(), expected_result, MASK_WORDS);
1544+
let expected_masked =
1545+
_mm_blend_epi32::<MASK_WORDS>(_mm_setzero_si128(), expected_result);
15431546
assert_eq_m128i(result_masked, expected_masked);
15441547
}
15451548
}
@@ -1560,7 +1563,7 @@ mod tests {
15601563
let expected_result = _mm_gf2p8affineinv_epi64_epi8(left, right, CONSTANT_BYTE);
15611564
let result_masked =
15621565
_mm_mask_gf2p8affineinv_epi64_epi8(left, mask_bytes, left, right, CONSTANT_BYTE);
1563-
let expected_masked = _mm_blend_epi32(left, expected_result, MASK_WORDS);
1566+
let expected_masked = _mm_blend_epi32::<MASK_WORDS>(left, expected_result);
15641567
assert_eq_m128i(result_masked, expected_masked);
15651568
}
15661569
}

0 commit comments

Comments
 (0)