Skip to content

Commit 72962ce

Browse files
authored
Convert _mm256_blend_{ps,pd} to const generics (#1058)
1 parent bb174c8 commit 72962ce

File tree

1 file changed

+40
-99
lines changed
  • crates/core_arch/src/x86

1 file changed

+40
-99
lines changed

crates/core_arch/src/x86/avx.rs

Lines changed: 40 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -458,44 +458,21 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
458458
// Note: LLVM7 prefers single-precision blend instructions when
459459
// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194
460460
// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
461-
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
462-
#[rustc_args_required_const(2)]
463-
#[stable(feature = "simd_x86", since = "1.27.0")]
464-
pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
465-
let imm8 = (imm8 & 0xFF) as u8;
466-
macro_rules! blend4 {
467-
($a:expr, $b:expr, $c:expr, $d:expr) => {
468-
simd_shuffle4(a, b, [$a, $b, $c, $d])
469-
};
470-
}
471-
macro_rules! blend3 {
472-
($a:expr, $b:expr, $c:expr) => {
473-
match imm8 & 0x8 {
474-
0 => blend4!($a, $b, $c, 3),
475-
_ => blend4!($a, $b, $c, 7),
476-
}
477-
};
478-
}
479-
macro_rules! blend2 {
480-
($a:expr, $b:expr) => {
481-
match imm8 & 0x4 {
482-
0 => blend3!($a, $b, 2),
483-
_ => blend3!($a, $b, 6),
484-
}
485-
};
486-
}
487-
macro_rules! blend1 {
488-
($a:expr) => {
489-
match imm8 & 0x2 {
490-
0 => blend2!($a, 1),
491-
_ => blend2!($a, 5),
492-
}
493-
};
494-
}
495-
match imm8 & 0x1 {
496-
0 => blend1!(0),
497-
_ => blend1!(4),
498-
}
461+
#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
462+
#[rustc_legacy_const_generics(2)]
463+
#[stable(feature = "simd_x86", since = "1.27.0")]
464+
pub unsafe fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
465+
static_assert_imm4!(IMM4);
466+
simd_shuffle4(
467+
a,
468+
b,
469+
[
470+
((IMM4 as u32 >> 0) & 1) * 4 + 0,
471+
((IMM4 as u32 >> 1) & 1) * 4 + 1,
472+
((IMM4 as u32 >> 2) & 1) * 4 + 2,
473+
((IMM4 as u32 >> 3) & 1) * 4 + 3,
474+
],
475+
)
499476
}
500477

501478
/// Blends packed single-precision (32-bit) floating-point elements from
@@ -504,61 +481,25 @@ pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
504481
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_ps)
505482
#[inline]
506483
#[target_feature(enable = "avx")]
507-
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
508-
#[rustc_args_required_const(2)]
509-
#[stable(feature = "simd_x86", since = "1.27.0")]
510-
pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
511-
let imm8 = (imm8 & 0xFF) as u8;
512-
macro_rules! blend4 {
513-
(
514-
$a:expr,
515-
$b:expr,
516-
$c:expr,
517-
$d:expr,
518-
$e:expr,
519-
$f:expr,
520-
$g:expr,
521-
$h:expr
522-
) => {
523-
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
524-
};
525-
}
526-
macro_rules! blend3 {
527-
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => {
528-
match (imm8 >> 6) & 0b11 {
529-
0b00 => blend4!($a, $b, $c, $d, $e, $f, 6, 7),
530-
0b01 => blend4!($a, $b, $c, $d, $e, $f, 14, 7),
531-
0b10 => blend4!($a, $b, $c, $d, $e, $f, 6, 15),
532-
_ => blend4!($a, $b, $c, $d, $e, $f, 14, 15),
533-
}
534-
};
535-
}
536-
macro_rules! blend2 {
537-
($a:expr, $b:expr, $c:expr, $d:expr) => {
538-
match (imm8 >> 4) & 0b11 {
539-
0b00 => blend3!($a, $b, $c, $d, 4, 5),
540-
0b01 => blend3!($a, $b, $c, $d, 12, 5),
541-
0b10 => blend3!($a, $b, $c, $d, 4, 13),
542-
_ => blend3!($a, $b, $c, $d, 12, 13),
543-
}
544-
};
545-
}
546-
macro_rules! blend1 {
547-
($a:expr, $b:expr) => {
548-
match (imm8 >> 2) & 0b11 {
549-
0b00 => blend2!($a, $b, 2, 3),
550-
0b01 => blend2!($a, $b, 10, 3),
551-
0b10 => blend2!($a, $b, 2, 11),
552-
_ => blend2!($a, $b, 10, 11),
553-
}
554-
};
555-
}
556-
match imm8 & 0b11 {
557-
0b00 => blend1!(0, 1),
558-
0b01 => blend1!(8, 1),
559-
0b10 => blend1!(0, 9),
560-
_ => blend1!(8, 9),
561-
}
484+
#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
485+
#[rustc_legacy_const_generics(2)]
486+
#[stable(feature = "simd_x86", since = "1.27.0")]
487+
pub unsafe fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
488+
static_assert_imm8!(IMM8);
489+
simd_shuffle8(
490+
a,
491+
b,
492+
[
493+
((IMM8 as u32 >> 0) & 1) * 8 + 0,
494+
((IMM8 as u32 >> 1) & 1) * 8 + 1,
495+
((IMM8 as u32 >> 2) & 1) * 8 + 2,
496+
((IMM8 as u32 >> 3) & 1) * 8 + 3,
497+
((IMM8 as u32 >> 4) & 1) * 8 + 4,
498+
((IMM8 as u32 >> 5) & 1) * 8 + 5,
499+
((IMM8 as u32 >> 6) & 1) * 8 + 6,
500+
((IMM8 as u32 >> 7) & 1) * 8 + 7,
501+
],
502+
)
562503
}
563504

564505
/// Blends packed double-precision (64-bit) floating-point elements from
@@ -3378,23 +3319,23 @@ mod tests {
33783319
unsafe fn test_mm256_blend_pd() {
33793320
let a = _mm256_setr_pd(4., 9., 16., 25.);
33803321
let b = _mm256_setr_pd(4., 3., 2., 5.);
3381-
let r = _mm256_blend_pd(a, b, 0x0);
3322+
let r = _mm256_blend_pd::<0x0>(a, b);
33823323
assert_eq_m256d(r, _mm256_setr_pd(4., 9., 16., 25.));
3383-
let r = _mm256_blend_pd(a, b, 0x3);
3324+
let r = _mm256_blend_pd::<0x3>(a, b);
33843325
assert_eq_m256d(r, _mm256_setr_pd(4., 3., 16., 25.));
3385-
let r = _mm256_blend_pd(a, b, 0xF);
3326+
let r = _mm256_blend_pd::<0xF>(a, b);
33863327
assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 5.));
33873328
}
33883329

33893330
#[simd_test(enable = "avx")]
33903331
unsafe fn test_mm256_blend_ps() {
33913332
let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
33923333
let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3393-
let r = _mm256_blend_ps(a, b, 0x0);
3334+
let r = _mm256_blend_ps::<0x0>(a, b);
33943335
assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.));
3395-
let r = _mm256_blend_ps(a, b, 0x3);
3336+
let r = _mm256_blend_ps::<0x3>(a, b);
33963337
assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.));
3397-
let r = _mm256_blend_ps(a, b, 0xF);
3338+
let r = _mm256_blend_ps::<0xF>(a, b);
33983339
assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.));
33993340
}
34003341

0 commit comments

Comments
 (0)