Skip to content

Commit b7503bf

Browse files
lqdAmanieu
authored andcommitted
convert _mm256_blend_epi16 to const generics
1 parent c0c416f commit b7503bf

File tree

1 file changed

+29
-97
lines changed

1 file changed

+29
-97
lines changed

crates/core_arch/src/x86/avx2.rs

Lines changed: 29 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -412,108 +412,40 @@ pub unsafe fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
412412
transmute(r)
413413
}
414414

415-
/// Blends packed 16-bit integers from `a` and `b` using control mask `imm8`.
415+
/// Blends packed 16-bit integers from `a` and `b` using control mask `IMM8`.
416416
///
417417
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi16)
418418
#[inline]
419419
#[target_feature(enable = "avx2")]
420-
#[cfg_attr(test, assert_instr(vpblendw, imm8 = 9))]
421-
#[rustc_args_required_const(2)]
420+
#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
421+
#[rustc_legacy_const_generics(2)]
422422
#[stable(feature = "simd_x86", since = "1.27.0")]
423-
pub unsafe fn _mm256_blend_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
424-
let imm8 = (imm8 & 0xFF) as u8;
423+
pub unsafe fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
424+
static_assert_imm8!(IMM8);
425425
let a = a.as_i16x16();
426426
let b = b.as_i16x16();
427-
macro_rules! blend4 {
428-
(
429-
$a:expr,
430-
$b:expr,
431-
$c:expr,
432-
$d:expr,
433-
$e:expr,
434-
$f:expr,
435-
$g:expr,
436-
$h:expr,
437-
$i:expr,
438-
$j:expr,
439-
$k:expr,
440-
$l:expr,
441-
$m:expr,
442-
$n:expr,
443-
$o:expr,
444-
$p:expr
445-
) => {
446-
simd_shuffle16(
447-
a,
448-
b,
449-
[
450-
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
451-
],
452-
)
453-
};
454-
}
455-
macro_rules! blend3 {
456-
(
457-
$a:expr,
458-
$b:expr,
459-
$c:expr,
460-
$d:expr,
461-
$e:expr,
462-
$f:expr,
463-
$a2:expr,
464-
$b2:expr,
465-
$c2:expr,
466-
$d2:expr,
467-
$e2:expr,
468-
$f2:expr
469-
) => {
470-
match (imm8 >> 6) & 0b11 {
471-
0b00 => blend4!($a, $b, $c, $d, $e, $f, 6, 7, $a2, $b2, $c2, $d2, $e2, $f2, 14, 15),
472-
0b01 => {
473-
blend4!($a, $b, $c, $d, $e, $f, 22, 7, $a2, $b2, $c2, $d2, $e2, $f2, 30, 15)
474-
}
475-
0b10 => {
476-
blend4!($a, $b, $c, $d, $e, $f, 6, 23, $a2, $b2, $c2, $d2, $e2, $f2, 14, 31)
477-
}
478-
_ => blend4!($a, $b, $c, $d, $e, $f, 22, 23, $a2, $b2, $c2, $d2, $e2, $f2, 30, 31),
479-
}
480-
};
481-
}
482-
macro_rules! blend2 {
483-
(
484-
$a:expr,
485-
$b:expr,
486-
$c:expr,
487-
$d:expr,
488-
$a2:expr,
489-
$b2:expr,
490-
$c2:expr,
491-
$d2:expr
492-
) => {
493-
match (imm8 >> 4) & 0b11 {
494-
0b00 => blend3!($a, $b, $c, $d, 4, 5, $a2, $b2, $c2, $d2, 12, 13),
495-
0b01 => blend3!($a, $b, $c, $d, 20, 5, $a2, $b2, $c2, $d2, 28, 13),
496-
0b10 => blend3!($a, $b, $c, $d, 4, 21, $a2, $b2, $c2, $d2, 12, 29),
497-
_ => blend3!($a, $b, $c, $d, 20, 21, $a2, $b2, $c2, $d2, 28, 29),
498-
}
499-
};
500-
}
501-
macro_rules! blend1 {
502-
($a1:expr, $b1:expr, $a2:expr, $b2:expr) => {
503-
match (imm8 >> 2) & 0b11 {
504-
0b00 => blend2!($a1, $b1, 2, 3, $a2, $b2, 10, 11),
505-
0b01 => blend2!($a1, $b1, 18, 3, $a2, $b2, 26, 11),
506-
0b10 => blend2!($a1, $b1, 2, 19, $a2, $b2, 10, 27),
507-
_ => blend2!($a1, $b1, 18, 19, $a2, $b2, 26, 27),
508-
}
509-
};
510-
}
511-
let r: i16x16 = match imm8 & 0b11 {
512-
0b00 => blend1!(0, 1, 8, 9),
513-
0b01 => blend1!(16, 1, 24, 9),
514-
0b10 => blend1!(0, 17, 8, 25),
515-
_ => blend1!(16, 17, 24, 25),
516-
};
427+
let r: i16x16 = simd_shuffle16(
428+
a,
429+
b,
430+
[
431+
[0, 16, 0, 16][IMM8 as usize & 0b11],
432+
[1, 1, 17, 17][IMM8 as usize & 0b11],
433+
[2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
434+
[3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
435+
[4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
436+
[5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
437+
[6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
438+
[7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
439+
[8, 24, 8, 24][IMM8 as usize & 0b11],
440+
[9, 9, 25, 25][IMM8 as usize & 0b11],
441+
[10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
442+
[11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
443+
[12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
444+
[13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
445+
[14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
446+
[15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
447+
],
448+
);
517449
transmute(r)
518450
}
519451

@@ -4045,10 +3977,10 @@ mod tests {
40453977
unsafe fn test_mm256_blend_epi16() {
40463978
let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
40473979
let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4048-
let r = _mm256_blend_epi16(a, b, 0x01 as i32);
3980+
let r = _mm256_blend_epi16::<0x01>(a, b);
40493981
assert_eq_m256i(r, e);
40503982

4051-
let r = _mm256_blend_epi16(b, a, 0xFE as i32);
3983+
let r = _mm256_blend_epi16::<0xFE>(b, a);
40523984
assert_eq_m256i(r, e);
40533985
}
40543986

0 commit comments

Comments
 (0)