@@ -412,108 +412,40 @@ pub unsafe fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
412
412
transmute ( r)
413
413
}
414
414
415
- /// Blends packed 16-bit integers from `a` and `b` using control mask `imm8 `.
415
+ /// Blends packed 16-bit integers from `a` and `b` using control mask `IMM8 `.
416
416
///
417
417
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi16)
418
418
#[ inline]
419
419
#[ target_feature( enable = "avx2" ) ]
420
- #[ cfg_attr( test, assert_instr( vpblendw, imm8 = 9 ) ) ]
421
- #[ rustc_args_required_const ( 2 ) ]
420
+ #[ cfg_attr( test, assert_instr( vpblendw, IMM8 = 9 ) ) ]
421
+ #[ rustc_legacy_const_generics ( 2 ) ]
422
422
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
423
- pub unsafe fn _mm256_blend_epi16 ( a : __m256i , b : __m256i , imm8 : i32 ) -> __m256i {
424
- let imm8 = ( imm8 & 0xFF ) as u8 ;
423
+ pub unsafe fn _mm256_blend_epi16 < const IMM8 : i32 > ( a : __m256i , b : __m256i ) -> __m256i {
424
+ static_assert_imm8 ! ( IMM8 ) ;
425
425
let a = a. as_i16x16 ( ) ;
426
426
let b = b. as_i16x16 ( ) ;
427
- macro_rules! blend4 {
428
- (
429
- $a: expr,
430
- $b: expr,
431
- $c: expr,
432
- $d: expr,
433
- $e: expr,
434
- $f: expr,
435
- $g: expr,
436
- $h: expr,
437
- $i: expr,
438
- $j: expr,
439
- $k: expr,
440
- $l: expr,
441
- $m: expr,
442
- $n: expr,
443
- $o: expr,
444
- $p: expr
445
- ) => {
446
- simd_shuffle16(
447
- a,
448
- b,
449
- [
450
- $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
451
- ] ,
452
- )
453
- } ;
454
- }
455
- macro_rules! blend3 {
456
- (
457
- $a: expr,
458
- $b: expr,
459
- $c: expr,
460
- $d: expr,
461
- $e: expr,
462
- $f: expr,
463
- $a2: expr,
464
- $b2: expr,
465
- $c2: expr,
466
- $d2: expr,
467
- $e2: expr,
468
- $f2: expr
469
- ) => {
470
- match ( imm8 >> 6 ) & 0b11 {
471
- 0b00 => blend4!( $a, $b, $c, $d, $e, $f, 6 , 7 , $a2, $b2, $c2, $d2, $e2, $f2, 14 , 15 ) ,
472
- 0b01 => {
473
- blend4!( $a, $b, $c, $d, $e, $f, 22 , 7 , $a2, $b2, $c2, $d2, $e2, $f2, 30 , 15 )
474
- }
475
- 0b10 => {
476
- blend4!( $a, $b, $c, $d, $e, $f, 6 , 23 , $a2, $b2, $c2, $d2, $e2, $f2, 14 , 31 )
477
- }
478
- _ => blend4!( $a, $b, $c, $d, $e, $f, 22 , 23 , $a2, $b2, $c2, $d2, $e2, $f2, 30 , 31 ) ,
479
- }
480
- } ;
481
- }
482
- macro_rules! blend2 {
483
- (
484
- $a: expr,
485
- $b: expr,
486
- $c: expr,
487
- $d: expr,
488
- $a2: expr,
489
- $b2: expr,
490
- $c2: expr,
491
- $d2: expr
492
- ) => {
493
- match ( imm8 >> 4 ) & 0b11 {
494
- 0b00 => blend3!( $a, $b, $c, $d, 4 , 5 , $a2, $b2, $c2, $d2, 12 , 13 ) ,
495
- 0b01 => blend3!( $a, $b, $c, $d, 20 , 5 , $a2, $b2, $c2, $d2, 28 , 13 ) ,
496
- 0b10 => blend3!( $a, $b, $c, $d, 4 , 21 , $a2, $b2, $c2, $d2, 12 , 29 ) ,
497
- _ => blend3!( $a, $b, $c, $d, 20 , 21 , $a2, $b2, $c2, $d2, 28 , 29 ) ,
498
- }
499
- } ;
500
- }
501
- macro_rules! blend1 {
502
- ( $a1: expr, $b1: expr, $a2: expr, $b2: expr) => {
503
- match ( imm8 >> 2 ) & 0b11 {
504
- 0b00 => blend2!( $a1, $b1, 2 , 3 , $a2, $b2, 10 , 11 ) ,
505
- 0b01 => blend2!( $a1, $b1, 18 , 3 , $a2, $b2, 26 , 11 ) ,
506
- 0b10 => blend2!( $a1, $b1, 2 , 19 , $a2, $b2, 10 , 27 ) ,
507
- _ => blend2!( $a1, $b1, 18 , 19 , $a2, $b2, 26 , 27 ) ,
508
- }
509
- } ;
510
- }
511
- let r: i16x16 = match imm8 & 0b11 {
512
- 0b00 => blend1 ! ( 0 , 1 , 8 , 9 ) ,
513
- 0b01 => blend1 ! ( 16 , 1 , 24 , 9 ) ,
514
- 0b10 => blend1 ! ( 0 , 17 , 8 , 25 ) ,
515
- _ => blend1 ! ( 16 , 17 , 24 , 25 ) ,
516
- } ;
427
+ let r: i16x16 = simd_shuffle16 (
428
+ a,
429
+ b,
430
+ [
431
+ [ 0 , 16 , 0 , 16 ] [ IMM8 as usize & 0b11 ] ,
432
+ [ 1 , 1 , 17 , 17 ] [ IMM8 as usize & 0b11 ] ,
433
+ [ 2 , 18 , 2 , 18 ] [ ( IMM8 as usize >> 2 ) & 0b11 ] ,
434
+ [ 3 , 3 , 19 , 19 ] [ ( IMM8 as usize >> 2 ) & 0b11 ] ,
435
+ [ 4 , 20 , 4 , 20 ] [ ( IMM8 as usize >> 4 ) & 0b11 ] ,
436
+ [ 5 , 5 , 21 , 21 ] [ ( IMM8 as usize >> 4 ) & 0b11 ] ,
437
+ [ 6 , 22 , 6 , 22 ] [ ( IMM8 as usize >> 6 ) & 0b11 ] ,
438
+ [ 7 , 7 , 23 , 23 ] [ ( IMM8 as usize >> 6 ) & 0b11 ] ,
439
+ [ 8 , 24 , 8 , 24 ] [ IMM8 as usize & 0b11 ] ,
440
+ [ 9 , 9 , 25 , 25 ] [ IMM8 as usize & 0b11 ] ,
441
+ [ 10 , 26 , 10 , 26 ] [ ( IMM8 as usize >> 2 ) & 0b11 ] ,
442
+ [ 11 , 11 , 27 , 27 ] [ ( IMM8 as usize >> 2 ) & 0b11 ] ,
443
+ [ 12 , 28 , 12 , 28 ] [ ( IMM8 as usize >> 4 ) & 0b11 ] ,
444
+ [ 13 , 13 , 29 , 29 ] [ ( IMM8 as usize >> 4 ) & 0b11 ] ,
445
+ [ 14 , 30 , 14 , 30 ] [ ( IMM8 as usize >> 6 ) & 0b11 ] ,
446
+ [ 15 , 15 , 31 , 31 ] [ ( IMM8 as usize >> 6 ) & 0b11 ] ,
447
+ ] ,
448
+ ) ;
517
449
transmute ( r)
518
450
}
519
451
@@ -4045,10 +3977,10 @@ mod tests {
4045
3977
unsafe fn test_mm256_blend_epi16 ( ) {
4046
3978
let ( a, b) = ( _mm256_set1_epi16 ( 3 ) , _mm256_set1_epi16 ( 9 ) ) ;
4047
3979
let e = _mm256_setr_epi16 ( 9 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 9 , 3 , 3 , 3 , 3 , 3 , 3 , 3 ) ;
4048
- let r = _mm256_blend_epi16 ( a, b, 0x01 as i32 ) ;
3980
+ let r = _mm256_blend_epi16 :: < 0x01 > ( a, b) ;
4049
3981
assert_eq_m256i ( r, e) ;
4050
3982
4051
- let r = _mm256_blend_epi16 ( b, a, 0xFE as i32 ) ;
3983
+ let r = _mm256_blend_epi16 :: < 0xFE > ( b, a) ;
4052
3984
assert_eq_m256i ( r, e) ;
4053
3985
}
4054
3986
0 commit comments