@@ -458,44 +458,21 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
458
458
// Note: LLVM7 prefers single-precision blend instructions when
459
459
// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194
460
460
// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
461
- #[ cfg_attr( test, assert_instr( vblendps, imm8 = 9 ) ) ]
462
- #[ rustc_args_required_const( 2 ) ]
463
- #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
464
- pub unsafe fn _mm256_blend_pd ( a : __m256d , b : __m256d , imm8 : i32 ) -> __m256d {
465
- let imm8 = ( imm8 & 0xFF ) as u8 ;
466
- macro_rules! blend4 {
467
- ( $a: expr, $b: expr, $c: expr, $d: expr) => {
468
- simd_shuffle4( a, b, [ $a, $b, $c, $d] )
469
- } ;
470
- }
471
- macro_rules! blend3 {
472
- ( $a: expr, $b: expr, $c: expr) => {
473
- match imm8 & 0x8 {
474
- 0 => blend4!( $a, $b, $c, 3 ) ,
475
- _ => blend4!( $a, $b, $c, 7 ) ,
476
- }
477
- } ;
478
- }
479
- macro_rules! blend2 {
480
- ( $a: expr, $b: expr) => {
481
- match imm8 & 0x4 {
482
- 0 => blend3!( $a, $b, 2 ) ,
483
- _ => blend3!( $a, $b, 6 ) ,
484
- }
485
- } ;
486
- }
487
- macro_rules! blend1 {
488
- ( $a: expr) => {
489
- match imm8 & 0x2 {
490
- 0 => blend2!( $a, 1 ) ,
491
- _ => blend2!( $a, 5 ) ,
492
- }
493
- } ;
494
- }
495
- match imm8 & 0x1 {
496
- 0 => blend1 ! ( 0 ) ,
497
- _ => blend1 ! ( 4 ) ,
498
- }
461
+ #[ cfg_attr( test, assert_instr( vblendps, IMM4 = 9 ) ) ]
462
+ #[ rustc_legacy_const_generics( 2 ) ]
463
+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
464
+ pub unsafe fn _mm256_blend_pd < const IMM4 : i32 > ( a : __m256d , b : __m256d ) -> __m256d {
465
+ static_assert_imm4 ! ( IMM4 ) ;
466
+ simd_shuffle4 (
467
+ a,
468
+ b,
469
+ [
470
+ ( ( IMM4 as u32 >> 0 ) & 1 ) * 4 + 0 ,
471
+ ( ( IMM4 as u32 >> 1 ) & 1 ) * 4 + 1 ,
472
+ ( ( IMM4 as u32 >> 2 ) & 1 ) * 4 + 2 ,
473
+ ( ( IMM4 as u32 >> 3 ) & 1 ) * 4 + 3 ,
474
+ ] ,
475
+ )
499
476
}
500
477
501
478
/// Blends packed single-precision (32-bit) floating-point elements from
@@ -504,61 +481,25 @@ pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
504
481
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_ps)
505
482
#[ inline]
506
483
#[ target_feature( enable = "avx" ) ]
507
- #[ cfg_attr( test, assert_instr( vblendps, imm8 = 9 ) ) ]
508
- #[ rustc_args_required_const( 2 ) ]
509
- #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
510
- pub unsafe fn _mm256_blend_ps ( a : __m256 , b : __m256 , imm8 : i32 ) -> __m256 {
511
- let imm8 = ( imm8 & 0xFF ) as u8 ;
512
- macro_rules! blend4 {
513
- (
514
- $a: expr,
515
- $b: expr,
516
- $c: expr,
517
- $d: expr,
518
- $e: expr,
519
- $f: expr,
520
- $g: expr,
521
- $h: expr
522
- ) => {
523
- simd_shuffle8( a, b, [ $a, $b, $c, $d, $e, $f, $g, $h] )
524
- } ;
525
- }
526
- macro_rules! blend3 {
527
- ( $a: expr, $b: expr, $c: expr, $d: expr, $e: expr, $f: expr) => {
528
- match ( imm8 >> 6 ) & 0b11 {
529
- 0b00 => blend4!( $a, $b, $c, $d, $e, $f, 6 , 7 ) ,
530
- 0b01 => blend4!( $a, $b, $c, $d, $e, $f, 14 , 7 ) ,
531
- 0b10 => blend4!( $a, $b, $c, $d, $e, $f, 6 , 15 ) ,
532
- _ => blend4!( $a, $b, $c, $d, $e, $f, 14 , 15 ) ,
533
- }
534
- } ;
535
- }
536
- macro_rules! blend2 {
537
- ( $a: expr, $b: expr, $c: expr, $d: expr) => {
538
- match ( imm8 >> 4 ) & 0b11 {
539
- 0b00 => blend3!( $a, $b, $c, $d, 4 , 5 ) ,
540
- 0b01 => blend3!( $a, $b, $c, $d, 12 , 5 ) ,
541
- 0b10 => blend3!( $a, $b, $c, $d, 4 , 13 ) ,
542
- _ => blend3!( $a, $b, $c, $d, 12 , 13 ) ,
543
- }
544
- } ;
545
- }
546
- macro_rules! blend1 {
547
- ( $a: expr, $b: expr) => {
548
- match ( imm8 >> 2 ) & 0b11 {
549
- 0b00 => blend2!( $a, $b, 2 , 3 ) ,
550
- 0b01 => blend2!( $a, $b, 10 , 3 ) ,
551
- 0b10 => blend2!( $a, $b, 2 , 11 ) ,
552
- _ => blend2!( $a, $b, 10 , 11 ) ,
553
- }
554
- } ;
555
- }
556
- match imm8 & 0b11 {
557
- 0b00 => blend1 ! ( 0 , 1 ) ,
558
- 0b01 => blend1 ! ( 8 , 1 ) ,
559
- 0b10 => blend1 ! ( 0 , 9 ) ,
560
- _ => blend1 ! ( 8 , 9 ) ,
561
- }
484
+ #[ cfg_attr( test, assert_instr( vblendps, IMM8 = 9 ) ) ]
485
+ #[ rustc_legacy_const_generics( 2 ) ]
486
+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
487
+ pub unsafe fn _mm256_blend_ps < const IMM8 : i32 > ( a : __m256 , b : __m256 ) -> __m256 {
488
+ static_assert_imm8 ! ( IMM8 ) ;
489
+ simd_shuffle8 (
490
+ a,
491
+ b,
492
+ [
493
+ ( ( IMM8 as u32 >> 0 ) & 1 ) * 8 + 0 ,
494
+ ( ( IMM8 as u32 >> 1 ) & 1 ) * 8 + 1 ,
495
+ ( ( IMM8 as u32 >> 2 ) & 1 ) * 8 + 2 ,
496
+ ( ( IMM8 as u32 >> 3 ) & 1 ) * 8 + 3 ,
497
+ ( ( IMM8 as u32 >> 4 ) & 1 ) * 8 + 4 ,
498
+ ( ( IMM8 as u32 >> 5 ) & 1 ) * 8 + 5 ,
499
+ ( ( IMM8 as u32 >> 6 ) & 1 ) * 8 + 6 ,
500
+ ( ( IMM8 as u32 >> 7 ) & 1 ) * 8 + 7 ,
501
+ ] ,
502
+ )
562
503
}
563
504
564
505
/// Blends packed double-precision (64-bit) floating-point elements from
@@ -3378,23 +3319,23 @@ mod tests {
3378
3319
unsafe fn test_mm256_blend_pd ( ) {
3379
3320
let a = _mm256_setr_pd ( 4. , 9. , 16. , 25. ) ;
3380
3321
let b = _mm256_setr_pd ( 4. , 3. , 2. , 5. ) ;
3381
- let r = _mm256_blend_pd ( a, b, 0x0 ) ;
3322
+ let r = _mm256_blend_pd :: < 0x0 > ( a, b) ;
3382
3323
assert_eq_m256d ( r, _mm256_setr_pd ( 4. , 9. , 16. , 25. ) ) ;
3383
- let r = _mm256_blend_pd ( a, b, 0x3 ) ;
3324
+ let r = _mm256_blend_pd :: < 0x3 > ( a, b) ;
3384
3325
assert_eq_m256d ( r, _mm256_setr_pd ( 4. , 3. , 16. , 25. ) ) ;
3385
- let r = _mm256_blend_pd ( a, b, 0xF ) ;
3326
+ let r = _mm256_blend_pd :: < 0xF > ( a, b) ;
3386
3327
assert_eq_m256d ( r, _mm256_setr_pd ( 4. , 3. , 2. , 5. ) ) ;
3387
3328
}
3388
3329
3389
3330
#[ simd_test( enable = "avx" ) ]
3390
3331
unsafe fn test_mm256_blend_ps ( ) {
3391
3332
let a = _mm256_setr_ps ( 1. , 4. , 5. , 8. , 9. , 12. , 13. , 16. ) ;
3392
3333
let b = _mm256_setr_ps ( 2. , 3. , 6. , 7. , 10. , 11. , 14. , 15. ) ;
3393
- let r = _mm256_blend_ps ( a, b, 0x0 ) ;
3334
+ let r = _mm256_blend_ps :: < 0x0 > ( a, b) ;
3394
3335
assert_eq_m256 ( r, _mm256_setr_ps ( 1. , 4. , 5. , 8. , 9. , 12. , 13. , 16. ) ) ;
3395
- let r = _mm256_blend_ps ( a, b, 0x3 ) ;
3336
+ let r = _mm256_blend_ps :: < 0x3 > ( a, b) ;
3396
3337
assert_eq_m256 ( r, _mm256_setr_ps ( 2. , 3. , 5. , 8. , 9. , 12. , 13. , 16. ) ) ;
3397
- let r = _mm256_blend_ps ( a, b, 0xF ) ;
3338
+ let r = _mm256_blend_ps :: < 0xF > ( a, b) ;
3398
3339
assert_eq_m256 ( r, _mm256_setr_ps ( 2. , 3. , 6. , 7. , 9. , 12. , 13. , 16. ) ) ;
3399
3340
}
3400
3341
0 commit comments