19
19
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
20
20
21
21
use crate :: core_arch:: x86:: * ;
22
- use crate :: intrinsics:: simd:: simd_fma;
22
+ use crate :: intrinsics:: simd:: { simd_fma, simd_insert, simd_neg} ;
23
+ use crate :: intrinsics:: { fmaf32, fmaf64} ;
23
24
24
25
#[ cfg( test) ]
25
26
use stdarch_test:: assert_instr;
@@ -86,7 +87,7 @@ pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
86
87
simd_insert ! (
87
88
a,
88
89
0 ,
89
- _mm_cvtsd_f64( a) . mul_add ( _mm_cvtsd_f64( b) , _mm_cvtsd_f64( c) )
90
+ fmaf64 ( _mm_cvtsd_f64( a) , _mm_cvtsd_f64( b) , _mm_cvtsd_f64( c) )
90
91
)
91
92
}
92
93
@@ -104,7 +105,7 @@ pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
104
105
simd_insert ! (
105
106
a,
106
107
0 ,
107
- _mm_cvtss_f32( a) . mul_add ( _mm_cvtss_f32( b) , _mm_cvtss_f32( c) )
108
+ fmaf32 ( _mm_cvtss_f32( a) , _mm_cvtss_f32( b) , _mm_cvtss_f32( c) )
108
109
)
109
110
}
110
111
@@ -222,7 +223,7 @@ pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
222
223
simd_insert ! (
223
224
a,
224
225
0 ,
225
- _mm_cvtsd_f64( a) . mul_add ( _mm_cvtsd_f64( b) , -_mm_cvtsd_f64( c) )
226
+ fmaf64 ( _mm_cvtsd_f64( a) , _mm_cvtsd_f64( b) , -_mm_cvtsd_f64( c) )
226
227
)
227
228
}
228
229
@@ -240,7 +241,7 @@ pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
240
241
simd_insert ! (
241
242
a,
242
243
0 ,
243
- _mm_cvtss_f32( a) . mul_add ( _mm_cvtss_f32( b) , -_mm_cvtss_f32( c) )
244
+ fmaf32 ( _mm_cvtss_f32( a) , _mm_cvtss_f32( b) , -_mm_cvtss_f32( c) )
244
245
)
245
246
}
246
247
@@ -358,7 +359,7 @@ pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
358
359
simd_insert ! (
359
360
a,
360
361
0 ,
361
- _mm_cvtsd_f64( a) . mul_add ( -_mm_cvtsd_f64( b) , _mm_cvtsd_f64( c) )
362
+ fmaf64 ( _mm_cvtsd_f64( a) , -_mm_cvtsd_f64( b) , _mm_cvtsd_f64( c) )
362
363
)
363
364
}
364
365
@@ -376,7 +377,7 @@ pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
376
377
simd_insert ! (
377
378
a,
378
379
0 ,
379
- _mm_cvtss_f32( a) . mul_add ( -_mm_cvtss_f32( b) , _mm_cvtss_f32( c) )
380
+ fmaf32 ( _mm_cvtss_f32( a, -_mm_cvtss_f32( b) , _mm_cvtss_f32( c) )
380
381
)
381
382
}
382
383
@@ -447,7 +448,7 @@ pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
447
448
simd_insert ! (
448
449
a,
449
450
0 ,
450
- _mm_cvtsd_f64( a) . mul_add ( -_mm_cvtsd_f64( b) , -_mm_cvtsd_f64( c) )
451
+ fmaf64 ( _mm_cvtsd_f64( a) , -_mm_cvtsd_f64( b) , -_mm_cvtsd_f64( c) )
451
452
)
452
453
}
453
454
@@ -466,7 +467,7 @@ pub unsafe fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
466
467
simd_insert ! (
467
468
a,
468
469
0 ,
469
- _mm_cvtss_f32( a) . mul_add ( -_mm_cvtss_f32( b) , -_mm_cvtss_f32( c) )
470
+ fmaf32 ( _mm_cvtss_f32( a) , -_mm_cvtss_f32( b) , -_mm_cvtss_f32( c) )
470
471
)
471
472
}
472
473
0 commit comments