@@ -194,7 +194,8 @@ pub unsafe fn _mm_min_ss(a: __m128, b: __m128) -> __m128 {
194
194
#[ cfg_attr( test, assert_instr( minps) ) ]
195
195
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
196
196
pub unsafe fn _mm_min_ps ( a : __m128 , b : __m128 ) -> __m128 {
197
- simd_fmin ( a, b)
197
+ // See the `test_mm_min_ps` test why this can't be implemented using `simd_fmin`.
198
+ minps ( a, b)
198
199
}
199
200
200
201
/// Compares the first single-precision (32-bit) floating-point element of `a`
@@ -219,7 +220,8 @@ pub unsafe fn _mm_max_ss(a: __m128, b: __m128) -> __m128 {
219
220
#[ cfg_attr( test, assert_instr( maxps) ) ]
220
221
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
221
222
pub unsafe fn _mm_max_ps ( a : __m128 , b : __m128 ) -> __m128 {
222
- simd_fmax ( a, b)
223
+ // See the `test_mm_min_ps` test why this can't be implemented using `simd_fmax`.
224
+ maxps ( a, b)
223
225
}
224
226
225
227
/// Bitwise AND of packed single-precision (32-bit) floating-point elements.
@@ -1915,8 +1917,12 @@ extern "C" {
1915
1917
fn rsqrtps ( a : __m128 ) -> __m128 ;
1916
1918
#[ link_name = "llvm.x86.sse.min.ss" ]
1917
1919
fn minss ( a : __m128 , b : __m128 ) -> __m128 ;
1920
+ #[ link_name = "llvm.x86.sse.min.ps" ]
1921
+ fn minps ( a : __m128 , b : __m128 ) -> __m128 ;
1918
1922
#[ link_name = "llvm.x86.sse.max.ss" ]
1919
1923
fn maxss ( a : __m128 , b : __m128 ) -> __m128 ;
1924
+ #[ link_name = "llvm.x86.sse.max.ps" ]
1925
+ fn maxps ( a : __m128 , b : __m128 ) -> __m128 ;
1920
1926
#[ link_name = "llvm.x86.sse.movmsk.ps" ]
1921
1927
fn movmskps ( a : __m128 ) -> i32 ;
1922
1928
#[ link_name = "llvm.x86.sse.cmp.ps" ]
@@ -2614,6 +2620,21 @@ mod tests {
2614
2620
let b = _mm_setr_ps ( -100.0 , 20.0 , 0.0 , -5.0 ) ;
2615
2621
let r = _mm_min_ps ( a, b) ;
2616
2622
assert_eq_m128 ( r, _mm_setr_ps ( -100.0 , 5.0 , 0.0 , -10.0 ) ) ;
2623
+
2624
+ // `_mm_min_ps` can **not** be implemented using the `simd_min` rust intrinsic. `simd_min`
2625
+ // is lowered by the llvm codegen backend to `llvm.minnum.v*` llvm intrinsic. This intrinsic
2626
+ // doesn't specify how -0.0 is handled. Unfortunately it happens to behave different from
2627
+ // the `minps` x86 instruction on x86. The `llvm.minnum.v*` llvm intrinsic equals
2628
+ // `r1` to `a` and `r2` to `b`.
2629
+ let a = _mm_setr_ps ( -0.0 , 0.0 , 0.0 , 0.0 ) ;
2630
+ let b = _mm_setr_ps ( 0.0 , 0.0 , 0.0 , 0.0 ) ;
2631
+ let r1: [ u8 ; 16 ] = transmute ( _mm_min_ps ( a, b) ) ;
2632
+ let r2: [ u8 ; 16 ] = transmute ( _mm_min_ps ( b, a) ) ;
2633
+ let a: [ u8 ; 16 ] = transmute ( a) ;
2634
+ let b: [ u8 ; 16 ] = transmute ( b) ;
2635
+ assert_eq ! ( r1, b) ;
2636
+ assert_eq ! ( r2, a) ;
2637
+ assert_ne ! ( a, b) ; // sanity check that -0.0 is actually present
2617
2638
}
2618
2639
2619
2640
#[ simd_test( enable = "sse" ) ]
0 commit comments