@@ -682,6 +682,21 @@ define i32 @signbits_cmpss(float %0, float %1) {
682
682
ret i32 %4
683
683
}
684
684
685
+ define i32 @signbits_cmpss_int (<4 x float > %0 , <4 x float > %1 ) {
686
+ ; CHECK-LABEL: signbits_cmpss_int:
687
+ ; CHECK: # %bb.0:
688
+ ; CHECK-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0
689
+ ; CHECK-NEXT: vextractps $0, %xmm0, %eax
690
+ ; CHECK-NEXT: sarl $31, %eax
691
+ ; CHECK-NEXT: ret{{[l|q]}}
692
+ %3 = tail call <4 x float > @llvm.x86.sse.cmp.ss (<4 x float > %0 , <4 x float > %1 , i8 0 )
693
+ %4 = bitcast <4 x float > %3 to <4 x i32 >
694
+ %5 = extractelement <4 x i32 > %4 , i32 0
695
+ %6 = ashr i32 %5 , 31
696
+ ret i32 %6
697
+ }
698
+ declare <4 x float > @llvm.x86.sse.cmp.ss (<4 x float >, <4 x float >, i8 immarg)
699
+
685
700
define i64 @signbits_cmpsd (double %0 , double %1 ) {
686
701
; X86-LABEL: signbits_cmpsd:
687
702
; X86: # %bb.0:
@@ -705,6 +720,29 @@ define i64 @signbits_cmpsd(double %0, double %1) {
705
720
ret i64 %4
706
721
}
707
722
723
+ define i64 @signbits_cmpsd_int (<2 x double > %0 , <2 x double > %1 ) {
724
+ ; X86-LABEL: signbits_cmpsd_int:
725
+ ; X86: # %bb.0:
726
+ ; X86-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
727
+ ; X86-NEXT: vextractps $1, %xmm0, %eax
728
+ ; X86-NEXT: sarl $31, %eax
729
+ ; X86-NEXT: movl %eax, %edx
730
+ ; X86-NEXT: retl
731
+ ;
732
+ ; X64-LABEL: signbits_cmpsd_int:
733
+ ; X64: # %bb.0:
734
+ ; X64-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
735
+ ; X64-NEXT: vmovq %xmm0, %rax
736
+ ; X64-NEXT: sarq $63, %rax
737
+ ; X64-NEXT: retq
738
+ %3 = tail call <2 x double > @llvm.x86.sse2.cmp.sd (<2 x double > %0 , <2 x double > %1 , i8 0 )
739
+ %4 = bitcast <2 x double > %3 to <2 x i64 >
740
+ %5 = extractelement <2 x i64 > %4 , i32 0
741
+ %6 = ashr i64 %5 , 63
742
+ ret i64 %6
743
+ }
744
+ declare <2 x double > @llvm.x86.sse2.cmp.sd (<2 x double >, <2 x double >, i8 immarg)
745
+
708
746
; Make sure we can preserve sign bit information into the second basic block
709
747
; so we can avoid having to shift bit 0 into bit 7 for each element due to
710
748
; v32i1->v32i8 promotion and the splitting of v32i8 into 2xv16i8. This requires
0 commit comments