@@ -937,6 +937,77 @@ define <16 x i32> @zext_mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
937
937
ret <16 x i32 > %d
938
938
}
939
939
940
+ ; PR109790
941
+ define <16 x i16 > @zext_mulhuw_v16i16_negative_constant (<16 x i16 > %a ) {
942
+ ; SSE-LABEL: zext_mulhuw_v16i16_negative_constant:
943
+ ; SSE: # %bb.0:
944
+ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767]
945
+ ; SSE-NEXT: pand %xmm2, %xmm1
946
+ ; SSE-NEXT: pand %xmm2, %xmm0
947
+ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [64536,64536,64536,64536,64536,64536,64536,64536]
948
+ ; SSE-NEXT: pmulhw %xmm2, %xmm0
949
+ ; SSE-NEXT: pmulhw %xmm2, %xmm1
950
+ ; SSE-NEXT: retq
951
+ ;
952
+ ; AVX-LABEL: zext_mulhuw_v16i16_negative_constant:
953
+ ; AVX: # %bb.0:
954
+ ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
955
+ ; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536]
956
+ ; AVX-NEXT: retq
957
+ %k = and <16 x i16 > %a , <i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 >
958
+ %x = zext nneg <16 x i16 > %k to <16 x i32 >
959
+ %m = mul nsw <16 x i32 > %x , <i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 >
960
+ %s = lshr <16 x i32 > %m , <i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 >
961
+ %t = trunc nuw <16 x i32 > %s to <16 x i16 >
962
+ ret <16 x i16 > %t
963
+ }
964
+
965
+ ; PR109790
966
+ define <16 x i16 > @zext_mulhuw_v16i16_positive_constant (<16 x i16 > %a ) {
967
+ ; SSE-LABEL: zext_mulhuw_v16i16_positive_constant:
968
+ ; SSE: # %bb.0:
969
+ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767]
970
+ ; SSE-NEXT: pand %xmm2, %xmm1
971
+ ; SSE-NEXT: pand %xmm2, %xmm0
972
+ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1000,1000,1000,1000,1000,1000,1000,1000]
973
+ ; SSE-NEXT: pmulhw %xmm2, %xmm0
974
+ ; SSE-NEXT: pmulhw %xmm2, %xmm1
975
+ ; SSE-NEXT: retq
976
+ ;
977
+ ; AVX2-LABEL: zext_mulhuw_v16i16_positive_constant:
978
+ ; AVX2: # %bb.0:
979
+ ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
980
+ ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
981
+ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
982
+ ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
983
+ ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0]
984
+ ; AVX2-NEXT: vpmulhuw %ymm2, %ymm0, %ymm0
985
+ ; AVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1
986
+ ; AVX2-NEXT: vpackusdw %ymm0, %ymm1, %ymm0
987
+ ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
988
+ ; AVX2-NEXT: retq
989
+ ;
990
+ ; AVX512F-LABEL: zext_mulhuw_v16i16_positive_constant:
991
+ ; AVX512F: # %bb.0:
992
+ ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
993
+ ; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000]
994
+ ; AVX512F-NEXT: retq
995
+ ;
996
+ ; AVX512BW-LABEL: zext_mulhuw_v16i16_positive_constant:
997
+ ; AVX512BW: # %bb.0:
998
+ ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
999
+ ; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1000
+ ; AVX512BW-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0]
1001
+ ; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
1002
+ ; AVX512BW-NEXT: retq
1003
+ %k = and <16 x i16 > %a , <i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 >
1004
+ %x = zext nneg <16 x i16 > %k to <16 x i32 >
1005
+ %m = mul nuw nsw <16 x i32 > %x , <i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 >
1006
+ %s = lshr <16 x i32 > %m , <i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 >
1007
+ %t = trunc nuw nsw <16 x i32 > %s to <16 x i16 >
1008
+ ret <16 x i16 > %t
1009
+ }
1010
+
940
1011
define <16 x i32 > @mulhsw_v16i16_lshr (<16 x i16 > %a , <16 x i16 > %b ) {
941
1012
; SSE2-LABEL: mulhsw_v16i16_lshr:
942
1013
; SSE2: # %bb.0:
@@ -2056,3 +2127,4 @@ define <8 x i16> @sse2_pmulhu_w_const(<8 x i16> %a0, <8 x i16> %a1) {
2056
2127
ret <8 x i16 > %res
2057
2128
}
2058
2129
declare <8 x i16 > @llvm.x86.sse2.pmulhu.w (<8 x i16 >, <8 x i16 >)
2130
+
0 commit comments