@@ -950,16 +950,21 @@ define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1)
950
950
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
951
951
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
952
952
; CHECK-NEXT: call void @llvm.donothing()
953
+ ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
954
+ ; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP3]], 1
955
+ ; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP9]]
956
+ ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1
957
+ ; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 1
958
+ ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]]
953
959
; CHECK-NEXT: [[A0:%.*]] = bitcast <2 x i64> [[TMP1]] to <2 x double>
954
960
; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0]], <2 x i64> [[A1:%.*]])
955
961
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[RES]] to <2 x i64>
956
- ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
957
- ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0
958
- ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
959
- ; CHECK: 7:
962
+ ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP8]], 0
963
+ ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
964
+ ; CHECK: 12:
960
965
; CHECK-NEXT: call void @__msan_warning_noreturn()
961
966
; CHECK-NEXT: unreachable
962
- ; CHECK: 8 :
967
+ ; CHECK: 13 :
963
968
; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A2:%.*]], <2 x i64> [[A1]])
964
969
; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
965
970
; CHECK-NEXT: ret <2 x double> [[RES1]]
@@ -975,16 +980,27 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64>
975
980
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
976
981
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
977
982
; CHECK-NEXT: call void @llvm.donothing()
983
+ ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0
984
+ ; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP3]], 3
985
+ ; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP15]]
986
+ ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1
987
+ ; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 3
988
+ ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]]
989
+ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2
990
+ ; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 3
991
+ ; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP9]], [[TMP10]]
992
+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3
993
+ ; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 3
994
+ ; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]]
978
995
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
979
996
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0]], <4 x i64> [[A1:%.*]])
980
997
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
981
- ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
982
- ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0
983
- ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
984
- ; CHECK: 7:
998
+ ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP14]], 0
999
+ ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
1000
+ ; CHECK: 18:
985
1001
; CHECK-NEXT: call void @__msan_warning_noreturn()
986
1002
; CHECK-NEXT: unreachable
987
- ; CHECK: 8 :
1003
+ ; CHECK: 19 :
988
1004
; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A2:%.*]], <4 x i64> [[A1]])
989
1005
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
990
1006
; CHECK-NEXT: ret <4 x double> [[RES1]]
@@ -1014,16 +1030,27 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) #
1014
1030
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1015
1031
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1016
1032
; CHECK-NEXT: call void @llvm.donothing()
1033
+ ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
1034
+ ; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP3]], 3
1035
+ ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP15]]
1036
+ ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i64 1
1037
+ ; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 3
1038
+ ; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
1039
+ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i64 2
1040
+ ; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 3
1041
+ ; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]]
1042
+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 3
1043
+ ; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3
1044
+ ; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]]
1017
1045
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float>
1018
1046
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A1:%.*]])
1019
1047
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
1020
- ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1021
- ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0
1022
- ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
1023
- ; CHECK: 7:
1048
+ ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP14]], 0
1049
+ ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
1050
+ ; CHECK: 18:
1024
1051
; CHECK-NEXT: call void @__msan_warning_noreturn()
1025
1052
; CHECK-NEXT: unreachable
1026
- ; CHECK: 8 :
1053
+ ; CHECK: 19 :
1027
1054
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A2:%.*]], <4 x i32> [[A1]])
1028
1055
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
1029
1056
; CHECK-NEXT: ret <4 x float> [[RES1]]
@@ -1047,16 +1074,27 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0
1047
1074
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
1048
1075
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1049
1076
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
1077
+ ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 0
1078
+ ; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 3
1079
+ ; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP8]], [[TMP9]]
1080
+ ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 1
1081
+ ; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP11]], 3
1082
+ ; CHECK-NEXT: [[TMP13:%.*]] = or i32 [[TMP11]], [[TMP12]]
1083
+ ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 2
1084
+ ; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 3
1085
+ ; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP14]], [[TMP15]]
1086
+ ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 3
1087
+ ; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 3
1088
+ ; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP17]], [[TMP18]]
1050
1089
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
1051
1090
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A2]])
1052
1091
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
1053
- ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
1054
- ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
1055
- ; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
1056
- ; CHECK: 12:
1092
+ ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP19]], 0
1093
+ ; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
1094
+ ; CHECK: 23:
1057
1095
; CHECK-NEXT: call void @__msan_warning_noreturn()
1058
1096
; CHECK-NEXT: unreachable
1059
- ; CHECK: 13 :
1097
+ ; CHECK: 24 :
1060
1098
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A3:%.*]], <4 x i32> [[A2]])
1061
1099
; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8
1062
1100
; CHECK-NEXT: ret <4 x float> [[RES1]]
@@ -1073,16 +1111,39 @@ define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a
1073
1111
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1074
1112
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1075
1113
; CHECK-NEXT: call void @llvm.donothing()
1114
+ ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
1115
+ ; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP3]], 7
1116
+ ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP27]]
1117
+ ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
1118
+ ; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 7
1119
+ ; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
1120
+ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
1121
+ ; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 7
1122
+ ; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]]
1123
+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
1124
+ ; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 7
1125
+ ; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]]
1126
+ ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
1127
+ ; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 7
1128
+ ; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP15]], [[TMP16]]
1129
+ ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
1130
+ ; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7
1131
+ ; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]]
1132
+ ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
1133
+ ; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 7
1134
+ ; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
1135
+ ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
1136
+ ; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 7
1137
+ ; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP24]], [[TMP25]]
1076
1138
; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
1077
1139
; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0]], <8 x i32> [[A1:%.*]])
1078
1140
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32>
1079
- ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
1080
- ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0
1081
- ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
1082
- ; CHECK: 7:
1141
+ ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP26]], 0
1142
+ ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
1143
+ ; CHECK: 30:
1083
1144
; CHECK-NEXT: call void @__msan_warning_noreturn()
1084
1145
; CHECK-NEXT: unreachable
1085
- ; CHECK: 8 :
1146
+ ; CHECK: 31 :
1086
1147
; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A2:%.*]], <8 x i32> [[A1]])
1087
1148
; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
1088
1149
; CHECK-NEXT: ret <8 x float> [[RES1]]
0 commit comments