@@ -833,13 +833,11 @@ define i32 @smax_i32_same(<4 x i32> %a, <4 x i32> %b) {
833
833
define float @nested_fadd_f32 (<4 x float > %a , <4 x float > %b , float %c , float %d ) {
834
834
; CHECK-SD-LABEL: nested_fadd_f32:
835
835
; CHECK-SD: // %bb.0:
836
- ; CHECK-SD-NEXT: faddp v1.4s, v1.4s, v1.4s
836
+ ; CHECK-SD-NEXT: fadd v0.4s, v0.4s, v1.4s
837
+ ; CHECK-SD-NEXT: fadd s2, s2, s3
837
838
; CHECK-SD-NEXT: faddp v0.4s, v0.4s, v0.4s
838
- ; CHECK-SD-NEXT: faddp s1, v1.2s
839
839
; CHECK-SD-NEXT: faddp s0, v0.2s
840
- ; CHECK-SD-NEXT: fadd s1, s1, s3
841
840
; CHECK-SD-NEXT: fadd s0, s0, s2
842
- ; CHECK-SD-NEXT: fadd s0, s0, s1
843
841
; CHECK-SD-NEXT: ret
844
842
;
845
843
; CHECK-GI-LABEL: nested_fadd_f32:
@@ -905,15 +903,12 @@ define float @nested_fadd_f32_slow(<4 x float> %a, <4 x float> %b, float %c, flo
905
903
define float @nested_mul_f32 (<4 x float > %a , <4 x float > %b , float %c , float %d ) {
906
904
; CHECK-SD-LABEL: nested_mul_f32:
907
905
; CHECK-SD: // %bb.0:
908
- ; CHECK-SD-NEXT: ext v4.16b, v1.16b, v1.16b, #8
909
- ; CHECK-SD-NEXT: ext v5.16b, v0.16b, v0.16b, #8
910
- ; CHECK-SD-NEXT: fmul v1.2s, v1.2s, v4.2s
911
- ; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v5.2s
912
- ; CHECK-SD-NEXT: fmul s1, s1, v1.s[1]
906
+ ; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v1.4s
907
+ ; CHECK-SD-NEXT: fmul s2, s2, s3
908
+ ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
909
+ ; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s
913
910
; CHECK-SD-NEXT: fmul s0, s0, v0.s[1]
914
- ; CHECK-SD-NEXT: fmul s1, s1, s3
915
911
; CHECK-SD-NEXT: fmul s0, s0, s2
916
- ; CHECK-SD-NEXT: fmul s0, s0, s1
917
912
; CHECK-SD-NEXT: ret
918
913
;
919
914
; CHECK-GI-LABEL: nested_mul_f32:
@@ -941,12 +936,10 @@ define float @nested_mul_f32(<4 x float> %a, <4 x float> %b, float %c, float %d)
941
936
define i32 @nested_add_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
942
937
; CHECK-SD-LABEL: nested_add_i32:
943
938
; CHECK-SD: // %bb.0:
944
- ; CHECK-SD-NEXT: addv s1, v1.4s
939
+ ; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
940
+ ; CHECK-SD-NEXT: add w8, w0, w1
945
941
; CHECK-SD-NEXT: addv s0, v0.4s
946
- ; CHECK-SD-NEXT: fmov w8, s1
947
942
; CHECK-SD-NEXT: fmov w9, s0
948
- ; CHECK-SD-NEXT: add w9, w9, w0
949
- ; CHECK-SD-NEXT: add w8, w8, w1
950
943
; CHECK-SD-NEXT: add w0, w9, w8
951
944
; CHECK-SD-NEXT: ret
952
945
;
@@ -971,12 +964,10 @@ define i32 @nested_add_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
971
964
define i32 @nested_add_c1_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
972
965
; CHECK-SD-LABEL: nested_add_c1_i32:
973
966
; CHECK-SD: // %bb.0:
974
- ; CHECK-SD-NEXT: addv s1, v1.4s
967
+ ; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
968
+ ; CHECK-SD-NEXT: add w8, w0, w1
975
969
; CHECK-SD-NEXT: addv s0, v0.4s
976
- ; CHECK-SD-NEXT: fmov w8, s1
977
970
; CHECK-SD-NEXT: fmov w9, s0
978
- ; CHECK-SD-NEXT: add w9, w0, w9
979
- ; CHECK-SD-NEXT: add w8, w8, w1
980
971
; CHECK-SD-NEXT: add w0, w9, w8
981
972
; CHECK-SD-NEXT: ret
982
973
;
@@ -1001,12 +992,10 @@ define i32 @nested_add_c1_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
1001
992
define i32 @nested_add_c2_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
1002
993
; CHECK-SD-LABEL: nested_add_c2_i32:
1003
994
; CHECK-SD: // %bb.0:
1004
- ; CHECK-SD-NEXT: addv s1, v1.4s
995
+ ; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
996
+ ; CHECK-SD-NEXT: add w8, w0, w1
1005
997
; CHECK-SD-NEXT: addv s0, v0.4s
1006
- ; CHECK-SD-NEXT: fmov w8, s1
1007
998
; CHECK-SD-NEXT: fmov w9, s0
1008
- ; CHECK-SD-NEXT: add w9, w9, w0
1009
- ; CHECK-SD-NEXT: add w8, w1, w8
1010
999
; CHECK-SD-NEXT: add w0, w9, w8
1011
1000
; CHECK-SD-NEXT: ret
1012
1001
;
@@ -1065,19 +1054,14 @@ define i32 @nested_add_manyreduct_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c,
1065
1054
define i32 @nested_mul_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
1066
1055
; CHECK-SD-LABEL: nested_mul_i32:
1067
1056
; CHECK-SD: // %bb.0:
1068
- ; CHECK-SD-NEXT: ext v3.16b, v0.16b , v0.16b, #8
1069
- ; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8
1070
- ; CHECK-SD-NEXT: mul v0.2s , v0.2s, v3.2s
1071
- ; CHECK-SD-NEXT: mul v1 .2s, v1 .2s, v2 .2s
1072
- ; CHECK-SD-NEXT: mov w8 , v0.s[1]
1057
+ ; CHECK-SD-NEXT: mul v0.4s , v0.4s, v1.4s
1058
+ ; CHECK-SD-NEXT: mul w8, w0, w1
1059
+ ; CHECK-SD-NEXT: ext v1.16b , v0.16b, v0.16b, #8
1060
+ ; CHECK-SD-NEXT: mul v0 .2s, v0 .2s, v1 .2s
1061
+ ; CHECK-SD-NEXT: mov w9 , v0.s[1]
1073
1062
; CHECK-SD-NEXT: fmov w10, s0
1074
- ; CHECK-SD-NEXT: mov w9, v1.s[1]
1075
- ; CHECK-SD-NEXT: mul w8, w10, w8
1076
- ; CHECK-SD-NEXT: fmov w10, s1
1077
1063
; CHECK-SD-NEXT: mul w9, w10, w9
1078
- ; CHECK-SD-NEXT: mul w8, w8, w0
1079
- ; CHECK-SD-NEXT: mul w9, w9, w1
1080
- ; CHECK-SD-NEXT: mul w0, w8, w9
1064
+ ; CHECK-SD-NEXT: mul w0, w9, w8
1081
1065
; CHECK-SD-NEXT: ret
1082
1066
;
1083
1067
; CHECK-GI-LABEL: nested_mul_i32:
@@ -1107,19 +1091,14 @@ define i32 @nested_mul_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
1107
1091
define i32 @nested_and_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
1108
1092
; CHECK-SD-LABEL: nested_and_i32:
1109
1093
; CHECK-SD: // %bb.0:
1110
- ; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8
1111
- ; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8
1112
- ; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
1113
- ; CHECK-SD-NEXT: and v0.8b, v0.8b, v3.8b
1114
- ; CHECK-SD-NEXT: fmov x8, d1
1094
+ ; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
1095
+ ; CHECK-SD-NEXT: and w8, w0, w1
1096
+ ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1097
+ ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
1115
1098
; CHECK-SD-NEXT: fmov x9, d0
1116
1099
; CHECK-SD-NEXT: lsr x10, x9, #32
1117
- ; CHECK-SD-NEXT: lsr x11, x8, #32
1118
- ; CHECK-SD-NEXT: and w9, w9, w0
1119
- ; CHECK-SD-NEXT: and w8, w8, w1
1120
- ; CHECK-SD-NEXT: and w9, w9, w10
1121
- ; CHECK-SD-NEXT: and w8, w8, w11
1122
- ; CHECK-SD-NEXT: and w0, w9, w8
1100
+ ; CHECK-SD-NEXT: and w8, w9, w8
1101
+ ; CHECK-SD-NEXT: and w0, w8, w10
1123
1102
; CHECK-SD-NEXT: ret
1124
1103
;
1125
1104
; CHECK-GI-LABEL: nested_and_i32:
@@ -1149,19 +1128,14 @@ define i32 @nested_and_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
1149
1128
define i32 @nested_or_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
1150
1129
; CHECK-SD-LABEL: nested_or_i32:
1151
1130
; CHECK-SD: // %bb.0:
1152
- ; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8
1153
- ; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8
1154
- ; CHECK-SD-NEXT: orr v1.8b, v1.8b, v2.8b
1155
- ; CHECK-SD-NEXT: orr v0.8b, v0.8b, v3.8b
1156
- ; CHECK-SD-NEXT: fmov x8, d1
1131
+ ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
1132
+ ; CHECK-SD-NEXT: orr w8, w0, w1
1133
+ ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1134
+ ; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
1157
1135
; CHECK-SD-NEXT: fmov x9, d0
1158
1136
; CHECK-SD-NEXT: lsr x10, x9, #32
1159
- ; CHECK-SD-NEXT: lsr x11, x8, #32
1160
- ; CHECK-SD-NEXT: orr w9, w9, w0
1161
- ; CHECK-SD-NEXT: orr w8, w8, w1
1162
- ; CHECK-SD-NEXT: orr w9, w9, w10
1163
- ; CHECK-SD-NEXT: orr w8, w8, w11
1164
- ; CHECK-SD-NEXT: orr w0, w9, w8
1137
+ ; CHECK-SD-NEXT: orr w8, w9, w8
1138
+ ; CHECK-SD-NEXT: orr w0, w8, w10
1165
1139
; CHECK-SD-NEXT: ret
1166
1140
;
1167
1141
; CHECK-GI-LABEL: nested_or_i32:
@@ -1191,19 +1165,14 @@ define i32 @nested_or_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
1191
1165
define i32 @nested_xor_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
1192
1166
; CHECK-SD-LABEL: nested_xor_i32:
1193
1167
; CHECK-SD: // %bb.0:
1194
- ; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8
1195
- ; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8
1196
- ; CHECK-SD-NEXT: eor v1.8b, v1.8b, v2.8b
1197
- ; CHECK-SD-NEXT: eor v0.8b, v0.8b, v3.8b
1198
- ; CHECK-SD-NEXT: fmov x8, d1
1168
+ ; CHECK-SD-NEXT: eor v0.16b, v0.16b, v1.16b
1169
+ ; CHECK-SD-NEXT: eor w8, w0, w1
1170
+ ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1171
+ ; CHECK-SD-NEXT: eor v0.8b, v0.8b, v1.8b
1199
1172
; CHECK-SD-NEXT: fmov x9, d0
1200
1173
; CHECK-SD-NEXT: lsr x10, x9, #32
1201
- ; CHECK-SD-NEXT: lsr x11, x8, #32
1202
- ; CHECK-SD-NEXT: eor w9, w9, w0
1203
- ; CHECK-SD-NEXT: eor w8, w8, w1
1204
- ; CHECK-SD-NEXT: eor w9, w9, w10
1205
- ; CHECK-SD-NEXT: eor w8, w8, w11
1206
- ; CHECK-SD-NEXT: eor w0, w9, w8
1174
+ ; CHECK-SD-NEXT: eor w8, w9, w8
1175
+ ; CHECK-SD-NEXT: eor w0, w8, w10
1207
1176
; CHECK-SD-NEXT: ret
1208
1177
;
1209
1178
; CHECK-GI-LABEL: nested_xor_i32:
@@ -1233,14 +1202,11 @@ define i32 @nested_xor_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
1233
1202
define i32 @nested_smin_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
1234
1203
; CHECK-SD-LABEL: nested_smin_i32:
1235
1204
; CHECK-SD: // %bb.0:
1205
+ ; CHECK-SD-NEXT: smin v0.4s, v0.4s, v1.4s
1206
+ ; CHECK-SD-NEXT: cmp w0, w1
1207
+ ; CHECK-SD-NEXT: csel w8, w0, w1, lt
1236
1208
; CHECK-SD-NEXT: sminv s0, v0.4s
1237
- ; CHECK-SD-NEXT: sminv s1, v1.4s
1238
1209
; CHECK-SD-NEXT: fmov w9, s0
1239
- ; CHECK-SD-NEXT: fmov w8, s1
1240
- ; CHECK-SD-NEXT: cmp w9, w0
1241
- ; CHECK-SD-NEXT: csel w9, w9, w0, lt
1242
- ; CHECK-SD-NEXT: cmp w8, w1
1243
- ; CHECK-SD-NEXT: csel w8, w8, w1, lt
1244
1210
; CHECK-SD-NEXT: cmp w9, w8
1245
1211
; CHECK-SD-NEXT: csel w0, w9, w8, lt
1246
1212
; CHECK-SD-NEXT: ret
@@ -1269,14 +1235,11 @@ define i32 @nested_smin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
1269
1235
define i32 @nested_smax_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
1270
1236
; CHECK-SD-LABEL: nested_smax_i32:
1271
1237
; CHECK-SD: // %bb.0:
1238
+ ; CHECK-SD-NEXT: smax v0.4s, v0.4s, v1.4s
1239
+ ; CHECK-SD-NEXT: cmp w0, w1
1240
+ ; CHECK-SD-NEXT: csel w8, w0, w1, gt
1272
1241
; CHECK-SD-NEXT: smaxv s0, v0.4s
1273
- ; CHECK-SD-NEXT: smaxv s1, v1.4s
1274
1242
; CHECK-SD-NEXT: fmov w9, s0
1275
- ; CHECK-SD-NEXT: fmov w8, s1
1276
- ; CHECK-SD-NEXT: cmp w9, w0
1277
- ; CHECK-SD-NEXT: csel w9, w9, w0, gt
1278
- ; CHECK-SD-NEXT: cmp w8, w1
1279
- ; CHECK-SD-NEXT: csel w8, w8, w1, gt
1280
1243
; CHECK-SD-NEXT: cmp w9, w8
1281
1244
; CHECK-SD-NEXT: csel w0, w9, w8, gt
1282
1245
; CHECK-SD-NEXT: ret
@@ -1305,14 +1268,11 @@ define i32 @nested_smax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
1305
1268
define i32 @nested_umin_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
1306
1269
; CHECK-SD-LABEL: nested_umin_i32:
1307
1270
; CHECK-SD: // %bb.0:
1271
+ ; CHECK-SD-NEXT: umin v0.4s, v0.4s, v1.4s
1272
+ ; CHECK-SD-NEXT: cmp w0, w1
1273
+ ; CHECK-SD-NEXT: csel w8, w0, w1, lo
1308
1274
; CHECK-SD-NEXT: uminv s0, v0.4s
1309
- ; CHECK-SD-NEXT: uminv s1, v1.4s
1310
1275
; CHECK-SD-NEXT: fmov w9, s0
1311
- ; CHECK-SD-NEXT: fmov w8, s1
1312
- ; CHECK-SD-NEXT: cmp w9, w0
1313
- ; CHECK-SD-NEXT: csel w9, w9, w0, lo
1314
- ; CHECK-SD-NEXT: cmp w8, w1
1315
- ; CHECK-SD-NEXT: csel w8, w8, w1, lo
1316
1276
; CHECK-SD-NEXT: cmp w9, w8
1317
1277
; CHECK-SD-NEXT: csel w0, w9, w8, lo
1318
1278
; CHECK-SD-NEXT: ret
@@ -1341,14 +1301,11 @@ define i32 @nested_umin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
1341
1301
define i32 @nested_umax_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
1342
1302
; CHECK-SD-LABEL: nested_umax_i32:
1343
1303
; CHECK-SD: // %bb.0:
1304
+ ; CHECK-SD-NEXT: umax v0.4s, v0.4s, v1.4s
1305
+ ; CHECK-SD-NEXT: cmp w0, w1
1306
+ ; CHECK-SD-NEXT: csel w8, w0, w1, hi
1344
1307
; CHECK-SD-NEXT: umaxv s0, v0.4s
1345
- ; CHECK-SD-NEXT: umaxv s1, v1.4s
1346
1308
; CHECK-SD-NEXT: fmov w9, s0
1347
- ; CHECK-SD-NEXT: fmov w8, s1
1348
- ; CHECK-SD-NEXT: cmp w9, w0
1349
- ; CHECK-SD-NEXT: csel w9, w9, w0, hi
1350
- ; CHECK-SD-NEXT: cmp w8, w1
1351
- ; CHECK-SD-NEXT: csel w8, w8, w1, hi
1352
1309
; CHECK-SD-NEXT: cmp w9, w8
1353
1310
; CHECK-SD-NEXT: csel w0, w9, w8, hi
1354
1311
; CHECK-SD-NEXT: ret
@@ -1377,11 +1334,10 @@ define i32 @nested_umax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
1377
1334
define float @nested_fmin_float (<4 x float > %a , <4 x float > %b , float %c , float %d ) {
1378
1335
; CHECK-SD-LABEL: nested_fmin_float:
1379
1336
; CHECK-SD: // %bb.0:
1380
- ; CHECK-SD-NEXT: fminnmv s1, v1.4s
1337
+ ; CHECK-SD-NEXT: fminnm v0.4s, v0.4s, v1.4s
1338
+ ; CHECK-SD-NEXT: fminnm s2, s2, s3
1381
1339
; CHECK-SD-NEXT: fminnmv s0, v0.4s
1382
- ; CHECK-SD-NEXT: fminnm s1, s1, s3
1383
1340
; CHECK-SD-NEXT: fminnm s0, s0, s2
1384
- ; CHECK-SD-NEXT: fminnm s0, s0, s1
1385
1341
; CHECK-SD-NEXT: ret
1386
1342
;
1387
1343
; CHECK-GI-LABEL: nested_fmin_float:
@@ -1403,11 +1359,10 @@ define float @nested_fmin_float(<4 x float> %a, <4 x float> %b, float %c, float
1403
1359
define float @nested_fmax_float (<4 x float > %a , <4 x float > %b , float %c , float %d ) {
1404
1360
; CHECK-SD-LABEL: nested_fmax_float:
1405
1361
; CHECK-SD: // %bb.0:
1406
- ; CHECK-SD-NEXT: fmaxnmv s1, v1.4s
1362
+ ; CHECK-SD-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
1363
+ ; CHECK-SD-NEXT: fmaxnm s2, s2, s3
1407
1364
; CHECK-SD-NEXT: fmaxnmv s0, v0.4s
1408
- ; CHECK-SD-NEXT: fmaxnm s1, s1, s3
1409
1365
; CHECK-SD-NEXT: fmaxnm s0, s0, s2
1410
- ; CHECK-SD-NEXT: fmaxnm s0, s0, s1
1411
1366
; CHECK-SD-NEXT: ret
1412
1367
;
1413
1368
; CHECK-GI-LABEL: nested_fmax_float:
0 commit comments