Skip to content

Commit 645c0d5

Browse files
authored
[AArch64][GlobalISel] Ensure we have a insert-subreg v4i32 GPR pattern (#142724)
This is the GISel equivalent of scalar_to_vector, making sure that when we insert into undef we use a fmov that avoids the artificial dependency on the previous register. This adds v2i32 and v2i64 patterns too for similar reasons.
1 parent 73a4c36 commit 645c0d5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+3873
-3782
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7269,6 +7269,13 @@ def : Pat<(v4i16 (vec_ins_or_scal_vec GPR32:$Rn)),
72697269
(SUBREG_TO_REG (i32 0),
72707270
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
72717271

7272+
def : Pat<(v2i32 (vec_ins_or_scal_vec GPR32:$Rn)),
7273+
(INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), GPR32:$Rn, ssub)>;
7274+
def : Pat<(v4i32 (vec_ins_or_scal_vec GPR32:$Rn)),
7275+
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GPR32:$Rn, ssub)>;
7276+
def : Pat<(v2i64 (vec_ins_or_scal_vec GPR64:$Rn)),
7277+
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GPR64:$Rn, dsub)>;
7278+
72727279
def : Pat<(v4f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
72737280
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
72747281
def : Pat<(v8f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
@@ -7279,16 +7286,6 @@ def : Pat<(v4bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
72797286
def : Pat<(v8bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
72807287
(INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
72817288

7282-
def : Pat<(v2i32 (vec_ins_or_scal_vec (i32 FPR32:$Rn))),
7283-
(v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
7284-
(i32 FPR32:$Rn), ssub))>;
7285-
def : Pat<(v4i32 (vec_ins_or_scal_vec (i32 FPR32:$Rn))),
7286-
(v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
7287-
(i32 FPR32:$Rn), ssub))>;
7288-
def : Pat<(v2i64 (vec_ins_or_scal_vec (i64 FPR64:$Rn))),
7289-
(v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
7290-
(i64 FPR64:$Rn), dsub))>;
7291-
72927289
def : Pat<(v4f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
72937290
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
72947291
def : Pat<(v8f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),

llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define i32 @bar() {
1111
; CHECK-NEXT: movi.2d v0, #0000000000000000
1212
; CHECK-NEXT: umov.b w8, v0[0]
1313
; CHECK-NEXT: umov.b w9, v0[1]
14-
; CHECK-NEXT: mov.s v1[0], w8
14+
; CHECK-NEXT: fmov s1, w8
1515
; CHECK-NEXT: umov.b w8, v0[2]
1616
; CHECK-NEXT: mov.s v1[1], w9
1717
; CHECK-NEXT: umov.b w9, v0[3]

llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,12 @@ body: |
5757
; SELECT-NEXT: %r:gpr32 = COPY $w0
5858
; SELECT-NEXT: %q:gpr32 = COPY $w1
5959
; SELECT-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
60+
; SELECT-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], %r, %subreg.ssub
6061
; SELECT-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
61-
; SELECT-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[DEF]], %subreg.dsub
62-
; SELECT-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 0, %r
62+
; SELECT-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[INSERT_SUBREG]], %subreg.dsub
63+
; SELECT-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG1]], 1, %q
6364
; SELECT-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr]].dsub
64-
; SELECT-NEXT: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
65-
; SELECT-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[COPY]], %subreg.dsub
66-
; SELECT-NEXT: [[INSvi32gpr1:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG1]], 1, %q
67-
; SELECT-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr1]].dsub
68-
; SELECT-NEXT: $d0 = COPY [[COPY1]]
65+
; SELECT-NEXT: $d0 = COPY [[COPY]]
6966
; SELECT-NEXT: RET_ReallyLR implicit $d0
7067
%r:_(s32) = COPY $w0
7168
%q:_(s32) = COPY $w1

llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,7 @@ define <1 x i32> @test_bitf_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
7676
; CHECK-GI-NEXT: bic w9, w9, w8
7777
; CHECK-GI-NEXT: and w8, w8, w10
7878
; CHECK-GI-NEXT: orr w8, w9, w8
79-
; CHECK-GI-NEXT: mov v0.s[0], w8
80-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
79+
; CHECK-GI-NEXT: fmov s0, w8
8180
; CHECK-GI-NEXT: ret
8281
%neg = xor <1 x i32> %C, <i32 -1>
8382
%and = and <1 x i32> %neg, %B

llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,7 @@ define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
7676
; CHECK-GI-NEXT: and w9, w8, w9
7777
; CHECK-GI-NEXT: bic w8, w10, w8
7878
; CHECK-GI-NEXT: orr w8, w9, w8
79-
; CHECK-GI-NEXT: mov v0.s[0], w8
80-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
79+
; CHECK-GI-NEXT: fmov s0, w8
8180
; CHECK-GI-NEXT: ret
8281
%and = and <1 x i32> %C, %B
8382
%neg = xor <1 x i32> %C, <i32 -1>

llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ define void @matrix_mul_double_shuffle(i32 %N, ptr nocapture %C, ptr nocapture r
204204
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
205205
; CHECK-GI-NEXT: ldrh w9, [x2], #16
206206
; CHECK-GI-NEXT: subs x8, x8, #8
207-
; CHECK-GI-NEXT: mov v2.s[0], w9
207+
; CHECK-GI-NEXT: fmov s2, w9
208208
; CHECK-GI-NEXT: mov w9, w0
209209
; CHECK-GI-NEXT: add w0, w0, #8
210210
; CHECK-GI-NEXT: lsl x9, x9, #2

llvm/test/CodeGen/AArch64/aarch64-smull.ll

Lines changed: 56 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -2282,14 +2282,14 @@ define <2 x i64> @asr(<2 x i64> %a, <2 x i64> %b) {
22822282
; CHECK-GI: // %bb.0:
22832283
; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #32
22842284
; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #32
2285-
; CHECK-GI-NEXT: fmov x8, d0
2286-
; CHECK-GI-NEXT: fmov x9, d1
2287-
; CHECK-GI-NEXT: mov x10, v0.d[1]
2288-
; CHECK-GI-NEXT: mov x11, v1.d[1]
2285+
; CHECK-GI-NEXT: fmov x10, d0
2286+
; CHECK-GI-NEXT: fmov x11, d1
2287+
; CHECK-GI-NEXT: mov x8, v0.d[1]
2288+
; CHECK-GI-NEXT: mov x9, v1.d[1]
2289+
; CHECK-GI-NEXT: mul x10, x10, x11
22892290
; CHECK-GI-NEXT: mul x8, x8, x9
2290-
; CHECK-GI-NEXT: mul x9, x10, x11
2291-
; CHECK-GI-NEXT: mov v0.d[0], x8
2292-
; CHECK-GI-NEXT: mov v0.d[1], x9
2291+
; CHECK-GI-NEXT: fmov d0, x10
2292+
; CHECK-GI-NEXT: mov v0.d[1], x8
22932293
; CHECK-GI-NEXT: ret
22942294
%x = ashr <2 x i64> %a, <i64 32, i64 32>
22952295
%y = ashr <2 x i64> %b, <i64 32, i64 32>
@@ -2317,14 +2317,14 @@ define <2 x i64> @asr_const(<2 x i64> %a, <2 x i64> %b) {
23172317
; CHECK-GI-NEXT: adrp x8, .LCPI81_0
23182318
; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #32
23192319
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI81_0]
2320-
; CHECK-GI-NEXT: fmov x8, d0
2321-
; CHECK-GI-NEXT: fmov x9, d1
2322-
; CHECK-GI-NEXT: mov x10, v0.d[1]
2323-
; CHECK-GI-NEXT: mov x11, v1.d[1]
2320+
; CHECK-GI-NEXT: fmov x10, d0
2321+
; CHECK-GI-NEXT: fmov x11, d1
2322+
; CHECK-GI-NEXT: mov x8, v0.d[1]
2323+
; CHECK-GI-NEXT: mov x9, v1.d[1]
2324+
; CHECK-GI-NEXT: mul x10, x10, x11
23242325
; CHECK-GI-NEXT: mul x8, x8, x9
2325-
; CHECK-GI-NEXT: mul x9, x10, x11
2326-
; CHECK-GI-NEXT: mov v0.d[0], x8
2327-
; CHECK-GI-NEXT: mov v0.d[1], x9
2326+
; CHECK-GI-NEXT: fmov d0, x10
2327+
; CHECK-GI-NEXT: mov v0.d[1], x8
23282328
; CHECK-GI-NEXT: ret
23292329
%x = ashr <2 x i64> %a, <i64 32, i64 32>
23302330
%z = mul nsw <2 x i64> %x, <i64 31, i64 31>
@@ -2799,14 +2799,14 @@ define <2 x i64> @sdistribute_v2i32(<2 x i32> %src1, <2 x i32> %src2, <2 x i32>
27992799
; CHECK-GI: // %bb.0: // %entry
28002800
; CHECK-GI-NEXT: sshll v2.2d, v2.2s, #0
28012801
; CHECK-GI-NEXT: saddl v0.2d, v0.2s, v1.2s
2802-
; CHECK-GI-NEXT: fmov x8, d0
2803-
; CHECK-GI-NEXT: fmov x9, d2
2804-
; CHECK-GI-NEXT: mov x10, v0.d[1]
2805-
; CHECK-GI-NEXT: mov x11, v2.d[1]
2802+
; CHECK-GI-NEXT: fmov x10, d0
2803+
; CHECK-GI-NEXT: fmov x11, d2
2804+
; CHECK-GI-NEXT: mov x8, v0.d[1]
2805+
; CHECK-GI-NEXT: mov x9, v2.d[1]
2806+
; CHECK-GI-NEXT: mul x10, x10, x11
28062807
; CHECK-GI-NEXT: mul x8, x8, x9
2807-
; CHECK-GI-NEXT: mul x9, x10, x11
2808-
; CHECK-GI-NEXT: mov v0.d[0], x8
2809-
; CHECK-GI-NEXT: mov v0.d[1], x9
2808+
; CHECK-GI-NEXT: fmov d0, x10
2809+
; CHECK-GI-NEXT: mov v0.d[1], x8
28102810
; CHECK-GI-NEXT: ret
28112811
entry:
28122812
%4 = sext <2 x i32> %src1 to <2 x i64>
@@ -2838,14 +2838,14 @@ define <2 x i64> @sdistribute_const1_v2i32(<2 x i32> %src1, <2 x i32> %mul) {
28382838
; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
28392839
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI101_0]
28402840
; CHECK-GI-NEXT: saddw v0.2d, v2.2d, v0.2s
2841-
; CHECK-GI-NEXT: fmov x9, d1
2842-
; CHECK-GI-NEXT: mov x11, v1.d[1]
2843-
; CHECK-GI-NEXT: fmov x8, d0
2844-
; CHECK-GI-NEXT: mov x10, v0.d[1]
2841+
; CHECK-GI-NEXT: fmov x11, d1
2842+
; CHECK-GI-NEXT: mov x9, v1.d[1]
2843+
; CHECK-GI-NEXT: fmov x10, d0
2844+
; CHECK-GI-NEXT: mov x8, v0.d[1]
2845+
; CHECK-GI-NEXT: mul x10, x10, x11
28452846
; CHECK-GI-NEXT: mul x8, x8, x9
2846-
; CHECK-GI-NEXT: mul x9, x10, x11
2847-
; CHECK-GI-NEXT: mov v0.d[0], x8
2848-
; CHECK-GI-NEXT: mov v0.d[1], x9
2847+
; CHECK-GI-NEXT: fmov d0, x10
2848+
; CHECK-GI-NEXT: mov v0.d[1], x8
28492849
; CHECK-GI-NEXT: ret
28502850
entry:
28512851
%4 = sext <2 x i32> %src1 to <2 x i64>
@@ -2875,14 +2875,14 @@ define <2 x i64> @sdistribute_const2_v2i32(<2 x i32> %src1, <2 x i32> %src2) {
28752875
; CHECK-GI-NEXT: adrp x8, .LCPI102_0
28762876
; CHECK-GI-NEXT: saddl v0.2d, v0.2s, v1.2s
28772877
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI102_0]
2878-
; CHECK-GI-NEXT: fmov x8, d0
2879-
; CHECK-GI-NEXT: fmov x9, d1
2880-
; CHECK-GI-NEXT: mov x10, v0.d[1]
2881-
; CHECK-GI-NEXT: mov x11, v1.d[1]
2878+
; CHECK-GI-NEXT: fmov x10, d0
2879+
; CHECK-GI-NEXT: fmov x11, d1
2880+
; CHECK-GI-NEXT: mov x8, v0.d[1]
2881+
; CHECK-GI-NEXT: mov x9, v1.d[1]
2882+
; CHECK-GI-NEXT: mul x10, x10, x11
28822883
; CHECK-GI-NEXT: mul x8, x8, x9
2883-
; CHECK-GI-NEXT: mul x9, x10, x11
2884-
; CHECK-GI-NEXT: mov v0.d[0], x8
2885-
; CHECK-GI-NEXT: mov v0.d[1], x9
2884+
; CHECK-GI-NEXT: fmov d0, x10
2885+
; CHECK-GI-NEXT: mov v0.d[1], x8
28862886
; CHECK-GI-NEXT: ret
28872887
entry:
28882888
%4 = sext <2 x i32> %src1 to <2 x i64>
@@ -2909,14 +2909,14 @@ define <2 x i64> @udistribute_v2i32(<2 x i32> %src1, <2 x i32> %src2, <2 x i32>
29092909
; CHECK-GI: // %bb.0: // %entry
29102910
; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0
29112911
; CHECK-GI-NEXT: uaddl v0.2d, v0.2s, v1.2s
2912-
; CHECK-GI-NEXT: fmov x8, d0
2913-
; CHECK-GI-NEXT: fmov x9, d2
2914-
; CHECK-GI-NEXT: mov x10, v0.d[1]
2915-
; CHECK-GI-NEXT: mov x11, v2.d[1]
2912+
; CHECK-GI-NEXT: fmov x10, d0
2913+
; CHECK-GI-NEXT: fmov x11, d2
2914+
; CHECK-GI-NEXT: mov x8, v0.d[1]
2915+
; CHECK-GI-NEXT: mov x9, v2.d[1]
2916+
; CHECK-GI-NEXT: mul x10, x10, x11
29162917
; CHECK-GI-NEXT: mul x8, x8, x9
2917-
; CHECK-GI-NEXT: mul x9, x10, x11
2918-
; CHECK-GI-NEXT: mov v0.d[0], x8
2919-
; CHECK-GI-NEXT: mov v0.d[1], x9
2918+
; CHECK-GI-NEXT: fmov d0, x10
2919+
; CHECK-GI-NEXT: mov v0.d[1], x8
29202920
; CHECK-GI-NEXT: ret
29212921
entry:
29222922
%4 = zext <2 x i32> %src1 to <2 x i64>
@@ -2948,14 +2948,14 @@ define <2 x i64> @udistribute_const1_v2i32(<2 x i32> %src1, <2 x i32> %mul) {
29482948
; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
29492949
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI104_0]
29502950
; CHECK-GI-NEXT: uaddw v0.2d, v2.2d, v0.2s
2951-
; CHECK-GI-NEXT: fmov x9, d1
2952-
; CHECK-GI-NEXT: mov x11, v1.d[1]
2953-
; CHECK-GI-NEXT: fmov x8, d0
2954-
; CHECK-GI-NEXT: mov x10, v0.d[1]
2951+
; CHECK-GI-NEXT: fmov x11, d1
2952+
; CHECK-GI-NEXT: mov x9, v1.d[1]
2953+
; CHECK-GI-NEXT: fmov x10, d0
2954+
; CHECK-GI-NEXT: mov x8, v0.d[1]
2955+
; CHECK-GI-NEXT: mul x10, x10, x11
29552956
; CHECK-GI-NEXT: mul x8, x8, x9
2956-
; CHECK-GI-NEXT: mul x9, x10, x11
2957-
; CHECK-GI-NEXT: mov v0.d[0], x8
2958-
; CHECK-GI-NEXT: mov v0.d[1], x9
2957+
; CHECK-GI-NEXT: fmov d0, x10
2958+
; CHECK-GI-NEXT: mov v0.d[1], x8
29592959
; CHECK-GI-NEXT: ret
29602960
entry:
29612961
%4 = zext <2 x i32> %src1 to <2 x i64>
@@ -2985,14 +2985,14 @@ define <2 x i64> @udistribute_const2_v2i32(<2 x i32> %src1, <2 x i32> %src2) {
29852985
; CHECK-GI-NEXT: adrp x8, .LCPI105_0
29862986
; CHECK-GI-NEXT: uaddl v0.2d, v0.2s, v1.2s
29872987
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI105_0]
2988-
; CHECK-GI-NEXT: fmov x8, d0
2989-
; CHECK-GI-NEXT: fmov x9, d1
2990-
; CHECK-GI-NEXT: mov x10, v0.d[1]
2991-
; CHECK-GI-NEXT: mov x11, v1.d[1]
2988+
; CHECK-GI-NEXT: fmov x10, d0
2989+
; CHECK-GI-NEXT: fmov x11, d1
2990+
; CHECK-GI-NEXT: mov x8, v0.d[1]
2991+
; CHECK-GI-NEXT: mov x9, v1.d[1]
2992+
; CHECK-GI-NEXT: mul x10, x10, x11
29922993
; CHECK-GI-NEXT: mul x8, x8, x9
2993-
; CHECK-GI-NEXT: mul x9, x10, x11
2994-
; CHECK-GI-NEXT: mov v0.d[0], x8
2995-
; CHECK-GI-NEXT: mov v0.d[1], x9
2994+
; CHECK-GI-NEXT: fmov d0, x10
2995+
; CHECK-GI-NEXT: mov v0.d[1], x8
29962996
; CHECK-GI-NEXT: ret
29972997
entry:
29982998
%4 = zext <2 x i32> %src1 to <2 x i64>

llvm/test/CodeGen/AArch64/abs.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,7 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
247247
; CHECK-GI-NEXT: fmov w9, s0
248248
; CHECK-GI-NEXT: cmp w8, #0
249249
; CHECK-GI-NEXT: cneg w8, w9, le
250-
; CHECK-GI-NEXT: mov v0.s[0], w8
251-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
250+
; CHECK-GI-NEXT: fmov s0, w8
252251
; CHECK-GI-NEXT: ret
253252
entry:
254253
%res = call <1 x i32> @llvm.abs.v1i32(<1 x i32> %a, i1 0)

llvm/test/CodeGen/AArch64/arm64-dup.ll

Lines changed: 18 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -334,40 +334,25 @@ entry:
334334
}
335335

336336
define <2 x i32> @f(i32 %a, i32 %b) nounwind readnone {
337-
; CHECK-SD-LABEL: f:
338-
; CHECK-SD: // %bb.0:
339-
; CHECK-SD-NEXT: fmov s0, w0
340-
; CHECK-SD-NEXT: mov.s v0[1], w1
341-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
342-
; CHECK-SD-NEXT: ret
343-
;
344-
; CHECK-GI-LABEL: f:
345-
; CHECK-GI: // %bb.0:
346-
; CHECK-GI-NEXT: mov.s v0[0], w0
347-
; CHECK-GI-NEXT: mov.s v0[1], w1
348-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
349-
; CHECK-GI-NEXT: ret
337+
; CHECK-LABEL: f:
338+
; CHECK: // %bb.0:
339+
; CHECK-NEXT: fmov s0, w0
340+
; CHECK-NEXT: mov.s v0[1], w1
341+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
342+
; CHECK-NEXT: ret
350343
%vecinit = insertelement <2 x i32> undef, i32 %a, i32 0
351344
%vecinit1 = insertelement <2 x i32> %vecinit, i32 %b, i32 1
352345
ret <2 x i32> %vecinit1
353346
}
354347

355348
define <4 x i32> @g(i32 %a, i32 %b) nounwind readnone {
356-
; CHECK-SD-LABEL: g:
357-
; CHECK-SD: // %bb.0:
358-
; CHECK-SD-NEXT: fmov s0, w0
359-
; CHECK-SD-NEXT: mov.s v0[1], w1
360-
; CHECK-SD-NEXT: mov.s v0[2], w1
361-
; CHECK-SD-NEXT: mov.s v0[3], w0
362-
; CHECK-SD-NEXT: ret
363-
;
364-
; CHECK-GI-LABEL: g:
365-
; CHECK-GI: // %bb.0:
366-
; CHECK-GI-NEXT: mov.s v0[0], w0
367-
; CHECK-GI-NEXT: mov.s v0[1], w1
368-
; CHECK-GI-NEXT: mov.s v0[2], w1
369-
; CHECK-GI-NEXT: mov.s v0[3], w0
370-
; CHECK-GI-NEXT: ret
349+
; CHECK-LABEL: g:
350+
; CHECK: // %bb.0:
351+
; CHECK-NEXT: fmov s0, w0
352+
; CHECK-NEXT: mov.s v0[1], w1
353+
; CHECK-NEXT: mov.s v0[2], w1
354+
; CHECK-NEXT: mov.s v0[3], w0
355+
; CHECK-NEXT: ret
371356
%vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
372357
%vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
373358
%vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
@@ -376,17 +361,11 @@ define <4 x i32> @g(i32 %a, i32 %b) nounwind readnone {
376361
}
377362

378363
define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone {
379-
; CHECK-SD-LABEL: h:
380-
; CHECK-SD: // %bb.0:
381-
; CHECK-SD-NEXT: fmov d0, x0
382-
; CHECK-SD-NEXT: mov.d v0[1], x1
383-
; CHECK-SD-NEXT: ret
384-
;
385-
; CHECK-GI-LABEL: h:
386-
; CHECK-GI: // %bb.0:
387-
; CHECK-GI-NEXT: mov.d v0[0], x0
388-
; CHECK-GI-NEXT: mov.d v0[1], x1
389-
; CHECK-GI-NEXT: ret
364+
; CHECK-LABEL: h:
365+
; CHECK: // %bb.0:
366+
; CHECK-NEXT: fmov d0, x0
367+
; CHECK-NEXT: mov.d v0[1], x1
368+
; CHECK-NEXT: ret
390369
%vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
391370
%vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
392371
ret <2 x i64> %vecinit1

0 commit comments

Comments
 (0)