Skip to content

Commit ec46232

Browse files
committed
[DAGCombiner] Fold ty1 extract_vector(ty2 splat(V)) -> ty1 splat(V)
This seems like an obvious fold, which leads to a few improvements. Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D118920
1 parent c962038 commit ec46232

File tree

6 files changed

+123
-30
lines changed

6 files changed

+123
-30
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21109,6 +21109,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
2110921109
}
2111021110
}
2111121111

21112+
// ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
21113+
if (V.getOpcode() == ISD::SPLAT_VECTOR)
21114+
if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
21115+
return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
21116+
2111221117
// Try to move vector bitcast after extract_subv by scaling extraction index:
2111321118
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
2111421119
if (V.getOpcode() == ISD::BITCAST &&

llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,44 @@ entry:
424424
ret <4 x i32> %out
425425
}
426426

427+
;
428+
; Extract fixed-width vector from a scalable vector splat.
429+
;
430+
431+
define <2 x float> @extract_v2f32_nxv4f32_splat(float %f) {
432+
; CHECK-LABEL: extract_v2f32_nxv4f32_splat:
433+
; CHECK: // %bb.0:
434+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
435+
; CHECK-NEXT: dup v0.2s, v0.s[0]
436+
; CHECK-NEXT: ret
437+
%ins = insertelement <vscale x 4 x float> poison, float %f, i32 0
438+
%splat = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
439+
%ext = call <2 x float> @llvm.experimental.vector.extract.v2f32.nxv4f32(<vscale x 4 x float> %splat, i64 0)
440+
ret <2 x float> %ext
441+
}
442+
443+
define <2 x float> @extract_v2f32_nxv4f32_splat_const() {
444+
; CHECK-LABEL: extract_v2f32_nxv4f32_splat_const:
445+
; CHECK: // %bb.0:
446+
; CHECK-NEXT: fmov v0.2s, #1.00000000
447+
; CHECK-NEXT: ret
448+
%ins = insertelement <vscale x 4 x float> poison, float 1.0, i32 0
449+
%splat = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
450+
%ext = call <2 x float> @llvm.experimental.vector.extract.v2f32.nxv4f32(<vscale x 4 x float> %splat, i64 0)
451+
ret <2 x float> %ext
452+
}
453+
454+
define <4 x i32> @extract_v4i32_nxv8i32_splat_const() {
455+
; CHECK-LABEL: extract_v4i32_nxv8i32_splat_const:
456+
; CHECK: // %bb.0:
457+
; CHECK-NEXT: movi v0.4s, #1
458+
; CHECK-NEXT: ret
459+
%ins = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
460+
%splat = shufflevector <vscale x 8 x i32> %ins, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
461+
%ext = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv8i32(<vscale x 8 x i32> %splat, i64 0)
462+
ret <4 x i32> %ext
463+
}
464+
427465
attributes #0 = { vscale_range(2,2) }
428466
attributes #1 = { vscale_range(8,8) }
429467

@@ -442,3 +480,5 @@ declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv4i8(<vscale x 4 x i
442480
declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv2i8(<vscale x 2 x i8>, i64)
443481

444482
declare <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64>, i64)
483+
declare <2 x float> @llvm.experimental.vector.extract.v2f32.nxv4f32(<vscale x 4 x float>, i64)
484+
declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv8i32(<vscale x 8 x i32>, i64)

llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,3 +1014,63 @@ define <vscale x 4 x bfloat> @extract_nxv4bf16_nxv16bf16_12(<vscale x 16 x bfloa
10141014

10151015
declare <vscale x 4 x bfloat> @llvm.experimental.vector.extract.nxv4bf16.nxv16bf16(<vscale x 16 x bfloat>, i64)
10161016

1017+
1018+
;
1019+
; Extract from a splat
1020+
;
1021+
define <vscale x 2 x float> @extract_nxv2f32_nxv4f32_splat(float %f) {
1022+
; CHECK-LABEL: extract_nxv2f32_nxv4f32_splat:
1023+
; CHECK: // %bb.0:
1024+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
1025+
; CHECK-NEXT: mov z0.s, s0
1026+
; CHECK-NEXT: ret
1027+
%ins = insertelement <vscale x 4 x float> poison, float %f, i32 0
1028+
%splat = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
1029+
%ext = call <vscale x 2 x float> @llvm.experimental.vector.extract.nxv2f32.nxv4f32(<vscale x 4 x float> %splat, i64 0)
1030+
ret <vscale x 2 x float> %ext
1031+
}
1032+
1033+
define <vscale x 2 x float> @extract_nxv2f32_nxv4f32_splat_const() {
1034+
; CHECK-LABEL: extract_nxv2f32_nxv4f32_splat_const:
1035+
; CHECK: // %bb.0:
1036+
; CHECK-NEXT: fmov z0.s, #1.00000000
1037+
; CHECK-NEXT: ret
1038+
%ins = insertelement <vscale x 4 x float> poison, float 1.0, i32 0
1039+
%splat = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
1040+
%ext = call <vscale x 2 x float> @llvm.experimental.vector.extract.nxv2f32.nxv4f32(<vscale x 4 x float> %splat, i64 0)
1041+
ret <vscale x 2 x float> %ext
1042+
}
1043+
1044+
define <vscale x 4 x i32> @extract_nxv4i32_nxv8i32_splat_const() {
1045+
; CHECK-LABEL: extract_nxv4i32_nxv8i32_splat_const:
1046+
; CHECK: // %bb.0:
1047+
; CHECK-NEXT: mov z0.s, #1 // =0x1
1048+
; CHECK-NEXT: ret
1049+
%ins = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
1050+
%splat = shufflevector <vscale x 8 x i32> %ins, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1051+
%ext = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> %splat, i64 0)
1052+
ret <vscale x 4 x i32> %ext
1053+
}
1054+
1055+
define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_all_ones() {
1056+
; CHECK-LABEL: extract_nxv2i1_nxv16i1_all_ones:
1057+
; CHECK: // %bb.0:
1058+
; CHECK-NEXT: ptrue p0.d
1059+
; CHECK-NEXT: ret
1060+
%ins = insertelement <vscale x 16 x i1> poison, i1 1, i32 0
1061+
%splat = shufflevector <vscale x 16 x i1> %ins, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1062+
%ext = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %splat, i64 0)
1063+
ret <vscale x 2 x i1> %ext
1064+
}
1065+
1066+
define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_all_zero() {
1067+
; CHECK-LABEL: extract_nxv2i1_nxv16i1_all_zero:
1068+
; CHECK: // %bb.0:
1069+
; CHECK-NEXT: pfalse p0.b
1070+
; CHECK-NEXT: ret
1071+
%ext = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> zeroinitializer, i64 0)
1072+
ret <vscale x 2 x i1> %ext
1073+
}
1074+
1075+
declare <vscale x 2 x float> @llvm.experimental.vector.extract.nxv2f32.nxv4f32(<vscale x 4 x float>, i64)
1076+
declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32>, i64)

llvm/test/CodeGen/AArch64/sve-insert-vector.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -554,10 +554,7 @@ define <vscale x 16 x i1> @insert_nxv16i1_nxv4i1_into_zero(<vscale x 4 x i1> %sv
554554
; CHECK-LABEL: insert_nxv16i1_nxv4i1_into_zero:
555555
; CHECK: // %bb.0:
556556
; CHECK-NEXT: pfalse p1.b
557-
; CHECK-NEXT: punpklo p2.h, p1.b
558-
; CHECK-NEXT: punpkhi p1.h, p1.b
559-
; CHECK-NEXT: punpkhi p2.h, p2.b
560-
; CHECK-NEXT: uzp1 p0.h, p0.h, p2.h
557+
; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
561558
; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b
562559
; CHECK-NEXT: ret
563560
%v0 = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv4i1(<vscale x 16 x i1> zeroinitializer, <vscale x 4 x i1> %sv, i64 0)

llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,10 @@ define void @store_nxv6f32(<vscale x 6 x float>* %out) {
134134
; CHECK-LABEL: store_nxv6f32:
135135
; CHECK: // %bb.0:
136136
; CHECK-NEXT: fmov z0.s, #1.00000000
137-
; CHECK-NEXT: ptrue p0.s
138-
; CHECK-NEXT: uunpklo z1.d, z0.s
139-
; CHECK-NEXT: ptrue p1.d
140-
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
141-
; CHECK-NEXT: st1w { z1.d }, p1, [x0, #2, mul vl]
137+
; CHECK-NEXT: ptrue p0.d
138+
; CHECK-NEXT: ptrue p1.s
139+
; CHECK-NEXT: st1w { z0.d }, p0, [x0, #2, mul vl]
140+
; CHECK-NEXT: st1w { z0.s }, p1, [x0]
142141
; CHECK-NEXT: ret
143142
%ins = insertelement <vscale x 6 x float> undef, float 1.0, i32 0
144143
%splat = shufflevector <vscale x 6 x float> %ins, <vscale x 6 x float> undef, <vscale x 6 x i32> zeroinitializer
@@ -150,11 +149,10 @@ define void @store_nxv12f16(<vscale x 12 x half>* %out) {
150149
; CHECK-LABEL: store_nxv12f16:
151150
; CHECK: // %bb.0:
152151
; CHECK-NEXT: fmov z0.h, #1.00000000
153-
; CHECK-NEXT: ptrue p0.h
154-
; CHECK-NEXT: uunpklo z1.s, z0.h
155-
; CHECK-NEXT: ptrue p1.s
156-
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
157-
; CHECK-NEXT: st1h { z1.s }, p1, [x0, #2, mul vl]
152+
; CHECK-NEXT: ptrue p0.s
153+
; CHECK-NEXT: ptrue p1.h
154+
; CHECK-NEXT: st1h { z0.s }, p0, [x0, #2, mul vl]
155+
; CHECK-NEXT: st1h { z0.h }, p1, [x0]
158156
; CHECK-NEXT: ret
159157
%ins = insertelement <vscale x 12 x half> undef, half 1.0, i32 0
160158
%splat = shufflevector <vscale x 12 x half> %ins, <vscale x 12 x half> undef, <vscale x 12 x i32> zeroinitializer

llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1572,33 +1572,26 @@ define <vscale x 32 x i32> @vadd_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x
15721572
ret <vscale x 32 x i32> %v
15731573
}
15741574

1575-
; FIXME: We don't catch this as unmasked.
1576-
15771575
define <vscale x 32 x i32> @vadd_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) {
15781576
; CHECK-LABEL: vadd_vi_nxv32i32_unmasked:
15791577
; CHECK: # %bb.0:
1580-
; CHECK-NEXT: li a2, 0
15811578
; CHECK-NEXT: csrr a1, vlenb
1582-
; CHECK-NEXT: srli a4, a1, 2
1583-
; CHECK-NEXT: vsetvli a3, zero, e8, m4, ta, mu
1584-
; CHECK-NEXT: vmset.m v24
1585-
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu
15861579
; CHECK-NEXT: slli a1, a1, 1
1587-
; CHECK-NEXT: sub a3, a0, a1
1588-
; CHECK-NEXT: vslidedown.vx v0, v24, a4
1589-
; CHECK-NEXT: bltu a0, a3, .LBB119_2
1580+
; CHECK-NEXT: mv a2, a0
1581+
; CHECK-NEXT: bltu a0, a1, .LBB119_2
15901582
; CHECK-NEXT: # %bb.1:
1591-
; CHECK-NEXT: mv a2, a3
1583+
; CHECK-NEXT: mv a2, a1
15921584
; CHECK-NEXT: .LBB119_2:
1585+
; CHECK-NEXT: li a3, 0
15931586
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
1594-
; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
1587+
; CHECK-NEXT: sub a1, a0, a1
1588+
; CHECK-NEXT: vadd.vi v8, v8, -1
15951589
; CHECK-NEXT: bltu a0, a1, .LBB119_4
15961590
; CHECK-NEXT: # %bb.3:
1597-
; CHECK-NEXT: mv a0, a1
1591+
; CHECK-NEXT: mv a3, a1
15981592
; CHECK-NEXT: .LBB119_4:
1599-
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
1600-
; CHECK-NEXT: vmv1r.v v0, v24
1601-
; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
1593+
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu
1594+
; CHECK-NEXT: vadd.vi v16, v16, -1
16021595
; CHECK-NEXT: ret
16031596
%elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
16041597
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer

0 commit comments

Comments
 (0)