Skip to content

Commit 220a002

Browse files
preamestopperc
andauthored
[SDAG] Prefer scalar for prefix of vector GEP expansion (#146719)
When generating SDAG for a getelementptr with a vector result, we were previously generating splats for each scalar operand. This essentially has the effect of aggressively vectorizing the sequence, and leaving it later combines to scalarize if profitable. Instead, we can keep the accumulating address as a scalar for as long as the prefix of operands allows before lazily converting to vector on the first vector operand. This both better fits hardware which frequently has a scalar base on the scatter/gather instructions, and reduces the addressing cost even when not as otherwise we end up with a scalar to vector domain crossing for each scalar operand. Note that constant splat offsets are treated as scalar for the above, and only variable offsets can force a conversion to vector. --------- Co-authored-by: Craig Topper <craig.topper@sifive.com>
1 parent 44bed1a commit 220a002

File tree

3 files changed

+52
-86
lines changed

3 files changed

+52
-86
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4336,19 +4336,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
43364336
auto &TLI = DAG.getTargetLoweringInfo();
43374337
GEPNoWrapFlags NW = cast<GEPOperator>(I).getNoWrapFlags();
43384338

4339-
// Normalize Vector GEP - all scalar operands should be converted to the
4340-
// splat vector.
4339+
// For a vector GEP, keep the prefix scalar as long as possible, then
4340+
// convert any scalars encountered after the first vector operand to vectors.
43414341
bool IsVectorGEP = I.getType()->isVectorTy();
43424342
ElementCount VectorElementCount =
43434343
IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount()
43444344
: ElementCount::getFixed(0);
43454345

4346-
if (IsVectorGEP && !N.getValueType().isVector()) {
4347-
LLVMContext &Context = *DAG.getContext();
4348-
EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
4349-
N = DAG.getSplat(VT, dl, N);
4350-
}
4351-
43524346
for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
43534347
GTI != E; ++GTI) {
43544348
const Value *Idx = GTI.getOperand();
@@ -4396,7 +4390,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
43964390
APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize);
43974391
LLVMContext &Context = *DAG.getContext();
43984392
SDValue OffsVal;
4399-
if (IsVectorGEP)
4393+
if (N.getValueType().isVector())
44004394
OffsVal = DAG.getConstant(
44014395
Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount));
44024396
else
@@ -4418,10 +4412,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
44184412
// N = N + Idx * ElementMul;
44194413
SDValue IdxN = getValue(Idx);
44204414

4421-
if (!IdxN.getValueType().isVector() && IsVectorGEP) {
4422-
EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
4423-
VectorElementCount);
4424-
IdxN = DAG.getSplat(VT, dl, IdxN);
4415+
if (IdxN.getValueType().isVector() != N.getValueType().isVector()) {
4416+
if (N.getValueType().isVector()) {
4417+
EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
4418+
VectorElementCount);
4419+
IdxN = DAG.getSplat(VT, dl, IdxN);
4420+
} else {
4421+
EVT VT =
4422+
EVT::getVectorVT(*Context, N.getValueType(), VectorElementCount);
4423+
N = DAG.getSplat(VT, dl, N);
4424+
}
44254425
}
44264426

44274427
// If the index is smaller or larger than intptr_t, truncate or extend
@@ -4442,7 +4442,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
44424442
SDValue VScale = DAG.getNode(
44434443
ISD::VSCALE, dl, VScaleTy,
44444444
DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
4445-
if (IsVectorGEP)
4445+
if (N.getValueType().isVector())
44464446
VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
44474447
IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale,
44484448
ScaleFlags);
@@ -4475,6 +4475,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
44754475
}
44764476
}
44774477

4478+
if (IsVectorGEP && !N.getValueType().isVector()) {
4479+
EVT VT = EVT::getVectorVT(*Context, N.getValueType(), VectorElementCount);
4480+
N = DAG.getSplat(VT, dl, N);
4481+
}
4482+
44784483
MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
44794484
MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
44804485
if (IsVectorGEP) {

llvm/test/CodeGen/AArch64/ptradd.ll

Lines changed: 24 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -285,19 +285,11 @@ entry:
285285
}
286286

287287
define <1 x ptr> @vector_gep_v1i64_c10(ptr %b) {
288-
; CHECK-SD-LABEL: vector_gep_v1i64_c10:
289-
; CHECK-SD: // %bb.0: // %entry
290-
; CHECK-SD-NEXT: mov w8, #10 // =0xa
291-
; CHECK-SD-NEXT: fmov d0, x0
292-
; CHECK-SD-NEXT: fmov d1, x8
293-
; CHECK-SD-NEXT: add d0, d0, d1
294-
; CHECK-SD-NEXT: ret
295-
;
296-
; CHECK-GI-LABEL: vector_gep_v1i64_c10:
297-
; CHECK-GI: // %bb.0: // %entry
298-
; CHECK-GI-NEXT: add x8, x0, #10
299-
; CHECK-GI-NEXT: fmov d0, x8
300-
; CHECK-GI-NEXT: ret
288+
; CHECK-LABEL: vector_gep_v1i64_c10:
289+
; CHECK: // %bb.0: // %entry
290+
; CHECK-NEXT: add x8, x0, #10
291+
; CHECK-NEXT: fmov d0, x8
292+
; CHECK-NEXT: ret
301293
entry:
302294
%g = getelementptr i8, ptr %b, <1 x i64> <i64 10>
303295
ret <1 x ptr> %g
@@ -306,10 +298,8 @@ entry:
306298
define <2 x ptr> @vector_gep_v2i64_c10(ptr %b) {
307299
; CHECK-SD-LABEL: vector_gep_v2i64_c10:
308300
; CHECK-SD: // %bb.0: // %entry
309-
; CHECK-SD-NEXT: mov w8, #10 // =0xa
310-
; CHECK-SD-NEXT: dup v0.2d, x0
311-
; CHECK-SD-NEXT: dup v1.2d, x8
312-
; CHECK-SD-NEXT: add v0.2d, v0.2d, v1.2d
301+
; CHECK-SD-NEXT: add x8, x0, #10
302+
; CHECK-SD-NEXT: dup v0.2d, x8
313303
; CHECK-SD-NEXT: ret
314304
;
315305
; CHECK-GI-LABEL: vector_gep_v2i64_c10:
@@ -327,15 +317,10 @@ entry:
327317
define <3 x ptr> @vector_gep_v3i64_c10(ptr %b) {
328318
; CHECK-SD-LABEL: vector_gep_v3i64_c10:
329319
; CHECK-SD: // %bb.0: // %entry
330-
; CHECK-SD-NEXT: mov w8, #10 // =0xa
331-
; CHECK-SD-NEXT: dup v0.2d, x0
332-
; CHECK-SD-NEXT: fmov d3, x0
333-
; CHECK-SD-NEXT: dup v2.2d, x8
334-
; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d
335-
; CHECK-SD-NEXT: add d2, d3, d2
336-
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
337-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
338-
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
320+
; CHECK-SD-NEXT: add x8, x0, #10
321+
; CHECK-SD-NEXT: fmov d0, x8
322+
; CHECK-SD-NEXT: fmov d1, d0
323+
; CHECK-SD-NEXT: fmov d2, d0
339324
; CHECK-SD-NEXT: ret
340325
;
341326
; CHECK-GI-LABEL: vector_gep_v3i64_c10:
@@ -356,10 +341,8 @@ entry:
356341
define <4 x ptr> @vector_gep_v4i64_c10(ptr %b) {
357342
; CHECK-SD-LABEL: vector_gep_v4i64_c10:
358343
; CHECK-SD: // %bb.0: // %entry
359-
; CHECK-SD-NEXT: mov w8, #10 // =0xa
360-
; CHECK-SD-NEXT: dup v0.2d, x0
361-
; CHECK-SD-NEXT: dup v1.2d, x8
362-
; CHECK-SD-NEXT: add v0.2d, v0.2d, v1.2d
344+
; CHECK-SD-NEXT: add x8, x0, #10
345+
; CHECK-SD-NEXT: dup v0.2d, x8
363346
; CHECK-SD-NEXT: mov v1.16b, v0.16b
364347
; CHECK-SD-NEXT: ret
365348
;
@@ -377,19 +360,11 @@ entry:
377360
}
378361

379362
define <1 x ptr> @vector_gep_v1i64_cm10(ptr %b) {
380-
; CHECK-SD-LABEL: vector_gep_v1i64_cm10:
381-
; CHECK-SD: // %bb.0: // %entry
382-
; CHECK-SD-NEXT: mov x8, #-10 // =0xfffffffffffffff6
383-
; CHECK-SD-NEXT: fmov d1, x0
384-
; CHECK-SD-NEXT: fmov d0, x8
385-
; CHECK-SD-NEXT: add d0, d1, d0
386-
; CHECK-SD-NEXT: ret
387-
;
388-
; CHECK-GI-LABEL: vector_gep_v1i64_cm10:
389-
; CHECK-GI: // %bb.0: // %entry
390-
; CHECK-GI-NEXT: sub x8, x0, #10
391-
; CHECK-GI-NEXT: fmov d0, x8
392-
; CHECK-GI-NEXT: ret
363+
; CHECK-LABEL: vector_gep_v1i64_cm10:
364+
; CHECK: // %bb.0: // %entry
365+
; CHECK-NEXT: sub x8, x0, #10
366+
; CHECK-NEXT: fmov d0, x8
367+
; CHECK-NEXT: ret
393368
entry:
394369
%g = getelementptr i8, ptr %b, <1 x i64> <i64 -10>
395370
ret <1 x ptr> %g
@@ -398,10 +373,8 @@ entry:
398373
define <2 x ptr> @vector_gep_v2i64_cm10(ptr %b) {
399374
; CHECK-SD-LABEL: vector_gep_v2i64_cm10:
400375
; CHECK-SD: // %bb.0: // %entry
401-
; CHECK-SD-NEXT: mov x8, #-10 // =0xfffffffffffffff6
402-
; CHECK-SD-NEXT: dup v1.2d, x0
376+
; CHECK-SD-NEXT: sub x8, x0, #10
403377
; CHECK-SD-NEXT: dup v0.2d, x8
404-
; CHECK-SD-NEXT: add v0.2d, v1.2d, v0.2d
405378
; CHECK-SD-NEXT: ret
406379
;
407380
; CHECK-GI-LABEL: vector_gep_v2i64_cm10:
@@ -419,15 +392,10 @@ entry:
419392
define <3 x ptr> @vector_gep_v3i64_cm10(ptr %b) {
420393
; CHECK-SD-LABEL: vector_gep_v3i64_cm10:
421394
; CHECK-SD: // %bb.0: // %entry
422-
; CHECK-SD-NEXT: mov x8, #-10 // =0xfffffffffffffff6
423-
; CHECK-SD-NEXT: dup v0.2d, x0
424-
; CHECK-SD-NEXT: fmov d3, x0
425-
; CHECK-SD-NEXT: dup v2.2d, x8
426-
; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d
427-
; CHECK-SD-NEXT: add d2, d3, d2
428-
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
429-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
430-
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
395+
; CHECK-SD-NEXT: sub x8, x0, #10
396+
; CHECK-SD-NEXT: fmov d0, x8
397+
; CHECK-SD-NEXT: fmov d1, d0
398+
; CHECK-SD-NEXT: fmov d2, d0
431399
; CHECK-SD-NEXT: ret
432400
;
433401
; CHECK-GI-LABEL: vector_gep_v3i64_cm10:
@@ -448,10 +416,8 @@ entry:
448416
define <4 x ptr> @vector_gep_v4i64_cm10(ptr %b) {
449417
; CHECK-SD-LABEL: vector_gep_v4i64_cm10:
450418
; CHECK-SD: // %bb.0: // %entry
451-
; CHECK-SD-NEXT: mov x8, #-10 // =0xfffffffffffffff6
452-
; CHECK-SD-NEXT: dup v1.2d, x0
419+
; CHECK-SD-NEXT: sub x8, x0, #10
453420
; CHECK-SD-NEXT: dup v0.2d, x8
454-
; CHECK-SD-NEXT: add v0.2d, v1.2d, v0.2d
455421
; CHECK-SD-NEXT: mov v1.16b, v0.16b
456422
; CHECK-SD-NEXT: ret
457423
;

llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2377,26 +2377,21 @@ define <vscale x 1 x i8> @mgather_baseidx_zext_nxv1i1_nxv1i8(ptr %base, <vscale
23772377
define <4 x i32> @scalar_prefix(ptr %base, i32 signext %index, <4 x i32> %vecidx) {
23782378
; RV32-LABEL: scalar_prefix:
23792379
; RV32: # %bb.0:
2380+
; RV32-NEXT: slli a1, a1, 10
2381+
; RV32-NEXT: add a0, a0, a1
23802382
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2381-
; RV32-NEXT: vmv.v.x v9, a1
2382-
; RV32-NEXT: vsll.vi v9, v9, 10
2383-
; RV32-NEXT: vadd.vx v9, v9, a0
23842383
; RV32-NEXT: vsll.vi v8, v8, 2
2385-
; RV32-NEXT: vadd.vv v8, v9, v8
2386-
; RV32-NEXT: vluxei32.v v8, (zero), v8
2384+
; RV32-NEXT: vluxei32.v v8, (a0), v8
23872385
; RV32-NEXT: ret
23882386
;
23892387
; RV64-LABEL: scalar_prefix:
23902388
; RV64: # %bb.0:
2391-
; RV64-NEXT: li a2, 1024
2392-
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2393-
; RV64-NEXT: vmv.v.x v10, a0
2394-
; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2395-
; RV64-NEXT: vmv.v.x v9, a2
2396-
; RV64-NEXT: vwmaccsu.vx v10, a1, v9
2397-
; RV64-NEXT: li a0, 4
2398-
; RV64-NEXT: vwmaccus.vx v10, a0, v8
2399-
; RV64-NEXT: vluxei64.v v8, (zero), v10
2389+
; RV64-NEXT: li a2, 4
2390+
; RV64-NEXT: slli a1, a1, 10
2391+
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2392+
; RV64-NEXT: vwmulsu.vx v10, v8, a2
2393+
; RV64-NEXT: add a0, a0, a1
2394+
; RV64-NEXT: vluxei64.v v8, (a0), v10
24002395
; RV64-NEXT: ret
24012396
%gep = getelementptr [256 x i32], ptr %base, i32 %index, <4 x i32> %vecidx
24022397
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)

0 commit comments

Comments
 (0)