Skip to content

Commit 04b51f8

Browse files
committed
promote extract_vector_elt nodes to unpacking mov
Also update the test cases.
1 parent edbbc50 commit 04b51f8

File tree

4 files changed

+2093
-13
lines changed

4 files changed

+2093
-13
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -465,10 +465,14 @@ bool NVPTXDAGToDAGISel::tryUNPACK_VECTOR(SDNode *N) {
465465
bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
466466
SDValue Vector = N->getOperand(0);
467467

468-
// We only care about 16x2 as it's the only real vector type we
469-
// need to deal with.
468+
// We only care about packed vector types: 16x2 and 32x2.
470469
MVT VT = Vector.getSimpleValueType();
471-
if (!Isv2x16VT(VT))
470+
unsigned NewOpcode;
471+
if (Isv2x16VT(VT))
472+
NewOpcode = NVPTX::I32toV2I16;
473+
else if (VT == MVT::v2f32)
474+
NewOpcode = NVPTX::I64toV2F32;
475+
else
472476
return false;
473477
// Find and record all uses of this vector that extract element 0 or 1.
474478
SmallVector<SDNode *, 4> E0, E1;
@@ -488,16 +492,19 @@ bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
488492
}
489493
}
490494

491-
// There's no point scattering f16x2 if we only ever access one
495+
// There's no point scattering f16x2 or f32x2 if we only ever access one
492496
// element of it.
493497
if (E0.empty() || E1.empty())
494498
return false;
495499

496-
// Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
497-
// into f16,f16 SplitF16x2(V)
500+
// Merge:
501+
// (f16 extractelt(V, 0), f16 extractelt(V,1))
502+
// -> f16,f16 SplitF16x2(V)
503+
// (f32 extractelt(V, 0), f32 extractelt(V,1))
504+
// -> f32,f32 SplitF32x2(V)
498505
MVT EltVT = VT.getVectorElementType();
499506
SDNode *ScatterOp =
500-
CurDAG->getMachineNode(NVPTX::I32toV2I16, SDLoc(N), EltVT, EltVT, Vector);
507+
CurDAG->getMachineNode(NewOpcode, SDLoc(N), EltVT, EltVT, Vector);
501508
for (auto *Node : E0)
502509
ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
503510
for (auto *Node : E1)

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5565,10 +5565,10 @@ static SDValue PerformEXTRACTCombine(SDNode *N,
55655565
IsPTXVectorType(VectorVT.getSimpleVT()))
55665566
return SDValue(); // Native vector loads already combine nicely w/
55675567
// extract_vector_elt.
5568-
// Don't mess with singletons or v2*16, v4i8 and v8i8 types, we already
5568+
// Don't mess with singletons or v2*16, v2f32, v4i8 and v8i8 types, we already
55695569
// handle them OK.
55705570
if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT) ||
5571-
VectorVT == MVT::v4i8 || VectorVT == MVT::v8i8)
5571+
VectorVT == MVT::v2f32 || VectorVT == MVT::v4i8 || VectorVT == MVT::v8i8)
55725572
return SDValue();
55735573

55745574
// Don't mess with undef values as sra may be simplified to 0, not undef.

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2824,6 +2824,9 @@ let hasSideEffects = false in {
28242824
def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
28252825
(ins Int64Regs:$s),
28262826
"mov.b64 \t{{$d1, $d2}}, $s;", []>;
2827+
def I64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
2828+
(ins Int64Regs:$s),
2829+
"mov.b64 \t{{$d1, $d2}}, $s;", []>;
28272830
def I128toV2I64: NVPTXInst<(outs Int64Regs:$d1, Int64Regs:$d2),
28282831
(ins Int128Regs:$s),
28292832
"mov.b128 \t{{$d1, $d2}}, $s;", []>;

0 commit comments

Comments
 (0)