Skip to content

Commit 44ef248

Browse files
committed
break packed f32 into two f32 regs, not i32 regs
This enables better code simplification.
1 parent 555e5fd commit 44ef248

File tree

5 files changed

+225
-345
lines changed

5 files changed

+225
-345
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,9 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
190190
SelectI128toV2I64(N);
191191
return;
192192
}
193-
if (N->getOperand(1).getValueType() == MVT::i64 && N->getNumValues() == 3) {
194-
SelectI64ToV2I32(N);
193+
if (N->getOperand(1).getValueType() == MVT::i64) {
194+
// {f32,f32} = mov i64
195+
SelectI64ToV2F32(N);
195196
return;
196197
}
197198
break;
@@ -2769,13 +2770,15 @@ void NVPTXDAGToDAGISel::SelectI128toV2I64(SDNode *N) {
27692770
ReplaceNode(N, Mov);
27702771
}
27712772

2772-
void NVPTXDAGToDAGISel::SelectI64ToV2I32(SDNode *N) {
2773+
void NVPTXDAGToDAGISel::SelectI64ToV2F32(SDNode *N) {
27732774
SDValue Ch = N->getOperand(0);
27742775
SDValue Src = N->getOperand(1);
2776+
assert(N->getValueType(0) == MVT::f32 && N->getValueType(1) == MVT::f32 &&
2777+
"expected {f32,f32} = CopyFromReg i64");
27752778
SDLoc DL(N);
27762779

2777-
SDNode *Mov = CurDAG->getMachineNode(NVPTX::I64toV2I32, DL,
2778-
{MVT::i32, MVT::i32, Ch.getValueType()},
2780+
SDNode *Mov = CurDAG->getMachineNode(NVPTX::I64toV2F32, DL,
2781+
{MVT::f32, MVT::f32, Ch.getValueType()},
27792782
{Src, Ch});
27802783
ReplaceNode(N, Mov);
27812784
}

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
9191
bool tryEXTRACT_VECTOR_ELEMENT(SDNode *N);
9292
void SelectV2I64toI128(SDNode *N);
9393
void SelectI128toV2I64(SDNode *N);
94-
void SelectI64ToV2I32(SDNode *N);
94+
void SelectI64ToV2F32(SDNode *N);
9595
void SelectCpAsyncBulkG2S(SDNode *N);
9696
void SelectCpAsyncBulkS2G(SDNode *N);
9797
void SelectCpAsyncBulkPrefetchL2(SDNode *N);

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5638,20 +5638,22 @@ static void ReplaceF32x2Op(SDNode *N, SelectionDAG &DAG,
56385638

56395639
SDValue Chain = DAG.getEntryNode();
56405640

5641-
// break i64 result into two i32 registers for later instructions that may
5642-
// access element #0 or #1. otherwise, this code will be eliminated
5641+
// break packed result into two f32 registers for later instructions that may
5642+
// access element #0 or #1
56435643
SDValue NewValue = DAG.getNode(Opcode, DL, MVT::i64, NewOps);
56445644
MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
56455645
Register DestReg = RegInfo.createVirtualRegister(
56465646
DAG.getTargetLoweringInfo().getRegClassFor(MVT::i64));
56475647
SDValue RegCopy = DAG.getCopyToReg(Chain, DL, DestReg, NewValue);
56485648
SDValue Explode = DAG.getNode(ISD::CopyFromReg, DL,
5649-
{MVT::i32, MVT::i32, Chain.getValueType()},
5649+
{MVT::f32, MVT::f32, Chain.getValueType()},
56505650
{RegCopy, DAG.getRegister(DestReg, MVT::i64)});
56515651
// cast i64 result of new op back to <2 x float>
56525652
Results.push_back(DAG.getBitcast(
5653-
OldResultTy, DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
5654-
{Explode.getValue(0), Explode.getValue(1)})));
5653+
OldResultTy,
5654+
DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
5655+
{DAG.getBitcast(MVT::i32, Explode.getValue(0)),
5656+
DAG.getBitcast(MVT::i32, Explode.getValue(1))})));
56555657
}
56565658

56575659
void NVPTXTargetLowering::ReplaceNodeResults(

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3387,6 +3387,9 @@ let hasSideEffects = false in {
33873387
def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
33883388
(ins Int64Regs:$s),
33893389
"mov.b64 \t{{$d1, $d2}}, $s;", []>;
3390+
def I64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
3391+
(ins Int64Regs:$s),
3392+
"mov.b64 \t{{$d1, $d2}}, $s;", []>;
33903393
def I128toV2I64: NVPTXInst<(outs Int64Regs:$d1, Int64Regs:$d2),
33913394
(ins Int128Regs:$s),
33923395
"mov.b128 \t{{$d1, $d2}}, $s;", []>;

0 commit comments

Comments
 (0)