Skip to content

Commit 9d4a12d

Browse files
committed
handle fdiv and other instructions where v2f32 is illegal
Requires us to lower EXTRACT_VECTOR_ELT as well.
1 parent d062156 commit 9d4a12d

File tree

2 files changed

+15
-0
lines changed

2 files changed

+15
-0
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -965,6 +965,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
965965
{ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS}) {
966966
setOperationAction(Op, MVT::f16, Promote);
967967
setOperationAction(Op, MVT::f32, Legal);
968+
setOperationAction(Op, MVT::v2f32, Expand);
968969
setOperationAction(Op, MVT::f64, Legal);
969970
setOperationAction(Op, MVT::v2f16, Expand);
970971
setOperationAction(Op, MVT::v2bf16, Expand);

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2912,6 +2912,14 @@ let hasSideEffects = false in {
29122912
(ins Int64Regs:$s),
29132913
"{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}",
29142914
[]>;
2915+
def I64toF32H : NVPTXInst<(outs Float32Regs:$high),
2916+
(ins Int64Regs:$s),
2917+
"{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}",
2918+
[]>;
2919+
def I64toF32L : NVPTXInst<(outs Float32Regs:$low),
2920+
(ins Int64Regs:$s),
2921+
"{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}",
2922+
[]>;
29152923

29162924
// PTX 7.1 lets you avoid a temp register and just use _ as a "sink" for the
29172925
// unused high/low part.
@@ -2954,6 +2962,12 @@ foreach vt = [v2f16, v2bf16, v2i16] in {
29542962
def : Pat<(extractelt vt:$src, 0), (I32toI16L $src)>;
29552963
def : Pat<(extractelt vt:$src, 1), (I32toI16H $src)>;
29562964
}
2965+
2966+
def : Pat<(extractelt v2f32:$src, 0),
2967+
(I64toF32L $src)>;
2968+
def : Pat<(extractelt v2f32:$src, 1),
2969+
(I64toF32H $src)>;
2970+
29572971
def : Pat<(v2f16 (build_vector f16:$a, f16:$b)),
29582972
(V2I16toI32 $a, $b)>;
29592973
def : Pat<(v2bf16 (build_vector bf16:$a, bf16:$b)),

0 commit comments

Comments
 (0)