@@ -1582,49 +1582,34 @@ def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
1582
1582
Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
1583
1583
1584
1584
// packed f32 ops (sm_100+)
1585
- class F32x2Op2<string OpcStr, Predicate Pred>
1585
+
1586
+ def fadd32x2_nvptx : SDNode<"NVPTXISD::FADD_F32X2", SDTIntBinOp>;
1587
+ def fsub32x2_nvptx : SDNode<"NVPTXISD::FSUB_F32X2", SDTIntBinOp>;
1588
+ def fmul32x2_nvptx : SDNode<"NVPTXISD::FMUL_F32X2", SDTIntBinOp>;
1589
+ def fma32x2_nvptx : SDNode<"NVPTXISD::FMA_F32X2", SDTIntTernaryOp>;
1590
+
1591
+ class F32x2Op2<string OpcStr, SDNode Op, Predicate Pred>
1586
1592
: NVPTXInst<(outs Int64Regs:$res),
1587
1593
(ins Int64Regs:$a, Int64Regs:$b),
1588
- OpcStr # ".f32x2 \t$res, $a, $b;", []>,
1594
+ OpcStr # ".f32x2 \t$res, $a, $b;",
1595
+ [(set i64:$res, (Op i64:$a, i64:$b))]>,
1589
1596
Requires<[hasF32x2Instructions, Pred]>;
1590
- class F32x2Op3<string OpcStr, Predicate Pred>
1597
+ class F32x2Op3<string OpcStr, SDNode Op, Predicate Pred>
1591
1598
: NVPTXInst<(outs Int64Regs:$res),
1592
1599
(ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
1593
- OpcStr # ".f32x2 \t$res, $a, $b, $c;", []>,
1600
+ OpcStr # ".f32x2 \t$res, $a, $b, $c;",
1601
+ [(set i64:$res, (Op i64:$a, i64:$b, i64:$c))]>,
1594
1602
Requires<[hasF32x2Instructions, Pred]>;
1595
1603
1596
- def fadd32x2_nvptx : SDNode<"NVPTXISD::FADD_F32X2 ", SDTIntBinOp >;
1597
- def fsub32x2_nvptx : SDNode<"NVPTXISD::FSUB_F32X2 ", SDTIntBinOp >;
1598
- def fmul32x2_nvptx : SDNode<"NVPTXISD::FMUL_F32X2 ", SDTIntBinOp >;
1599
- def fma32x2_nvptx : SDNode<"NVPTXISD::FMA_F32X2 ", SDTIntTernaryOp >;
1604
+ def FADD32x2 : F32x2Op2<"add.rn ", fadd32x2_nvptx, doNoF32FTZ >;
1605
+ def FSUB32x2 : F32x2Op2<"sub.rn ", fsub32x2_nvptx, doNoF32FTZ >;
1606
+ def FMUL32x2 : F32x2Op2<"mul.rn ", fmul32x2_nvptx, doNoF32FTZ >;
1607
+ def FMA32x2 : F32x2Op3<"fma.rn ", fma32x2_nvptx, doNoF32FTZ >;
1600
1608
1601
- def FADD32x2 : F32x2Op2<"add.rn", doNoF32FTZ>;
1602
- def FSUB32x2 : F32x2Op2<"sub.rn", doNoF32FTZ>;
1603
- def FMUL32x2 : F32x2Op2<"mul.rn", doNoF32FTZ>;
1604
- def FMA32x2 : F32x2Op3<"fma.rn", doNoF32FTZ>;
1605
-
1606
- def : Pat<(fadd32x2_nvptx i64:$a, i64:$b),
1607
- (FADD32x2 $a, $b)>, Requires<[doNoF32FTZ]>;
1608
- def : Pat<(fsub32x2_nvptx i64:$a, i64:$b),
1609
- (FSUB32x2 $a, $b)>, Requires<[doNoF32FTZ]>;
1610
- def : Pat<(fmul32x2_nvptx i64:$a, i64:$b),
1611
- (FMUL32x2 $a, $b)>, Requires<[doNoF32FTZ]>;
1612
- def : Pat<(fma32x2_nvptx i64:$a, i64:$b, i64:$c),
1613
- (FMA32x2 $a, $b, $c)>, Requires<[doNoF32FTZ]>;
1614
-
1615
- def FADD32x2_ftz : F32x2Op2<"add.rn.ftz", doF32FTZ>;
1616
- def FSUB32x2_ftz : F32x2Op2<"sub.rn.ftz", doF32FTZ>;
1617
- def FMUL32x2_ftz : F32x2Op2<"mul.rn.ftz", doF32FTZ>;
1618
- def FMA32x2_ftz : F32x2Op3<"fma.rn.ftz", doF32FTZ>;
1619
-
1620
- def : Pat<(fadd32x2_nvptx i64:$a, i64:$b),
1621
- (FADD32x2_ftz $a, $b)>, Requires<[doF32FTZ]>;
1622
- def : Pat<(fsub32x2_nvptx i64:$a, i64:$b),
1623
- (FSUB32x2_ftz $a, $b)>, Requires<[doF32FTZ]>;
1624
- def : Pat<(fmul32x2_nvptx i64:$a, i64:$b),
1625
- (FMUL32x2_ftz $a, $b)>, Requires<[doF32FTZ]>;
1626
- def : Pat<(fma32x2_nvptx i64:$a, i64:$b, i64:$c),
1627
- (FMA32x2_ftz $a, $b, $c)>, Requires<[doF32FTZ]>;
1609
+ def FADD32x2_ftz : F32x2Op2<"add.rn.ftz", fadd32x2_nvptx, doF32FTZ>;
1610
+ def FSUB32x2_ftz : F32x2Op2<"sub.rn.ftz", fsub32x2_nvptx, doF32FTZ>;
1611
+ def FMUL32x2_ftz : F32x2Op2<"mul.rn.ftz", fmul32x2_nvptx, doF32FTZ>;
1612
+ def FMA32x2_ftz : F32x2Op3<"fma.rn.ftz", fma32x2_nvptx, doF32FTZ>;
1628
1613
1629
1614
//
1630
1615
// BFIND
0 commit comments