@@ -1581,27 +1581,50 @@ def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
1581
1581
def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
1582
1582
Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
1583
1583
1584
- // F32x2 ops (sm_100+)
1585
-
1586
- def FADD_F32X2 : NVPTXInst<(outs Int64Regs:$res),
1587
- (ins Int64Regs:$a, Int64Regs:$b),
1588
- "add.rn.f32x2 \t$res, $a, $b;", []>,
1589
- Requires<[hasF32x2Instructions]>;
1590
-
1591
- def FSUB_F32X2 : NVPTXInst<(outs Int64Regs:$res),
1592
- (ins Int64Regs:$a, Int64Regs:$b),
1593
- "sub.rn.f32x2 \t$res, $a, $b;", []>,
1594
- Requires<[hasF32x2Instructions]>;
1595
-
1596
- def FMUL_F32X2 : NVPTXInst<(outs Int64Regs:$res),
1597
- (ins Int64Regs:$a, Int64Regs:$b),
1598
- "mul.rn.f32x2 \t$res, $a, $b;", []>,
1599
- Requires<[hasF32x2Instructions]>;
1600
-
1601
- def FMA_F32X2 : NVPTXInst<(outs Int64Regs:$res),
1602
- (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
1603
- "fma.rn.f32x2 \t$res, $a, $b;", []>,
1604
- Requires<[hasF32x2Instructions]>;
1584
+ // packed f32 ops (sm_100+)
1585
+ class F32x2Op2<string OpcStr, Predicate Pred>
1586
+ : NVPTXInst<(outs Int64Regs:$res),
1587
+ (ins Int64Regs:$a, Int64Regs:$b),
1588
+ OpcStr # ".f32x2 \t$res, $a, $b;", []>,
1589
+ Requires<[hasF32x2Instructions, Pred]>;
1590
+ class F32x2Op3<string OpcStr, Predicate Pred>
1591
+ : NVPTXInst<(outs Int64Regs:$res),
1592
+ (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
1593
+ OpcStr # ".f32x2 \t$res, $a, $b, $c;", []>,
1594
+ Requires<[hasF32x2Instructions, Pred]>;
1595
+
1596
+ def fadd32x2_nvptx : SDNode<"NVPTXISD::FADD_F32X2", SDTIntBinOp>;
1597
+ def fsub32x2_nvptx : SDNode<"NVPTXISD::FSUB_F32X2", SDTIntBinOp>;
1598
+ def fmul32x2_nvptx : SDNode<"NVPTXISD::FMUL_F32X2", SDTIntBinOp>;
1599
+ def fma32x2_nvptx : SDNode<"NVPTXISD::FMA_F32X2", SDTIntTernaryOp>;
1600
+
1601
+ def FADD32x2 : F32x2Op2<"add.rn", doNoF32FTZ>;
1602
+ def FSUB32x2 : F32x2Op2<"sub.rn", doNoF32FTZ>;
1603
+ def FMUL32x2 : F32x2Op2<"mul.rn", doNoF32FTZ>;
1604
+ def FMA32x2 : F32x2Op3<"fma.rn", doNoF32FTZ>;
1605
+
1606
+ def : Pat<(fadd32x2_nvptx i64:$a, i64:$b),
1607
+ (FADD32x2 $a, $b)>, Requires<[doNoF32FTZ]>;
1608
+ def : Pat<(fsub32x2_nvptx i64:$a, i64:$b),
1609
+ (FSUB32x2 $a, $b)>, Requires<[doNoF32FTZ]>;
1610
+ def : Pat<(fmul32x2_nvptx i64:$a, i64:$b),
1611
+ (FMUL32x2 $a, $b)>, Requires<[doNoF32FTZ]>;
1612
+ def : Pat<(fma32x2_nvptx i64:$a, i64:$b, i64:$c),
1613
+ (FMA32x2 $a, $b, $c)>, Requires<[doNoF32FTZ]>;
1614
+
1615
+ def FADD32x2_ftz : F32x2Op2<"add.rn.ftz", doF32FTZ>;
1616
+ def FSUB32x2_ftz : F32x2Op2<"sub.rn.ftz", doF32FTZ>;
1617
+ def FMUL32x2_ftz : F32x2Op2<"mul.rn.ftz", doF32FTZ>;
1618
+ def FMA32x2_ftz : F32x2Op3<"fma.rn.ftz", doF32FTZ>;
1619
+
1620
+ def : Pat<(fadd32x2_nvptx i64:$a, i64:$b),
1621
+ (FADD32x2_ftz $a, $b)>, Requires<[doF32FTZ]>;
1622
+ def : Pat<(fsub32x2_nvptx i64:$a, i64:$b),
1623
+ (FSUB32x2_ftz $a, $b)>, Requires<[doF32FTZ]>;
1624
+ def : Pat<(fmul32x2_nvptx i64:$a, i64:$b),
1625
+ (FMUL32x2_ftz $a, $b)>, Requires<[doF32FTZ]>;
1626
+ def : Pat<(fma32x2_nvptx i64:$a, i64:$b, i64:$c),
1627
+ (FMA32x2_ftz $a, $b, $c)>, Requires<[doF32FTZ]>;
1605
1628
1606
1629
//
1607
1630
// BFIND
0 commit comments