Skip to content

Commit 1eb027b

Browse files
committed
support fadd, fsub, fmul, fma and load on v2f32
1 parent e46f256 commit 1eb027b

File tree

1 file changed

+15
-2
lines changed

1 file changed

+15
-2
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ def F64RT : RegTyInfo<f64, Float64Regs, f64imm, fpimm>;
238238
def F16RT : RegTyInfo<f16, Int16Regs, f16imm, fpimm, supports_imm = 0>;
239239
def BF16RT : RegTyInfo<bf16, Int16Regs, bf16imm, fpimm, supports_imm = 0>;
240240

241+
def F32X2RT : RegTyInfo<v2f32, Int64Regs, ?, ?, supports_imm = 0>;
241242
def F16X2RT : RegTyInfo<v2f16, Int32Regs, ?, ?, supports_imm = 0>;
242243
def BF16X2RT : RegTyInfo<v2bf16, Int32Regs, ?, ?, supports_imm = 0>;
243244

@@ -408,7 +409,18 @@ multiclass F3<string op_str, SDPatternOperator op_pat> {
408409
(ins Float32Regs:$a, f32imm:$b),
409410
op_str # ".f32 \t$dst, $a, $b;",
410411
[(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>;
411-
412+
def f32x2rr_ftz :
413+
NVPTXInst<(outs Int64Regs:$dst),
414+
(ins Int64Regs:$a, Int64Regs:$b),
415+
op_str # ".ftz.f32x2 \t$dst, $a, $b;",
416+
[(set v2f32:$dst, (op_pat v2f32:$a, v2f32:$b))]>,
417+
Requires<[doF32FTZ, hasF32x2Instructions]>;
418+
def f32x2rr :
419+
NVPTXInst<(outs Int64Regs:$dst),
420+
(ins Int64Regs:$a, Int64Regs:$b),
421+
op_str # ".f32x2 \t$dst, $a, $b;",
422+
[(set v2f32:$dst, (op_pat v2f32:$a, v2f32:$b))]>,
423+
Requires<[hasF32x2Instructions]>;
412424
def f16rr_ftz :
413425
NVPTXInst<(outs Int16Regs:$dst),
414426
(ins Int16Regs:$a, Int16Regs:$b),
@@ -440,7 +452,6 @@ multiclass F3<string op_str, SDPatternOperator op_pat> {
440452
op_str # ".bf16 \t$dst, $a, $b;",
441453
[(set bf16:$dst, (op_pat bf16:$a, bf16:$b))]>,
442454
Requires<[hasBF16Math]>;
443-
444455
def bf16x2rr :
445456
NVPTXInst<(outs Int32Regs:$dst),
446457
(ins Int32Regs:$a, Int32Regs:$b),
@@ -1363,6 +1374,8 @@ defm BFMA16 : FMA<"fma.rn.bf16", BF16RT, [hasBF16Math]>;
13631374
defm BFMA16x2 : FMA<"fma.rn.bf16x2", BF16X2RT, [hasBF16Math]>;
13641375
defm FMA32_ftz : FMA<"fma.rn.ftz.f32", F32RT, [doF32FTZ]>;
13651376
defm FMA32 : FMA<"fma.rn.f32", F32RT>;
1377+
defm FMA32x2_ftz : FMA<"fma.rn.ftz.f32x2", F32X2RT, [doF32FTZ]>;
1378+
defm FMA32x2 : FMA<"fma.rn.f32x2", F32X2RT>;
13661379
defm FMA64 : FMA<"fma.rn.f64", F64RT>;
13671380

13681381
// sin/cos

0 commit comments

Comments
 (0)