@@ -235,6 +235,7 @@ def F64RT : RegTyInfo<f64, Float64Regs, f64imm, fpimm>;
235
235
def F16RT : RegTyInfo<f16, Int16Regs, f16imm, fpimm, supports_imm = 0>;
236
236
def BF16RT : RegTyInfo<bf16, Int16Regs, bf16imm, fpimm, supports_imm = 0>;
237
237
238
+ def F32X2RT : RegTyInfo<v2f32, Int64Regs, ?, ?, supports_imm = 0>;
238
239
def F16X2RT : RegTyInfo<v2f16, Int32Regs, ?, ?, supports_imm = 0>;
239
240
def BF16X2RT : RegTyInfo<v2bf16, Int32Regs, ?, ?, supports_imm = 0>;
240
241
@@ -446,7 +447,18 @@ multiclass F3<string op_str, SDPatternOperator op_pat> {
446
447
(ins Float32Regs:$a, f32imm:$b),
447
448
op_str # ".f32 \t$dst, $a, $b;",
448
449
[(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>;
449
-
450
+ def f32x2rr_ftz :
451
+ NVPTXInst<(outs Int64Regs:$dst),
452
+ (ins Int64Regs:$a, Int64Regs:$b),
453
+ op_str # ".ftz.f32x2 \t$dst, $a, $b;",
454
+ [(set v2f32:$dst, (op_pat v2f32:$a, v2f32:$b))]>,
455
+ Requires<[doF32FTZ, hasF32x2Instructions]>;
456
+ def f32x2rr :
457
+ NVPTXInst<(outs Int64Regs:$dst),
458
+ (ins Int64Regs:$a, Int64Regs:$b),
459
+ op_str # ".f32x2 \t$dst, $a, $b;",
460
+ [(set v2f32:$dst, (op_pat v2f32:$a, v2f32:$b))]>,
461
+ Requires<[hasF32x2Instructions]>;
450
462
def f16rr_ftz :
451
463
NVPTXInst<(outs Int16Regs:$dst),
452
464
(ins Int16Regs:$a, Int16Regs:$b),
@@ -478,7 +490,6 @@ multiclass F3<string op_str, SDPatternOperator op_pat> {
478
490
op_str # ".bf16 \t$dst, $a, $b;",
479
491
[(set bf16:$dst, (op_pat bf16:$a, bf16:$b))]>,
480
492
Requires<[hasBF16Math]>;
481
-
482
493
def bf16x2rr :
483
494
NVPTXInst<(outs Int32Regs:$dst),
484
495
(ins Int32Regs:$a, Int32Regs:$b),
@@ -1416,6 +1427,8 @@ defm BFMA16 : FMA<"fma.rn.bf16", BF16RT, [hasBF16Math]>;
1416
1427
defm BFMA16x2 : FMA<"fma.rn.bf16x2", BF16X2RT, [hasBF16Math]>;
1417
1428
defm FMA32_ftz : FMA<"fma.rn.ftz.f32", F32RT, [doF32FTZ]>;
1418
1429
defm FMA32 : FMA<"fma.rn.f32", F32RT>;
1430
+ defm FMA32x2_ftz : FMA<"fma.rn.ftz.f32x2", F32X2RT, [doF32FTZ]>;
1431
+ defm FMA32x2 : FMA<"fma.rn.f32x2", F32X2RT>;
1419
1432
defm FMA64 : FMA<"fma.rn.f64", F64RT>;
1420
1433
1421
1434
// sin/cos
0 commit comments