Skip to content

Commit 8c49ab0

Browse files
committed
[NVPTX] Add add.cc/addc.cc/sub.cc/subc.cc for i64
PTX supports those instructions for i64 starting from 4.3. The patch also marks corresponding DAG nodes legal for both i32 and i64. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D124698
1 parent 938ed8a commit 8c49ab0

File tree

4 files changed

+76
-37
lines changed

4 files changed

+76
-37
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,17 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
487487
setOperationAction(ISD::CTLZ, Ty, Legal);
488488
}
489489

490+
setOperationAction(ISD::ADDC, MVT::i32, Legal);
491+
setOperationAction(ISD::ADDE, MVT::i32, Legal);
492+
setOperationAction(ISD::SUBC, MVT::i32, Legal);
493+
setOperationAction(ISD::SUBE, MVT::i32, Legal);
494+
if (STI.getPTXVersion() >= 43) {
495+
setOperationAction(ISD::ADDC, MVT::i64, Legal);
496+
setOperationAction(ISD::ADDE, MVT::i64, Legal);
497+
setOperationAction(ISD::SUBC, MVT::i64, Legal);
498+
setOperationAction(ISD::SUBE, MVT::i64, Legal);
499+
}
500+
490501
setOperationAction(ISD::CTTZ, MVT::i16, Expand);
491502
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
492503
setOperationAction(ISD::CTTZ, MVT::i64, Expand);

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ def True : Predicate<"true">;
146146

147147
def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
148148
def hasPTX42 : Predicate<"Subtarget->getPTXVersion() >= 42">;
149+
def hasPTX43 : Predicate<"Subtarget->getPTXVersion() >= 43">;
149150
def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">;
150151
def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">;
151152
def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">;
@@ -204,17 +205,29 @@ multiclass I3<string OpcStr, SDNode OpNode> {
204205
[(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
205206
}
206207

207-
// Template for instructions which take 3 int32 args. The instructions are
208+
// Template for instructions which take 3 int args. The instructions are
208209
// named "<OpcStr>.s32" (e.g. "addc.cc.s32").
209-
multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
210-
def i32rr :
211-
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
212-
!strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
213-
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
214-
def i32ri :
215-
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
216-
!strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
217-
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
210+
multiclass ADD_SUB_INT_CARRY<string OpcStr, SDNode OpNode> {
211+
let hasSideEffects = 1 in {
212+
def i32rr :
213+
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
214+
!strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
215+
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
216+
def i32ri :
217+
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
218+
!strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
219+
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
220+
def i64rr :
221+
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
222+
!strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
223+
[(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>,
224+
Requires<[hasPTX43]>;
225+
def i64ri :
226+
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
227+
!strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
228+
[(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>,
229+
Requires<[hasPTX43]>;
230+
}
218231
}
219232

220233
// Template for instructions which take three fp64 or fp32 args. The
@@ -584,14 +597,13 @@ defm SUB_i1 : ADD_SUB_i1<sub>;
584597
defm ADD : I3<"add.s", add>;
585598
defm SUB : I3<"sub.s", sub>;
586599

587-
// int32 addition and subtraction with carry-out.
588-
// FIXME: PTX 4.3 adds a 64-bit add.cc (and maybe also 64-bit addc.cc?).
589-
defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
590-
defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
600+
// in32 and int64 addition and subtraction with carry-out.
601+
defm ADDCC : ADD_SUB_INT_CARRY<"add.cc", addc>;
602+
defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>;
591603

592-
// int32 addition and subtraction with carry-in and carry-out.
593-
defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
594-
defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
604+
// int32 and int64 addition and subtraction with carry-in and carry-out.
605+
defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde>;
606+
defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube>;
595607

596608
defm MULT : I3<"mul.lo.s", mul>;
597609

llvm/test/CodeGen/NVPTX/add-128bit.ll

Lines changed: 0 additions & 20 deletions
This file was deleted.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=COMMON,NOCARRY
2+
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx43 | FileCheck %s --check-prefixes=COMMON,CARRY
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
4+
5+
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
6+
7+
; COMMON-LABEL: test_add
8+
define i128 @test_add(i128 %a, i128 %b) {
9+
; NOCARRY: add.s64
10+
; NOCARRY-NEXT: setp.lt.u64
11+
; NOCARRY-NEXT: setp.lt.u64
12+
; NOCARRY-NEXT: selp.u64
13+
; NOCARRY-NEXT: selp.b64
14+
; NOCARRY-NEXT: add.s64
15+
16+
; CARRY: add.cc.s64
17+
; CARRY-NEXT: addc.cc.s64
18+
19+
%1 = add i128 %a, %b
20+
ret i128 %1
21+
}
22+
23+
; COMMON-LABEL: test_sub
24+
define i128 @test_sub(i128 %a, i128 %b) {
25+
; NOCARRY: sub.s64
26+
; NOCARRY-NEXT: setp.lt.u64
27+
; NOCARRY-NEXT: selp.s64
28+
; NOCARRY-NEXT: add.s64
29+
; NOCARRY-NEXT: sub.s64
30+
31+
; CARRY: sub.cc.s64
32+
; CARRY-NEXT: subc.cc.s64
33+
34+
%1 = sub i128 %a, %b
35+
ret i128 %1
36+
}

0 commit comments

Comments
 (0)