Skip to content

Commit 0f1b16d

Browse files
[NVPTX] Add syncscope support for cmpxchg (#140812)
This MR adds support for cmpxchg instructions with syncscope. - Adds a new definition for atomic 3-operand instructions, with constant operands for sem, scope and addsp. - Lowers cmpxchg SDNodes populating sem, scope and addsp using SDNodeXForms. - Handle syncscope correctly for emulation loops in AtomicExpand, in bracketInstructionWithFences. - Modifies emitLeadingFence, emitTrailingFence to accept SyncScope as a parameter. Modifies implementation of these in other backends, with the parameter being ignored. - Tests for a _slice_ of all possible combinations of the cmpxchg instruction (with modifications to cmpxchg.py) --------- Co-authored-by: gonzalobg <65027571+gonzalobg@users.noreply.github.com>
1 parent 4bd0e9e commit 0f1b16d

18 files changed

+3107
-13597
lines changed

llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,8 @@ void NVPTXInstPrinter::printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
268268
llvm_unreachable("Empty Modifier");
269269
}
270270

271-
void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
272-
raw_ostream &O, StringRef Modifier) {
271+
void NVPTXInstPrinter::printAtomicCode(const MCInst *MI, int OpNum,
272+
raw_ostream &O, StringRef Modifier) {
273273
const MCOperand &MO = MI->getOperand(OpNum);
274274
int Imm = (int)MO.getImm();
275275
if (Modifier == "sem") {
@@ -286,6 +286,12 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
286286
case NVPTX::Ordering::Release:
287287
O << ".release";
288288
return;
289+
case NVPTX::Ordering::AcquireRelease:
290+
O << ".acq_rel";
291+
return;
292+
case NVPTX::Ordering::SequentiallyConsistent:
293+
O << ".seq_cst";
294+
return;
289295
case NVPTX::Ordering::Volatile:
290296
O << ".volatile";
291297
return;
@@ -294,14 +300,14 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
294300
return;
295301
default:
296302
report_fatal_error(formatv(
297-
"NVPTX LdStCode Printer does not support \"{}\" sem modifier. "
298-
"Loads/Stores cannot be AcquireRelease or SequentiallyConsistent.",
303+
"NVPTX AtomicCode Printer does not support \"{}\" sem modifier. ",
299304
OrderingToString(Ordering)));
300305
}
301306
} else if (Modifier == "scope") {
302307
auto S = NVPTX::Scope(Imm);
303308
switch (S) {
304309
case NVPTX::Scope::Thread:
310+
case NVPTX::Scope::DefaultDevice:
305311
return;
306312
case NVPTX::Scope::System:
307313
O << ".sys";
@@ -316,9 +322,9 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
316322
O << ".gpu";
317323
return;
318324
}
319-
report_fatal_error(
320-
formatv("NVPTX LdStCode Printer does not support \"{}\" sco modifier.",
321-
ScopeToString(S)));
325+
report_fatal_error(formatv(
326+
"NVPTX AtomicCode Printer does not support \"{}\" scope modifier.",
327+
ScopeToString(S)));
322328
} else if (Modifier == "addsp") {
323329
auto A = NVPTX::AddressSpace(Imm);
324330
switch (A) {
@@ -334,7 +340,7 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
334340
return;
335341
}
336342
report_fatal_error(formatv(
337-
"NVPTX LdStCode Printer does not support \"{}\" addsp modifier.",
343+
"NVPTX AtomicCode Printer does not support \"{}\" addsp modifier.",
338344
AddressSpaceToString(A)));
339345
} else if (Modifier == "sign") {
340346
switch (Imm) {

llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ class NVPTXInstPrinter : public MCInstPrinter {
4040
StringRef Modifier = {});
4141
void printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
4242
StringRef Modifier = {});
43-
void printLdStCode(const MCInst *MI, int OpNum, raw_ostream &O,
44-
StringRef Modifier = {});
43+
void printAtomicCode(const MCInst *MI, int OpNum, raw_ostream &O,
44+
StringRef Modifier = {});
4545
void printMmaCode(const MCInst *MI, int OpNum, raw_ostream &O,
4646
StringRef Modifier = {});
4747
void printMemOperand(const MCInst *MI, int OpNum, raw_ostream &O,

llvm/lib/Target/NVPTX/NVPTX.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ enum Scope : ScopeUnderlyingType {
174174
Cluster = 2,
175175
Device = 3,
176176
System = 4,
177-
LASTSCOPE = System
177+
DefaultDevice = 5, // For SM < 70: denotes PTX op implicit/default .gpu scope
178+
LASTSCOPE = DefaultDevice
178179
};
179180

180181
using AddressSpaceUnderlyingType = unsigned int;

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
494494
return true;
495495
}
496496

497-
static std::optional<unsigned> convertAS(unsigned AS) {
497+
static std::optional<NVPTX::AddressSpace> convertAS(unsigned AS) {
498498
switch (AS) {
499499
case llvm::ADDRESS_SPACE_LOCAL:
500500
return NVPTX::AddressSpace::Local;
@@ -515,11 +515,42 @@ static std::optional<unsigned> convertAS(unsigned AS) {
515515
}
516516
}
517517

518-
static unsigned int getCodeAddrSpace(const MemSDNode *N) {
518+
NVPTX::AddressSpace NVPTXDAGToDAGISel::getAddrSpace(const MemSDNode *N) {
519519
return convertAS(N->getMemOperand()->getAddrSpace())
520520
.value_or(NVPTX::AddressSpace::Generic);
521521
}
522522

523+
NVPTX::Ordering NVPTXDAGToDAGISel::getMemOrder(const MemSDNode *N) const {
524+
// No "sem" orderings for SM/PTX versions which do not support memory ordering
525+
if (!Subtarget->hasMemoryOrdering())
526+
return NVPTX::Ordering::NotAtomic;
527+
auto Ordering = N->getMergedOrdering();
528+
switch (Ordering) {
529+
case AtomicOrdering::NotAtomic:
530+
return NVPTX::Ordering::NotAtomic;
531+
case AtomicOrdering::Unordered:
532+
case AtomicOrdering::Monotonic:
533+
return NVPTX::Ordering::Relaxed;
534+
case AtomicOrdering::Acquire:
535+
return NVPTX::Ordering::Acquire;
536+
case AtomicOrdering::Release:
537+
return NVPTX::Ordering::Release;
538+
case AtomicOrdering::AcquireRelease:
539+
return NVPTX::Ordering::AcquireRelease;
540+
case AtomicOrdering::SequentiallyConsistent:
541+
return NVPTX::Ordering::SequentiallyConsistent;
542+
}
543+
llvm_unreachable("Invalid atomic ordering");
544+
}
545+
546+
NVPTX::Scope NVPTXDAGToDAGISel::getAtomicScope(const MemSDNode *N) const {
547+
// No "scope" modifier for SM/PTX versions which do not support scoped atomics
548+
// Functionally, these atomics are at device scope
549+
if (!Subtarget->hasAtomScope())
550+
return NVPTX::Scope::DefaultDevice;
551+
return Scopes[N->getSyncScopeID()];
552+
}
553+
523554
namespace {
524555

525556
struct OperationOrderings {
@@ -532,7 +563,7 @@ struct OperationOrderings {
532563
static OperationOrderings
533564
getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
534565
AtomicOrdering Ordering = N->getSuccessOrdering();
535-
auto CodeAddrSpace = getCodeAddrSpace(N);
566+
auto CodeAddrSpace = NVPTXDAGToDAGISel::getAddrSpace(N);
536567

537568
bool HasMemoryOrdering = Subtarget->hasMemoryOrdering();
538569
bool HasRelaxedMMIO = Subtarget->hasRelaxedMMIO();
@@ -756,7 +787,7 @@ NVPTX::Scope NVPTXDAGToDAGISel::getOperationScope(MemSDNode *N,
756787
}
757788

758789
static bool canLowerToLDG(const MemSDNode &N, const NVPTXSubtarget &Subtarget,
759-
unsigned CodeAddrSpace) {
790+
NVPTX::AddressSpace CodeAddrSpace) {
760791
// We use ldg (i.e. ld.global.nc) for invariant loads from the global address
761792
// space.
762793
return Subtarget.hasLDG() && CodeAddrSpace == NVPTX::AddressSpace::Global &&
@@ -788,6 +819,7 @@ static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S,
788819
return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_gpu
789820
: NVPTX::INT_MEMBAR_GL;
790821
case NVPTX::Scope::Thread:
822+
case NVPTX::Scope::DefaultDevice:
791823
report_fatal_error(
792824
formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
793825
ScopeToString(S)));
@@ -807,6 +839,7 @@ static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S,
807839
return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_gpu
808840
: NVPTX::INT_MEMBAR_GL;
809841
case NVPTX::Scope::Thread:
842+
case NVPTX::Scope::DefaultDevice:
810843
report_fatal_error(
811844
formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
812845
ScopeToString(S)));
@@ -826,6 +859,7 @@ static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S,
826859
return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_gpu
827860
: NVPTX::INT_MEMBAR_GL;
828861
case NVPTX::Scope::Thread:
862+
case NVPTX::Scope::DefaultDevice:
829863
report_fatal_error(
830864
formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
831865
ScopeToString(S)));
@@ -846,6 +880,7 @@ static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S,
846880
return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_gpu
847881
: NVPTX::INT_MEMBAR_GL;
848882
case NVPTX::Scope::Thread:
883+
case NVPTX::Scope::DefaultDevice:
849884
report_fatal_error(formatv("Unsupported scope \"{}\" for seq_cst fence.",
850885
ScopeToString(S)));
851886
}
@@ -1025,7 +1060,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
10251060
const MVT LoadedVT = LoadedEVT.getSimpleVT();
10261061

10271062
// Address Space Setting
1028-
const unsigned CodeAddrSpace = getCodeAddrSpace(LD);
1063+
const auto CodeAddrSpace = getAddrSpace(LD);
10291064
if (canLowerToLDG(*LD, *Subtarget, CodeAddrSpace))
10301065
return tryLDG(LD);
10311066

@@ -1097,7 +1132,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
10971132
const MVT MemVT = MemEVT.getSimpleVT();
10981133

10991134
// Address Space Setting
1100-
const unsigned CodeAddrSpace = getCodeAddrSpace(LD);
1135+
const auto CodeAddrSpace = getAddrSpace(LD);
11011136
if (canLowerToLDG(*LD, *Subtarget, CodeAddrSpace))
11021137
return tryLDG(LD);
11031138

@@ -1313,7 +1348,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
13131348
return false;
13141349

13151350
// Address Space Setting
1316-
const unsigned CodeAddrSpace = getCodeAddrSpace(ST);
1351+
const auto CodeAddrSpace = getAddrSpace(ST);
13171352

13181353
SDLoc DL(ST);
13191354
SDValue Chain = ST->getChain();
@@ -1363,7 +1398,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
13631398
assert(StoreVT.isSimple() && "Store value is not simple");
13641399

13651400
// Address Space Setting
1366-
const unsigned CodeAddrSpace = getCodeAddrSpace(ST);
1401+
const auto CodeAddrSpace = getAddrSpace(ST);
13671402
if (CodeAddrSpace == NVPTX::AddressSpace::Const) {
13681403
report_fatal_error("Cannot store to pointer that points to constant "
13691404
"memory space");

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
100100
inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
101101
return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
102102
}
103+
NVPTX::Ordering getMemOrder(const MemSDNode *N) const;
104+
NVPTX::Scope getAtomicScope(const MemSDNode *N) const;
103105

104106
bool SelectADDR(SDValue Addr, SDValue &Base, SDValue &Offset);
105107
SDValue getPTXCmpMode(const CondCodeSDNode &CondCode);
@@ -114,6 +116,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
114116
std::pair<NVPTX::Ordering, NVPTX::Scope>
115117
insertMemoryInstructionFence(SDLoc DL, SDValue &Chain, MemSDNode *N);
116118
NVPTX::Scope getOperationScope(MemSDNode *N, NVPTX::Ordering O) const;
119+
120+
public:
121+
static NVPTX::AddressSpace getAddrSpace(const MemSDNode *N);
117122
};
118123

119124
class NVPTXDAGToDAGISelLegacy : public SelectionDAGISelLegacy {

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6315,10 +6315,12 @@ Instruction *NVPTXTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
63156315

63166316
// Specialize for cmpxchg
63176317
// Emit a fence.sc leading fence for cmpxchg seq_cst which are not emulated
6318+
SyncScope::ID SSID = cast<AtomicCmpXchgInst>(Inst)->getSyncScopeID();
63186319
if (isReleaseOrStronger(Ord))
6319-
return Ord == AtomicOrdering::SequentiallyConsistent
6320-
? Builder.CreateFence(AtomicOrdering::SequentiallyConsistent)
6321-
: Builder.CreateFence(AtomicOrdering::Release);
6320+
return Builder.CreateFence(Ord == AtomicOrdering::SequentiallyConsistent
6321+
? Ord
6322+
: AtomicOrdering::Release,
6323+
SSID);
63226324

63236325
return nullptr;
63246326
}
@@ -6330,15 +6332,15 @@ Instruction *NVPTXTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
63306332
if (!isa<AtomicCmpXchgInst>(Inst))
63316333
return TargetLoweringBase::emitTrailingFence(Builder, Inst, Ord);
63326334

6335+
auto *CI = cast<AtomicCmpXchgInst>(Inst);
63336336
auto CASWidth =
6334-
cast<IntegerType>(
6335-
dyn_cast<AtomicCmpXchgInst>(Inst)->getCompareOperand()->getType())
6336-
->getBitWidth();
6337+
cast<IntegerType>(CI->getCompareOperand()->getType())->getBitWidth();
6338+
SyncScope::ID SSID = CI->getSyncScopeID();
63376339
// Do not emit a trailing fence for cmpxchg seq_cst which are not emulated
63386340
if (isAcquireOrStronger(Ord) &&
63396341
(Ord != AtomicOrdering::SequentiallyConsistent ||
63406342
CASWidth < STI.getMinCmpXchgSizeInBits()))
6341-
return Builder.CreateFence(AtomicOrdering::Acquire);
6343+
return Builder.CreateFence(AtomicOrdering::Acquire, SSID);
63426344

63436345
return nullptr;
63446346
}

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,8 +1608,8 @@ def ADDR : Operand<pAny> {
16081608
let MIOperandInfo = (ops ADDR_base, i32imm);
16091609
}
16101610

1611-
def LdStCode : Operand<i32> {
1612-
let PrintMethod = "printLdStCode";
1611+
def AtomicCode : Operand<i32> {
1612+
let PrintMethod = "printAtomicCode";
16131613
}
16141614

16151615
def MmaCode : Operand<i32> {
@@ -1962,7 +1962,7 @@ defm ProxyRegB64 : ProxyRegInst<"b64", B64>;
19621962
class LD<NVPTXRegClass regclass>
19631963
: NVPTXInst<
19641964
(outs regclass:$dst),
1965-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Sign,
1965+
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, AtomicCode:$Sign,
19661966
i32imm:$fromWidth, ADDR:$addr),
19671967
"ld${sem:sem}${scope:scope}${addsp:addsp}.${Sign:sign}$fromWidth "
19681968
"\t$dst, [$addr];", []>;
@@ -1978,7 +1978,7 @@ class ST<DAGOperand O>
19781978
: NVPTXInst<
19791979
(outs),
19801980
(ins O:$src,
1981-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$toWidth,
1981+
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$toWidth,
19821982
ADDR:$addr),
19831983
"st${sem:sem}${scope:scope}${addsp:addsp}.b$toWidth"
19841984
" \t[$addr], $src;", []>;
@@ -1996,21 +1996,21 @@ let mayStore=1, hasSideEffects=0 in {
19961996
multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
19971997
def _v2 : NVPTXInst<
19981998
(outs regclass:$dst1, regclass:$dst2),
1999-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
2000-
LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
1999+
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
2000+
AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
20012001
"ld${sem:sem}${scope:scope}${addsp:addsp}.v2.${Sign:sign}$fromWidth "
20022002
"\t{{$dst1, $dst2}}, [$addr];", []>;
20032003
def _v4 : NVPTXInst<
20042004
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
2005-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
2006-
LdStCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
2005+
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
2006+
AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
20072007
"ld${sem:sem}${scope:scope}${addsp:addsp}.v4.${Sign:sign}$fromWidth "
20082008
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
20092009
if support_v8 then
20102010
def _v8 : NVPTXInst<
20112011
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4,
20122012
regclass:$dst5, regclass:$dst6, regclass:$dst7, regclass:$dst8),
2013-
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Sign,
2013+
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, AtomicCode:$Sign,
20142014
i32imm:$fromWidth, ADDR:$addr),
20152015
"ld${sem:sem}${scope:scope}${addsp:addsp}.v8.${Sign:sign}$fromWidth "
20162016
"\t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, "
@@ -2027,14 +2027,14 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
20272027
def _v2 : NVPTXInst<
20282028
(outs),
20292029
(ins O:$src1, O:$src2,
2030-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$fromWidth,
2030+
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
20312031
ADDR:$addr),
20322032
"st${sem:sem}${scope:scope}${addsp:addsp}.v2.b$fromWidth "
20332033
"\t[$addr], {{$src1, $src2}};", []>;
20342034
def _v4 : NVPTXInst<
20352035
(outs),
20362036
(ins O:$src1, O:$src2, O:$src3, O:$src4,
2037-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$fromWidth,
2037+
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
20382038
ADDR:$addr),
20392039
"st${sem:sem}${scope:scope}${addsp:addsp}.v4.b$fromWidth "
20402040
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
@@ -2043,7 +2043,7 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
20432043
(outs),
20442044
(ins O:$src1, O:$src2, O:$src3, O:$src4,
20452045
O:$src5, O:$src6, O:$src7, O:$src8,
2046-
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, i32imm:$fromWidth,
2046+
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
20472047
ADDR:$addr),
20482048
"st${sem:sem}${scope:scope}${addsp:addsp}.v8.b$fromWidth "
20492049
"\t[$addr], "

0 commit comments

Comments
 (0)