Skip to content

Commit 8679826

Browse files
frasercrmckjsji
authored andcommitted
[NVPTX] Restore ISel for atomic ld/st with semantics
These were dropped during the WW14 pulldown. Code has been added more or less intact, rather than making it more idiomatic to suit the surrounding code. Such work can be achieved later. Some CHECKs had to be amended, swapping around the order of various syntax components, due to how the upstream multiclasses now order things. To minimize downstream changes, it is simpler to adjust our tests than it is to fiddle with the asm syntax.
1 parent fb9979a commit 8679826

File tree

2 files changed

+711
-542
lines changed

2 files changed

+711
-542
lines changed

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2288,6 +2288,176 @@ defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
22882288
defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
22892289
defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
22902290

2291+
class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
2292+
dag ins, dag Operands>
2293+
: NVPTXInst<(outs regclass:$result), ins,
2294+
AsmStr,
2295+
[(set regT:$result, Operands)]>,
2296+
Requires<Preds>;
2297+
2298+
multiclass ATOM_LdP_impl<string AsmStr, Intrinsic Intr,
2299+
ValueType regT, NVPTXRegClass regclass,
2300+
list<Predicate> Preds> {
2301+
let AddedComplexity = 1 in {
2302+
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2303+
(ins Int32Regs:$src),
2304+
(Intr (i32 Int32Regs:$src))>;
2305+
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2306+
(ins Int64Regs:$src),
2307+
(Intr (i64 Int64Regs:$src))>;
2308+
}
2309+
}
2310+
2311+
multiclass ATOM_LdN_impl<string IntTypeStr, string TypeStr,
2312+
string ScopeStr, string SpaceStr, string SemStr,
2313+
ValueType regT, NVPTXRegClass regclass,
2314+
list<Predicate> Preds> {
2315+
defm : ATOM_LdP_impl<"ld." # SemStr
2316+
# !if(!eq(SemStr, "volatile"), "", "." # ScopeStr)
2317+
# !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2318+
# "." # TypeStr
2319+
# " \t$result, [$src];",
2320+
!cast<Intrinsic>(
2321+
"int_nvvm_ld_" # SpaceStr # "_" # IntTypeStr
2322+
# !if(!eq(SemStr, "relaxed"), "", "_" # SemStr)
2323+
# !if(!or(!eq(ScopeStr, "gpu"), !eq(ScopeStr, "")), "", "_" # ScopeStr)),
2324+
regT, regclass, Preds>;
2325+
}
2326+
2327+
multiclass ATOM_LdN_spaces_impl<string IntTypeStr, string TypeStr,
2328+
string ScopeStr, string SemStr, ValueType regT,
2329+
NVPTXRegClass regclass, list<Predicate> Preds> {
2330+
defm _gen : ATOM_LdN_impl<IntTypeStr, TypeStr, ScopeStr, "gen", SemStr,
2331+
regT, regclass, Preds>;
2332+
defm _global : ATOM_LdN_impl<IntTypeStr, TypeStr, ScopeStr, "global", SemStr,
2333+
regT, regclass, Preds>;
2334+
defm _shared : ATOM_LdN_impl<IntTypeStr, TypeStr, ScopeStr, "shared", SemStr,
2335+
regT, regclass, Preds>;
2336+
}
2337+
2338+
// Constructs variants for different semantic orders.
2339+
multiclass ATOM_LdA_impl<string IntTypeStr, string TypeStr,
2340+
string ScopeStr, ValueType regT, NVPTXRegClass regclass,
2341+
list<Predicate> Preds> {
2342+
defm _relaxed_ : ATOM_LdN_spaces_impl<IntTypeStr, TypeStr, ScopeStr, "relaxed",
2343+
regT, regclass, !listconcat(Preds,[hasAtomSemantics])>;
2344+
defm _acquire_ : ATOM_LdN_spaces_impl<IntTypeStr, TypeStr, ScopeStr, "acquire",
2345+
regT, regclass, !listconcat(Preds,[hasAtomSemantics])>;
2346+
}
2347+
2348+
// Constructs variants for different scopes of atomic op.
2349+
multiclass ATOM_LdS_impl<string IntTypeStr, string TypeStr,
2350+
ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds> {
2351+
defm _volatile_ : ATOM_LdN_spaces_impl<IntTypeStr, TypeStr, "", "volatile",
2352+
regT, regclass, Preds>;
2353+
defm "" : ATOM_LdA_impl<IntTypeStr, TypeStr, "gpu",
2354+
regT, regclass, Preds>;
2355+
defm _cta : ATOM_LdA_impl<IntTypeStr, TypeStr, "cta",
2356+
regT, regclass, Preds>;
2357+
defm _sys : ATOM_LdA_impl<IntTypeStr, TypeStr, "sys",
2358+
regT, regclass, Preds>;
2359+
}
2360+
2361+
// ld
2362+
multiclass ATOM_ld_impl {
2363+
defm _s32 : ATOM_LdS_impl<"i", "s32", i32, Int32Regs, []>;
2364+
defm _u64 : ATOM_LdS_impl<"i", "s64", i64, Int64Regs, []>;
2365+
defm _f32 : ATOM_LdS_impl<"f", "f32", f32, Float32Regs, []>;
2366+
defm _f64 : ATOM_LdS_impl<"f", "f64", f64, Float64Regs, []>;
2367+
}
2368+
2369+
defm INT_PTX_LD : ATOM_ld_impl;
2370+
2371+
multiclass ATOM_StP_impl<string AsmStr, Intrinsic Intr,
2372+
ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2373+
SDNode Imm, ValueType ImmTy,
2374+
list<Predicate> Preds> {
2375+
let AddedComplexity = 1 in {
2376+
def : NVPTXInst<(outs), (ins Int32Regs:$src, regclass:$b),
2377+
AsmStr,
2378+
[(Intr (i32 Int32Regs:$src), (regT regclass:$b))]>,
2379+
Requires<Preds>;
2380+
def : NVPTXInst<(outs), (ins Int64Regs:$src, regclass:$b),
2381+
AsmStr,
2382+
[(Intr (i64 Int64Regs:$src), (regT regclass:$b))]>,
2383+
Requires<Preds>;
2384+
}
2385+
def : NVPTXInst<(outs), (ins Int32Regs:$src, ImmType:$b),
2386+
AsmStr,
2387+
[(Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))]>,
2388+
Requires<Preds>;
2389+
def : NVPTXInst<(outs), (ins Int64Regs:$src, ImmType:$b),
2390+
AsmStr,
2391+
[(Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))]>,
2392+
Requires<Preds>;
2393+
}
2394+
2395+
multiclass ATOM_StN_impl<string IntTypeStr, string TypeStr,
2396+
string ScopeStr, string SpaceStr, string SemStr,
2397+
ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2398+
ValueType ImmTy, list<Predicate> Preds> {
2399+
defm : ATOM_StP_impl<"st." # SemStr
2400+
# !if(!eq(SemStr, "volatile"), "", "." # ScopeStr)
2401+
# !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2402+
# "." # TypeStr
2403+
# " \t[$src], $b;",
2404+
!cast<Intrinsic>(
2405+
"int_nvvm_st_" # SpaceStr # "_" # IntTypeStr
2406+
# !if(!eq(SemStr, "relaxed"), "", "_" # SemStr)
2407+
# !if(!or(!eq(ScopeStr, "gpu"), !eq(ScopeStr, "")), "", "_" # ScopeStr)),
2408+
regT, regclass, ImmType, Imm, ImmTy, Preds>;
2409+
}
2410+
2411+
multiclass ATOM_StN_spaces_impl<string IntTypeStr, string TypeStr,
2412+
string ScopeStr, string SemStr, ValueType regT,
2413+
NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2414+
ValueType ImmTy, list<Predicate> Preds> {
2415+
defm _gen : ATOM_StN_impl<IntTypeStr, TypeStr, ScopeStr, "gen", SemStr,
2416+
regT, regclass, ImmType, Imm, ImmTy, Preds>;
2417+
defm _global : ATOM_StN_impl<IntTypeStr, TypeStr, ScopeStr, "global", SemStr,
2418+
regT, regclass, ImmType, Imm, ImmTy, Preds>;
2419+
defm _shared : ATOM_StN_impl<IntTypeStr, TypeStr, ScopeStr, "shared", SemStr,
2420+
regT, regclass, ImmType, Imm, ImmTy, Preds>;
2421+
}
2422+
2423+
multiclass ATOM_StA_impl<string IntTypeStr, string TypeStr,
2424+
string ScopeStr, ValueType regT, NVPTXRegClass regclass,
2425+
Operand ImmType, SDNode Imm, ValueType ImmTy,
2426+
list<Predicate> Preds> {
2427+
defm _relaxed_ : ATOM_StN_spaces_impl<IntTypeStr, TypeStr, ScopeStr, "relaxed",
2428+
regT, regclass, ImmType, Imm, ImmTy, !listconcat(Preds,[hasAtomSemantics])>;
2429+
defm _release_ : ATOM_StN_spaces_impl<IntTypeStr, TypeStr, ScopeStr, "release",
2430+
regT, regclass, ImmType, Imm, ImmTy, !listconcat(Preds,[hasAtomSemantics])>;
2431+
}
2432+
2433+
multiclass ATOM_StS_impl<string IntTypeStr, string TypeStr,
2434+
ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2435+
ValueType ImmTy, list<Predicate> Preds> {
2436+
defm _volatile_ : ATOM_StN_spaces_impl<IntTypeStr, TypeStr, "", "volatile",
2437+
regT, regclass, ImmType, Imm, ImmTy, Preds>;
2438+
defm "" : ATOM_StA_impl<IntTypeStr, TypeStr, "gpu",
2439+
regT, regclass, ImmType, Imm, ImmTy,
2440+
Preds>;
2441+
defm _cta : ATOM_StA_impl<IntTypeStr, TypeStr, "cta",
2442+
regT, regclass, ImmType, Imm, ImmTy,
2443+
Preds>;
2444+
defm _sys : ATOM_StA_impl<IntTypeStr, TypeStr, "sys",
2445+
regT, regclass, ImmType, Imm, ImmTy,
2446+
Preds>;
2447+
}
2448+
2449+
// st
2450+
multiclass ATOM_st_impl {
2451+
defm _s32 : ATOM_StS_impl<"i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2452+
defm _u64 : ATOM_StS_impl<"i", "s64", i64, Int64Regs, i64imm, imm, i64, []>;
2453+
defm _f32 : ATOM_StS_impl<"f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
2454+
[]>;
2455+
defm _f64 : ATOM_StS_impl<"f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
2456+
[]>;
2457+
}
2458+
2459+
defm INT_PTX_ST : ATOM_st_impl;
2460+
22912461
//-----------------------------------
22922462
// Support for ldu on sm_20 or later
22932463
//-----------------------------------

0 commit comments

Comments
 (0)