@@ -2288,6 +2288,176 @@ defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
2288
2288
defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
2289
2289
defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2290
2290
2291
+ class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
2292
+ dag ins, dag Operands>
2293
+ : NVPTXInst<(outs regclass:$result), ins,
2294
+ AsmStr,
2295
+ [(set regT:$result, Operands)]>,
2296
+ Requires<Preds>;
2297
+
2298
+ multiclass ATOM_LdP_impl<string AsmStr, Intrinsic Intr,
2299
+ ValueType regT, NVPTXRegClass regclass,
2300
+ list<Predicate> Preds> {
2301
+ let AddedComplexity = 1 in {
2302
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2303
+ (ins Int32Regs:$src),
2304
+ (Intr (i32 Int32Regs:$src))>;
2305
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2306
+ (ins Int64Regs:$src),
2307
+ (Intr (i64 Int64Regs:$src))>;
2308
+ }
2309
+ }
2310
+
2311
+ multiclass ATOM_LdN_impl<string IntTypeStr, string TypeStr,
2312
+ string ScopeStr, string SpaceStr, string SemStr,
2313
+ ValueType regT, NVPTXRegClass regclass,
2314
+ list<Predicate> Preds> {
2315
+ defm : ATOM_LdP_impl<"ld." # SemStr
2316
+ # !if(!eq(SemStr, "volatile"), "", "." # ScopeStr)
2317
+ # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2318
+ # "." # TypeStr
2319
+ # " \t$result, [$src];",
2320
+ !cast<Intrinsic>(
2321
+ "int_nvvm_ld_" # SpaceStr # "_" # IntTypeStr
2322
+ # !if(!eq(SemStr, "relaxed"), "", "_" # SemStr)
2323
+ # !if(!or(!eq(ScopeStr, "gpu"), !eq(ScopeStr, "")), "", "_" # ScopeStr)),
2324
+ regT, regclass, Preds>;
2325
+ }
2326
+
2327
+ multiclass ATOM_LdN_spaces_impl<string IntTypeStr, string TypeStr,
2328
+ string ScopeStr, string SemStr, ValueType regT,
2329
+ NVPTXRegClass regclass, list<Predicate> Preds> {
2330
+ defm _gen : ATOM_LdN_impl<IntTypeStr, TypeStr, ScopeStr, "gen", SemStr,
2331
+ regT, regclass, Preds>;
2332
+ defm _global : ATOM_LdN_impl<IntTypeStr, TypeStr, ScopeStr, "global", SemStr,
2333
+ regT, regclass, Preds>;
2334
+ defm _shared : ATOM_LdN_impl<IntTypeStr, TypeStr, ScopeStr, "shared", SemStr,
2335
+ regT, regclass, Preds>;
2336
+ }
2337
+
2338
+ // Constructs variants for different semantic orders.
2339
+ multiclass ATOM_LdA_impl<string IntTypeStr, string TypeStr,
2340
+ string ScopeStr, ValueType regT, NVPTXRegClass regclass,
2341
+ list<Predicate> Preds> {
2342
+ defm _relaxed_ : ATOM_LdN_spaces_impl<IntTypeStr, TypeStr, ScopeStr, "relaxed",
2343
+ regT, regclass, !listconcat(Preds,[hasAtomSemantics])>;
2344
+ defm _acquire_ : ATOM_LdN_spaces_impl<IntTypeStr, TypeStr, ScopeStr, "acquire",
2345
+ regT, regclass, !listconcat(Preds,[hasAtomSemantics])>;
2346
+ }
2347
+
2348
+ // Constructs variants for different scopes of atomic op.
2349
+ multiclass ATOM_LdS_impl<string IntTypeStr, string TypeStr,
2350
+ ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds> {
2351
+ defm _volatile_ : ATOM_LdN_spaces_impl<IntTypeStr, TypeStr, "", "volatile",
2352
+ regT, regclass, Preds>;
2353
+ defm "" : ATOM_LdA_impl<IntTypeStr, TypeStr, "gpu",
2354
+ regT, regclass, Preds>;
2355
+ defm _cta : ATOM_LdA_impl<IntTypeStr, TypeStr, "cta",
2356
+ regT, regclass, Preds>;
2357
+ defm _sys : ATOM_LdA_impl<IntTypeStr, TypeStr, "sys",
2358
+ regT, regclass, Preds>;
2359
+ }
2360
+
2361
+ // ld
2362
+ multiclass ATOM_ld_impl {
2363
+ defm _s32 : ATOM_LdS_impl<"i", "s32", i32, Int32Regs, []>;
2364
+ defm _u64 : ATOM_LdS_impl<"i", "s64", i64, Int64Regs, []>;
2365
+ defm _f32 : ATOM_LdS_impl<"f", "f32", f32, Float32Regs, []>;
2366
+ defm _f64 : ATOM_LdS_impl<"f", "f64", f64, Float64Regs, []>;
2367
+ }
2368
+
2369
+ defm INT_PTX_LD : ATOM_ld_impl;
2370
+
2371
+ multiclass ATOM_StP_impl<string AsmStr, Intrinsic Intr,
2372
+ ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2373
+ SDNode Imm, ValueType ImmTy,
2374
+ list<Predicate> Preds> {
2375
+ let AddedComplexity = 1 in {
2376
+ def : NVPTXInst<(outs), (ins Int32Regs:$src, regclass:$b),
2377
+ AsmStr,
2378
+ [(Intr (i32 Int32Regs:$src), (regT regclass:$b))]>,
2379
+ Requires<Preds>;
2380
+ def : NVPTXInst<(outs), (ins Int64Regs:$src, regclass:$b),
2381
+ AsmStr,
2382
+ [(Intr (i64 Int64Regs:$src), (regT regclass:$b))]>,
2383
+ Requires<Preds>;
2384
+ }
2385
+ def : NVPTXInst<(outs), (ins Int32Regs:$src, ImmType:$b),
2386
+ AsmStr,
2387
+ [(Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))]>,
2388
+ Requires<Preds>;
2389
+ def : NVPTXInst<(outs), (ins Int64Regs:$src, ImmType:$b),
2390
+ AsmStr,
2391
+ [(Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))]>,
2392
+ Requires<Preds>;
2393
+ }
2394
+
2395
+ multiclass ATOM_StN_impl<string IntTypeStr, string TypeStr,
2396
+ string ScopeStr, string SpaceStr, string SemStr,
2397
+ ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2398
+ ValueType ImmTy, list<Predicate> Preds> {
2399
+ defm : ATOM_StP_impl<"st." # SemStr
2400
+ # !if(!eq(SemStr, "volatile"), "", "." # ScopeStr)
2401
+ # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2402
+ # "." # TypeStr
2403
+ # " \t[$src], $b;",
2404
+ !cast<Intrinsic>(
2405
+ "int_nvvm_st_" # SpaceStr # "_" # IntTypeStr
2406
+ # !if(!eq(SemStr, "relaxed"), "", "_" # SemStr)
2407
+ # !if(!or(!eq(ScopeStr, "gpu"), !eq(ScopeStr, "")), "", "_" # ScopeStr)),
2408
+ regT, regclass, ImmType, Imm, ImmTy, Preds>;
2409
+ }
2410
+
2411
+ multiclass ATOM_StN_spaces_impl<string IntTypeStr, string TypeStr,
2412
+ string ScopeStr, string SemStr, ValueType regT,
2413
+ NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2414
+ ValueType ImmTy, list<Predicate> Preds> {
2415
+ defm _gen : ATOM_StN_impl<IntTypeStr, TypeStr, ScopeStr, "gen", SemStr,
2416
+ regT, regclass, ImmType, Imm, ImmTy, Preds>;
2417
+ defm _global : ATOM_StN_impl<IntTypeStr, TypeStr, ScopeStr, "global", SemStr,
2418
+ regT, regclass, ImmType, Imm, ImmTy, Preds>;
2419
+ defm _shared : ATOM_StN_impl<IntTypeStr, TypeStr, ScopeStr, "shared", SemStr,
2420
+ regT, regclass, ImmType, Imm, ImmTy, Preds>;
2421
+ }
2422
+
2423
+ multiclass ATOM_StA_impl<string IntTypeStr, string TypeStr,
2424
+ string ScopeStr, ValueType regT, NVPTXRegClass regclass,
2425
+ Operand ImmType, SDNode Imm, ValueType ImmTy,
2426
+ list<Predicate> Preds> {
2427
+ defm _relaxed_ : ATOM_StN_spaces_impl<IntTypeStr, TypeStr, ScopeStr, "relaxed",
2428
+ regT, regclass, ImmType, Imm, ImmTy, !listconcat(Preds,[hasAtomSemantics])>;
2429
+ defm _release_ : ATOM_StN_spaces_impl<IntTypeStr, TypeStr, ScopeStr, "release",
2430
+ regT, regclass, ImmType, Imm, ImmTy, !listconcat(Preds,[hasAtomSemantics])>;
2431
+ }
2432
+
2433
+ multiclass ATOM_StS_impl<string IntTypeStr, string TypeStr,
2434
+ ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2435
+ ValueType ImmTy, list<Predicate> Preds> {
2436
+ defm _volatile_ : ATOM_StN_spaces_impl<IntTypeStr, TypeStr, "", "volatile",
2437
+ regT, regclass, ImmType, Imm, ImmTy, Preds>;
2438
+ defm "" : ATOM_StA_impl<IntTypeStr, TypeStr, "gpu",
2439
+ regT, regclass, ImmType, Imm, ImmTy,
2440
+ Preds>;
2441
+ defm _cta : ATOM_StA_impl<IntTypeStr, TypeStr, "cta",
2442
+ regT, regclass, ImmType, Imm, ImmTy,
2443
+ Preds>;
2444
+ defm _sys : ATOM_StA_impl<IntTypeStr, TypeStr, "sys",
2445
+ regT, regclass, ImmType, Imm, ImmTy,
2446
+ Preds>;
2447
+ }
2448
+
2449
+ // st
2450
+ multiclass ATOM_st_impl {
2451
+ defm _s32 : ATOM_StS_impl<"i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2452
+ defm _u64 : ATOM_StS_impl<"i", "s64", i64, Int64Regs, i64imm, imm, i64, []>;
2453
+ defm _f32 : ATOM_StS_impl<"f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
2454
+ []>;
2455
+ defm _f64 : ATOM_StS_impl<"f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
2456
+ []>;
2457
+ }
2458
+
2459
+ defm INT_PTX_ST : ATOM_st_impl;
2460
+
2291
2461
//-----------------------------------
2292
2462
// Support for ldu on sm_20 or later
2293
2463
//-----------------------------------
0 commit comments