Skip to content

Commit 549e118

Browse files
author
Kai Luo
committed
[PowerPC] Support 16-byte lock free atomics on pwr8 and up
Make 16-byte atomic type aligned to 16-byte on PPC64, thus consistent with GCC. Also enable inlining 16-byte atomics on non-AIX targets on PPC64. Reviewed By: hubert.reinterpretcast Differential Revision: https://reviews.llvm.org/D122377
1 parent 9fdd258 commit 549e118

File tree

8 files changed

+1222
-22
lines changed

8 files changed

+1222
-22
lines changed

clang/lib/Basic/Targets/PPC.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
8181
IsISA3_0 = true;
8282
} else if (Feature == "+isa-v31-instructions") {
8383
IsISA3_1 = true;
84+
} else if (Feature == "+quadword-atomics") {
85+
HasQuadwordAtomics = true;
8486
}
8587
// TODO: Finish this list and add an assert that we've handled them
8688
// all.
@@ -550,6 +552,12 @@ bool PPCTargetInfo::initFeatureMap(
550552
Features["isa-v30-instructions"] =
551553
llvm::StringSwitch<bool>(CPU).Case("pwr9", true).Default(false);
552554

555+
Features["quadword-atomics"] =
556+
getTriple().isArch64Bit() && llvm::StringSwitch<bool>(CPU)
557+
.Case("pwr9", true)
558+
.Case("pwr8", true)
559+
.Default(false);
560+
553561
// Power10 includes all the same features as Power9 plus any features specific
554562
// to the Power10 core.
555563
if (CPU == "pwr10" || CPU == "power10") {
@@ -660,6 +668,7 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
660668
.Case("isa-v207-instructions", IsISA2_07)
661669
.Case("isa-v30-instructions", IsISA3_0)
662670
.Case("isa-v31-instructions", IsISA3_1)
671+
.Case("quadword-atomics", HasQuadwordAtomics)
663672
.Default(false);
664673
}
665674

clang/lib/Basic/Targets/PPC.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
7878
bool IsISA2_07 = false;
7979
bool IsISA3_0 = false;
8080
bool IsISA3_1 = false;
81+
bool HasQuadwordAtomics = false;
8182

8283
protected:
8384
std::string ABI;
@@ -439,8 +440,18 @@ class LLVM_LIBRARY_VISIBILITY PPC64TargetInfo : public PPCTargetInfo {
439440
DataLayout += "-S128-v256:256:256-v512:512:512";
440441
resetDataLayout(DataLayout);
441442

442-
// PPC64 supports atomics up to 8 bytes.
443-
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
443+
// Newer PPC64 instruction sets support atomics up to 16 bytes.
444+
MaxAtomicPromoteWidth = 128;
445+
// Baseline PPC64 supports inlining atomics up to 8 bytes.
446+
MaxAtomicInlineWidth = 64;
447+
}
448+
449+
void setMaxAtomicWidth() override {
450+
// For power8 and up, backend is able to inline 16-byte atomic lock free
451+
// code.
452+
// TODO: We should allow AIX to inline quadword atomics in the future.
453+
if (!getTriple().isOSAIX() && hasFeature("quadword-atomics"))
454+
MaxAtomicInlineWidth = 128;
444455
}
445456

446457
BuiltinVaListKind getBuiltinVaListKind() const override {
Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,45 @@
1-
// RUN: %clang_cc1 -verify -triple powerpc-unknown-unknown -emit-llvm -o - %s | \
1+
// RUN: %clang_cc1 -Werror -triple powerpc-unknown-unknown -emit-llvm -o - %s | \
22
// RUN: FileCheck %s --check-prefixes=PPC,PPC32
3-
// RUN: %clang_cc1 -verify -triple powerpc64le-unknown-linux -emit-llvm -o - %s | \
4-
// RUN: FileCheck %s --check-prefixes=PPC,PPC64
5-
// RUN: %clang_cc1 -verify -triple powerpc64-unknown-aix -emit-llvm -o - %s | \
3+
// RUN: %clang_cc1 -Werror -triple powerpc64le-unknown-linux -emit-llvm -o - %s | \
64
// RUN: FileCheck %s --check-prefixes=PPC,PPC64
5+
// RUN: %clang_cc1 -Werror -triple powerpc64le-unknown-linux -emit-llvm -o - %s \
6+
// RUN: -target-cpu pwr8 | FileCheck %s --check-prefixes=PPC,PPC64
7+
// RUN: %clang_cc1 -Werror -triple powerpc64-unknown-aix -emit-llvm -o - %s | \
8+
// RUN: FileCheck %s --check-prefixes=PPC,AIX64
9+
// RUN: %clang_cc1 -Werror -triple powerpc64-unknown-aix -emit-llvm -o - %s \
10+
// RUN: -target-cpu pwr8 | FileCheck %s --check-prefixes=PPC,AIX64
711

812
// PPC: @c = global i8 0, align 1{{$}}
9-
_Atomic(char) c; // expected-no-diagnostics
13+
_Atomic(char) c;
1014

1115
// PPC: @s = global i16 0, align 2{{$}}
12-
_Atomic(short) s; // expected-no-diagnostics
16+
_Atomic(short) s;
1317

1418
// PPC: @i = global i32 0, align 4{{$}}
15-
_Atomic(int) i; // expected-no-diagnostics
19+
_Atomic(int) i;
1620

1721
// PPC32: @l = global i32 0, align 4{{$}}
1822
// PPC64: @l = global i64 0, align 8{{$}}
19-
_Atomic(long) l; // expected-no-diagnostics
23+
// AIX64: @l = global i64 0, align 8{{$}}
24+
_Atomic(long) l;
2025

2126
// PPC: @ll = global i64 0, align 8{{$}}
22-
_Atomic(long long) ll; // expected-no-diagnostics
27+
_Atomic(long long) ll;
2328

2429
typedef struct {
2530
char x[8];
2631
} O;
2732

2833
// PPC32: @o = global %struct.O zeroinitializer, align 1{{$}}
2934
// PPC64: @o = global %struct.O zeroinitializer, align 8{{$}}
30-
_Atomic(O) o; // expected-no-diagnostics
35+
// AIX64: @o = global %struct.O zeroinitializer, align 8{{$}}
36+
_Atomic(O) o;
3137

3238
typedef struct {
3339
char x[16];
3440
} Q;
3541

36-
// PPC: @q = global %struct.Q zeroinitializer, align 1{{$}}
37-
_Atomic(Q) q; // expected-no-diagnostics
42+
// PPC32: @q = global %struct.Q zeroinitializer, align 1{{$}}
43+
// PPC64: @q = global %struct.Q zeroinitializer, align 16{{$}}
44+
// AIX64: @q = global %struct.Q zeroinitializer, align 16{{$}}
45+
_Atomic(Q) q;
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
2+
// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64-PWR8
3+
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
4+
// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
5+
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \
6+
// RUN: -target-cpu pwr7 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
7+
8+
typedef struct {
9+
char x[16];
10+
} Q;
11+
12+
typedef _Atomic(Q) AtomicQ;
13+
14+
typedef __int128_t int128_t;
15+
16+
// PPC64-PWR8-LABEL: @test_load(
17+
// PPC64-PWR8: [[TMP3:%.*]] = load atomic i128, i128* [[TMP1:%.*]] acquire, align 16
18+
//
19+
// PPC64-LABEL: @test_load(
20+
// PPC64: call void @__atomic_load(i64 noundef 16, i8* noundef [[TMP3:%.*]], i8* noundef [[TMP4:%.*]], i32 noundef signext 2)
21+
//
22+
Q test_load(AtomicQ *ptr) {
23+
// expected-no-diagnostics
24+
return __c11_atomic_load(ptr, __ATOMIC_ACQUIRE);
25+
}
26+
27+
// PPC64-PWR8-LABEL: @test_store(
28+
// PPC64-PWR8: store atomic i128 [[TMP6:%.*]], i128* [[TMP4:%.*]] release, align 16
29+
//
30+
// PPC64-LABEL: @test_store(
31+
// PPC64: call void @__atomic_store(i64 noundef 16, i8* noundef [[TMP6:%.*]], i8* noundef [[TMP7:%.*]], i32 noundef signext 3)
32+
//
33+
void test_store(Q val, AtomicQ *ptr) {
34+
// expected-no-diagnostics
35+
__c11_atomic_store(ptr, val, __ATOMIC_RELEASE);
36+
}
37+
38+
// PPC64-PWR8-LABEL: @test_add(
39+
// PPC64-PWR8: [[TMP3:%.*]] = atomicrmw add i128* [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
40+
//
41+
// PPC64-LABEL: @test_add(
42+
// PPC64: [[CALL:%.*]] = call i128 @__atomic_fetch_add_16(i8* noundef [[TMP2:%.*]], i128 noundef [[TMP3:%.*]], i32 noundef signext 0)
43+
//
44+
void test_add(_Atomic(int128_t) *ptr, int128_t x) {
45+
// expected-no-diagnostics
46+
__c11_atomic_fetch_add(ptr, x, __ATOMIC_RELAXED);
47+
}
48+
49+
// PPC64-PWR8-LABEL: @test_xchg(
50+
// PPC64-PWR8: [[TMP8:%.*]] = atomicrmw xchg i128* [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
51+
//
52+
// PPC64-LABEL: @test_xchg(
53+
// PPC64: call void @__atomic_exchange(i64 noundef 16, i8* noundef [[TMP7:%.*]], i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i32 noundef signext 5)
54+
//
55+
Q test_xchg(AtomicQ *ptr, Q new) {
56+
// expected-no-diagnostics
57+
return __c11_atomic_exchange(ptr, new, __ATOMIC_SEQ_CST);
58+
}
59+
60+
// PPC64-PWR8-LABEL: @test_cmpxchg(
61+
// PPC64-PWR8: [[TMP10:%.*]] = cmpxchg i128* [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
62+
//
63+
// PPC64-LABEL: @test_cmpxchg(
64+
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i8* noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
65+
//
66+
int test_cmpxchg(AtomicQ *ptr, Q *cmp, Q new) {
67+
// expected-no-diagnostics
68+
return __c11_atomic_compare_exchange_strong(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
69+
}
70+
71+
// PPC64-PWR8-LABEL: @test_cmpxchg_weak(
72+
// PPC64-PWR8: [[TMP10:%.*]] = cmpxchg weak i128* [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
73+
//
74+
// PPC64-LABEL: @test_cmpxchg_weak(
75+
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i8* noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
76+
//
77+
int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) {
78+
// expected-no-diagnostics
79+
return __c11_atomic_compare_exchange_weak(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
80+
}
81+
82+
// PPC64-PWR8-LABEL: @is_lock_free(
83+
// PPC64-PWR8: ret i32 1
84+
//
85+
// PPC64-LABEL: @is_lock_free(
86+
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_is_lock_free(i64 noundef 16, i8* noundef null)
87+
//
88+
int is_lock_free() {
89+
AtomicQ q;
90+
// expected-no-diagnostics
91+
return __c11_atomic_is_lock_free(sizeof(q));
92+
}

clang/test/Sema/atomic-ops.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
// RUN: -target-cpu pwr7
1010
// RUN: %clang_cc1 %s -verify -fgnuc-version=4.2.1 -ffreestanding \
1111
// RUN: -fsyntax-only -triple=powerpc64le-linux-gnu -std=c11 \
12-
// RUN: -target-cpu pwr8
12+
// RUN: -target-cpu pwr8 -DPPC64_PWR8
1313

1414
// Basic parsing/Sema tests for __c11_atomic_*
1515

@@ -47,23 +47,35 @@ _Static_assert(__c11_atomic_is_lock_free(2), "");
4747
_Static_assert(__c11_atomic_is_lock_free(3), ""); // expected-error {{not an integral constant expression}}
4848
_Static_assert(__c11_atomic_is_lock_free(4), "");
4949
_Static_assert(__c11_atomic_is_lock_free(8), "");
50+
#ifndef PPC64_PWR8
5051
_Static_assert(__c11_atomic_is_lock_free(16), ""); // expected-error {{not an integral constant expression}}
52+
#else
53+
_Static_assert(__c11_atomic_is_lock_free(16), ""); // expected-no-error
54+
#endif
5155
_Static_assert(__c11_atomic_is_lock_free(17), ""); // expected-error {{not an integral constant expression}}
5256

5357
_Static_assert(__atomic_is_lock_free(1, 0), "");
5458
_Static_assert(__atomic_is_lock_free(2, 0), "");
5559
_Static_assert(__atomic_is_lock_free(3, 0), ""); // expected-error {{not an integral constant expression}}
5660
_Static_assert(__atomic_is_lock_free(4, 0), "");
5761
_Static_assert(__atomic_is_lock_free(8, 0), "");
62+
#ifndef PPC64_PWR8
5863
_Static_assert(__atomic_is_lock_free(16, 0), ""); // expected-error {{not an integral constant expression}}
64+
#else
65+
_Static_assert(__atomic_is_lock_free(16, 0), ""); // expected-no-error
66+
#endif
5967
_Static_assert(__atomic_is_lock_free(17, 0), ""); // expected-error {{not an integral constant expression}}
6068

6169
_Static_assert(atomic_is_lock_free((atomic_char*)0), "");
6270
_Static_assert(atomic_is_lock_free((atomic_short*)0), "");
6371
_Static_assert(atomic_is_lock_free((atomic_int*)0), "");
6472
_Static_assert(atomic_is_lock_free((atomic_long*)0), "");
73+
#ifndef PPC64_PWR8
6574
// noi128-error@+1 {{__int128 is not supported on this target}}
6675
_Static_assert(atomic_is_lock_free((_Atomic(__int128)*)0), ""); // expected-error {{not an integral constant expression}}
76+
#else
77+
_Static_assert(atomic_is_lock_free((_Atomic(__int128)*)0), ""); // expected-no-error
78+
#endif
6779
_Static_assert(atomic_is_lock_free(0 + (atomic_char*)0), "");
6880

6981
char i8;
@@ -88,7 +100,11 @@ _Static_assert(__atomic_always_lock_free(2, 0), "");
88100
_Static_assert(!__atomic_always_lock_free(3, 0), "");
89101
_Static_assert(__atomic_always_lock_free(4, 0), "");
90102
_Static_assert(__atomic_always_lock_free(8, 0), "");
103+
#ifndef PPC64_PWR8
91104
_Static_assert(!__atomic_always_lock_free(16, 0), "");
105+
#else
106+
_Static_assert(__atomic_always_lock_free(16, 0), "");
107+
#endif
92108
_Static_assert(!__atomic_always_lock_free(17, 0), "");
93109

94110
_Static_assert(__atomic_always_lock_free(1, incomplete), "");

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,7 +1321,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
13211321
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
13221322
}
13231323

1324-
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics()) {
1324+
if (shouldInlineQuadwordAtomics()) {
13251325
setMaxAtomicSizeInBitsSupported(128);
13261326
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
13271327
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
@@ -18053,18 +18053,26 @@ CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
1805318053
}
1805418054
}
1805518055

18056+
bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
18057+
// TODO: 16-byte atomic type support for AIX is in progress; we should be able
18058+
// to inline 16-byte atomic ops on AIX too in the future.
18059+
return Subtarget.isPPC64() &&
18060+
(EnableQuadwordAtomics || !Subtarget.getTargetTriple().isOSAIX()) &&
18061+
Subtarget.hasQuadwordAtomics();
18062+
}
18063+
1805618064
TargetLowering::AtomicExpansionKind
1805718065
PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1805818066
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18059-
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
18067+
if (shouldInlineQuadwordAtomics() && Size == 128)
1806018068
return AtomicExpansionKind::MaskedIntrinsic;
1806118069
return TargetLowering::shouldExpandAtomicRMWInIR(AI);
1806218070
}
1806318071

1806418072
TargetLowering::AtomicExpansionKind
1806518073
PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
1806618074
unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18067-
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
18075+
if (shouldInlineQuadwordAtomics() && Size == 128)
1806818076
return AtomicExpansionKind::MaskedIntrinsic;
1806918077
return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
1807018078
}
@@ -18094,8 +18102,7 @@ getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
1809418102
Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
1809518103
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
1809618104
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18097-
assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
18098-
"Only support quadword now");
18105+
assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
1809918106
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
1810018107
Type *ValTy = Incr->getType();
1810118108
assert(ValTy->getPrimitiveSizeInBits() == 128);
@@ -18119,8 +18126,7 @@ Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
1811918126
Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
1812018127
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
1812118128
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18122-
assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
18123-
"Only support quadword now");
18129+
assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
1812418130
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
1812518131
Type *ValTy = CmpVal->getType();
1812618132
assert(ValTy->getPrimitiveSizeInBits() == 128);

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,8 @@ namespace llvm {
910910
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
911911
AtomicOrdering Ord) const override;
912912

913+
bool shouldInlineQuadwordAtomics() const;
914+
913915
TargetLowering::AtomicExpansionKind
914916
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
915917

0 commit comments

Comments
 (0)