Skip to content

Commit 2878182

Browse files
authored
merge main into amd-staging (llvm#707)
2 parents 29f226f + a438f4b commit 2878182

File tree

62 files changed

+1572
-400
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+1572
-400
lines changed

clang/lib/AST/ByteCode/Interp.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1063,7 +1063,8 @@ bool Free(InterpState &S, CodePtr OpPC, bool DeleteIsArrayForm,
10631063
return false;
10641064
}
10651065

1066-
if (!Ptr.isRoot() || Ptr.isOnePastEnd() || Ptr.isArrayElement()) {
1066+
if (!Ptr.isRoot() || Ptr.isOnePastEnd() ||
1067+
(Ptr.isArrayElement() && Ptr.getIndex() != 0)) {
10671068
const SourceInfo &Loc = S.Current->getSource(OpPC);
10681069
S.FFDiag(Loc, diag::note_constexpr_delete_subobject)
10691070
<< Ptr.toDiagnosticString(S.getASTContext()) << Ptr.isOnePastEnd();

clang/lib/AST/ByteCode/Interp.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2915,13 +2915,17 @@ inline bool AllocN(InterpState &S, CodePtr OpPC, PrimType T, const Expr *Source,
29152915
S.Stk.push<Pointer>(0, nullptr);
29162916
return true;
29172917
}
2918+
assert(NumElements.isPositive());
29182919

29192920
DynamicAllocator &Allocator = S.getAllocator();
29202921
Block *B =
29212922
Allocator.allocate(Source, T, static_cast<size_t>(NumElements),
29222923
S.Ctx.getEvalID(), DynamicAllocator::Form::Array);
29232924
assert(B);
2924-
S.Stk.push<Pointer>(B);
2925+
if (NumElements.isZero())
2926+
S.Stk.push<Pointer>(B);
2927+
else
2928+
S.Stk.push<Pointer>(Pointer(B).atIndex(0));
29252929
return true;
29262930
}
29272931

@@ -2941,13 +2945,18 @@ inline bool AllocCN(InterpState &S, CodePtr OpPC, const Descriptor *ElementDesc,
29412945
S.Stk.push<Pointer>(0, ElementDesc);
29422946
return true;
29432947
}
2948+
assert(NumElements.isPositive());
29442949

29452950
DynamicAllocator &Allocator = S.getAllocator();
29462951
Block *B =
29472952
Allocator.allocate(ElementDesc, static_cast<size_t>(NumElements),
29482953
S.Ctx.getEvalID(), DynamicAllocator::Form::Array);
29492954
assert(B);
2950-
S.Stk.push<Pointer>(B);
2955+
if (NumElements.isZero())
2956+
S.Stk.push<Pointer>(B);
2957+
else
2958+
S.Stk.push<Pointer>(Pointer(B).atIndex(0));
2959+
29512960
return true;
29522961
}
29532962

clang/lib/CodeGen/CGCall.cpp

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5640,22 +5640,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
56405640
if (!CallArgs.getCleanupsToDeactivate().empty())
56415641
deactivateArgCleanupsBeforeCall(*this, CallArgs);
56425642

5643-
// Assert that the arguments we computed match up. The IR verifier
5644-
// will catch this, but this is a common enough source of problems
5645-
// during IRGen changes that it's way better for debugging to catch
5646-
// it ourselves here.
5647-
#ifndef NDEBUG
5648-
assert(IRCallArgs.size() == IRFuncTy->getNumParams() || IRFuncTy->isVarArg());
5649-
for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
5650-
// Inalloca argument can have different type.
5651-
if (IRFunctionArgs.hasInallocaArg() &&
5652-
i == IRFunctionArgs.getInallocaArgNo())
5653-
continue;
5654-
if (i < IRFuncTy->getNumParams())
5655-
assert(IRCallArgs[i]->getType() == IRFuncTy->getParamType(i));
5656-
}
5657-
#endif
5658-
56595643
// Update the largest vector width if any arguments have vector types.
56605644
for (unsigned i = 0; i < IRCallArgs.size(); ++i)
56615645
LargestVectorWidth = std::max(LargestVectorWidth,

clang/lib/Headers/amdgpuintrin.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,62 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
162162
((uint64_t)__gpu_shuffle_idx_u32(__lane_mask, __idx, __lo, __width));
163163
}
164164

165+
// Returns a bitmask marking all lanes that have the same value of __x.
166+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
167+
__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
168+
uint32_t __match_mask = 0;
169+
170+
bool __done = 0;
171+
while (__gpu_ballot(__lane_mask, !__done)) {
172+
if (!__done) {
173+
uint32_t __first = __gpu_read_first_lane_u32(__lane_mask, __x);
174+
if (__first == __x) {
175+
__match_mask = __gpu_lane_mask();
176+
__done = 1;
177+
}
178+
}
179+
}
180+
__gpu_sync_lane(__lane_mask);
181+
return __match_mask;
182+
}
183+
184+
// Returns a bitmask marking all lanes that have the same value of __x.
185+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
186+
__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
187+
uint64_t __match_mask = 0;
188+
189+
bool __done = 0;
190+
while (__gpu_ballot(__lane_mask, __done)) {
191+
if (!__done) {
192+
uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
193+
if (__first == __x) {
194+
__match_mask = __gpu_lane_mask();
195+
__done = 1;
196+
}
197+
}
198+
}
199+
__gpu_sync_lane(__lane_mask);
200+
return __match_mask;
201+
}
202+
203+
// Returns the current lane mask if every lane contains __x.
204+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
205+
__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
206+
uint32_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
207+
uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
208+
__gpu_sync_lane(__lane_mask);
209+
return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
210+
}
211+
212+
// Returns the current lane mask if every lane contains __x.
213+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
214+
__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
215+
uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
216+
uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
217+
__gpu_sync_lane(__lane_mask);
218+
return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
219+
}
220+
165221
// Returns true if the flat pointer points to AMDGPU 'shared' memory.
166222
_DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
167223
return __builtin_amdgcn_is_shared((void [[clang::address_space(0)]] *)((

clang/lib/Headers/nvptxintrin.h

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
#error "This file is intended for NVPTX targets or offloading to NVPTX"
1414
#endif
1515

16+
#ifndef __CUDA_ARCH__
17+
#define __CUDA_ARCH__ 0
18+
#endif
19+
1620
#include <stdint.h>
1721

1822
#if !defined(__cplusplus)
@@ -168,6 +172,76 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
168172
((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __lo, __width));
169173
}
170174

175+
// Returns a bitmask marking all lanes that have the same value of __x.
176+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
177+
__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
178+
// Newer targets can use the dedicated CUDA support.
179+
if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
180+
return __nvvm_match_any_sync_i32(__lane_mask, __x);
181+
182+
uint32_t __match_mask = 0;
183+
bool __done = 0;
184+
while (__gpu_ballot(__lane_mask, !__done)) {
185+
if (!__done) {
186+
uint32_t __first = __gpu_read_first_lane_u32(__lane_mask, __x);
187+
if (__first == __x) {
188+
__match_mask = __gpu_lane_mask();
189+
__done = 1;
190+
}
191+
}
192+
}
193+
return __match_mask;
194+
}
195+
196+
// Returns a bitmask marking all lanes that have the same value of __x.
197+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
198+
__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
199+
// Newer targets can use the dedicated CUDA support.
200+
if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
201+
return __nvvm_match_any_sync_i64(__lane_mask, __x);
202+
203+
uint64_t __match_mask = 0;
204+
205+
bool __done = 0;
206+
while (__gpu_ballot(__lane_mask, __done)) {
207+
if (!__done) {
208+
uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
209+
if (__first == __x) {
210+
__match_mask = __gpu_lane_mask();
211+
__done = 1;
212+
}
213+
}
214+
}
215+
__gpu_sync_lane(__lane_mask);
216+
return __match_mask;
217+
}
218+
219+
// Returns the current lane mask if every lane contains __x.
220+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
221+
__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
222+
// Newer targets can use the dedicated CUDA support.
223+
int predicate;
224+
if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
225+
return __nvvm_match_all_sync_i32p(__lane_mask, __x, &predicate);
226+
227+
uint32_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
228+
uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
229+
return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
230+
}
231+
232+
// Returns the current lane mask if every lane contains __x.
233+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
234+
__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
235+
// Newer targets can use the dedicated CUDA support.
236+
int predicate;
237+
if (__CUDA_ARCH__ >= 700 || __nvvm_reflect("__CUDA_ARCH") >= 700)
238+
return __nvvm_match_all_sync_i64p(__lane_mask, __x, &predicate);
239+
240+
uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
241+
uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first);
242+
return __ballot == __gpu_lane_mask() ? __gpu_lane_mask() : 0ull;
243+
}
244+
171245
// Returns true if the flat pointer points to CUDA 'shared' memory.
172246
_DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
173247
return __nvvm_isspacep_shared(ptr);

clang/test/AST/ByteCode/new-delete.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -922,6 +922,20 @@ namespace NonConstexprArrayCtor {
922922
// both-note {{in call to}}
923923
}
924924

925+
namespace ArrayBaseCast {
926+
struct A {};
927+
struct B : A {};
928+
constexpr bool test() {
929+
B *b = new B[2];
930+
931+
A* a = b;
932+
933+
delete[] b;
934+
return true;
935+
}
936+
static_assert(test());
937+
}
938+
925939
#else
926940
/// Make sure we reject this prior to C++20
927941
constexpr int a() { // both-error {{never produces a constant expression}}

libc/include/fenv.h.def

Lines changed: 0 additions & 17 deletions
This file was deleted.

libc/include/fenv.yaml

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,32 @@
11
header: fenv.h
2-
header_template: fenv.h.def
3-
macros: []
2+
standards:
3+
- stdc
4+
macros:
5+
- macro_name: FE_ALL_EXCEPT
6+
macro_header: fenv-macros.h
7+
- macro_name: FE_DIVBYZERO
8+
macro_header: fenv-macros.h
9+
- macro_name: FE_INEXACT
10+
macro_header: fenv-macros.h
11+
- macro_name: FE_INVALID
12+
macro_header: fenv-macros.h
13+
- macro_name: FE_OVERFLOW
14+
macro_header: fenv-macros.h
15+
- macro_name: FE_UNDERFLOW
16+
macro_header: fenv-macros.h
17+
- macro_name: FE_DOWNWARD
18+
macro_header: fenv-macros.h
19+
- macro_name: FE_TONEAREST
20+
macro_header: fenv-macros.h
21+
- macro_name: FE_TOWARDZERO
22+
macro_header: fenv-macros.h
23+
- macro_name: FE_UPWARD
24+
macro_header: fenv-macros.h
25+
- macro_name: FE_DFL_ENV
26+
macro_header: fenv-macros.h
427
types:
528
- type_name: fenv_t
629
- type_name: fexcept_t
7-
enums: []
8-
objects: []
930
functions:
1031
- name: feclearexcept
1132
standards:
@@ -15,14 +36,14 @@ functions:
1536
- type: int
1637
- name: fedisableexcept
1738
standards:
18-
- GNUExtensions
39+
- gnu
1940
return_type: int
2041
arguments:
2142
- type: int
2243
guard: null
2344
- name: feenableexcept
2445
standards:
25-
- GNUExtensions
46+
- gnu
2647
return_type: int
2748
arguments:
2849
- type: int
@@ -35,7 +56,7 @@ functions:
3556
- type: fenv_t *
3657
- name: fegetexcept
3758
standards:
38-
- GNUExtensions
59+
- gnu
3960
return_type: int
4061
arguments: []
4162
- name: fegetexceptflag

libc/src/__support/GPU/utils.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,14 @@ LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, uint32_t idx, uint32_t x,
9292
return __gpu_shuffle_idx_u32(lane_mask, idx, x, width);
9393
}
9494

95+
LIBC_INLINE uint64_t match_any(uint64_t lane_mask, uint32_t x) {
96+
return __gpu_match_any_u32(lane_mask, x);
97+
}
98+
99+
LIBC_INLINE uint64_t match_all(uint64_t lane_mask, uint32_t x) {
100+
return __gpu_match_all_u32(lane_mask, x);
101+
}
102+
95103
[[noreturn]] LIBC_INLINE void end_program() { __gpu_exit(); }
96104

97105
LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {

libc/test/integration/src/__support/GPU/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,12 @@ add_integration_test(
1818
LOADER_ARGS
1919
--threads 64
2020
)
21+
22+
add_integration_test(
23+
match_test
24+
SUITE libc-support-gpu-tests
25+
SRCS
26+
match.cpp
27+
LOADER_ARGS
28+
--threads 64
29+
)

0 commit comments

Comments
 (0)