Skip to content

Commit 6955b58

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents 44eef3d + e448888 commit 6955b58

File tree

22 files changed

+1779
-202
lines changed

22 files changed

+1779
-202
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3438,10 +3438,6 @@ def err_attribute_argument_is_zero : Error<
34383438
def warn_attribute_argument_n_negative : Warning<
34393439
"%0 attribute parameter %1 is negative and will be ignored">,
34403440
InGroup<CudaCompat>;
3441-
def warn_reqd_sub_group_attribute_cuda_n_32
3442-
: Warning<"attribute argument %0 is invalid and will be ignored; CUDA "
3443-
"requires sub_group size 32">,
3444-
InGroup<CudaCompat>;
34453441
def err_property_function_in_objc_container : Error<
34463442
"use of Objective-C property in function nested in Objective-C "
34473443
"container not supported, move function outside its container">;
@@ -3540,6 +3536,10 @@ def warn_dllimport_dropped_from_inline_function : Warning<
35403536
def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed"
35413537
" only on a function directly called from a SYCL kernel function; attribute ignored">,
35423538
InGroup<IgnoredAttributes>;
3539+
def warn_reqd_sub_group_attribute_n
3540+
: Warning<"attribute argument %0 is invalid and will be ignored; %1 "
3541+
"requires sub_group size %2">,
3542+
InGroup<IgnoredAttributes>;
35433543
def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
35443544
" exception specification; attribute ignored">,
35453545
InGroup<IgnoredAttributes>;

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4020,9 +4020,25 @@ void Sema::AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI,
40204020
<< CI << /*positive*/ 0;
40214021
return;
40224022
}
4023-
if (Context.getTargetInfo().getTriple().isNVPTX() && ArgVal != 32) {
4024-
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_cuda_n_32)
4025-
<< ArgVal.getSExtValue();
4023+
auto &TI = Context.getTargetInfo();
4024+
if (TI.getTriple().isNVPTX() && ArgVal != 32)
4025+
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n)
4026+
<< ArgVal.getSExtValue() << TI.getTriple().getArchName() << 32;
4027+
if (TI.getTriple().isAMDGPU()) {
4028+
const auto HasWaveFrontSize64 =
4029+
TI.getTargetOpts().FeatureMap["wavefrontsize64"];
4030+
const auto HasWaveFrontSize32 =
4031+
TI.getTargetOpts().FeatureMap["wavefrontsize32"];
4032+
4033+
// CDNA supports only 64 wave front size, for those GPUs allow subgroup
4034+
// size of 64. Some GPUs support both 32 and 64, for those (and the rest)
4035+
// only allow 32. Warn on incompatible sizes.
4036+
const auto SupportedWaveFrontSize =
4037+
HasWaveFrontSize64 && !HasWaveFrontSize32 ? 64 : 32;
4038+
if (ArgVal != SupportedWaveFrontSize)
4039+
Diag(E->getExprLoc(), diag::warn_reqd_sub_group_attribute_n)
4040+
<< ArgVal.getSExtValue() << TI.getTriple().getArchName()
4041+
<< SupportedWaveFrontSize;
40264042
}
40274043

40284044
// Check to see if there's a duplicate attribute with different values
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -internal-isystem %S/Inputs -std=c++2b -verify %s
2+
3+
// Sub-group size is optimized for 32, warn (and ignore the attribute) if the
4+
// size is not 32.
5+
#include "sycl.hpp"
6+
7+
int main() {
8+
9+
sycl::queue Q;
10+
11+
Q.submit([&](sycl::handler &h) {
12+
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {}); // expected-warning {{attribute argument 64 is invalid and will be ignored; amdgcn requires sub_group size 32}}
13+
});
14+
15+
Q.submit([&](sycl::handler &h) {
16+
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {});
17+
});
18+
19+
Q.submit([&](sycl::handler &h) {
20+
h.single_task<class invalid_kernel_2>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 32}}
21+
});
22+
23+
return 0;
24+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx90a -internal-isystem %S/Inputs -std=c++2b -verify %s
2+
3+
// Sub-group size is optimized for 64, warn (and ignore the attribute) if the
4+
// size is not 64.
5+
#include "sycl.hpp"
6+
7+
int main() {
8+
9+
sycl::queue Q;
10+
11+
Q.submit([&](sycl::handler &h) {
12+
h.single_task<class valid_kernel>([=] [[sycl::reqd_sub_group_size(64)]] {});
13+
});
14+
15+
Q.submit([&](sycl::handler &h) {
16+
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(32)]] {}); // expected-warning {{attribute argument 32 is invalid and will be ignored; amdgcn requires sub_group size 64}}
17+
});
18+
19+
Q.submit([&](sycl::handler &h) {
20+
h.single_task<class invalid_kernel_2>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; amdgcn requires sub_group size 64}}
21+
});
22+
23+
return 0;
24+
}

clang/test/SemaSYCL/reqd-sub-group-size-cuda.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ int main() {
99
sycl::queue Q;
1010

1111
Q.submit([&](sycl::handler &h) {
12-
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; CUDA requires sub_group size 32}}
12+
h.single_task<class invalid_kernel>([=] [[sycl::reqd_sub_group_size(8)]] {}); // expected-warning {{attribute argument 8 is invalid and will be ignored; nvptx requires sub_group size 32}}
1313
});
1414

1515
Q.submit([&](sycl::handler &h) {

libclc/amdgcn-amdhsa/libspirv/workitem/get_local_size.cl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,19 @@
2323
CONST_AS char * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
2424
#endif
2525

26+
// Mimic `EmitAMDGPUWorkGroupSize` in `clang/lib/CodeGen/CGBuiltin.cpp`.
27+
2628
_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_x() {
27-
CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr();
28-
return ptr[1] & 0xffffu;
29+
CONST_AS ushort * ptr = (CONST_AS ushort *) __dispatch_ptr();
30+
return ptr[2];
2931
}
3032

3133
_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_y() {
32-
CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr();
33-
return ptr[1] >> 16;
34+
CONST_AS ushort * ptr = (CONST_AS ushort *) __dispatch_ptr();
35+
return ptr[3];
3436
}
3537

3638
_CLC_DEF _CLC_OVERLOAD size_t __spirv_WorkgroupSize_z() {
37-
CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr();
38-
return ptr[2] & 0xffffu;
39+
CONST_AS ushort * ptr = (CONST_AS ushort *) __dispatch_ptr();
40+
return ptr[4];
3941
}

sycl-fusion/jit-compiler/lib/KernelFusion.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,6 @@ FusionResult KernelFusion::fuseKernels(
100100
"Fusion output target format not supported by this build");
101101
}
102102

103-
if (TargetFormat != BinaryFormat::SPIRV &&
104-
TargetFormat != BinaryFormat::PTX && IsHeterogeneousList) {
105-
return FusionResult{
106-
"Heterogeneous ND ranges not supported for this target"};
107-
}
108-
109103
bool CachingEnabled = ConfigHelper::get<option::JITEnableCaching>();
110104
CacheKeyT CacheKey{KernelsToFuse,
111105
Identities,

sycl-fusion/passes/kernel-fusion/Builtins.cpp

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,9 @@ static raw_ostream &operator<<(raw_ostream &Os, const NDRange &ND) {
3636
<< ND.getLocalSize();
3737
}
3838

39-
/// Will generate a unique function name so that it can be reused in further
40-
/// stages.
41-
static std::string getFunctionName(BuiltinKind K, const NDRange &SrcNDRange,
42-
const NDRange &FusedNDRange) {
39+
std::string Remapper::getFunctionName(BuiltinKind K, const NDRange &SrcNDRange,
40+
const NDRange &FusedNDRange,
41+
uint32_t Idx) {
4342
std::string Res;
4443
raw_string_ostream S{Res};
4544
S << "__" <<
@@ -63,6 +62,8 @@ static std::string getFunctionName(BuiltinKind K, const NDRange &SrcNDRange,
6362
llvm_unreachable("Unhandled kind");
6463
}()
6564
<< "_remapper_" << SrcNDRange << "_" << FusedNDRange;
65+
if (Idx != (uint32_t)-1)
66+
S << "_" << static_cast<char>('x' + Idx);
6667
return S.str();
6768
}
6869

@@ -339,13 +340,8 @@ jit_compiler::Remapper::remapBuiltins(Function *F, const NDRange &SrcNDRange,
339340
// If the builtin should not be remapped, return the original function.
340341
return F;
341342

342-
// Remap given builtin.
343-
const auto Name = getFunctionName(K, SrcNDRange, FusedNDRange);
344-
auto *M = F->getParent();
345-
assert(!M->getFunction(Name) && "Function name should be unique");
346-
347343
return Cached = TargetInfo.createRemapperFunction(
348-
*this, K, F->getName(), Name, M, SrcNDRange, FusedNDRange);
344+
*this, K, F, F->getParent(), SrcNDRange, FusedNDRange);
349345
}
350346
if (TargetInfo.isSafeToNotRemapBuiltin(F)) {
351347
// No need to remap.
@@ -375,20 +371,9 @@ jit_compiler::Remapper::remapBuiltins(Function *F, const NDRange &SrcNDRange,
375371

376372
// Set Cached to support recursive functions.
377373
Cached = Clone;
378-
for (auto &I : instructions(Clone)) {
379-
if (auto *Call = dyn_cast<CallBase>(&I)) {
380-
// Recursive call
381-
auto *OldF = Call->getCalledFunction();
382-
auto ErrOrNewF = remapBuiltins(OldF, SrcNDRange, FusedNDRange);
383-
if (auto Err = ErrOrNewF.takeError()) {
384-
return std::move(Err);
385-
}
386-
// Override called function.
387-
auto *NewF = *ErrOrNewF;
388-
Call->setCalledFunction(NewF);
389-
Call->setCallingConv(NewF->getCallingConv());
390-
Call->setAttributes(NewF->getAttributes());
391-
}
374+
if (auto Err = TargetInfo.scanForBuiltinsToRemap(Clone, *this, SrcNDRange,
375+
FusedNDRange)) {
376+
return Err;
392377
}
393378
return Clone;
394379
}

sycl-fusion/passes/kernel-fusion/Builtins.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ class Remapper {
4747
explicit Remapper(const llvm::TargetFusionInfo &TargetInfo)
4848
: TargetInfo(TargetInfo) {}
4949

50+
///
51+
/// Generate a unique function name for a remapper function.
52+
static std::string getFunctionName(BuiltinKind K, const NDRange &SrcNDRange,
53+
const NDRange &FusedNDRange,
54+
uint32_t Idx = -1);
55+
5056
///
5157
/// Recursively remap index space getters builtins.
5258
llvm::Expected<llvm::Function *> remapBuiltins(llvm::Function *F,

0 commit comments

Comments
 (0)