Skip to content

Commit 0196b82

Browse files
committed
Merge remote-tracking branch 'intel_llvm/sycl' into llvmspirv_pulldown
2 parents 8a15d8d + 2085978 commit 0196b82

File tree

64 files changed

+3133
-243
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+3133
-243
lines changed

buildbot/configure.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def do_configure(args):
3232
libclc_targets_to_build = ''
3333
libclc_gen_remangled_variants = 'OFF'
3434
sycl_build_pi_cuda = 'OFF'
35-
sycl_build_pi_esimd_emulator = 'OFF'
35+
sycl_build_pi_esimd_emulator = 'ON'
3636
sycl_build_pi_hip = 'OFF'
3737
sycl_build_pi_hip_platform = 'AMD'
3838
sycl_clang_extra_flags = ''
@@ -50,8 +50,8 @@ def do_configure(args):
5050
if args.arm:
5151
llvm_targets_to_build = 'ARM;AArch64'
5252

53-
if args.enable_esimd_cpu_emulation:
54-
sycl_build_pi_esimd_emulator = 'ON'
53+
if args.disable_esimd_emulator:
54+
sycl_build_pi_esimd_emulator = 'OFF'
5555

5656
if args.cuda or args.hip:
5757
llvm_enable_projects += ';libclc'
@@ -213,7 +213,7 @@ def main():
213213
parser.add_argument("--hip-platform", type=str, choices=['AMD', 'NVIDIA'], default='AMD', help="choose hardware platform for HIP backend")
214214
parser.add_argument("--hip-amd-arch", type=str, help="Sets AMD gpu architecture for llvm lit tests, this is only needed for the HIP backend and AMD platform")
215215
parser.add_argument("--arm", action='store_true', help="build ARM support rather than x86")
216-
parser.add_argument("--enable-esimd-cpu-emulation", action='store_true', help="build with ESIMD_CPU emulation support")
216+
parser.add_argument("--disable-esimd-emulator", action='store_true', help="exclude ESIMD_EMULATOR support")
217217
parser.add_argument("--no-assertions", action='store_true', help="build without assertions")
218218
parser.add_argument("--docs", action='store_true', help="build Doxygen documentation")
219219
parser.add_argument("--no-werror", action='store_true', help="Don't treat warnings as errors")

buildbot/dependency.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ ocl_fpga_emu_ver=2021.13.11.0.23
2525
ocl_fpga_emu_ver_win=2021.13.11.0.23
2626
fpga_ver=20211014_000004
2727
fpga_ver_win=20211014_000004
28+
# https://downloadmirror.intel.com/691496/igfx_win_101.1191.zip
2829
ocloc_ver_win=101.1191
2930

3031
[DRIVER VERSIONS]

clang/include/clang/Basic/Attr.td

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,15 +1280,17 @@ def SYCLRegisterNum : InheritableAttr {
12801280
let Documentation = [SYCLRegisterNumDocs];
12811281
}
12821282

1283-
// Used by FE to mark ESIMD kernel pointer parameters which correspond to the
1283+
// Used by FE to mark SYCL kernel pointer parameters which correspond to the
12841284
// original lambda's captured accessors. FE turns the attribute to some metadata
1285-
// required by the ESIMD Back-End.
1286-
// Not supposed to be used directly in the source - SYCL device compiler FE
1287-
// automatically adds it for ESIMD kernels, hence undocumented.
1288-
def SYCLSimdAccessorPtr : InheritableAttr {
1289-
// No spelling, as this attribute can't be created in the source code.
1285+
// required by the device back-end.
1286+
// This attribute does not require custom semantic handling
1287+
// hence we set the SemaHandler field to 0.
1288+
// The attribute is not for public consumption, and is an implicitly-created attribute
1289+
// that has no visible spelling, hence undocumented.
1290+
def SYCLAccessorPtr : Attr {
1291+
// This attribute has no spellings as it is only ever created implicitly.
12901292
let Spellings = [];
1291-
let Subjects = SubjectList<[ParmVar]>;
1293+
let SemaHandler = 0;
12921294
let Documentation = [Undocumented];
12931295
}
12941296

clang/include/clang/Basic/BuiltinsNVPTX.def

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2054,6 +2054,92 @@ TARGET_BUILTIN(__nvvm_atom_acq_rel_cas_shared_ll, "LLiLLiD*LLiLLi", "n", SM_70)
20542054
TARGET_BUILTIN(__nvvm_atom_acq_rel_cta_cas_shared_ll, "LLiLLiD*LLiLLi", "n", SM_70)
20552055
TARGET_BUILTIN(__nvvm_atom_acq_rel_sys_cas_shared_ll, "LLiLLiD*LLiLLi", "n", SM_70)
20562056

2057+
#pragma push_macro("LD_VOLATILE_BUILTIN_TYPES")
2058+
#define LD_VOLATILE_BUILTIN_TYPES(ADDR_SPACE) \
2059+
BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_i, "iiD*", "n") \
2060+
BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_l, "LiLiD*", "n") \
2061+
BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_ll, "LLiLLiD*", "n") \
2062+
BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_f, "ffD*", "n") \
2063+
BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_d, "ddD*", "n")
2064+
2065+
#pragma push_macro("LD_BUILTIN_TYPES")
2066+
#define LD_BUILTIN_TYPES(ORDER, SCOPE, ADDR_SPACE) \
2067+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_i, "iiD*", "n", SM_70) \
2068+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_l, "LiLiD*", "n", SM_70) \
2069+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_ll, "LLiLLiD*", "n", SM_70) \
2070+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_f, "ffD*", "n", SM_70) \
2071+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_d, "ddD*", "n", SM_70)
2072+
2073+
#pragma push_macro("LD_BUILTIN_AS_TYPES")
2074+
#define LD_BUILTIN_AS_TYPES(ORDER, SCOPE) \
2075+
LD_BUILTIN_TYPES(ORDER, SCOPE, _gen) \
2076+
LD_BUILTIN_TYPES(ORDER, SCOPE, _global) \
2077+
LD_BUILTIN_TYPES(ORDER, SCOPE, _shared)
2078+
2079+
#pragma push_macro("LD_BUILTIN_SCOPES_AS_TYPES")
2080+
#define LD_BUILTIN_SCOPES_AS_TYPES(ORDER) \
2081+
LD_BUILTIN_AS_TYPES(ORDER, ) \
2082+
LD_BUILTIN_AS_TYPES(ORDER, _cta) \
2083+
LD_BUILTIN_AS_TYPES(ORDER, _sys)
2084+
2085+
LD_BUILTIN_SCOPES_AS_TYPES()
2086+
LD_BUILTIN_SCOPES_AS_TYPES(_acquire)
2087+
LD_VOLATILE_BUILTIN_TYPES(_gen)
2088+
LD_VOLATILE_BUILTIN_TYPES(_global)
2089+
LD_VOLATILE_BUILTIN_TYPES(_shared)
2090+
2091+
#undef LD_VOLATILE_BUILTIN_TYPES
2092+
#pragma pop_macro("LD_VOLATILE_BUILTIN_TYPES")
2093+
#undef LD_BUILTIN_TYPES
2094+
#pragma pop_macro("LD_BUILTIN_TYPES")
2095+
#undef LD_BUILTIN_AS_TYPES
2096+
#pragma pop_macro("LD_BUILTIN_AS_TYPES")
2097+
#undef LD_BUILTIN_SCOPES_AS_TYPES
2098+
#pragma pop_macro("LD_BUILTIN_SCOPES_AS_TYPES")
2099+
2100+
#pragma push_macro("ST_VOLATILE_BUILTIN_TYPES")
2101+
#define ST_VOLATILE_BUILTIN_TYPES(ADDR_SPACE) \
2102+
BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_i, "viD*i", "n") \
2103+
BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_l, "vLiD*Li", "n") \
2104+
BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_ll, "vLLiD*LLi", "n") \
2105+
BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_f, "vfD*f", "n") \
2106+
BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_d, "vdD*d", "n")
2107+
2108+
#pragma push_macro("ST_BUILTIN_TYPES")
2109+
#define ST_BUILTIN_TYPES(ORDER, SCOPE, ADDR_SPACE) \
2110+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_i, "viD*i", "n", SM_70) \
2111+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_l, "vLiD*Li", "n", SM_70) \
2112+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_ll, "vLLiD*LLi", "n", SM_70) \
2113+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_f, "vfD*f", "n", SM_70) \
2114+
TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_d, "vdD*d", "n", SM_70)
2115+
2116+
#pragma push_macro("ST_BUILTIN_AS_TYPES")
2117+
#define ST_BUILTIN_AS_TYPES(ORDER, SCOPE) \
2118+
ST_BUILTIN_TYPES(ORDER, SCOPE, _gen) \
2119+
ST_BUILTIN_TYPES(ORDER, SCOPE, _global) \
2120+
ST_BUILTIN_TYPES(ORDER, SCOPE, _shared)
2121+
2122+
#pragma push_macro("ST_BUILTIN_SCOPES_AS_TYPES")
2123+
#define ST_BUILTIN_SCOPES_AS_TYPES(ORDER) \
2124+
ST_BUILTIN_AS_TYPES(ORDER, ) \
2125+
ST_BUILTIN_AS_TYPES(ORDER, _cta) \
2126+
ST_BUILTIN_AS_TYPES(ORDER, _sys)
2127+
2128+
ST_BUILTIN_SCOPES_AS_TYPES()
2129+
ST_BUILTIN_SCOPES_AS_TYPES(_release)
2130+
ST_VOLATILE_BUILTIN_TYPES(_gen)
2131+
ST_VOLATILE_BUILTIN_TYPES(_global)
2132+
ST_VOLATILE_BUILTIN_TYPES(_shared)
2133+
2134+
#undef ST_VOLATILE_BUILTIN_TYPES
2135+
#pragma pop_macro("ST_VOLATILE_BUILTIN_TYPES")
2136+
#undef ST_BUILTIN_TYPES
2137+
#pragma pop_macro("ST_BUILTIN_TYPES")
2138+
#undef ST_BUILTIN_AS_TYPES
2139+
#pragma pop_macro("ST_BUILTIN_AS_TYPES")
2140+
#undef ST_BUILTIN_SCOPES_AS_TYPES
2141+
#pragma pop_macro("ST_BUILTIN_SCOPES_AS_TYPES")
2142+
20572143
// Compiler Error Warn
20582144
BUILTIN(__nvvm_compiler_error, "vcC*4", "n")
20592145
BUILTIN(__nvvm_compiler_warn, "vcC*4", "n")

clang/include/clang/Basic/Version.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
#ifndef LLVM_CLANG_BASIC_VERSION_H
1616
#define LLVM_CLANG_BASIC_VERSION_H
1717

18+
#include "clang/Basic/LangOptions.h"
1819
#include "clang/Basic/Version.inc"
20+
#include "llvm/ADT/SmallVector.h"
1921
#include "llvm/ADT/StringRef.h"
2022

2123
namespace clang {
@@ -56,6 +58,11 @@ namespace clang {
5658
/// for use in the CPP __VERSION__ macro, which includes the clang version
5759
/// number, the repository version, and the vendor tag.
5860
std::string getClangFullCPPVersion();
61+
62+
/// Retrieves a string representing the SYCL standard version for use in
63+
/// the CL_SYCL_LANGUAGE_VERSION and SYCL_LANGUAGE_VERSION macros.
64+
llvm::SmallVector<std::pair<llvm::StringRef, llvm::StringRef>, 2>
65+
getSYCLVersionMacros(const LangOptions &LangOpts);
5966
}
6067

6168
#endif // LLVM_CLANG_BASIC_VERSION_H

clang/lib/Basic/Version.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,4 +123,13 @@ std::string getClangFullCPPVersion() {
123123
return buf;
124124
}
125125

126+
llvm::SmallVector<std::pair<llvm::StringRef, llvm::StringRef>, 2>
127+
getSYCLVersionMacros(const LangOptions &LangOpts) {
128+
if (LangOpts.getSYCLVersion() == LangOptions::SYCL_2017)
129+
return {{"CL_SYCL_LANGUAGE_VERSION", "121"},
130+
{"SYCL_LANGUAGE_VERSION", "201707"}};
131+
if (LangOpts.getSYCLVersion() == LangOptions::SYCL_2020)
132+
return {{"SYCL_LANGUAGE_VERSION", "202001"}};
133+
llvm_unreachable("SYCL standard should be set");
134+
}
126135
} // end namespace clang

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,9 +1045,8 @@ void EmitAssemblyHelper::EmitAssemblyWithLegacyPassManager(
10451045
// -fsycl-instrument-device-code option was passed. This option can be
10461046
// used only with spir triple.
10471047
if (CodeGenOpts.SPIRITTAnnotations) {
1048-
if (!llvm::Triple(TheModule->getTargetTriple()).isSPIR())
1049-
llvm::report_fatal_error(
1050-
"ITT annotations can only by added to a module with spir target");
1048+
assert(llvm::Triple(TheModule->getTargetTriple()).isSPIR() &&
1049+
"ITT annotations can only by added to a module with spir target");
10511050
PerModulePasses.add(createSPIRITTAnnotationsLegacyPass());
10521051
}
10531052

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17503,6 +17503,21 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
1750317503
Ptr->getType()}),
1750417504
{Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
1750517505
};
17506+
auto MakeScopedLd = [&](unsigned IntrinsicID) {
17507+
Value *Ptr = EmitScalarExpr(E->getArg(0));
17508+
return Builder.CreateCall(
17509+
CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
17510+
Ptr->getType()}),
17511+
{Ptr});
17512+
};
17513+
auto MakeScopedSt = [&](unsigned IntrinsicID) {
17514+
Value *Ptr = EmitScalarExpr(E->getArg(0));
17515+
return Builder.CreateCall(
17516+
CGM.getIntrinsic(
17517+
IntrinsicID,
17518+
{Ptr->getType(), Ptr->getType()->getPointerElementType()}),
17519+
{Ptr, EmitScalarExpr(E->getArg(1))});
17520+
};
1750617521
auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
1750717522
Value *Ptr = EmitScalarExpr(E->getArg(0));
1750817523
return Builder.CreateCall(
@@ -17518,6 +17533,85 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
1751817533
{Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
1751917534
};
1752017535
switch (BuiltinID) {
17536+
17537+
#define LD_VOLATILE_CASES(ADDR_SPACE) \
17538+
case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_i: \
17539+
case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_l: \
17540+
case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_ll: \
17541+
return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_i_volatile); \
17542+
case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_f: \
17543+
case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_d: \
17544+
return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_f_volatile);
17545+
17546+
#define LD_CASES(ORDER, SCOPE, ADDR_SPACE) \
17547+
case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_i: \
17548+
case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_l: \
17549+
case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_ll: \
17550+
return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_i##ORDER##SCOPE); \
17551+
case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_f: \
17552+
case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_d: \
17553+
return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_f##ORDER##SCOPE);
17554+
17555+
#define LD_CASES_AS(ORDER, SCOPE) \
17556+
LD_CASES(ORDER, SCOPE, _gen) \
17557+
LD_CASES(ORDER, SCOPE, _global) \
17558+
LD_CASES(ORDER, SCOPE, _shared)
17559+
17560+
#define LD_CASES_AS_SCOPES(ORDER) \
17561+
LD_CASES_AS(ORDER, ) \
17562+
LD_CASES_AS(ORDER, _cta) \
17563+
LD_CASES_AS(ORDER, _sys)
17564+
17565+
LD_CASES_AS_SCOPES()
17566+
LD_CASES_AS_SCOPES(_acquire)
17567+
LD_VOLATILE_CASES(_gen)
17568+
LD_VOLATILE_CASES(_global)
17569+
LD_VOLATILE_CASES(_shared)
17570+
17571+
#undef LD_VOLATILE_CASES
17572+
#undef LD_CASES
17573+
#undef LD_CASES_AS
17574+
#undef LD_CASES_AS_SCOPES
17575+
17576+
#define ST_VOLATILE_CASES(ADDR_SPACE) \
17577+
case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_i: \
17578+
case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_l: \
17579+
case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_ll: \
17580+
return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_i_volatile); \
17581+
case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_f: \
17582+
case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_d: \
17583+
return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_f_volatile);
17584+
17585+
#define ST_CASES(ORDER, SCOPE, ADDR_SPACE) \
17586+
case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_i: \
17587+
case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_l: \
17588+
case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_ll: \
17589+
return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_i##ORDER##SCOPE); \
17590+
case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_f: \
17591+
case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_d: \
17592+
return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_f##ORDER##SCOPE);
17593+
17594+
#define ST_CASES_AS(ORDER, SCOPE) \
17595+
ST_CASES(ORDER, SCOPE, _gen) \
17596+
ST_CASES(ORDER, SCOPE, _global) \
17597+
ST_CASES(ORDER, SCOPE, _shared)
17598+
17599+
#define ST_CASES_AS_SCOPES(ORDER) \
17600+
ST_CASES_AS(ORDER, ) \
17601+
ST_CASES_AS(ORDER, _cta) \
17602+
ST_CASES_AS(ORDER, _sys)
17603+
17604+
ST_CASES_AS_SCOPES()
17605+
ST_CASES_AS_SCOPES(_release)
17606+
ST_VOLATILE_CASES(_gen)
17607+
ST_VOLATILE_CASES(_global)
17608+
ST_VOLATILE_CASES(_shared)
17609+
17610+
#undef ST_VOLATILE_CASES
17611+
#undef ST_CASES
17612+
#undef ST_CASES_AS
17613+
#undef ST_CASES_AS_SCOPES
17614+
1752117615
case NVPTX::BI__nvvm_atom_add_gen_i:
1752217616
case NVPTX::BI__nvvm_atom_add_gen_l:
1752317617
case NVPTX::BI__nvvm_atom_add_gen_ll:

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1733,10 +1733,16 @@ void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn,
17331733
// MDNode for the intel_buffer_location attribute.
17341734
SmallVector<llvm::Metadata *, 8> argSYCLBufferLocationAttr;
17351735

1736+
// MDNode for listing SYCL kernel pointer arguments originating from
1737+
// accessors.
1738+
SmallVector<llvm::Metadata *, 8> argSYCLKernelRuntimeAligned;
1739+
17361740
// MDNode for listing ESIMD kernel pointer arguments originating from
1737-
// accessors
1741+
// accessors.
17381742
SmallVector<llvm::Metadata *, 8> argESIMDAccPtrs;
17391743

1744+
bool isKernelArgAnAccessor = false;
1745+
17401746
if (FD && CGF)
17411747
for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) {
17421748
const ParmVarDecl *parm = FD->getParamDecl(i);
@@ -1840,17 +1846,38 @@ void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn,
18401846
SYCLBufferLocationAttr->getLocationID()))
18411847
: llvm::ConstantAsMetadata::get(CGF->Builder.getInt32(-1)));
18421848

1849+
// If a kernel pointer argument comes from an accessor, we generate
1850+
// a new metadata(kernel_arg_runtime_aligned) to the kernel to indicate
1851+
// that this pointer has runtime allocated alignment. The value of any
1852+
// "kernel_arg_runtime_aligned" metadata element is 'true' for any kernel
1853+
// arguments that corresponds to the base pointer of an accessor and
1854+
// 'false' otherwise.
1855+
if (parm->hasAttr<SYCLAccessorPtrAttr>()) {
1856+
isKernelArgAnAccessor = true;
1857+
argSYCLKernelRuntimeAligned.push_back(
1858+
llvm::ConstantAsMetadata::get(CGF->Builder.getTrue()));
1859+
} else {
1860+
argSYCLKernelRuntimeAligned.push_back(
1861+
llvm::ConstantAsMetadata::get(CGF->Builder.getFalse()));
1862+
}
1863+
18431864
if (FD->hasAttr<SYCLSimdAttr>())
18441865
argESIMDAccPtrs.push_back(llvm::ConstantAsMetadata::get(
1845-
CGF->Builder.getInt1(parm->hasAttr<SYCLSimdAccessorPtrAttr>())));
1866+
CGF->Builder.getInt1(parm->hasAttr<SYCLAccessorPtrAttr>())));
18461867
}
18471868

18481869
bool IsEsimdFunction = FD && FD->hasAttr<SYCLSimdAttr>();
18491870

1850-
if (LangOpts.SYCLIsDevice && !IsEsimdFunction)
1871+
if (LangOpts.SYCLIsDevice && !IsEsimdFunction) {
18511872
Fn->setMetadata("kernel_arg_buffer_location",
18521873
llvm::MDNode::get(VMContext, argSYCLBufferLocationAttr));
1853-
else {
1874+
// Generate this metadata only if atleast one kernel argument is an
1875+
// accessor.
1876+
if (isKernelArgAnAccessor)
1877+
Fn->setMetadata(
1878+
"kernel_arg_runtime_aligned",
1879+
llvm::MDNode::get(VMContext, argSYCLKernelRuntimeAligned));
1880+
} else {
18541881
Fn->setMetadata("kernel_arg_addr_space",
18551882
llvm::MDNode::get(VMContext, addressQuals));
18561883
Fn->setMetadata("kernel_arg_access_qual",

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,11 @@ CudaInstallationDetector::CudaInstallationDetector(
136136
Candidates.emplace_back(
137137
Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
138138
} else if (HostTriple.isOSWindows()) {
139+
// CUDA_PATH is set by the installer, prefer it over other versions that
140+
// might be present on the system.
141+
if (const char *CudaPathEnvVar = ::getenv("CUDA_PATH"))
142+
Candidates.emplace_back(CudaPathEnvVar);
143+
139144
for (const char *Ver : Versions)
140145
Candidates.emplace_back(
141146
D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +

0 commit comments

Comments
 (0)