Skip to content

Commit 4239392

Browse files
committed
Merge from 'main' to 'sycl-web' (8 commits)
CONFLICT (content): Merge conflict in clang/lib/CodeGen/CGCUDANV.cpp
2 parents b13fc31 + f49e2b0 commit 4239392

File tree

18 files changed

+543
-794
lines changed

18 files changed

+543
-794
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1970,8 +1970,6 @@ def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone,
19701970
}
19711971

19721972
let TargetGuard = "sve2p1" in {
1973-
def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>;
1974-
19751973
def SVWHILEGE_COUNT : SInst<"svwhilege_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
19761974
def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
19771975
def SVWHILELE_COUNT : SInst<"svwhilele_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
@@ -2071,8 +2069,6 @@ let TargetGuard = "sve2p1" in {
20712069
def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [], []>;
20722070
def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>;
20732071

2074-
def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
2075-
20762072
defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
20772073
}
20782074

@@ -2081,6 +2077,9 @@ let TargetGuard = "sve2p1|sme2" in {
20812077
def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone, IsStreamingCompatible], []>;
20822078

20832079
def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
2080+
2081+
def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [IsStreamingCompatible], []>;
2082+
def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
20842083
}
20852084

20862085
let TargetGuard = "sve2p1,b16b16" in {

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 38 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class CGNVCUDARuntime : public CGCUDARuntime {
3939
private:
4040
llvm::IntegerType *IntTy, *SizeTy;
4141
llvm::Type *VoidTy;
42-
llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy;
42+
llvm::PointerType *PtrTy;
4343

4444
/// Convenience reference to LLVM Context
4545
llvm::LLVMContext &Context;
@@ -232,15 +232,12 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
232232
VoidTy = CGM.VoidTy;
233233
Zeros[0] = llvm::ConstantInt::get(SizeTy, 0);
234234
Zeros[1] = Zeros[0];
235-
236-
CharPtrTy = CGM.UnqualPtrTy;
237-
VoidPtrTy = CGM.UnqualPtrTy;
238-
VoidPtrPtrTy = CGM.UnqualPtrTy;
235+
PtrTy = CGM.UnqualPtrTy;
239236
}
240237

241238
llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const {
242239
// cudaError_t cudaSetupArgument(void *, size_t, size_t)
243-
llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
240+
llvm::Type *Params[] = {PtrTy, SizeTy, SizeTy};
244241
return CGM.CreateRuntimeFunction(
245242
llvm::FunctionType::get(IntTy, Params, false),
246243
addPrefixToName("SetupArgument"));
@@ -250,26 +247,26 @@ llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const {
250247
if (CGM.getLangOpts().HIP) {
251248
// hipError_t hipLaunchByPtr(char *);
252249
return CGM.CreateRuntimeFunction(
253-
llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr");
250+
llvm::FunctionType::get(IntTy, PtrTy, false), "hipLaunchByPtr");
254251
}
255252
// cudaError_t cudaLaunch(char *);
256-
return CGM.CreateRuntimeFunction(
257-
llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
253+
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, PtrTy, false),
254+
"cudaLaunch");
258255
}
259256

260257
llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const {
261-
return llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false);
258+
return llvm::FunctionType::get(VoidTy, PtrTy, false);
262259
}
263260

264261
llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const {
265-
return llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
262+
return llvm::FunctionType::get(VoidTy, PtrTy, false);
266263
}
267264

268265
llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
269266
auto *CallbackFnTy = getCallbackFnTy();
270267
auto *RegisterGlobalsFnTy = getRegisterGlobalsFnTy();
271-
llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo(), VoidPtrTy,
272-
VoidPtrTy, CallbackFnTy->getPointerTo()};
268+
llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo(), PtrTy,
269+
PtrTy, CallbackFnTy->getPointerTo()};
273270
return llvm::FunctionType::get(VoidTy, Params, false);
274271
}
275272

@@ -332,15 +329,15 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
332329
// args, allocate a single pointer so we still have a valid pointer to the
333330
// argument array that we can pass to runtime, even if it will be unused.
334331
Address KernelArgs = CGF.CreateTempAlloca(
335-
VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args",
332+
PtrTy, CharUnits::fromQuantity(16), "kernel_args",
336333
llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));
337334
// Store pointers to the arguments in a locally allocated launch_args.
338335
for (unsigned i = 0; i < Args.size(); ++i) {
339336
llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer();
340-
llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy);
337+
llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, PtrTy);
341338
CGF.Builder.CreateDefaultAlignedStore(
342339
VoidVarPtr,
343-
CGF.Builder.CreateConstGEP1_32(VoidPtrTy, KernelArgs.getPointer(), i));
340+
CGF.Builder.CreateConstGEP1_32(PtrTy, KernelArgs.getPointer(), i));
344341
}
345342

346343
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
@@ -388,8 +385,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
388385
CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim");
389386
Address ShmemSize =
390387
CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size");
391-
Address Stream =
392-
CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream");
388+
Address Stream = CGF.CreateTempAlloca(PtrTy, CGM.getPointerAlign(), "stream");
393389
llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction(
394390
llvm::FunctionType::get(IntTy,
395391
{/*gridDim=*/GridDim.getType(),
@@ -404,8 +400,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
404400
ShmemSize.getPointer(), Stream.getPointer()});
405401

406402
// Emit the call to cudaLaunch
407-
llvm::Value *Kernel = CGF.Builder.CreatePointerCast(
408-
KernelHandles[CGF.CurFn->getName()], VoidPtrTy);
403+
llvm::Value *Kernel =
404+
CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn->getName()], PtrTy);
409405
CallArgList LaunchKernelArgs;
410406
LaunchKernelArgs.add(RValue::get(Kernel),
411407
cudaLaunchKernelFD->getParamDecl(0)->getType());
@@ -445,7 +441,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
445441
Offset = Offset.alignTo(TInfo.Align);
446442
llvm::Value *Args[] = {
447443
CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(),
448-
VoidPtrTy),
444+
PtrTy),
449445
llvm::ConstantInt::get(SizeTy, TInfo.Width.getQuantity()),
450446
llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),
451447
};
@@ -460,8 +456,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
460456

461457
// Emit the call to cudaLaunch
462458
llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
463-
llvm::Value *Arg = CGF.Builder.CreatePointerCast(
464-
KernelHandles[CGF.CurFn->getName()], CharPtrTy);
459+
llvm::Value *Arg =
460+
CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn->getName()], PtrTy);
465461
CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
466462
CGF.EmitBranch(EndBlock);
467463

@@ -539,8 +535,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
539535
// void __cudaRegisterFunction(void **, const char *, char *, const char *,
540536
// int, uint3*, uint3*, dim3*, dim3*, int*)
541537
llvm::Type *RegisterFuncParams[] = {
542-
VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy,
543-
VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()};
538+
PtrTy, PtrTy, PtrTy, PtrTy, IntTy,
539+
PtrTy, PtrTy, PtrTy, PtrTy, IntTy->getPointerTo()};
544540
llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction(
545541
llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
546542
addUnderscoredPrefixToName("RegisterFunction"));
@@ -552,7 +548,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
552548
for (auto &&I : EmittedKernels) {
553549
llvm::Constant *KernelName =
554550
makeConstantString(getDeviceSideName(cast<NamedDecl>(I.D)));
555-
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
551+
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(PtrTy);
556552
llvm::Value *Args[] = {
557553
&GpuBinaryHandlePtr,
558554
KernelHandles[I.Kernel->getName()],
@@ -575,33 +571,29 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
575571

576572
// void __cudaRegisterVar(void **, char *, char *, const char *,
577573
// int, int, int, int)
578-
llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
579-
CharPtrTy, IntTy, VarSizeTy,
580-
IntTy, IntTy};
574+
llvm::Type *RegisterVarParams[] = {PtrTy, PtrTy, PtrTy, PtrTy,
575+
IntTy, VarSizeTy, IntTy, IntTy};
581576
llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
582577
llvm::FunctionType::get(VoidTy, RegisterVarParams, false),
583578
addUnderscoredPrefixToName("RegisterVar"));
584579
// void __hipRegisterManagedVar(void **, char *, char *, const char *,
585580
// size_t, unsigned)
586-
llvm::Type *RegisterManagedVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
587-
CharPtrTy, VarSizeTy, IntTy};
581+
llvm::Type *RegisterManagedVarParams[] = {PtrTy, PtrTy, PtrTy,
582+
PtrTy, VarSizeTy, IntTy};
588583
llvm::FunctionCallee RegisterManagedVar = CGM.CreateRuntimeFunction(
589584
llvm::FunctionType::get(VoidTy, RegisterManagedVarParams, false),
590585
addUnderscoredPrefixToName("RegisterManagedVar"));
591586
// void __cudaRegisterSurface(void **, const struct surfaceReference *,
592587
// const void **, const char *, int, int);
593588
llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction(
594589
llvm::FunctionType::get(
595-
VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
596-
false),
590+
VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy}, false),
597591
addUnderscoredPrefixToName("RegisterSurface"));
598592
// void __cudaRegisterTexture(void **, const struct textureReference *,
599593
// const void **, const char *, int, int, int)
600594
llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction(
601595
llvm::FunctionType::get(
602-
VoidTy,
603-
{VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
604-
false),
596+
VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy, IntTy}, false),
605597
addUnderscoredPrefixToName("RegisterTexture"));
606598
for (auto &&Info : DeviceVars) {
607599
llvm::GlobalVariable *Var = Info.Var;
@@ -712,11 +704,11 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
712704

713705
// void ** __{cuda|hip}RegisterFatBinary(void *);
714706
llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction(
715-
llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
707+
llvm::FunctionType::get(PtrTy, PtrTy, false),
716708
addUnderscoredPrefixToName("RegisterFatBinary"));
717709
// struct { int magic, int version, void * gpu_binary, void * dont_care };
718710
llvm::StructType *FatbinWrapperTy =
719-
llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
711+
llvm::StructType::get(IntTy, IntTy, PtrTy, PtrTy);
720712

721713
// Register GPU binary with the CUDA runtime, store returned handle in a
722714
// global variable and save a reference in GpuBinaryHandle to be cleaned up
@@ -812,7 +804,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
812804
// Data.
813805
Values.add(FatBinStr);
814806
// Unused in fatbin v1.
815-
Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
807+
Values.add(llvm::ConstantPointerNull::get(PtrTy));
816808
llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
817809
addUnderscoredPrefixToName("_fatbin_wrapper"), CGM.getPointerAlign(),
818810
/*constant*/ true);
@@ -835,9 +827,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
835827
// The name, size, and initialization pattern of this variable is part
836828
// of HIP ABI.
837829
GpuBinaryHandle = new llvm::GlobalVariable(
838-
TheModule, VoidPtrPtrTy, /*isConstant=*/false,
839-
Linkage,
840-
/*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
830+
TheModule, PtrTy, /*isConstant=*/false, Linkage,
831+
/*Initializer=*/llvm::ConstantPointerNull::get(PtrTy),
841832
"__hip_gpubin_handle");
842833
if (Linkage == llvm::GlobalValue::LinkOnceAnyLinkage)
843834
GpuBinaryHandle->setComdat(
@@ -847,7 +838,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
847838
if (Linkage != llvm::GlobalValue::InternalLinkage)
848839
GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
849840
Address GpuBinaryAddr(
850-
GpuBinaryHandle, VoidPtrPtrTy,
841+
GpuBinaryHandle, PtrTy,
851842
CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
852843
{
853844
auto *HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
@@ -879,8 +870,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
879870
llvm::CallInst *RegisterFatbinCall =
880871
CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);
881872
GpuBinaryHandle = new llvm::GlobalVariable(
882-
TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
883-
llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
873+
TheModule, PtrTy, false, llvm::GlobalValue::InternalLinkage,
874+
llvm::ConstantPointerNull::get(PtrTy), "__cuda_gpubin_handle");
884875
GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
885876
CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
886877
CGM.getPointerAlign());
@@ -894,7 +885,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
894885
CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
895886
// void __cudaRegisterFatBinaryEnd(void **);
896887
llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction(
897-
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
888+
llvm::FunctionType::get(VoidTy, PtrTy, false),
898889
"__cudaRegisterFatBinaryEnd");
899890
CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);
900891
}
@@ -966,7 +957,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
966957

967958
// void __cudaUnregisterFatBinary(void ** handle);
968959
llvm::FunctionCallee UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
969-
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
960+
llvm::FunctionType::get(VoidTy, PtrTy, false),
970961
addUnderscoredPrefixToName("UnregisterFatBinary"));
971962

972963
llvm::Function *ModuleDtorFunc = llvm::Function::Create(

0 commit comments

Comments
 (0)