@@ -39,7 +39,7 @@ class CGNVCUDARuntime : public CGCUDARuntime {
39
39
private:
40
40
llvm::IntegerType *IntTy, *SizeTy;
41
41
llvm::Type *VoidTy;
42
- llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy ;
42
+ llvm::PointerType *PtrTy ;
43
43
44
44
// / Convenience reference to LLVM Context
45
45
llvm::LLVMContext &Context;
@@ -232,15 +232,12 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
232
232
VoidTy = CGM.VoidTy ;
233
233
Zeros[0 ] = llvm::ConstantInt::get (SizeTy, 0 );
234
234
Zeros[1 ] = Zeros[0 ];
235
-
236
- CharPtrTy = CGM.UnqualPtrTy ;
237
- VoidPtrTy = CGM.UnqualPtrTy ;
238
- VoidPtrPtrTy = CGM.UnqualPtrTy ;
235
+ PtrTy = CGM.UnqualPtrTy ;
239
236
}
240
237
241
238
llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn () const {
242
239
// cudaError_t cudaSetupArgument(void *, size_t, size_t)
243
- llvm::Type *Params[] = {VoidPtrTy , SizeTy, SizeTy};
240
+ llvm::Type *Params[] = {PtrTy , SizeTy, SizeTy};
244
241
return CGM.CreateRuntimeFunction (
245
242
llvm::FunctionType::get (IntTy, Params, false ),
246
243
addPrefixToName (" SetupArgument" ));
@@ -250,26 +247,26 @@ llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const {
250
247
if (CGM.getLangOpts ().HIP ) {
251
248
// hipError_t hipLaunchByPtr(char *);
252
249
return CGM.CreateRuntimeFunction (
253
- llvm::FunctionType::get (IntTy, CharPtrTy , false ), " hipLaunchByPtr" );
250
+ llvm::FunctionType::get (IntTy, PtrTy , false ), " hipLaunchByPtr" );
254
251
}
255
252
// cudaError_t cudaLaunch(char *);
256
- return CGM.CreateRuntimeFunction (
257
- llvm::FunctionType::get (IntTy, CharPtrTy, false ), " cudaLaunch" );
253
+ return CGM.CreateRuntimeFunction (llvm::FunctionType::get (IntTy, PtrTy, false ),
254
+ " cudaLaunch" );
258
255
}
259
256
260
257
llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy () const {
261
- return llvm::FunctionType::get (VoidTy, VoidPtrPtrTy , false );
258
+ return llvm::FunctionType::get (VoidTy, PtrTy , false );
262
259
}
263
260
264
261
llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy () const {
265
- return llvm::FunctionType::get (VoidTy, VoidPtrTy , false );
262
+ return llvm::FunctionType::get (VoidTy, PtrTy , false );
266
263
}
267
264
268
265
llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy () const {
269
266
auto *CallbackFnTy = getCallbackFnTy ();
270
267
auto *RegisterGlobalsFnTy = getRegisterGlobalsFnTy ();
271
- llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo (), VoidPtrTy ,
272
- VoidPtrTy , CallbackFnTy->getPointerTo ()};
268
+ llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo (), PtrTy ,
269
+ PtrTy , CallbackFnTy->getPointerTo ()};
273
270
return llvm::FunctionType::get (VoidTy, Params, false );
274
271
}
275
272
@@ -332,15 +329,15 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
332
329
// args, allocate a single pointer so we still have a valid pointer to the
333
330
// argument array that we can pass to runtime, even if it will be unused.
334
331
Address KernelArgs = CGF.CreateTempAlloca (
335
- VoidPtrTy , CharUnits::fromQuantity (16 ), " kernel_args" ,
332
+ PtrTy , CharUnits::fromQuantity (16 ), " kernel_args" ,
336
333
llvm::ConstantInt::get (SizeTy, std::max<size_t >(1 , Args.size ())));
337
334
// Store pointers to the arguments in a locally allocated launch_args.
338
335
for (unsigned i = 0 ; i < Args.size (); ++i) {
339
336
llvm::Value* VarPtr = CGF.GetAddrOfLocalVar (Args[i]).getPointer ();
340
- llvm::Value *VoidVarPtr = CGF.Builder .CreatePointerCast (VarPtr, VoidPtrTy );
337
+ llvm::Value *VoidVarPtr = CGF.Builder .CreatePointerCast (VarPtr, PtrTy );
341
338
CGF.Builder .CreateDefaultAlignedStore (
342
339
VoidVarPtr,
343
- CGF.Builder .CreateConstGEP1_32 (VoidPtrTy , KernelArgs.getPointer (), i));
340
+ CGF.Builder .CreateConstGEP1_32 (PtrTy , KernelArgs.getPointer (), i));
344
341
}
345
342
346
343
llvm::BasicBlock *EndBlock = CGF.createBasicBlock (" setup.end" );
@@ -388,8 +385,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
388
385
CGF.CreateMemTemp (Dim3Ty, CharUnits::fromQuantity (8 ), " block_dim" );
389
386
Address ShmemSize =
390
387
CGF.CreateTempAlloca (SizeTy, CGM.getSizeAlign (), " shmem_size" );
391
- Address Stream =
392
- CGF.CreateTempAlloca (VoidPtrTy, CGM.getPointerAlign (), " stream" );
388
+ Address Stream = CGF.CreateTempAlloca (PtrTy, CGM.getPointerAlign (), " stream" );
393
389
llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction (
394
390
llvm::FunctionType::get (IntTy,
395
391
{/* gridDim=*/ GridDim.getType (),
@@ -404,8 +400,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
404
400
ShmemSize.getPointer (), Stream.getPointer ()});
405
401
406
402
// Emit the call to cudaLaunch
407
- llvm::Value *Kernel = CGF. Builder . CreatePointerCast (
408
- KernelHandles[CGF.CurFn ->getName ()], VoidPtrTy );
403
+ llvm::Value *Kernel =
404
+ CGF. Builder . CreatePointerCast ( KernelHandles[CGF.CurFn ->getName ()], PtrTy );
409
405
CallArgList LaunchKernelArgs;
410
406
LaunchKernelArgs.add (RValue::get (Kernel),
411
407
cudaLaunchKernelFD->getParamDecl (0 )->getType ());
@@ -445,7 +441,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
445
441
Offset = Offset.alignTo (TInfo.Align );
446
442
llvm::Value *Args[] = {
447
443
CGF.Builder .CreatePointerCast (CGF.GetAddrOfLocalVar (A).getPointer (),
448
- VoidPtrTy ),
444
+ PtrTy ),
449
445
llvm::ConstantInt::get (SizeTy, TInfo.Width .getQuantity ()),
450
446
llvm::ConstantInt::get (SizeTy, Offset.getQuantity ()),
451
447
};
@@ -460,8 +456,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
460
456
461
457
// Emit the call to cudaLaunch
462
458
llvm::FunctionCallee cudaLaunchFn = getLaunchFn ();
463
- llvm::Value *Arg = CGF. Builder . CreatePointerCast (
464
- KernelHandles[CGF.CurFn ->getName ()], CharPtrTy );
459
+ llvm::Value *Arg =
460
+ CGF. Builder . CreatePointerCast ( KernelHandles[CGF.CurFn ->getName ()], PtrTy );
465
461
CGF.EmitRuntimeCallOrInvoke (cudaLaunchFn, Arg);
466
462
CGF.EmitBranch (EndBlock);
467
463
@@ -539,8 +535,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
539
535
// void __cudaRegisterFunction(void **, const char *, char *, const char *,
540
536
// int, uint3*, uint3*, dim3*, dim3*, int*)
541
537
llvm::Type *RegisterFuncParams[] = {
542
- VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy , IntTy,
543
- VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy , IntTy->getPointerTo ()};
538
+ PtrTy, PtrTy, PtrTy, PtrTy , IntTy,
539
+ PtrTy, PtrTy, PtrTy, PtrTy , IntTy->getPointerTo ()};
544
540
llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction (
545
541
llvm::FunctionType::get (IntTy, RegisterFuncParams, false ),
546
542
addUnderscoredPrefixToName (" RegisterFunction" ));
@@ -552,7 +548,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
552
548
for (auto &&I : EmittedKernels) {
553
549
llvm::Constant *KernelName =
554
550
makeConstantString (getDeviceSideName (cast<NamedDecl>(I.D )));
555
- llvm::Constant *NullPtr = llvm::ConstantPointerNull::get (VoidPtrTy );
551
+ llvm::Constant *NullPtr = llvm::ConstantPointerNull::get (PtrTy );
556
552
llvm::Value *Args[] = {
557
553
&GpuBinaryHandlePtr,
558
554
KernelHandles[I.Kernel ->getName ()],
@@ -575,33 +571,29 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
575
571
576
572
// void __cudaRegisterVar(void **, char *, char *, const char *,
577
573
// int, int, int, int)
578
- llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
579
- CharPtrTy, IntTy, VarSizeTy,
580
- IntTy, IntTy};
574
+ llvm::Type *RegisterVarParams[] = {PtrTy, PtrTy, PtrTy, PtrTy,
575
+ IntTy, VarSizeTy, IntTy, IntTy};
581
576
llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction (
582
577
llvm::FunctionType::get (VoidTy, RegisterVarParams, false ),
583
578
addUnderscoredPrefixToName (" RegisterVar" ));
584
579
// void __hipRegisterManagedVar(void **, char *, char *, const char *,
585
580
// size_t, unsigned)
586
- llvm::Type *RegisterManagedVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy ,
587
- CharPtrTy, VarSizeTy, IntTy};
581
+ llvm::Type *RegisterManagedVarParams[] = {PtrTy, PtrTy, PtrTy ,
582
+ PtrTy, VarSizeTy, IntTy};
588
583
llvm::FunctionCallee RegisterManagedVar = CGM.CreateRuntimeFunction (
589
584
llvm::FunctionType::get (VoidTy, RegisterManagedVarParams, false ),
590
585
addUnderscoredPrefixToName (" RegisterManagedVar" ));
591
586
// void __cudaRegisterSurface(void **, const struct surfaceReference *,
592
587
// const void **, const char *, int, int);
593
588
llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction (
594
589
llvm::FunctionType::get (
595
- VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
596
- false ),
590
+ VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy}, false ),
597
591
addUnderscoredPrefixToName (" RegisterSurface" ));
598
592
// void __cudaRegisterTexture(void **, const struct textureReference *,
599
593
// const void **, const char *, int, int, int)
600
594
llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction (
601
595
llvm::FunctionType::get (
602
- VoidTy,
603
- {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
604
- false ),
596
+ VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy, IntTy}, false ),
605
597
addUnderscoredPrefixToName (" RegisterTexture" ));
606
598
for (auto &&Info : DeviceVars) {
607
599
llvm::GlobalVariable *Var = Info.Var ;
@@ -712,11 +704,11 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
712
704
713
705
// void ** __{cuda|hip}RegisterFatBinary(void *);
714
706
llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction (
715
- llvm::FunctionType::get (VoidPtrPtrTy, VoidPtrTy , false ),
707
+ llvm::FunctionType::get (PtrTy, PtrTy , false ),
716
708
addUnderscoredPrefixToName (" RegisterFatBinary" ));
717
709
// struct { int magic, int version, void * gpu_binary, void * dont_care };
718
710
llvm::StructType *FatbinWrapperTy =
719
- llvm::StructType::get (IntTy, IntTy, VoidPtrTy, VoidPtrTy );
711
+ llvm::StructType::get (IntTy, IntTy, PtrTy, PtrTy );
720
712
721
713
// Register GPU binary with the CUDA runtime, store returned handle in a
722
714
// global variable and save a reference in GpuBinaryHandle to be cleaned up
@@ -812,7 +804,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
812
804
// Data.
813
805
Values.add (FatBinStr);
814
806
// Unused in fatbin v1.
815
- Values.add (llvm::ConstantPointerNull::get (VoidPtrTy ));
807
+ Values.add (llvm::ConstantPointerNull::get (PtrTy ));
816
808
llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal (
817
809
addUnderscoredPrefixToName (" _fatbin_wrapper" ), CGM.getPointerAlign (),
818
810
/* constant*/ true );
@@ -835,9 +827,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
835
827
// The name, size, and initialization pattern of this variable is part
836
828
// of HIP ABI.
837
829
GpuBinaryHandle = new llvm::GlobalVariable (
838
- TheModule, VoidPtrPtrTy, /* isConstant=*/ false ,
839
- Linkage,
840
- /* Initializer=*/ llvm::ConstantPointerNull::get (VoidPtrPtrTy),
830
+ TheModule, PtrTy, /* isConstant=*/ false , Linkage,
831
+ /* Initializer=*/ llvm::ConstantPointerNull::get (PtrTy),
841
832
" __hip_gpubin_handle" );
842
833
if (Linkage == llvm::GlobalValue::LinkOnceAnyLinkage)
843
834
GpuBinaryHandle->setComdat (
@@ -847,7 +838,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
847
838
if (Linkage != llvm::GlobalValue::InternalLinkage)
848
839
GpuBinaryHandle->setVisibility (llvm::GlobalValue::HiddenVisibility);
849
840
Address GpuBinaryAddr (
850
- GpuBinaryHandle, VoidPtrPtrTy ,
841
+ GpuBinaryHandle, PtrTy ,
851
842
CharUnits::fromQuantity (GpuBinaryHandle->getAlignment ()));
852
843
{
853
844
auto *HandleValue = CtorBuilder.CreateLoad (GpuBinaryAddr);
@@ -879,8 +870,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
879
870
llvm::CallInst *RegisterFatbinCall =
880
871
CtorBuilder.CreateCall (RegisterFatbinFunc, FatbinWrapper);
881
872
GpuBinaryHandle = new llvm::GlobalVariable (
882
- TheModule, VoidPtrPtrTy , false , llvm::GlobalValue::InternalLinkage,
883
- llvm::ConstantPointerNull::get (VoidPtrPtrTy ), " __cuda_gpubin_handle" );
873
+ TheModule, PtrTy , false , llvm::GlobalValue::InternalLinkage,
874
+ llvm::ConstantPointerNull::get (PtrTy ), " __cuda_gpubin_handle" );
884
875
GpuBinaryHandle->setAlignment (CGM.getPointerAlign ().getAsAlign ());
885
876
CtorBuilder.CreateAlignedStore (RegisterFatbinCall, GpuBinaryHandle,
886
877
CGM.getPointerAlign ());
@@ -894,7 +885,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
894
885
CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
895
886
// void __cudaRegisterFatBinaryEnd(void **);
896
887
llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction (
897
- llvm::FunctionType::get (VoidTy, VoidPtrPtrTy , false ),
888
+ llvm::FunctionType::get (VoidTy, PtrTy , false ),
898
889
" __cudaRegisterFatBinaryEnd" );
899
890
CtorBuilder.CreateCall (RegisterFatbinEndFunc, RegisterFatbinCall);
900
891
}
@@ -966,7 +957,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
966
957
967
958
// void __cudaUnregisterFatBinary(void ** handle);
968
959
llvm::FunctionCallee UnregisterFatbinFunc = CGM.CreateRuntimeFunction (
969
- llvm::FunctionType::get (VoidTy, VoidPtrPtrTy , false ),
960
+ llvm::FunctionType::get (VoidTy, PtrTy , false ),
970
961
addUnderscoredPrefixToName (" UnregisterFatBinary" ));
971
962
972
963
llvm::Function *ModuleDtorFunc = llvm::Function::Create (
0 commit comments