diff --git a/clang/examples/DPCT/Runtime/cudaGetSymbolAddress.cu b/clang/examples/DPCT/Runtime/cudaGetSymbolAddress.cu index 8a3acbd587ca..f0cb656229bb 100644 --- a/clang/examples/DPCT/Runtime/cudaGetSymbolAddress.cu +++ b/clang/examples/DPCT/Runtime/cudaGetSymbolAddress.cu @@ -1,4 +1,7 @@ -void test(void **pDev, const void *symbol) { +#define MAX_CONST_SIZE 1024 +__constant__ char symbol[MAX_CONST_SIZE]; + +void test(void **pDev) { // Start cudaGetSymbolAddress(pDev /*void ***/, symbol /*const void **/); // End diff --git a/clang/include/clang/DPCT/DPCTOptions.inc b/clang/include/clang/DPCT/DPCTOptions.inc index afa05039ad58..9173ba488dc2 100644 --- a/clang/include/clang/DPCT/DPCTOptions.inc +++ b/clang/include/clang/DPCT/DPCTOptions.inc @@ -807,7 +807,7 @@ DPCT_ENUM_OPTION( DPCT_OPTION_ENUM_VALUE( "device_global", int(ExperimentalFeatures::Exp_DeviceGlobal), "Experimental extension that allows device scoped memory " - "allocations into SYCL that can\n" + "allocations into SYCL that can " "be accessed within a kernel using syntax similar to C++ global " "variables.\n", false), diff --git a/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.cpp b/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.cpp index ff3349fa6159..40e6e0c38832 100644 --- a/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.cpp +++ b/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.cpp @@ -63,14 +63,12 @@ std::string MathFuncNameRewriter::getNewFuncName() { auto ContextFD = getImmediateOuterFuncDecl(Call); if (NamespaceStr == "std" && ContextFD && - !ContextFD->hasAttr() && - !ContextFD->hasAttr()) { + !isGlobalOrDeviceFuncDecl(ContextFD)) { return ""; } // For device functions else if ((FD->hasAttr() && !FD->hasAttr()) || - (ContextFD && (ContextFD->hasAttr() || - ContextFD->hasAttr()))) { + (ContextFD && isGlobalOrDeviceFuncDecl(ContextFD))) { if (SourceCalleeName == "abs") { // further check the type of the args. if (!Call->getArg(0)->getType()->isIntegerType()) { @@ -333,15 +331,12 @@ std::optional MathSimulatedRewriter::rewrite() { } auto ContextFD = getImmediateOuterFuncDecl(Call); - if (NamespaceStr == "std" && ContextFD && - !ContextFD->hasAttr() && - !ContextFD->hasAttr()) { + if (NamespaceStr == "std" && ContextFD && !isGlobalOrDeviceFuncDecl(ContextFD)) { return {}; } if (!FD->hasAttr() && ContextFD && - !ContextFD->hasAttr() && - !ContextFD->hasAttr()) + !isGlobalOrDeviceFuncDecl(ContextFD)) return Base::rewrite(); // Do not need to report warnings for pow, funnelshift, or drcp migrations diff --git a/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.h b/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.h index b66d93f5df49..4f7393f19c3a 100644 --- a/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.h +++ b/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.h @@ -213,8 +213,7 @@ inline auto IsDirectCallerPureHost = [](const CallExpr *C) -> bool { } if (!ContextFD) return false; - if (!ContextFD->getAttr() && - !ContextFD->getAttr()) { + if (!isGlobalOrDeviceFuncDecl(ContextFD)) { return true; } return false; diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index 356bb0b1dcfd..659bfee88a7c 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -5020,8 +5020,7 @@ void DeviceFunctionDeclRule::runRule( // We need skip lambda in host code, but cannot skip lambda in device code. if (const FunctionDecl *OuterMostFD = findTheOuterMostFunctionDecl(FD); - OuterMostFD && (!OuterMostFD->hasAttr() && - !OuterMostFD->hasAttr())) + OuterMostFD && !isGlobalOrDeviceFuncDecl(OuterMostFD)) return; if (FD->isVariadic()) { @@ -6813,9 +6812,17 @@ void MemoryMigrationRule::getSymbolAddressMigration( ExprAnalysis EA; EA.analyze(C->getArg(0)); auto StmtStrArg0 = EA.getReplacedString(); + const DeclRefExpr *Arg = + dyn_cast(C->getArg(1)->IgnoreImplicitAsWritten()); + const VarDecl *VD = dyn_cast(Arg->getDecl()); EA.analyze(C->getArg(1)); auto StmtStrArg1 = EA.getReplacedString(); - Replacement = "*(" + StmtStrArg0 + ")" + " = " + StmtStrArg1 + ".get_ptr()"; + if (VD && VD->isLocalVarDeclOrParm()) { + StmtStrArg1 = "const_cast(" + StmtStrArg1 + ")"; + } else { + StmtStrArg1 += ".get_ptr()"; + } + Replacement = "*(" + StmtStrArg0 + ")" + " = " + StmtStrArg1; requestFeature(HelperFeatureEnum::device_ext); emplaceTransformation(new ReplaceStmt(C, std::move(Replacement))); } diff --git a/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp b/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp index 29dbf8978cdb..5a3a2e0a56b3 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp @@ -216,9 +216,17 @@ void MemVarRefMigrationRule::runRule(const MatchFinder::MatchResult &Result) { } } } - if (!HasTypeCasted && Decl->hasAttr() && - (MemVarRef->getType()->getTypeClass() == - Type::TypeClass::ConstantArray)) { + auto FD = dpct::DpctGlobalInfo::findAncestor(MemVarRef); + auto CE = dpct::DpctGlobalInfo::findAncestor(MemVarRef); + if (auto VD =dyn_cast(MemVarRef->getDecl()); FD && VD && + !VD->isLocalVarDeclOrParm() && + !isGlobalOrDeviceFuncDecl(FD)) { + if (CE && + !DpctGlobalInfo::isInCudaPath(CE->getCalleeDecl()->getBeginLoc())) + emplaceTransformation(new InsertAfterStmt(MemVarRef, ".get_ptr()")); + } else if (!HasTypeCasted && Decl->hasAttr() && + (MemVarRef->getType()->getTypeClass() == + Type::TypeClass::ConstantArray)) { const Expr *RHS = getRHSOfTheNonConstAssignedVar(MemVarRef); if (RHS) { auto Range = GetReplRange(RHS); @@ -235,7 +243,7 @@ void MemVarRefMigrationRule::runRule(const MatchFinder::MatchResult &Result) { if (VD == nullptr) return; auto Var = Global.findMemVarInfo(VD); - if (Func->hasAttr() || Func->hasAttr()) { + if (isGlobalOrDeviceFuncDecl(Func)) { if (DpctGlobalInfo::useGroupLocalMemory() && VD->hasAttr() && VD->getStorageClass() != SC_Extern) { if (!Var) @@ -829,7 +837,7 @@ void MemVarAnalysisRule::runRule(const MatchFinder::MatchResult &Result) { return; } auto Var = MemVarInfo::buildMemVarInfo(VD); - if (Func->hasAttr() || Func->hasAttr()) { + if (isGlobalOrDeviceFuncDecl(Func)) { if (!(DpctGlobalInfo::useGroupLocalMemory() && VD->hasAttr() && VD->getStorageClass() != SC_Extern)) { @@ -1025,7 +1033,7 @@ void ZeroLengthArrayRule::runRule(const MatchFinder::MatchResult &Result) { const clang::FunctionDecl *FD = DpctGlobalInfo::getParentFunction(TL); if (FD) { // Check if the array is in device code - if (!(FD->getAttr()) && !(FD->getAttr())) + if (!isGlobalOrDeviceFuncDecl(FD)) return; } } diff --git a/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp b/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp index 0ba5467e158d..5943728864cc 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp @@ -762,7 +762,7 @@ const Expr *TextureRule::getAssignedBO(const Expr *E, ASTContext &Context) { bool TextureRule::processTexVarDeclInDevice(const VarDecl *VD) { if (auto FD = dyn_cast_or_null(VD->getParentFunctionOrMethod())) { - if (FD->hasAttr() || FD->hasAttr()) { + if (isGlobalOrDeviceFuncDecl(FD)) { auto Tex = DpctGlobalInfo::getInstance().insertTextureInfo(VD); auto DataType = Tex->getType()->getDataType(); @@ -1009,8 +1009,7 @@ void TextureRule::runRule(const MatchFinder::MatchResult &Result) { return; } if (auto FD = DpctGlobalInfo::getParentFunction(TL)) { - if ((FD->hasAttr() || FD->hasAttr()) && - !DpctGlobalInfo::useExtBindlessImages()) { + if (isGlobalOrDeviceFuncDecl(FD) && !DpctGlobalInfo::useExtBindlessImages()) { return; } } diff --git a/clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp b/clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp index bcdb049d42e9..1d317403574f 100644 --- a/clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp +++ b/clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp @@ -1054,8 +1054,7 @@ void CubRule::processCubTypeDefOrUsing(const TypedefNameDecl *TD) { MapNames::getClNamespace() + "sub_group", SM)); } else if (CanonicalTypeStr.find("Block") != std::string::npos) { auto DeviceFuncDecl = DpctGlobalInfo::findAncestor(TD); - if (DeviceFuncDecl && (DeviceFuncDecl->hasAttr() || - DeviceFuncDecl->hasAttr())) { + if (DeviceFuncDecl && isGlobalOrDeviceFuncDecl(DeviceFuncDecl)) { if (auto DI = DeviceFunctionDecl::LinkRedecls(DeviceFuncDecl)) { auto &Map = DpctGlobalInfo::getInstance().getCubPlaceholderIndexMap(); Map.insert({PlaceholderIndex, DI}); @@ -1692,8 +1691,7 @@ void CubRule::processTypeLoc(const TypeLoc *TL) { } else if (TypeName.find("class cub::BlockScan") == 0 || TypeName.find("class cub::BlockReduce") == 0) { auto DeviceFuncDecl = DpctGlobalInfo::findAncestor(TL); - if (DeviceFuncDecl && (DeviceFuncDecl->hasAttr() || - DeviceFuncDecl->hasAttr())) { + if (DeviceFuncDecl && isGlobalOrDeviceFuncDecl(DeviceFuncDecl)) { if (auto DI = DeviceFunctionDecl::LinkRedecls(DeviceFuncDecl)) { auto &Map = DpctGlobalInfo::getInstance().getCubPlaceholderIndexMap(); Map.insert({PlaceholderIndex, DI}); diff --git a/clang/lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp b/clang/lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp index b5e85b1f06b3..74dc1d79ad58 100644 --- a/clang/lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp +++ b/clang/lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp @@ -188,7 +188,7 @@ void ThrustAPIRule::thrustFuncMigration(const MatchFinder::MatchResult &Result, // thrust::count, thrust::equal) called in device function , should be // migrated to oneapi::dpl APIs without a policy on the SYCL side if (auto FD = DpctGlobalInfo::getParentFunction(CE)) { - if (FD->hasAttr() || FD->hasAttr()) { + if (isGlobalOrDeviceFuncDecl(FD)) { if (hasExecutionPolicy) { emplaceTransformation(removeArg(CE, 0, *Result.SourceManager)); } diff --git a/clang/lib/DPCT/Utility.cpp b/clang/lib/DPCT/Utility.cpp index 088fedd49ef5..716683d612a5 100644 --- a/clang/lib/DPCT/Utility.cpp +++ b/clang/lib/DPCT/Utility.cpp @@ -783,6 +783,11 @@ bool isCudaMemoryAllocation(const DeclRefExpr *Arg, const CallExpr *CE) { return false; } +bool isGlobalOrDeviceFuncDecl(const FunctionDecl *FD) { + if (FD->hasAttr() || FD->hasAttr()) + return true; + return false; +} /// This function traverses all the nodes in the AST represented by \param Root /// in a depth-first manner, until the node \param Sentinal is reached, to check /// if the pointer \param Arg to a piece of memory is used as lvalue after the diff --git a/clang/lib/DPCT/Utility.h b/clang/lib/DPCT/Utility.h index d8f35cbf018c..2c72a798bd41 100644 --- a/clang/lib/DPCT/Utility.h +++ b/clang/lib/DPCT/Utility.h @@ -526,6 +526,7 @@ bool isTypeInAnalysisScope(const clang::Type *TypePtr); bool isCubVar(const clang::VarDecl *VD); bool isCubTempStorageType(QualType T); bool isCubCollectiveRecordType(QualType T); +bool isGlobalOrDeviceFuncDecl(const FunctionDecl *FD); bool isExprUsed(const clang::Expr *E, bool &Result); bool isUserDefinedDecl(const clang::Decl *D); bool isLambda(const clang::FunctionDecl *FD); diff --git a/clang/test/dpct/cuda_const_pass_by_param.cu b/clang/test/dpct/cuda_const_pass_by_param.cu new file mode 100644 index 000000000000..51ad9dbbc27a --- /dev/null +++ b/clang/test/dpct/cuda_const_pass_by_param.cu @@ -0,0 +1,52 @@ + +// RUN: dpct --format-range=none --usm-level=none -out-root %T/cuda_const_pass_by_param %s --cuda-include-path="%cuda-path/include" --sycl-named-lambda -- -x cuda --cuda-host-only +// RUN: FileCheck %s --match-full-lines --input-file %T/cuda_const_pass_by_param/cuda_const_pass_by_param.dp.cpp +// RUN: %if build_lit %{icpx -c -fsycl %T/cuda_const_pass_by_param/cuda_const_pass_by_param.dp.cpp -o %T/cuda_const_pass_by_param/cuda_const_pass_by_param.dp.o %} +#include +#include + +#define MAX_CONST_SIZE 1024 +__constant__ char device_const_buffer[MAX_CONST_SIZE]; + + +__host__ void* qudaGetSymbolAddress(const void* symbol) { + + void* ptr; + // CHECK: *(&ptr) = const_cast(symbol); + cudaGetSymbolAddress(&ptr, symbol); + return ptr; + +} + +__host__ void* qudaGetSymbolAddress2() { + + void* ptr; + // CHECK: *(&ptr) = device_const_buffer.get_ptr(); + cudaGetSymbolAddress(&ptr, device_const_buffer); + return ptr; + +} + + +template +__host__ void process_buffer(T* data) { + + if(data) printf("Processed: %f\n", static_cast(data[0])); +} + + +int main() { + float h_data[256]; + for(int i=0; i<256; i++) h_data[i] = i*1.0f; +// CHECK: dpct::dpct_memcpy(device_const_buffer.get_ptr(), h_data, sizeof(h_data)); + cudaMemcpyToSymbol(device_const_buffer, h_data, sizeof(h_data)); +// CHECK: void* host_ptr = qudaGetSymbolAddress(device_const_buffer.get_ptr()); + void* host_ptr = qudaGetSymbolAddress(device_const_buffer); + void* host_ptr2 = qudaGetSymbolAddress2(); + process_buffer(static_cast(host_ptr)); + cudaDeviceSynchronize(); + + return 0; +} + + diff --git a/clang/test/dpct/help_option_check/lin/help_advanced.txt b/clang/test/dpct/help_option_check/lin/help_advanced.txt index eec0dd19163c..dc122f7157eb 100644 --- a/clang/test/dpct/help_option_check/lin/help_advanced.txt +++ b/clang/test/dpct/help_option_check/lin/help_advanced.txt @@ -58,8 +58,7 @@ Advanced DPCT options =bindless_images - Experimental extension that allows use of bindless images APIs. =graph - Experimental extension that allows use of SYCL Graph APIs. =non-uniform-groups - Experimental extension that allows use of non-uniform groups. - =device_global - Experimental extension that allows device scoped memory allocations into SYCL that can - be accessed within a kernel using syntax similar to C++ global variables. + =device_global - Experimental extension that allows device scoped memory allocations into SYCL that can be accessed within a kernel using syntax similar to C++ global variables. =virtual_mem - Experimental extension that allows for mapping of an address range onto multiple allocations of physical memory. =in_order_queue_events - Experimental extension that allows placing the event from the last command submission into the queue and setting an external event as an implicit dependence on the next command submitted to the queue. =non-stdandard-sycl-builtins - Experimental extension that allows use of non standard SYCL builtin functions. diff --git a/clang/test/dpct/help_option_check/lin/help_all.txt b/clang/test/dpct/help_option_check/lin/help_all.txt index 0a759cf82505..22186a896daa 100644 --- a/clang/test/dpct/help_option_check/lin/help_all.txt +++ b/clang/test/dpct/help_option_check/lin/help_all.txt @@ -165,8 +165,7 @@ All DPCT options =bindless_images - Experimental extension that allows use of bindless images APIs. =graph - Experimental extension that allows use of SYCL Graph APIs. =non-uniform-groups - Experimental extension that allows use of non-uniform groups. - =device_global - Experimental extension that allows device scoped memory allocations into SYCL that can - be accessed within a kernel using syntax similar to C++ global variables. + =device_global - Experimental extension that allows device scoped memory allocations into SYCL that can be accessed within a kernel using syntax similar to C++ global variables. =virtual_mem - Experimental extension that allows for mapping of an address range onto multiple allocations of physical memory. =in_order_queue_events - Experimental extension that allows placing the event from the last command submission into the queue and setting an external event as an implicit dependence on the next command submitted to the queue. =non-stdandard-sycl-builtins - Experimental extension that allows use of non standard SYCL builtin functions. diff --git a/clang/test/dpct/kernel-call.cu b/clang/test/dpct/kernel-call.cu index c41e49e14970..b2b0a30dace7 100644 --- a/clang/test/dpct/kernel-call.cu +++ b/clang/test/dpct/kernel-call.cu @@ -406,7 +406,7 @@ void run_foo4(dim3 c, dim3 d) { //CHECK-NEXT: my_kernel(result_acc_ct0.get_raw_pointer(), resultInGroup_acc_ct1.get_multi_ptr().get()); //CHECK-NEXT: }); //CHECK-NEXT: }); -//CHECK-NEXT: printf("%f ", result[10]); +//CHECK-NEXT: printf("%f ", result.get_ptr()[10]); //CHECK-NEXT:} __managed__ float result[32]; __global__ void my_kernel(float* result) { @@ -432,7 +432,7 @@ int run_foo5 () { //CHECK-NEXT: my_kernel(result2_acc_ct0.get_raw_pointer(), resultInGroup_acc_ct1.get_multi_ptr().get()); //CHECK-NEXT: }); //CHECK-NEXT: }); -//CHECK-NEXT: printf("%f ", result2[10]); +//CHECK-NEXT: printf("%f ", result2.get_ptr()[10]); //CHECK-NEXT:} __managed__ float result2[32]; int run_foo6 () { @@ -453,7 +453,7 @@ int run_foo6 () { //CHECK-NEXT: my_kernel(result3_acc_ct0.get_raw_pointer(), resultInGroup_acc_ct1.get_multi_ptr().get()); //CHECK-NEXT: }); //CHECK-NEXT: }); -//CHECK-NEXT: printf("%f ", result3[0]); +//CHECK-NEXT: printf("%f ", result3.get_ptr()[0]); //CHECK-NEXT:} __managed__ float result3; int run_foo7 () { @@ -482,7 +482,7 @@ int run_foo7 () { //CHECK-NEXT: my_kernel2(in_ct0, out_acc_ct1.get_raw_pointer()); //CHECK-NEXT: }); //CHECK-NEXT: }); -//CHECK-NEXT: printf("%f ", out[0]); +//CHECK-NEXT: printf("%f ", out.get_ptr()[0]); //CHECK-NEXT:} __managed__ float in; diff --git a/clang/test/dpct/kernel-usm.cu b/clang/test/dpct/kernel-usm.cu index 6e826bfa43ab..4897b95f8128 100644 --- a/clang/test/dpct/kernel-usm.cu +++ b/clang/test/dpct/kernel-usm.cu @@ -65,7 +65,7 @@ int main() { // CHECK-NEXT: my_kernel(result_ct0, resultInGroup_acc_ct1.get_multi_ptr().get()); // CHECK-NEXT: }); // CHECK-NEXT: }); -// CHECK-NEXT: printf("%f ", result[10]); +// CHECK-NEXT: printf("%f ", result.get_ptr()[10]); // CHECK-NEXT:} __managed__ __device__ float result[32]; __global__ void my_kernel(float* result) { @@ -92,7 +92,7 @@ int run_foo5 () { // CHECK-NEXT: my_kernel(result2_ct0, resultInGroup_acc_ct1.get_multi_ptr().get()); // CHECK-NEXT: }); // CHECK-NEXT: }); -// CHECK-NEXT: printf("%f ", result2[10]); +// CHECK-NEXT: printf("%f ", result2.get_ptr()[10]); // CHECK-NEXT:} __managed__ float result2[32]; int run_foo6 () { @@ -114,7 +114,7 @@ int run_foo6 () { // CHECK-NEXT: my_kernel(result3_ct0, resultInGroup_acc_ct1.get_multi_ptr().get()); // CHECK-NEXT: }); // CHECK-NEXT: }); -// CHECK-NEXT: printf("%f ", result3[0]); +// CHECK-NEXT: printf("%f ", result3.get_ptr()[0]); // CHECK-NEXT:} __managed__ float result3; int run_foo7 () { @@ -142,7 +142,7 @@ int run_foo7 () { // CHECK-NEXT: my_kernel2(in_ct0, out_ct1); // CHECK-NEXT: }); // CHECK-NEXT: }); -// CHECK-NEXT: printf("%f ", out[0]); +// CHECK-NEXT: printf("%f ", out.get_ptr()[0]); // CHECK-NEXT:} __managed__ float in; __managed__ float out; diff --git a/clang/test/dpct/query_api_mapping/Runtime/test_memory_management.cu b/clang/test/dpct/query_api_mapping/Runtime/test_memory_management.cu index 0e509bddf17a..a61d62a10547 100644 --- a/clang/test/dpct/query_api_mapping/Runtime/test_memory_management.cu +++ b/clang/test/dpct/query_api_mapping/Runtime/test_memory_management.cu @@ -46,7 +46,7 @@ // CUDAGETSYMBOLADDRESS: CUDA API: // CUDAGETSYMBOLADDRESS-NEXT: cudaGetSymbolAddress(pDev /*void ***/, symbol /*const void **/); // CUDAGETSYMBOLADDRESS-NEXT: Is migrated to: -// CUDAGETSYMBOLADDRESS-NEXT: *(pDev) = symbol.get_ptr(); +// CUDAGETSYMBOLADDRESS-NEXT: *(pDev) = const_cast(symbol); // RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cudaGetSymbolSize | FileCheck %s -check-prefix=CUDAGETSYMBOLSIZE // CUDAGETSYMBOLSIZE: CUDA API: