From e5f333be0036947dfd27722f5953191092792ab7 Mon Sep 17 00:00:00 2001 From: "Chen, Sheng S" Date: Mon, 30 Jun 2025 15:37:20 +0800 Subject: [PATCH 1/5] [SYCLomatic] Fix the parse the const memoory througth parameter of function. Signed-off-by: Chen, Sheng S --- clang/include/clang/DPCT/DPCTOptions.inc | 2 +- clang/lib/DPCT/RulesLang/RulesLang.cpp | 10 ++++- .../RulesLang/RulesLangNoneAPIAndType.cpp | 10 +++-- clang/test/dpct/cuda_const_pass_by_param.cu | 44 +++++++++++++++++++ 4 files changed, 61 insertions(+), 5 deletions(-) create mode 100644 clang/test/dpct/cuda_const_pass_by_param.cu diff --git a/clang/include/clang/DPCT/DPCTOptions.inc b/clang/include/clang/DPCT/DPCTOptions.inc index afa05039ad58..9173ba488dc2 100644 --- a/clang/include/clang/DPCT/DPCTOptions.inc +++ b/clang/include/clang/DPCT/DPCTOptions.inc @@ -807,7 +807,7 @@ DPCT_ENUM_OPTION( DPCT_OPTION_ENUM_VALUE( "device_global", int(ExperimentalFeatures::Exp_DeviceGlobal), "Experimental extension that allows device scoped memory " - "allocations into SYCL that can\n" + "allocations into SYCL that can " "be accessed within a kernel using syntax similar to C++ global " "variables.\n", false), diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index aec72c3a5d30..021c011ca08b 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -6978,9 +6978,17 @@ void MemoryMigrationRule::getSymbolAddressMigration( ExprAnalysis EA; EA.analyze(C->getArg(0)); auto StmtStrArg0 = EA.getReplacedString(); + const DeclRefExpr *Arg = + dyn_cast(C->getArg(1)->IgnoreImplicitAsWritten()); + const VarDecl *VD = dyn_cast(Arg->getDecl()); EA.analyze(C->getArg(1)); auto StmtStrArg1 = EA.getReplacedString(); - Replacement = "*(" + StmtStrArg0 + ")" + " = " + StmtStrArg1 + ".get_ptr()"; + if (VD->isLocalVarDeclOrParm()) { + StmtStrArg1 = "const_cast(" + StmtStrArg1 + ")"; + } else { + StmtStrArg1 += ".get_ptr()"; + } + Replacement = "*(" + StmtStrArg0 + ")" + " = " + StmtStrArg1; requestFeature(HelperFeatureEnum::device_ext); emplaceTransformation(new ReplaceStmt(C, std::move(Replacement))); } diff --git a/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp b/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp index 01cb30192c66..318b301387c0 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp @@ -216,9 +216,13 @@ void MemVarRefMigrationRule::runRule(const MatchFinder::MatchResult &Result) { } } } - if (!HasTypeCasted && Decl->hasAttr() && - (MemVarRef->getType()->getTypeClass() == - Type::TypeClass::ConstantArray)) { + auto CE = dpct::DpctGlobalInfo::findAncestor(MemVarRef); + if (CE && !isa(CE) && + !DpctGlobalInfo::isInCudaPath(CE->getCalleeDecl()->getBeginLoc())) { + emplaceTransformation(new InsertAfterStmt(MemVarRef, ".get_ptr()")); + } else if (!HasTypeCasted && Decl->hasAttr() && + (MemVarRef->getType()->getTypeClass() == + Type::TypeClass::ConstantArray)) { const Expr *RHS = getRHSOfTheNonConstAssignedVar(MemVarRef); if (RHS) { auto Range = GetReplRange(RHS); diff --git a/clang/test/dpct/cuda_const_pass_by_param.cu b/clang/test/dpct/cuda_const_pass_by_param.cu new file mode 100644 index 000000000000..f70d0391d0d7 --- /dev/null +++ b/clang/test/dpct/cuda_const_pass_by_param.cu @@ -0,0 +1,44 @@ + +// RUN: dpct --format-range=none --usm-level=none -out-root %T/cuda_const_pass_by_param %s --cuda-include-path="%cuda-path/include" --sycl-named-lambda -- -x cuda --cuda-host-only +// RUN: FileCheck %s --match-full-lines --input-file %T/cuda_const_pass_by_param/cuda_const_pass_by_param.dp.cpp +// RUN: %if build_lit %{icpx -c -fsycl %T/cuda_const_pass_by_param/cuda_const_pass_by_param.dp.cpp -o %T/cuda_const_pass_by_param/cuda_const_pass_by_param.dp.o %} +#include +#include + +#define MAX_CONST_SIZE 1024 +__constant__ char device_const_buffer[MAX_CONST_SIZE]; + + +__host__ void* qudaGetSymbolAddress(const void* symbol) { + + void* ptr; + // CHECK: *(&ptr) = const_cast(symbol); + cudaGetSymbolAddress(&ptr, symbol); + return ptr; + +} + + +template +__host__ void process_buffer(T* data) { + + if(data) printf("Processed: %f\n", static_cast(data[0])); +} + + + + +int main() { + float h_data[256]; + for(int i=0; i<256; i++) h_data[i] = i*1.0f; +// CHECK: dpct::dpct_memcpy(device_const_buffer.get_ptr(), h_data, sizeof(h_data)); + cudaMemcpyToSymbol(device_const_buffer, h_data, sizeof(h_data)); +// CHECK: void* host_ptr = qudaGetSymbolAddress(device_const_buffer.get_ptr()); + void* host_ptr = qudaGetSymbolAddress(device_const_buffer); + process_buffer(static_cast(host_ptr)); + cudaDeviceSynchronize(); + + return 0; +} + + From 21e854690cc1fc6b6df8be962cc3ddd8c20347d7 Mon Sep 17 00:00:00 2001 From: "Chen, Sheng S" Date: Tue, 1 Jul 2025 11:43:13 +0800 Subject: [PATCH 2/5] up Signed-off-by: Chen, Sheng S --- .../DPCT/Runtime/cudaGetSymbolAddress.cu | 5 ++++- .../DPCT/RulesLang/Math/CallExprRewriterMath.cpp | 13 ++++--------- .../DPCT/RulesLang/Math/CallExprRewriterMath.h | 3 +-- clang/lib/DPCT/RulesLang/RulesLang.cpp | 3 +-- .../DPCT/RulesLang/RulesLangNoneAPIAndType.cpp | 16 ++++++++++------ clang/lib/DPCT/RulesLang/RulesLangTexture.cpp | 5 ++--- clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp | 6 ++---- .../lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp | 2 +- clang/lib/DPCT/Utility.cpp | 5 +++++ clang/lib/DPCT/Utility.h | 1 + clang/test/dpct/cuda_const_pass_by_param.cu | 12 ++++++++++-- .../dpct/help_option_check/lin/help_advanced.txt | 3 +-- .../test/dpct/help_option_check/lin/help_all.txt | 3 +-- clang/test/dpct/kernel-call.cu | 8 ++++---- clang/test/dpct/kernel-usm.cu | 8 ++++---- 15 files changed, 51 insertions(+), 42 deletions(-) diff --git a/clang/examples/DPCT/Runtime/cudaGetSymbolAddress.cu b/clang/examples/DPCT/Runtime/cudaGetSymbolAddress.cu index 8a3acbd587ca..f0cb656229bb 100644 --- a/clang/examples/DPCT/Runtime/cudaGetSymbolAddress.cu +++ b/clang/examples/DPCT/Runtime/cudaGetSymbolAddress.cu @@ -1,4 +1,7 @@ -void test(void **pDev, const void *symbol) { +#define MAX_CONST_SIZE 1024 +__constant__ char symbol[MAX_CONST_SIZE]; + +void test(void **pDev) { // Start cudaGetSymbolAddress(pDev /*void ***/, symbol /*const void **/); // End diff --git a/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.cpp b/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.cpp index 0444ecc0b5b0..7263ff3a002a 100644 --- a/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.cpp +++ b/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.cpp @@ -63,14 +63,12 @@ std::string MathFuncNameRewriter::getNewFuncName() { auto ContextFD = getImmediateOuterFuncDecl(Call); if (NamespaceStr == "std" && ContextFD && - !ContextFD->hasAttr() && - !ContextFD->hasAttr()) { + !isCudaKernelFuncDecl(ContextFD)) { return ""; } // For device functions else if ((FD->hasAttr() && !FD->hasAttr()) || - (ContextFD && (ContextFD->hasAttr() || - ContextFD->hasAttr()))) { + (ContextFD && isCudaKernelFuncDecl(ContextFD))) { if (SourceCalleeName == "abs") { // further check the type of the args. if (!Call->getArg(0)->getType()->isIntegerType()) { @@ -333,15 +331,12 @@ std::optional MathSimulatedRewriter::rewrite() { } auto ContextFD = getImmediateOuterFuncDecl(Call); - if (NamespaceStr == "std" && ContextFD && - !ContextFD->hasAttr() && - !ContextFD->hasAttr()) { + if (NamespaceStr == "std" && ContextFD && !isCudaKernelFuncDecl(ContextFD)) { return {}; } if (!FD->hasAttr() && ContextFD && - !ContextFD->hasAttr() && - !ContextFD->hasAttr()) + !isCudaKernelFuncDecl(ContextFD)) return Base::rewrite(); // Do not need to report warnings for pow, funnelshift, or drcp migrations diff --git a/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.h b/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.h index b66d93f5df49..ca270c3fa2a0 100644 --- a/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.h +++ b/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.h @@ -213,8 +213,7 @@ inline auto IsDirectCallerPureHost = [](const CallExpr *C) -> bool { } if (!ContextFD) return false; - if (!ContextFD->getAttr() && - !ContextFD->getAttr()) { + if (!isCudaKernelFuncDecl(ContextFD)) { return true; } return false; diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index 021c011ca08b..974a80e512ba 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -5185,8 +5185,7 @@ void DeviceFunctionDeclRule::runRule( // We need skip lambda in host code, but cannot skip lambda in device code. if (const FunctionDecl *OuterMostFD = findTheOuterMostFunctionDecl(FD); - OuterMostFD && (!OuterMostFD->hasAttr() && - !OuterMostFD->hasAttr())) + OuterMostFD && !isCudaKernelFuncDecl(OuterMostFD)) return; if (FD->isVariadic()) { diff --git a/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp b/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp index 318b301387c0..97f67047a6cf 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp @@ -216,10 +216,14 @@ void MemVarRefMigrationRule::runRule(const MatchFinder::MatchResult &Result) { } } } + auto FD = dpct::DpctGlobalInfo::findAncestor(MemVarRef); auto CE = dpct::DpctGlobalInfo::findAncestor(MemVarRef); - if (CE && !isa(CE) && - !DpctGlobalInfo::isInCudaPath(CE->getCalleeDecl()->getBeginLoc())) { - emplaceTransformation(new InsertAfterStmt(MemVarRef, ".get_ptr()")); + if (FD && + !dyn_cast(MemVarRef->getDecl())->isLocalVarDeclOrParm() && + !isCudaKernelFuncDecl(FD)) { + if (CE && + !DpctGlobalInfo::isInCudaPath(CE->getCalleeDecl()->getBeginLoc())) + emplaceTransformation(new InsertAfterStmt(MemVarRef, ".get_ptr()")); } else if (!HasTypeCasted && Decl->hasAttr() && (MemVarRef->getType()->getTypeClass() == Type::TypeClass::ConstantArray)) { @@ -239,7 +243,7 @@ void MemVarRefMigrationRule::runRule(const MatchFinder::MatchResult &Result) { if (VD == nullptr) return; auto Var = Global.findMemVarInfo(VD); - if (Func->hasAttr() || Func->hasAttr()) { + if (isCudaKernelFuncDecl(Func)) { if (DpctGlobalInfo::useGroupLocalMemory() && VD->hasAttr() && VD->getStorageClass() != SC_Extern) { if (!Var) @@ -833,7 +837,7 @@ void MemVarAnalysisRule::runRule(const MatchFinder::MatchResult &Result) { return; } auto Var = MemVarInfo::buildMemVarInfo(VD); - if (Func->hasAttr() || Func->hasAttr()) { + if (isCudaKernelFuncDecl(Func)) { if (!(DpctGlobalInfo::useGroupLocalMemory() && VD->hasAttr() && VD->getStorageClass() != SC_Extern)) { @@ -1029,7 +1033,7 @@ void ZeroLengthArrayRule::runRule(const MatchFinder::MatchResult &Result) { const clang::FunctionDecl *FD = DpctGlobalInfo::getParentFunction(TL); if (FD) { // Check if the array is in device code - if (!(FD->getAttr()) && !(FD->getAttr())) + if (!isCudaKernelFuncDecl(FD)) return; } } diff --git a/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp b/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp index 23a634b3595e..d91490d99a2c 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp @@ -761,7 +761,7 @@ const Expr *TextureRule::getAssignedBO(const Expr *E, ASTContext &Context) { bool TextureRule::processTexVarDeclInDevice(const VarDecl *VD) { if (auto FD = dyn_cast_or_null(VD->getParentFunctionOrMethod())) { - if (FD->hasAttr() || FD->hasAttr()) { + if (isCudaKernelFuncDecl(FD)) { auto Tex = DpctGlobalInfo::getInstance().insertTextureInfo(VD); auto DataType = Tex->getType()->getDataType(); @@ -1008,8 +1008,7 @@ void TextureRule::runRule(const MatchFinder::MatchResult &Result) { return; } if (auto FD = DpctGlobalInfo::getParentFunction(TL)) { - if ((FD->hasAttr() || FD->hasAttr()) && - !DpctGlobalInfo::useExtBindlessImages()) { + if (isCudaKernelFuncDecl(FD) && !DpctGlobalInfo::useExtBindlessImages()) { return; } } diff --git a/clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp b/clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp index bcdb049d42e9..ac530766f4d9 100644 --- a/clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp +++ b/clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp @@ -1054,8 +1054,7 @@ void CubRule::processCubTypeDefOrUsing(const TypedefNameDecl *TD) { MapNames::getClNamespace() + "sub_group", SM)); } else if (CanonicalTypeStr.find("Block") != std::string::npos) { auto DeviceFuncDecl = DpctGlobalInfo::findAncestor(TD); - if (DeviceFuncDecl && (DeviceFuncDecl->hasAttr() || - DeviceFuncDecl->hasAttr())) { + if (DeviceFuncDecl && isCudaKernelFuncDecl(DeviceFuncDecl)) { if (auto DI = DeviceFunctionDecl::LinkRedecls(DeviceFuncDecl)) { auto &Map = DpctGlobalInfo::getInstance().getCubPlaceholderIndexMap(); Map.insert({PlaceholderIndex, DI}); @@ -1692,8 +1691,7 @@ void CubRule::processTypeLoc(const TypeLoc *TL) { } else if (TypeName.find("class cub::BlockScan") == 0 || TypeName.find("class cub::BlockReduce") == 0) { auto DeviceFuncDecl = DpctGlobalInfo::findAncestor(TL); - if (DeviceFuncDecl && (DeviceFuncDecl->hasAttr() || - DeviceFuncDecl->hasAttr())) { + if (DeviceFuncDecl && isCudaKernelFuncDecl(DeviceFuncDecl)) { if (auto DI = DeviceFunctionDecl::LinkRedecls(DeviceFuncDecl)) { auto &Map = DpctGlobalInfo::getInstance().getCubPlaceholderIndexMap(); Map.insert({PlaceholderIndex, DI}); diff --git a/clang/lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp b/clang/lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp index efe87c2a06c5..07654e0c8001 100644 --- a/clang/lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp +++ b/clang/lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp @@ -189,7 +189,7 @@ void ThrustAPIRule::thrustFuncMigration(const MatchFinder::MatchResult &Result, // thrust::count, thrust::equal) called in device function , should be // migrated to oneapi::dpl APIs without a policy on the SYCL side if (auto FD = DpctGlobalInfo::getParentFunction(CE)) { - if (FD->hasAttr() || FD->hasAttr()) { + if (isCudaKernelFuncDecl(FD)) { if (hasExecutionPolicy) { emplaceTransformation(removeArg(CE, 0, *Result.SourceManager)); } diff --git a/clang/lib/DPCT/Utility.cpp b/clang/lib/DPCT/Utility.cpp index e9b2e30b6e17..935c6b394673 100644 --- a/clang/lib/DPCT/Utility.cpp +++ b/clang/lib/DPCT/Utility.cpp @@ -783,6 +783,11 @@ bool isCudaMemoryAllocation(const DeclRefExpr *Arg, const CallExpr *CE) { return false; } +bool isCudaKernelFuncDecl(const FunctionDecl *FD) { + if (FD->hasAttr() || FD->hasAttr()) + return true; + return false; +} /// This function traverses all the nodes in the AST represented by \param Root /// in a depth-first manner, until the node \param Sentinal is reached, to check /// if the pointer \param Arg to a piece of memory is used as lvalue after the diff --git a/clang/lib/DPCT/Utility.h b/clang/lib/DPCT/Utility.h index d8f35cbf018c..2248a8cb490e 100644 --- a/clang/lib/DPCT/Utility.h +++ b/clang/lib/DPCT/Utility.h @@ -526,6 +526,7 @@ bool isTypeInAnalysisScope(const clang::Type *TypePtr); bool isCubVar(const clang::VarDecl *VD); bool isCubTempStorageType(QualType T); bool isCubCollectiveRecordType(QualType T); +bool isCudaKernelFuncDecl(const FunctionDecl *FD); bool isExprUsed(const clang::Expr *E, bool &Result); bool isUserDefinedDecl(const clang::Decl *D); bool isLambda(const clang::FunctionDecl *FD); diff --git a/clang/test/dpct/cuda_const_pass_by_param.cu b/clang/test/dpct/cuda_const_pass_by_param.cu index f70d0391d0d7..51ad9dbbc27a 100644 --- a/clang/test/dpct/cuda_const_pass_by_param.cu +++ b/clang/test/dpct/cuda_const_pass_by_param.cu @@ -18,6 +18,15 @@ __host__ void* qudaGetSymbolAddress(const void* symbol) { } +__host__ void* qudaGetSymbolAddress2() { + + void* ptr; + // CHECK: *(&ptr) = device_const_buffer.get_ptr(); + cudaGetSymbolAddress(&ptr, device_const_buffer); + return ptr; + +} + template __host__ void process_buffer(T* data) { @@ -26,8 +35,6 @@ __host__ void process_buffer(T* data) { } - - int main() { float h_data[256]; for(int i=0; i<256; i++) h_data[i] = i*1.0f; @@ -35,6 +42,7 @@ int main() { cudaMemcpyToSymbol(device_const_buffer, h_data, sizeof(h_data)); // CHECK: void* host_ptr = qudaGetSymbolAddress(device_const_buffer.get_ptr()); void* host_ptr = qudaGetSymbolAddress(device_const_buffer); + void* host_ptr2 = qudaGetSymbolAddress2(); process_buffer(static_cast(host_ptr)); cudaDeviceSynchronize(); diff --git a/clang/test/dpct/help_option_check/lin/help_advanced.txt b/clang/test/dpct/help_option_check/lin/help_advanced.txt index eec0dd19163c..dc122f7157eb 100644 --- a/clang/test/dpct/help_option_check/lin/help_advanced.txt +++ b/clang/test/dpct/help_option_check/lin/help_advanced.txt @@ -58,8 +58,7 @@ Advanced DPCT options =bindless_images - Experimental extension that allows use of bindless images APIs. =graph - Experimental extension that allows use of SYCL Graph APIs. =non-uniform-groups - Experimental extension that allows use of non-uniform groups. - =device_global - Experimental extension that allows device scoped memory allocations into SYCL that can - be accessed within a kernel using syntax similar to C++ global variables. + =device_global - Experimental extension that allows device scoped memory allocations into SYCL that can be accessed within a kernel using syntax similar to C++ global variables. =virtual_mem - Experimental extension that allows for mapping of an address range onto multiple allocations of physical memory. =in_order_queue_events - Experimental extension that allows placing the event from the last command submission into the queue and setting an external event as an implicit dependence on the next command submitted to the queue. =non-stdandard-sycl-builtins - Experimental extension that allows use of non standard SYCL builtin functions. diff --git a/clang/test/dpct/help_option_check/lin/help_all.txt b/clang/test/dpct/help_option_check/lin/help_all.txt index 0a759cf82505..22186a896daa 100644 --- a/clang/test/dpct/help_option_check/lin/help_all.txt +++ b/clang/test/dpct/help_option_check/lin/help_all.txt @@ -165,8 +165,7 @@ All DPCT options =bindless_images - Experimental extension that allows use of bindless images APIs. =graph - Experimental extension that allows use of SYCL Graph APIs. =non-uniform-groups - Experimental extension that allows use of non-uniform groups. - =device_global - Experimental extension that allows device scoped memory allocations into SYCL that can - be accessed within a kernel using syntax similar to C++ global variables. + =device_global - Experimental extension that allows device scoped memory allocations into SYCL that can be accessed within a kernel using syntax similar to C++ global variables. =virtual_mem - Experimental extension that allows for mapping of an address range onto multiple allocations of physical memory. =in_order_queue_events - Experimental extension that allows placing the event from the last command submission into the queue and setting an external event as an implicit dependence on the next command submitted to the queue. =non-stdandard-sycl-builtins - Experimental extension that allows use of non standard SYCL builtin functions. diff --git a/clang/test/dpct/kernel-call.cu b/clang/test/dpct/kernel-call.cu index c41e49e14970..b2b0a30dace7 100644 --- a/clang/test/dpct/kernel-call.cu +++ b/clang/test/dpct/kernel-call.cu @@ -406,7 +406,7 @@ void run_foo4(dim3 c, dim3 d) { //CHECK-NEXT: my_kernel(result_acc_ct0.get_raw_pointer(), resultInGroup_acc_ct1.get_multi_ptr().get()); //CHECK-NEXT: }); //CHECK-NEXT: }); -//CHECK-NEXT: printf("%f ", result[10]); +//CHECK-NEXT: printf("%f ", result.get_ptr()[10]); //CHECK-NEXT:} __managed__ float result[32]; __global__ void my_kernel(float* result) { @@ -432,7 +432,7 @@ int run_foo5 () { //CHECK-NEXT: my_kernel(result2_acc_ct0.get_raw_pointer(), resultInGroup_acc_ct1.get_multi_ptr().get()); //CHECK-NEXT: }); //CHECK-NEXT: }); -//CHECK-NEXT: printf("%f ", result2[10]); +//CHECK-NEXT: printf("%f ", result2.get_ptr()[10]); //CHECK-NEXT:} __managed__ float result2[32]; int run_foo6 () { @@ -453,7 +453,7 @@ int run_foo6 () { //CHECK-NEXT: my_kernel(result3_acc_ct0.get_raw_pointer(), resultInGroup_acc_ct1.get_multi_ptr().get()); //CHECK-NEXT: }); //CHECK-NEXT: }); -//CHECK-NEXT: printf("%f ", result3[0]); +//CHECK-NEXT: printf("%f ", result3.get_ptr()[0]); //CHECK-NEXT:} __managed__ float result3; int run_foo7 () { @@ -482,7 +482,7 @@ int run_foo7 () { //CHECK-NEXT: my_kernel2(in_ct0, out_acc_ct1.get_raw_pointer()); //CHECK-NEXT: }); //CHECK-NEXT: }); -//CHECK-NEXT: printf("%f ", out[0]); +//CHECK-NEXT: printf("%f ", out.get_ptr()[0]); //CHECK-NEXT:} __managed__ float in; diff --git a/clang/test/dpct/kernel-usm.cu b/clang/test/dpct/kernel-usm.cu index 6e826bfa43ab..4897b95f8128 100644 --- a/clang/test/dpct/kernel-usm.cu +++ b/clang/test/dpct/kernel-usm.cu @@ -65,7 +65,7 @@ int main() { // CHECK-NEXT: my_kernel(result_ct0, resultInGroup_acc_ct1.get_multi_ptr().get()); // CHECK-NEXT: }); // CHECK-NEXT: }); -// CHECK-NEXT: printf("%f ", result[10]); +// CHECK-NEXT: printf("%f ", result.get_ptr()[10]); // CHECK-NEXT:} __managed__ __device__ float result[32]; __global__ void my_kernel(float* result) { @@ -92,7 +92,7 @@ int run_foo5 () { // CHECK-NEXT: my_kernel(result2_ct0, resultInGroup_acc_ct1.get_multi_ptr().get()); // CHECK-NEXT: }); // CHECK-NEXT: }); -// CHECK-NEXT: printf("%f ", result2[10]); +// CHECK-NEXT: printf("%f ", result2.get_ptr()[10]); // CHECK-NEXT:} __managed__ float result2[32]; int run_foo6 () { @@ -114,7 +114,7 @@ int run_foo6 () { // CHECK-NEXT: my_kernel(result3_ct0, resultInGroup_acc_ct1.get_multi_ptr().get()); // CHECK-NEXT: }); // CHECK-NEXT: }); -// CHECK-NEXT: printf("%f ", result3[0]); +// CHECK-NEXT: printf("%f ", result3.get_ptr()[0]); // CHECK-NEXT:} __managed__ float result3; int run_foo7 () { @@ -142,7 +142,7 @@ int run_foo7 () { // CHECK-NEXT: my_kernel2(in_ct0, out_ct1); // CHECK-NEXT: }); // CHECK-NEXT: }); -// CHECK-NEXT: printf("%f ", out[0]); +// CHECK-NEXT: printf("%f ", out.get_ptr()[0]); // CHECK-NEXT:} __managed__ float in; __managed__ float out; From 2f172a7922df5fb6d4f2d780af07c9f0d9e1f25a Mon Sep 17 00:00:00 2001 From: "Chen, Sheng S" Date: Tue, 8 Jul 2025 11:11:56 +0800 Subject: [PATCH 3/5] update name Signed-off-by: Chen, Sheng S --- clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.cpp | 8 ++++---- clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.h | 2 +- clang/lib/DPCT/RulesLang/RulesLang.cpp | 2 +- clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp | 8 ++++---- clang/lib/DPCT/RulesLang/RulesLangTexture.cpp | 4 ++-- clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp | 4 ++-- clang/lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp | 2 +- clang/lib/DPCT/Utility.cpp | 2 +- clang/lib/DPCT/Utility.h | 2 +- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.cpp b/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.cpp index 7263ff3a002a..c0c240155596 100644 --- a/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.cpp +++ b/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.cpp @@ -63,12 +63,12 @@ std::string MathFuncNameRewriter::getNewFuncName() { auto ContextFD = getImmediateOuterFuncDecl(Call); if (NamespaceStr == "std" && ContextFD && - !isCudaKernelFuncDecl(ContextFD)) { + !isGlobalOrDeviceFuncDecl(ContextFD)) { return ""; } // For device functions else if ((FD->hasAttr() && !FD->hasAttr()) || - (ContextFD && isCudaKernelFuncDecl(ContextFD))) { + (ContextFD && isGlobalOrDeviceFuncDecl(ContextFD))) { if (SourceCalleeName == "abs") { // further check the type of the args. if (!Call->getArg(0)->getType()->isIntegerType()) { @@ -331,12 +331,12 @@ std::optional MathSimulatedRewriter::rewrite() { } auto ContextFD = getImmediateOuterFuncDecl(Call); - if (NamespaceStr == "std" && ContextFD && !isCudaKernelFuncDecl(ContextFD)) { + if (NamespaceStr == "std" && ContextFD && !isGlobalOrDeviceFuncDecl(ContextFD)) { return {}; } if (!FD->hasAttr() && ContextFD && - !isCudaKernelFuncDecl(ContextFD)) + !isGlobalOrDeviceFuncDecl(ContextFD)) return Base::rewrite(); // Do not need to report warnings for pow, funnelshift, or drcp migrations diff --git a/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.h b/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.h index ca270c3fa2a0..4f7393f19c3a 100644 --- a/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.h +++ b/clang/lib/DPCT/RulesLang/Math/CallExprRewriterMath.h @@ -213,7 +213,7 @@ inline auto IsDirectCallerPureHost = [](const CallExpr *C) -> bool { } if (!ContextFD) return false; - if (!isCudaKernelFuncDecl(ContextFD)) { + if (!isGlobalOrDeviceFuncDecl(ContextFD)) { return true; } return false; diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index 21a7e7ff8f9f..514d47f63bfa 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -5020,7 +5020,7 @@ void DeviceFunctionDeclRule::runRule( // We need skip lambda in host code, but cannot skip lambda in device code. if (const FunctionDecl *OuterMostFD = findTheOuterMostFunctionDecl(FD); - OuterMostFD && !isCudaKernelFuncDecl(OuterMostFD)) + OuterMostFD && !isGlobalOrDeviceFuncDecl(OuterMostFD)) return; if (FD->isVariadic()) { diff --git a/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp b/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp index 4f8a00b24c3a..c3e4d5b94791 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp @@ -220,7 +220,7 @@ void MemVarRefMigrationRule::runRule(const MatchFinder::MatchResult &Result) { auto CE = dpct::DpctGlobalInfo::findAncestor(MemVarRef); if (FD && !dyn_cast(MemVarRef->getDecl())->isLocalVarDeclOrParm() && - !isCudaKernelFuncDecl(FD)) { + !isGlobalOrDeviceFuncDecl(FD)) { if (CE && !DpctGlobalInfo::isInCudaPath(CE->getCalleeDecl()->getBeginLoc())) emplaceTransformation(new InsertAfterStmt(MemVarRef, ".get_ptr()")); @@ -243,7 +243,7 @@ void MemVarRefMigrationRule::runRule(const MatchFinder::MatchResult &Result) { if (VD == nullptr) return; auto Var = Global.findMemVarInfo(VD); - if (isCudaKernelFuncDecl(Func)) { + if (isGlobalOrDeviceFuncDecl(Func)) { if (DpctGlobalInfo::useGroupLocalMemory() && VD->hasAttr() && VD->getStorageClass() != SC_Extern) { if (!Var) @@ -837,7 +837,7 @@ void MemVarAnalysisRule::runRule(const MatchFinder::MatchResult &Result) { return; } auto Var = MemVarInfo::buildMemVarInfo(VD); - if (isCudaKernelFuncDecl(Func)) { + if (isGlobalOrDeviceFuncDecl(Func)) { if (!(DpctGlobalInfo::useGroupLocalMemory() && VD->hasAttr() && VD->getStorageClass() != SC_Extern)) { @@ -1033,7 +1033,7 @@ void ZeroLengthArrayRule::runRule(const MatchFinder::MatchResult &Result) { const clang::FunctionDecl *FD = DpctGlobalInfo::getParentFunction(TL); if (FD) { // Check if the array is in device code - if (!isCudaKernelFuncDecl(FD)) + if (!isGlobalOrDeviceFuncDecl(FD)) return; } } diff --git a/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp b/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp index d91490d99a2c..498a36e0a71f 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp @@ -761,7 +761,7 @@ const Expr *TextureRule::getAssignedBO(const Expr *E, ASTContext &Context) { bool TextureRule::processTexVarDeclInDevice(const VarDecl *VD) { if (auto FD = dyn_cast_or_null(VD->getParentFunctionOrMethod())) { - if (isCudaKernelFuncDecl(FD)) { + if (isGlobalOrDeviceFuncDecl(FD)) { auto Tex = DpctGlobalInfo::getInstance().insertTextureInfo(VD); auto DataType = Tex->getType()->getDataType(); @@ -1008,7 +1008,7 @@ void TextureRule::runRule(const MatchFinder::MatchResult &Result) { return; } if (auto FD = DpctGlobalInfo::getParentFunction(TL)) { - if (isCudaKernelFuncDecl(FD) && !DpctGlobalInfo::useExtBindlessImages()) { + if (isGlobalOrDeviceFuncDecl(FD) && !DpctGlobalInfo::useExtBindlessImages()) { return; } } diff --git a/clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp b/clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp index ac530766f4d9..1d317403574f 100644 --- a/clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp +++ b/clang/lib/DPCT/RulesLangLib/CUBAPIMigration.cpp @@ -1054,7 +1054,7 @@ void CubRule::processCubTypeDefOrUsing(const TypedefNameDecl *TD) { MapNames::getClNamespace() + "sub_group", SM)); } else if (CanonicalTypeStr.find("Block") != std::string::npos) { auto DeviceFuncDecl = DpctGlobalInfo::findAncestor(TD); - if (DeviceFuncDecl && isCudaKernelFuncDecl(DeviceFuncDecl)) { + if (DeviceFuncDecl && isGlobalOrDeviceFuncDecl(DeviceFuncDecl)) { if (auto DI = DeviceFunctionDecl::LinkRedecls(DeviceFuncDecl)) { auto &Map = DpctGlobalInfo::getInstance().getCubPlaceholderIndexMap(); Map.insert({PlaceholderIndex, DI}); @@ -1691,7 +1691,7 @@ void CubRule::processTypeLoc(const TypeLoc *TL) { } else if (TypeName.find("class cub::BlockScan") == 0 || TypeName.find("class cub::BlockReduce") == 0) { auto DeviceFuncDecl = DpctGlobalInfo::findAncestor(TL); - if (DeviceFuncDecl && isCudaKernelFuncDecl(DeviceFuncDecl)) { + if (DeviceFuncDecl && isGlobalOrDeviceFuncDecl(DeviceFuncDecl)) { if (auto DI = DeviceFunctionDecl::LinkRedecls(DeviceFuncDecl)) { auto &Map = DpctGlobalInfo::getInstance().getCubPlaceholderIndexMap(); Map.insert({PlaceholderIndex, DI}); diff --git a/clang/lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp b/clang/lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp index 07654e0c8001..998137ece339 100644 --- a/clang/lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp +++ b/clang/lib/DPCT/RulesLangLib/ThrustAPIMigration.cpp @@ -189,7 +189,7 @@ void ThrustAPIRule::thrustFuncMigration(const MatchFinder::MatchResult &Result, // thrust::count, thrust::equal) called in device function , should be // migrated to oneapi::dpl APIs without a policy on the SYCL side if (auto FD = DpctGlobalInfo::getParentFunction(CE)) { - if (isCudaKernelFuncDecl(FD)) { + if (isGlobalOrDeviceFuncDecl(FD)) { if (hasExecutionPolicy) { emplaceTransformation(removeArg(CE, 0, *Result.SourceManager)); } diff --git a/clang/lib/DPCT/Utility.cpp b/clang/lib/DPCT/Utility.cpp index 935c6b394673..6c9fd89c9eab 100644 --- a/clang/lib/DPCT/Utility.cpp +++ b/clang/lib/DPCT/Utility.cpp @@ -783,7 +783,7 @@ bool isCudaMemoryAllocation(const DeclRefExpr *Arg, const CallExpr *CE) { return false; } -bool isCudaKernelFuncDecl(const FunctionDecl *FD) { +bool isGlobalOrDeviceFuncDecl(const FunctionDecl *FD) { if (FD->hasAttr() || FD->hasAttr()) return true; return false; diff --git a/clang/lib/DPCT/Utility.h b/clang/lib/DPCT/Utility.h index 2248a8cb490e..2c72a798bd41 100644 --- a/clang/lib/DPCT/Utility.h +++ b/clang/lib/DPCT/Utility.h @@ -526,7 +526,7 @@ bool isTypeInAnalysisScope(const clang::Type *TypePtr); bool isCubVar(const clang::VarDecl *VD); bool isCubTempStorageType(QualType T); bool isCubCollectiveRecordType(QualType T); -bool isCudaKernelFuncDecl(const FunctionDecl *FD); +bool isGlobalOrDeviceFuncDecl(const FunctionDecl *FD); bool isExprUsed(const clang::Expr *E, bool &Result); bool isUserDefinedDecl(const clang::Decl *D); bool isLambda(const clang::FunctionDecl *FD); From 884f2631ecb9cdbfcc6a681f448be21f56d33e87 Mon Sep 17 00:00:00 2001 From: "Chen, Sheng S" Date: Thu, 10 Jul 2025 14:34:05 +0800 Subject: [PATCH 4/5] up Signed-off-by: Chen, Sheng S --- clang/lib/DPCT/RulesLang/RulesLang.cpp | 2 +- clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index e6266c7df70d..659bfee88a7c 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -6817,7 +6817,7 @@ void MemoryMigrationRule::getSymbolAddressMigration( const VarDecl *VD = dyn_cast(Arg->getDecl()); EA.analyze(C->getArg(1)); auto StmtStrArg1 = EA.getReplacedString(); - if (VD->isLocalVarDeclOrParm()) { + if (VD && VD->isLocalVarDeclOrParm()) { StmtStrArg1 = "const_cast(" + StmtStrArg1 + ")"; } else { StmtStrArg1 += ".get_ptr()"; diff --git a/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp b/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp index c3e4d5b94791..5a3a2e0a56b3 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangNoneAPIAndType.cpp @@ -218,8 +218,8 @@ void MemVarRefMigrationRule::runRule(const MatchFinder::MatchResult &Result) { } auto FD = dpct::DpctGlobalInfo::findAncestor(MemVarRef); auto CE = dpct::DpctGlobalInfo::findAncestor(MemVarRef); - if (FD && - !dyn_cast(MemVarRef->getDecl())->isLocalVarDeclOrParm() && + if (auto VD =dyn_cast(MemVarRef->getDecl()); FD && VD && + !VD->isLocalVarDeclOrParm() && !isGlobalOrDeviceFuncDecl(FD)) { if (CE && !DpctGlobalInfo::isInCudaPath(CE->getCalleeDecl()->getBeginLoc())) From 20e023e54cba312a68c3de341bcb680d31c00664 Mon Sep 17 00:00:00 2001 From: "Chen, Sheng S" Date: Thu, 10 Jul 2025 15:17:34 +0800 Subject: [PATCH 5/5] up Signed-off-by: Chen, Sheng S --- .../dpct/query_api_mapping/Runtime/test_memory_management.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/dpct/query_api_mapping/Runtime/test_memory_management.cu b/clang/test/dpct/query_api_mapping/Runtime/test_memory_management.cu index 0e509bddf17a..a61d62a10547 100644 --- a/clang/test/dpct/query_api_mapping/Runtime/test_memory_management.cu +++ b/clang/test/dpct/query_api_mapping/Runtime/test_memory_management.cu @@ -46,7 +46,7 @@ // CUDAGETSYMBOLADDRESS: CUDA API: // CUDAGETSYMBOLADDRESS-NEXT: cudaGetSymbolAddress(pDev /*void ***/, symbol /*const void **/); // CUDAGETSYMBOLADDRESS-NEXT: Is migrated to: -// CUDAGETSYMBOLADDRESS-NEXT: *(pDev) = symbol.get_ptr(); +// CUDAGETSYMBOLADDRESS-NEXT: *(pDev) = const_cast(symbol); // RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=cudaGetSymbolSize | FileCheck %s -check-prefix=CUDAGETSYMBOLSIZE // CUDAGETSYMBOLSIZE: CUDA API: