From e6e72c29c1289c56d0f125274f08651bcec6b717 Mon Sep 17 00:00:00 2001 From: Daiyaan Ahmed Date: Thu, 10 Apr 2025 05:50:23 -0400 Subject: [PATCH 01/19] Migratiion of cudaGraphSetKernelNodeParams Signed-off-by: Daiyaan Ahmed --- .../DPCT/RuleInfra/APINamesTemplateType.inc | 9 +++ clang/lib/DPCT/RulesLang/APINamesGraph.inc | 22 ++++++ clang/lib/DPCT/RulesLang/MapNamesLang.cpp | 8 ++ clang/lib/DPCT/RulesLang/RulesLang.cpp | 25 +++--- clang/lib/DPCT/RulesLang/RulesLang.h | 4 + clang/lib/DPCT/RulesLang/RulesLangGraph.cpp | 78 ++++++++++++++++++- clang/lib/DPCT/SrcAPI/APINames.inc | 4 +- clang/lib/DPCT/SrcAPI/TypeNames.inc | 2 +- clang/runtime/dpct-rt/include/dpct/graph.hpp | 72 +++++++++++++++-- 9 files changed, 201 insertions(+), 23 deletions(-) diff --git a/clang/lib/DPCT/RuleInfra/APINamesTemplateType.inc b/clang/lib/DPCT/RuleInfra/APINamesTemplateType.inc index 45757ac75b06..5a645aac5994 100644 --- a/clang/lib/DPCT/RuleInfra/APINamesTemplateType.inc +++ b/clang/lib/DPCT/RuleInfra/APINamesTemplateType.inc @@ -507,6 +507,15 @@ TYPE_REWRITE_ENTRY( WARNING_FACTORY(Diagnostics::TRY_EXPERIMENTAL_FEATURE, TYPESTR, STR("--use-experimental-features=graph")))) +TYPE_REWRITE_ENTRY( + "cudaKernelNodeParams", + TYPE_CONDITIONAL_FACTORY( + checkEnableGraphForType(), + TYPE_FACTORY(STR(MapNames::getDpctNamespace() + + "experimental::kernel_node_params")), + WARNING_FACTORY(Diagnostics::TRY_EXPERIMENTAL_FEATURE, TYPESTR, + STR("--use-experimental-features=graph")))) + // Graphics Interop Handle TYPE_REWRITE_ENTRY( "cudaGraphicsResource", diff --git a/clang/lib/DPCT/RulesLang/APINamesGraph.inc b/clang/lib/DPCT/RulesLang/APINamesGraph.inc index 566460c831b6..d06b5444e0a8 100644 --- a/clang/lib/DPCT/RulesLang/APINamesGraph.inc +++ b/clang/lib/DPCT/RulesLang/APINamesGraph.inc @@ -104,3 +104,25 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphDestroy"), ARG("--use-experimental-features=graph")))) + +ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( + UseExtGraph, + CALL_FACTORY_ENTRY("cudaGraphKernelNodeGetParams", + CALL(MapNames::getDpctNamespace() + + "experimental::get_kernel_node_params", + ARG(0), ARG(1))), + UNSUPPORT_FACTORY_ENTRY("cudaGraphKernelNodeGetParams", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaGraphKernelNodeGetParams"), + ARG("--use-experimental-features=graph")))) + +ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( + UseExtGraph, + CALL_FACTORY_ENTRY("cudaGraphKernelNodeSetParams", + CALL(MapNames::getDpctNamespace() + + "experimental::set_kernel_node_params", + ARG(0), ARG(1))), + UNSUPPORT_FACTORY_ENTRY("cudaGraphKernelNodeSetParams", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaGraphKernelNodeSetParams"), + ARG("--use-experimental-features=graph")))) \ No newline at end of file diff --git a/clang/lib/DPCT/RulesLang/MapNamesLang.cpp b/clang/lib/DPCT/RulesLang/MapNamesLang.cpp index ad6c56cf38f2..965382bc1c5e 100644 --- a/clang/lib/DPCT/RulesLang/MapNamesLang.cpp +++ b/clang/lib/DPCT/RulesLang/MapNamesLang.cpp @@ -362,5 +362,13 @@ const std::unordered_map {"sampler", HelperFeatureEnum::device_ext}, }; +// Graph kernel node params mapping +MapNamesLang::MapTy GraphRule::KernelNodeParamNames{ + {"gridDim", "grid_dim"}, + {"blockDim", "block_dim"}, + {"kernelParams", "kernel_params"}, + {"sharedMemBytes", "shared_mem_bytes"}, + {"func", "func"}}; + } // namespace dpct } // namespace clang \ No newline at end of file diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index 012f4e0e2370..71b0a4591f9b 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -353,18 +353,19 @@ void TypeInDeclRule::registerMatcher(MatchFinder &MF) { this); MF.addMatcher( - typeLoc(loc(qualType(hasDeclaration(namedDecl(hasAnyName( - "cooperative_groups::__v1::coalesced_group", - "cooperative_groups::__v1::grid_group", - "cooperative_groups::__v1::thread_block_tile", "cudaGraph_t", - "cudaGraphExec_t", "cudaGraphNode_t", "cudaGraphicsResource", - "cudaGraphicsResource_t", "CUgraphicsResource", - "cudaExternalMemory_t", "cudaExternalMemoryHandleDesc", - "cudaExternalMemoryMipmappedArrayDesc", - "cudaExternalMemoryBufferDesc", "cudaExternalSemaphore_t", - "cudaExternalSemaphoreHandleDesc", - "cudaExternalSemaphoreSignalParams", - "cudaExternalSemaphoreWaitParams")))))) + typeLoc( + loc(qualType(hasDeclaration(namedDecl(hasAnyName( + "cooperative_groups::__v1::coalesced_group", + "cooperative_groups::__v1::grid_group", + "cooperative_groups::__v1::thread_block_tile", "cudaGraph_t", + "cudaGraphExec_t", "cudaGraphNode_t", "cudaGraphicsResource", + "cudaGraphicsResource_t", "CUgraphicsResource", + "cudaExternalMemory_t", "cudaExternalMemoryHandleDesc", + "cudaExternalMemoryMipmappedArrayDesc", + "cudaExternalMemoryBufferDesc", "cudaExternalSemaphore_t", + "cudaExternalSemaphoreHandleDesc", + "cudaExternalSemaphoreSignalParams", + "cudaExternalSemaphoreWaitParams", "cudaKernelNodeParams")))))) .bind("cudaTypeDefEA"), this); MF.addMatcher(varDecl(hasType(classTemplateSpecializationDecl( diff --git a/clang/lib/DPCT/RulesLang/RulesLang.h b/clang/lib/DPCT/RulesLang/RulesLang.h index a9e83884103d..77d9726d1ef9 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.h +++ b/clang/lib/DPCT/RulesLang/RulesLang.h @@ -999,6 +999,10 @@ class CompatWithClangRule : public NamedMigrationRule { }; class GraphRule : public NamedMigrationRule { + static MapNames::MapTy KernelNodeParamNames; + const Expr *getAssignedBO(const Expr *E, ASTContext &Context); + const Expr *getParentAsAssignedBO(const Expr *E, ASTContext &Context); + public: void registerMatcher(ast_matchers::MatchFinder &MF) override; void runRule(const ast_matchers::MatchFinder::MatchResult &Result); diff --git a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp index 892a71e34d9c..d9e3eac44eb3 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp @@ -34,14 +34,48 @@ void GraphRule::registerMatcher(MatchFinder &MF) { "cudaGraphExecDestroy", "cudaGraphAddEmptyNode", "cudaGraphAddDependencies", "cudaGraphExecUpdate", "cudaGraphNodeGetType", "cudaGraphGetNodes", - "cudaGraphGetRootNodes", "cudaGraphDestroy"); + "cudaGraphGetRootNodes", "cudaGraphDestroy", "cudaGraphKernelNodeGetParams", + "cudaGraphKernelNodeSetParams"); }; MF.addMatcher( callExpr(callee(functionDecl(functionName()))).bind("FunctionCall"), this); + + auto typeName = [&]() { return hasAnyName("cudaKernelNodeParams"); }; + MF.addMatcher( + memberExpr(hasObjectExpression(hasType(type(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(recordDecl(typeName())))))))) + .bind("Type"), + this); } void GraphRule::runRule(const MatchFinder::MatchResult &Result) { + if (auto ME = getNodeAsType(Result, "Type")) { + auto BaseTy = DpctGlobalInfo::getUnqualifiedTypeName( + ME->getBase()->getType().getDesugaredType(*Result.Context), + *Result.Context); + auto MemberName = ME->getMemberNameInfo().getAsString(); + if (BaseTy == "cudaKernelNodeParams") { + + auto FieldName = KernelNodeParamNames[MemberName]; + if (FieldName.empty()) { + report(ME->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, + DpctGlobalInfo::getOriginalTypeName(ME->getBase()->getType()) + + "::" + ME->getMemberDecl()->getName().str()); + return; + } + requestFeature(HelperFeatureEnum::device_ext); + if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { + StringRef ReplacedArg = ""; + emplaceTransformation( + ReplaceMemberAssignAsSetMethod(BO, ME, FieldName, ReplacedArg)); + } else { + emplaceTransformation(new RenameFieldInMemberExpr( + ME, buildString("get_", FieldName, "()"))); + } + } + return; + } const CallExpr *CE = getNodeAsType(Result, "FunctionCall"); if (!CE) { return; @@ -51,5 +85,47 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { EA.applyAllSubExprRepl(); } +const Expr *GraphRule::getParentAsAssignedBO(const Expr *E, + ASTContext &Context) { + auto Parents = Context.getParents(*E); + if (Parents.size() > 0) + return getAssignedBO(Parents[0].get(), Context); + return nullptr; +} + +// Return the binary operator if E is the lhs of an assign expression, otherwise +// nullptr. +const Expr *GraphRule::getAssignedBO(const Expr *E, ASTContext &Context) { + if (dyn_cast(E)) { + // Continue finding parents when E is MemberExpr. + return getParentAsAssignedBO(E, Context); + } else if (auto ICE = dyn_cast(E)) { + // Stop finding parents and return nullptr when E is ImplicitCastExpr, + // except for ArrayToPointerDecay cast. + if (ICE->getCastKind() == CK_ArrayToPointerDecay) { + return getParentAsAssignedBO(E, Context); + } + } else if (auto ASE = dyn_cast(E)) { + // Continue finding parents when E is ArraySubscriptExpr, and remove + // subscript operator anyway for texture object's member. + emplaceTransformation(new ReplaceToken( + Lexer::getLocForEndOfToken(ASE->getLHS()->getEndLoc(), 0, + Context.getSourceManager(), + Context.getLangOpts()), + ASE->getRBracketLoc(), "")); + return getParentAsAssignedBO(E, Context); + } else if (auto BO = dyn_cast(E)) { + // If E is BinaryOperator, return E only when it is assign expression, + // otherwise return nullptr. + if (BO->getOpcode() == BO_Assign) + return BO; + } else if (auto COCE = dyn_cast(E)) { + if (COCE->getOperator() == OO_Equal) { + return COCE; + } + } + return nullptr; +} + } // namespace dpct } // namespace clang diff --git a/clang/lib/DPCT/SrcAPI/APINames.inc b/clang/lib/DPCT/SrcAPI/APINames.inc index 9866f93fcdbe..447e392a5393 100644 --- a/clang/lib/DPCT/SrcAPI/APINames.inc +++ b/clang/lib/DPCT/SrcAPI/APINames.inc @@ -456,12 +456,12 @@ ENTRY(cudaGraphInstantiateWithFlags, cudaGraphInstantiateWithFlags, false, NO_FL ENTRY(cudaGraphInstantiateWithParams, cudaGraphInstantiateWithParams, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeCopyAttributes, cudaGraphKernelNodeCopyAttributes, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeGetAttribute, cudaGraphKernelNodeGetAttribute, false, NO_FLAG, P4, "comment") -ENTRY(cudaGraphKernelNodeGetParams, cudaGraphKernelNodeGetParams, false, NO_FLAG, P4, "comment") +ENTRY(cudaGraphKernelNodeGetParams, cudaGraphKernelNodeGetParams, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphKernelNodeSetAttribute, cudaGraphKernelNodeSetAttribute, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeSetEnabled, cudaGraphKernelNodeSetEnabled, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeSetGridDim, cudaGraphKernelNodeSetGridDim, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeSetParam, cudaGraphKernelNodeSetParam, false, NO_FLAG, P4, "comment") -ENTRY(cudaGraphKernelNodeSetParams, cudaGraphKernelNodeSetParams, false, NO_FLAG, P4, "comment") +ENTRY(cudaGraphKernelNodeSetParams, cudaGraphKernelNodeSetParams, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphKernelNodeUpdatesApply, cudaGraphKernelNodeUpdatesApply, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphLaunch, cudaGraphLaunch, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphMemAllocNodeGetParams, cudaGraphMemAllocNodeGetParams, false, NO_FLAG, P4, "comment") diff --git a/clang/lib/DPCT/SrcAPI/TypeNames.inc b/clang/lib/DPCT/SrcAPI/TypeNames.inc index b9b76377e92f..6f4220d31450 100644 --- a/clang/lib/DPCT/SrcAPI/TypeNames.inc +++ b/clang/lib/DPCT/SrcAPI/TypeNames.inc @@ -41,7 +41,7 @@ ENTRY_TYPE(CUgraphNode, false, NO_FLAG, P4, "comment") ENTRY_TYPE(CUgraphicsResource, true, NO_FLAG, P4, "successful") // CUDA Runtime Library -ENTRY_TYPE(cudaKernelNodeParams, false, NO_FLAG, P4, "comment") +ENTRY_TYPE(cudaKernelNodeParams, true, NO_FLAG, P4, "Successful/DPCT1119") // cuDNN Library ENTRY_TYPE(cudnnReduceTensorIndices_t, false, NO_FLAG, P4, "comment") diff --git a/clang/runtime/dpct-rt/include/dpct/graph.hpp b/clang/runtime/dpct-rt/include/dpct/graph.hpp index f17fc869eef1..21d81db5b87e 100644 --- a/clang/runtime/dpct-rt/include/dpct/graph.hpp +++ b/clang/runtime/dpct-rt/include/dpct/graph.hpp @@ -8,9 +8,12 @@ #pragma once +#include "dpct/util.hpp" +#include "sycl/handler.hpp" #include #include #include +#include namespace dpct { namespace experimental { @@ -25,6 +28,28 @@ typedef sycl::ext::oneapi::experimental::command_graph< typedef sycl::ext::oneapi::experimental::node *node_ptr; +struct kernel_node_params { + dpct::dim3 block_dim; + dpct::dim3 grid_dim; + void *kernel_params; + void* func; + unsigned int shared_mem_bytes; + +public: + void set_block_dim(dpct::dim3 block_dim) { block_dim = block_dim; } + void set_grid_dim(dpct::dim3 grid_dim) { grid_dim = grid_dim; } + void set_kernel_params(void *kernel_params) { kernel_params = kernel_params; } + void set_func(void *func) { func = func; } + void set_shared_mem_bytes(unsigned int shared_mem_bytes) { + shared_mem_bytes = shared_mem_bytes; + } + dpct::dim3 get_block_dim() { return block_dim; } + dpct::dim3 get_grid_dim() { return grid_dim; } + void *get_kernel_params() { return kernel_params; } + void *get_func() { return func; } + unsigned int get_shared_mem_bytes() { return shared_mem_bytes; } +}; + namespace detail { class graph_mgr { public: @@ -39,6 +64,10 @@ class graph_mgr { return instance; } + std::unordered_map + kernel_node_params_map; + void begin_recording(sycl::queue *queue_ptr) { // Calling begin_recording on an already recording queue is a no-op in SYCL if (queue_graph_map.find(queue_ptr) != queue_graph_map.end()) { @@ -94,6 +123,18 @@ class graph_mgr { } } + void kernel_node_set_params( + dpct::experimental::node_ptr node, + dpct::experimental::kernel_node_params *kernel_node_params) { + kernel_node_params_map[node] = kernel_node_params; + } + + void get_kernel_node_get_params( + dpct::experimental::node_ptr node, + dpct::experimental::kernel_node_params *kernel_node_params) { + kernel_node_params = kernel_node_params_map[node]; + } + private: std::unordered_map queue_graph_map; std::unordered_map Date: Mon, 21 Apr 2025 01:12:32 -0400 Subject: [PATCH 02/19] test Signed-off-by: Daiyaan Ahmed --- clang/lib/DPCT/RuleInfra/MapNames.cpp | 2 + clang/lib/DPCT/RulesLang/APINamesGraph.inc | 22 ------ clang/lib/DPCT/RulesLang/RulesLang.cpp | 71 ++++++++++---------- clang/lib/DPCT/RulesLang/RulesLangGraph.cpp | 28 ++++++-- clang/lib/DPCT/SrcAPI/APINames.inc | 4 +- clang/runtime/dpct-rt/include/dpct/graph.hpp | 34 ++-------- 6 files changed, 69 insertions(+), 92 deletions(-) diff --git a/clang/lib/DPCT/RuleInfra/MapNames.cpp b/clang/lib/DPCT/RuleInfra/MapNames.cpp index 615dd9a43dae..ebfc871dd58e 100644 --- a/clang/lib/DPCT/RuleInfra/MapNames.cpp +++ b/clang/lib/DPCT/RuleInfra/MapNames.cpp @@ -640,6 +640,8 @@ void MapNames::setExplicitNamespaceMap( DpctGlobalInfo::useExtGraph() ? getClNamespace() + "ext::oneapi::experimental::node_type" : "cudaGraphNodeType")}, + {"cudaGraphExecUpdateResultInfo", std::make_shared("int")}, + {"cudaGraphExecUpdateResult", std::make_shared("int")}, {"CUmem_advise", std::make_shared("int")}, {"CUmemorytype", std::make_shared(getClNamespace() + "usm::alloc")}, diff --git a/clang/lib/DPCT/RulesLang/APINamesGraph.inc b/clang/lib/DPCT/RulesLang/APINamesGraph.inc index d06b5444e0a8..566460c831b6 100644 --- a/clang/lib/DPCT/RulesLang/APINamesGraph.inc +++ b/clang/lib/DPCT/RulesLang/APINamesGraph.inc @@ -104,25 +104,3 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphDestroy"), ARG("--use-experimental-features=graph")))) - -ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( - UseExtGraph, - CALL_FACTORY_ENTRY("cudaGraphKernelNodeGetParams", - CALL(MapNames::getDpctNamespace() + - "experimental::get_kernel_node_params", - ARG(0), ARG(1))), - UNSUPPORT_FACTORY_ENTRY("cudaGraphKernelNodeGetParams", - Diagnostics::TRY_EXPERIMENTAL_FEATURE, - ARG("cudaGraphKernelNodeGetParams"), - ARG("--use-experimental-features=graph")))) - -ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( - UseExtGraph, - CALL_FACTORY_ENTRY("cudaGraphKernelNodeSetParams", - CALL(MapNames::getDpctNamespace() + - "experimental::set_kernel_node_params", - ARG(0), ARG(1))), - UNSUPPORT_FACTORY_ENTRY("cudaGraphKernelNodeSetParams", - Diagnostics::TRY_EXPERIMENTAL_FEATURE, - ARG("cudaGraphKernelNodeSetParams"), - ARG("--use-experimental-features=graph")))) \ No newline at end of file diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index 71b0a4591f9b..f409af2b76da 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -289,35 +289,36 @@ void TypeInDeclRule::registerMatcher(MatchFinder &MF) { "thrust::host_vector", "cublasHandle_t", "CUevent_st", "__half", "half", "__half2", "half2", "cudaMemoryAdvise", "cudaError_enum", "cudaDeviceProp", "cudaStreamCaptureStatus", - "cudaGraphExecUpdateResult", "cudaPitchedPtr", - "thrust::counting_iterator", "thrust::transform_iterator", - "thrust::permutation_iterator", "thrust::iterator_difference", - "cusolverDnHandle_t", "cusolverDnParams_t", "gesvdjInfo_t", - "syevjInfo_t", "thrust::device_malloc_allocator", - "thrust::divides", "thrust::tuple", "thrust::maximum", - "thrust::multiplies", "thrust::plus", "cudaDataType_t", - "cudaError_t", "CUresult", "CUdevice", "cudaEvent_t", - "cublasStatus_t", "cuComplex", "cuFloatComplex", - "cuDoubleComplex", "CUevent", "cublasFillMode_t", - "cublasDiagType_t", "cublasSideMode_t", "cublasOperation_t", - "cusolverStatus_t", "cusolverEigType_t", "cusolverEigMode_t", - "curandStatus_t", "cudaStream_t", "cusparseStatus_t", - "cusparseDiagType_t", "cusparseFillMode_t", "cusparseIndexBase_t", - "cusparseMatrixType_t", "cusparseAlgMode_t", - "cusparseOperation_t", "cusparseMatDescr_t", "cusparseHandle_t", - "CUcontext", "cublasPointerMode_t", "cusparsePointerMode_t", - "cublasGemmAlgo_t", "cusparseSolveAnalysisInfo_t", "cudaDataType", - "cublasDataType_t", "curandState_t", "curandState", - "curandStateXORWOW_t", "curandStateXORWOW", - "curandStatePhilox4_32_10_t", "curandStatePhilox4_32_10", - "curandStateMRG32k3a_t", "curandStateMRG32k3a", "thrust::minus", - "thrust::negate", "thrust::logical_or", "thrust::equal_to", - "thrust::less", "cudaSharedMemConfig", "curandGenerator_t", - "curandRngType_t", "curandOrdering_t", "cufftHandle", "cufftReal", - "cufftDoubleReal", "cufftComplex", "cufftDoubleComplex", - "cufftResult_t", "cufftResult", "cufftType_t", "cufftType", - "thrust::pair", "CUdeviceptr", "cudaDeviceAttr", "CUmodule", - "CUjit_option", "CUfunction", "cudaMemcpyKind", "cudaComputeMode", + "cudaGraphExecUpdateResult", "cudaGraphExecUpdateResultInfo", + "cudaPitchedPtr", "thrust::counting_iterator", + "thrust::transform_iterator", "thrust::permutation_iterator", + "thrust::iterator_difference", "cusolverDnHandle_t", + "cusolverDnParams_t", "gesvdjInfo_t", "syevjInfo_t", + "thrust::device_malloc_allocator", "thrust::divides", + "thrust::tuple", "thrust::maximum", "thrust::multiplies", + "thrust::plus", "cudaDataType_t", "cudaError_t", "CUresult", + "CUdevice", "cudaEvent_t", "cublasStatus_t", "cuComplex", + "cuFloatComplex", "cuDoubleComplex", "CUevent", + "cublasFillMode_t", "cublasDiagType_t", "cublasSideMode_t", + "cublasOperation_t", "cusolverStatus_t", "cusolverEigType_t", + "cusolverEigMode_t", "curandStatus_t", "cudaStream_t", + "cusparseStatus_t", "cusparseDiagType_t", "cusparseFillMode_t", + "cusparseIndexBase_t", "cusparseMatrixType_t", + "cusparseAlgMode_t", "cusparseOperation_t", "cusparseMatDescr_t", + "cusparseHandle_t", "CUcontext", "cublasPointerMode_t", + "cusparsePointerMode_t", "cublasGemmAlgo_t", + "cusparseSolveAnalysisInfo_t", "cudaDataType", "cublasDataType_t", + "curandState_t", "curandState", "curandStateXORWOW_t", + "curandStateXORWOW", "curandStatePhilox4_32_10_t", + "curandStatePhilox4_32_10", "curandStateMRG32k3a_t", + "curandStateMRG32k3a", "thrust::minus", "thrust::negate", + "thrust::logical_or", "thrust::equal_to", "thrust::less", + "cudaSharedMemConfig", "curandGenerator_t", "curandRngType_t", + "curandOrdering_t", "cufftHandle", "cufftReal", "cufftDoubleReal", + "cufftComplex", "cufftDoubleComplex", "cufftResult_t", + "cufftResult", "cufftType_t", "cufftType", "thrust::pair", + "CUdeviceptr", "cudaDeviceAttr", "CUmodule", "CUjit_option", + "CUfunction", "cudaMemcpyKind", "cudaComputeMode", "__nv_bfloat16", "cooperative_groups::__v1::thread_group", "cooperative_groups::__v1::thread_block", "libraryPropertyType_t", "libraryPropertyType", "cudaDataType_t", "cudaDataType", @@ -930,12 +931,6 @@ void TypeInDeclRule::runRule(const MatchFinder::MatchResult &Result) { } } - if (CanonicalTypeStr == "cudaGraphExecUpdateResult") { - report(TL->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - CanonicalTypeStr); - return; - } - if (CanonicalTypeStr == "cudaGraphicsRegisterFlags" || CanonicalTypeStr == "cudaGraphicsMapFlags") { if (!DpctGlobalInfo::useExtBindlessImages()) { @@ -4577,6 +4572,9 @@ void KernelCallRefRule::insertWrapperPostfix(const T *Node, bool isInsertWrapperRegister) { auto NLoc = DpctGlobalInfo::getSourceManager().getSpellingLoc( Node->getNameInfo().getBeginLoc()); + + std::cout <<"WRAPPER APPENDED: " << "\n"; + emplaceTransformation(new InsertText( NLoc.getLocWithOffset(Node->getNameInfo().getAsString().length()), "_wrapper")); @@ -7182,11 +7180,14 @@ ReplaceMemberAssignAsSetMethod(const Expr *E, const MemberExpr *ME, StringRef ExtraArg, StringRef ExtraFeild) { if (ReplacedArg.empty()) { if (auto RHS = getRhs(E)) { + StringRef c = ExprAnalysis::ref(RHS); + std::cout <<"Replaced String: "<< c.str() <<"\n"; return ReplaceMemberAssignAsSetMethod( getStmtExpansionSourceRange(E).getEnd(), ME, MethodName, ExprAnalysis::ref(RHS), ExtraArg, ExtraFeild); } } + std::cout << "Coming her!!!!!!!!!e\n"; return ReplaceMemberAssignAsSetMethod(getStmtExpansionSourceRange(E).getEnd(), ME, MethodName, ReplacedArg, ExtraArg); } diff --git a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp index d9e3eac44eb3..462d343d5066 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp @@ -34,8 +34,7 @@ void GraphRule::registerMatcher(MatchFinder &MF) { "cudaGraphExecDestroy", "cudaGraphAddEmptyNode", "cudaGraphAddDependencies", "cudaGraphExecUpdate", "cudaGraphNodeGetType", "cudaGraphGetNodes", - "cudaGraphGetRootNodes", "cudaGraphDestroy", "cudaGraphKernelNodeGetParams", - "cudaGraphKernelNodeSetParams"); + "cudaGraphGetRootNodes", "cudaGraphDestroy"); }; MF.addMatcher( callExpr(callee(functionDecl(functionName()))).bind("FunctionCall"), @@ -56,7 +55,6 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { *Result.Context); auto MemberName = ME->getMemberNameInfo().getAsString(); if (BaseTy == "cudaKernelNodeParams") { - auto FieldName = KernelNodeParamNames[MemberName]; if (FieldName.empty()) { report(ME->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, @@ -64,7 +62,29 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { "::" + ME->getMemberDecl()->getName().str()); return; } - requestFeature(HelperFeatureEnum::device_ext); + // if(FieldName == "func"){ + // if(auto BO = dyn_cast(getParentAsAssignedBO(ME, *Result.Context))){ + // const Expr *RHS = BO->getRHS(); + // const Expr *StrippedRHS = RHS->IgnoreParenCasts(); + // std::string RHSStr; + // llvm::raw_string_ostream OS(RHSStr); + // std::cout <<"RHSSTR: " <printPretty(OS, nullptr, Result.Context->getPrintingPolicy()); + + + // // Create the replacement string using dpct::wrapper_register + // auto ReplacementStr = "set_func.dpct::wrapper_register(&" + RHSStr + "_wrapper).get()"; + // std::cout<< "ReplacementSTR:" << ReplacementStr << "\n"; + + // // Replace the assignment with the set_func method call + // // emplaceTransformation(ReplaceMemberAssignAsSetMethod( + // // BO, ME, FieldName, ReplacementStr)); + // emplaceTransformation(new ReplaceText(getStmtExpansionSourceRange(RHS).getBegin(), + // ReplacementStr.length(), + // std::move(ReplacementStr))); + // return; + // } + // } if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { StringRef ReplacedArg = ""; emplaceTransformation( diff --git a/clang/lib/DPCT/SrcAPI/APINames.inc b/clang/lib/DPCT/SrcAPI/APINames.inc index 447e392a5393..9866f93fcdbe 100644 --- a/clang/lib/DPCT/SrcAPI/APINames.inc +++ b/clang/lib/DPCT/SrcAPI/APINames.inc @@ -456,12 +456,12 @@ ENTRY(cudaGraphInstantiateWithFlags, cudaGraphInstantiateWithFlags, false, NO_FL ENTRY(cudaGraphInstantiateWithParams, cudaGraphInstantiateWithParams, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeCopyAttributes, cudaGraphKernelNodeCopyAttributes, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeGetAttribute, cudaGraphKernelNodeGetAttribute, false, NO_FLAG, P4, "comment") -ENTRY(cudaGraphKernelNodeGetParams, cudaGraphKernelNodeGetParams, true, NO_FLAG, P4, "Successful/DPCT1119") +ENTRY(cudaGraphKernelNodeGetParams, cudaGraphKernelNodeGetParams, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeSetAttribute, cudaGraphKernelNodeSetAttribute, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeSetEnabled, cudaGraphKernelNodeSetEnabled, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeSetGridDim, cudaGraphKernelNodeSetGridDim, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeSetParam, cudaGraphKernelNodeSetParam, false, NO_FLAG, P4, "comment") -ENTRY(cudaGraphKernelNodeSetParams, cudaGraphKernelNodeSetParams, true, NO_FLAG, P4, "Successful/DPCT1119") +ENTRY(cudaGraphKernelNodeSetParams, cudaGraphKernelNodeSetParams, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeUpdatesApply, cudaGraphKernelNodeUpdatesApply, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphLaunch, cudaGraphLaunch, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphMemAllocNodeGetParams, cudaGraphMemAllocNodeGetParams, false, NO_FLAG, P4, "comment") diff --git a/clang/runtime/dpct-rt/include/dpct/graph.hpp b/clang/runtime/dpct-rt/include/dpct/graph.hpp index 21d81db5b87e..e1aeb49ea626 100644 --- a/clang/runtime/dpct-rt/include/dpct/graph.hpp +++ b/clang/runtime/dpct-rt/include/dpct/graph.hpp @@ -10,6 +10,7 @@ #include "dpct/util.hpp" #include "sycl/handler.hpp" +#include #include #include #include @@ -31,21 +32,21 @@ typedef sycl::ext::oneapi::experimental::node *node_ptr; struct kernel_node_params { dpct::dim3 block_dim; dpct::dim3 grid_dim; - void *kernel_params; + void **kernel_params; void* func; unsigned int shared_mem_bytes; public: void set_block_dim(dpct::dim3 block_dim) { block_dim = block_dim; } void set_grid_dim(dpct::dim3 grid_dim) { grid_dim = grid_dim; } - void set_kernel_params(void *kernel_params) { kernel_params = kernel_params; } + void set_kernel_params(void **kernel_params) { kernel_params = kernel_params; } void set_func(void *func) { func = func; } void set_shared_mem_bytes(unsigned int shared_mem_bytes) { shared_mem_bytes = shared_mem_bytes; } dpct::dim3 get_block_dim() { return block_dim; } dpct::dim3 get_grid_dim() { return grid_dim; } - void *get_kernel_params() { return kernel_params; } + void **get_kernel_params() { return kernel_params; } void *get_func() { return func; } unsigned int get_shared_mem_bytes() { return shared_mem_bytes; } }; @@ -123,18 +124,6 @@ class graph_mgr { } } - void kernel_node_set_params( - dpct::experimental::node_ptr node, - dpct::experimental::kernel_node_params *kernel_node_params) { - kernel_node_params_map[node] = kernel_node_params; - } - - void get_kernel_node_get_params( - dpct::experimental::node_ptr node, - dpct::experimental::kernel_node_params *kernel_node_params) { - kernel_node_params = kernel_node_params_map[node]; - } - private: std::unordered_map queue_graph_map; std::unordered_map Date: Wed, 23 Apr 2025 04:08:52 -0400 Subject: [PATCH 03/19] test2 Signed-off-by: Daiyaan Ahmed --- clang/lib/DPCT/RulesLang/RulesLangGraph.cpp | 35 ++++---- clang/runtime/dpct-rt/include/dpct/graph.hpp | 88 +++++++++++++++----- 2 files changed, 83 insertions(+), 40 deletions(-) diff --git a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp index 462d343d5066..222fbade833c 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp @@ -55,35 +55,28 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { *Result.Context); auto MemberName = ME->getMemberNameInfo().getAsString(); if (BaseTy == "cudaKernelNodeParams") { + std::cout <<"NODE PARAMS FOUND\n"; + DpctGlobalInfo::setCVersionCUDALaunchUsed(); auto FieldName = KernelNodeParamNames[MemberName]; if (FieldName.empty()) { report(ME->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, DpctGlobalInfo::getOriginalTypeName(ME->getBase()->getType()) + "::" + ME->getMemberDecl()->getName().str()); return; + } // if(FieldName == "func"){ - // if(auto BO = dyn_cast(getParentAsAssignedBO(ME, *Result.Context))){ - // const Expr *RHS = BO->getRHS(); - // const Expr *StrippedRHS = RHS->IgnoreParenCasts(); - // std::string RHSStr; - // llvm::raw_string_ostream OS(RHSStr); - // std::cout <<"RHSSTR: " <printPretty(OS, nullptr, Result.Context->getPrintingPolicy()); - - - // // Create the replacement string using dpct::wrapper_register - // auto ReplacementStr = "set_func.dpct::wrapper_register(&" + RHSStr + "_wrapper).get()"; - // std::cout<< "ReplacementSTR:" << ReplacementStr << "\n"; - - // // Replace the assignment with the set_func method call - // // emplaceTransformation(ReplaceMemberAssignAsSetMethod( - // // BO, ME, FieldName, ReplacementStr)); - // emplaceTransformation(new ReplaceText(getStmtExpansionSourceRange(RHS).getBegin(), - // ReplacementStr.length(), - // std::move(ReplacementStr))); - // return; - // } + // Check for the binary operator and fetch the RHS + // Strip the explicit typecast if it exists + // Check for VarDecl on the StrippedRHS + // If not a VarDecl, then insert user warning + // Check for VarDecl Type to be a FunctionDecl + // If FunctionDecl, then + // VarDecl, get var name, Get kernel_node_params variable name + // Create the expression, hardcoded strting + // Create new replace object and emplace transformation (nodeParams.set_func((void*)dpct::wrapper_register(&incrementKernel_wrapper).get());) + // If VarDecl and not a FunctionDecl and if type of VarDecl is function pointer + // Create a hardcoded string (nodeParams.set_func(a.get())); // } if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { StringRef ReplacedArg = ""; diff --git a/clang/runtime/dpct-rt/include/dpct/graph.hpp b/clang/runtime/dpct-rt/include/dpct/graph.hpp index e1aeb49ea626..9f610f7ded45 100644 --- a/clang/runtime/dpct-rt/include/dpct/graph.hpp +++ b/clang/runtime/dpct-rt/include/dpct/graph.hpp @@ -8,13 +8,14 @@ #pragma once +#include "dpct/kernel.hpp" #include "dpct/util.hpp" #include "sycl/handler.hpp" +#include "sycl/queue.hpp" #include #include #include #include -#include namespace dpct { namespace experimental { @@ -33,16 +34,18 @@ struct kernel_node_params { dpct::dim3 block_dim; dpct::dim3 grid_dim; void **kernel_params; - void* func; + void *func; unsigned int shared_mem_bytes; public: - void set_block_dim(dpct::dim3 block_dim) { block_dim = block_dim; } - void set_grid_dim(dpct::dim3 grid_dim) { grid_dim = grid_dim; } - void set_kernel_params(void **kernel_params) { kernel_params = kernel_params; } - void set_func(void *func) { func = func; } + void set_block_dim(dpct::dim3 block_dim) { this->block_dim = block_dim; } + void set_grid_dim(dpct::dim3 grid_dim) { this->grid_dim = grid_dim; } + void set_kernel_params(void **kernel_params) { + this->kernel_params = kernel_params; + } + void set_func(void *func) { this->func = func; } void set_shared_mem_bytes(unsigned int shared_mem_bytes) { - shared_mem_bytes = shared_mem_bytes; + this->shared_mem_bytes = shared_mem_bytes; } dpct::dim3 get_block_dim() { return block_dim; } dpct::dim3 get_grid_dim() { return grid_dim; } @@ -65,10 +68,6 @@ class graph_mgr { return instance; } - std::unordered_map - kernel_node_params_map; - void begin_recording(sycl::queue *queue_ptr) { // Calling begin_recording on an already recording queue is a no-op in SYCL if (queue_graph_map.find(queue_ptr) != queue_graph_map.end()) { @@ -124,6 +123,36 @@ class graph_mgr { } } + void add_kernel_node(dpct::experimental::node_ptr *node, + dpct::experimental::command_graph_ptr graph, + dpct::experimental::node_ptr *dependencies, + std::size_t numberOfDependencies, + dpct::experimental::kernel_node_params *params) { + kernel_node_params_map[graph].push_back(params); + } + void launch(dpct::experimental::command_graph_exec_ptr execGraph, + sycl::queue *queue) { + auto graph = exec_graph_map[execGraph]; + for (auto kernel_params : kernel_node_params_map[graph]) { + graph->add([&](sycl::handler &cgh) { + cgh.host_task([=]() { + dpct::kernel_launcher::launch( + kernel_params->get_func(), kernel_params->get_grid_dim(), + kernel_params->get_block_dim(), + kernel_params->get_kernel_params(), + kernel_params->get_shared_mem_bytes(), queue); + }); + }); + } + auto final_graph = graph->finalize(); + queue->submit([&](sycl::handler &cgh) { cgh.ext_oneapi_graph(final_graph); }); + } + + void instantiate(dpct::experimental::command_graph_exec_ptr *execGraph, + dpct::experimental::command_graph_ptr graph) { + exec_graph_map[*execGraph] = graph; + } + private: std::unordered_map queue_graph_map; std::unordered_map> root_nodes_map; + std::unordered_map + exec_graph_map; + std::unordered_map> + kernel_node_params_map; }; } // namespace detail @@ -204,9 +239,9 @@ static void add_dependencies(dpct::experimental::command_graph_ptr graph, /// nodes will be assigned. /// \param [out] numberOfNodes The number of nodes in the graph. static void get_nodes(dpct::experimental::command_graph_ptr graph, - dpct::experimental::node_ptr *nodesArray, - std::size_t *numberOfNodes) { -detail::graph_mgr::instance().get_nodes(graph, nodesArray, numberOfNodes); + dpct::experimental::node_ptr *nodesArray, + std::size_t *numberOfNodes) { + detail::graph_mgr::instance().get_nodes(graph, nodesArray, numberOfNodes); } /// Gets the root nodes in the command graph. @@ -215,14 +250,29 @@ detail::graph_mgr::instance().get_nodes(graph, nodesArray, numberOfNodes); /// root nodes will be assigned. /// \param [out] numberOfNodes The number of root nodes in the graph. static void get_root_nodes(dpct::experimental::command_graph_ptr graph, - dpct::experimental::node_ptr *nodesArray, - std::size_t *numberOfNodes) { -detail::graph_mgr::instance().get_root_nodes(graph, nodesArray, - numberOfNodes); + dpct::experimental::node_ptr *nodesArray, + std::size_t *numberOfNodes) { + detail::graph_mgr::instance().get_root_nodes(graph, nodesArray, + numberOfNodes); +} + +static void add_kernel_node(dpct::experimental::node_ptr *node, + dpct::experimental::command_graph_ptr graph, + dpct::experimental::node_ptr *dependencies, + std::size_t numberOfDependencies, + dpct::experimental::kernel_node_params *params) { + detail::graph_mgr::instance().add_kernel_node(node, graph, dependencies, + numberOfDependencies, params); } -static void add_kernel_node(dpct::experimental::node_ptr* node, dpct::experimental::node_ptr* dependencies, std::size_t &numberOfDependencies, dpct::experimental::kernel_node_params* params){ +static void instantiate(dpct::experimental::command_graph_exec_ptr *execGraph, + dpct::experimental::command_graph_ptr graph) { + detail::graph_mgr::instance().instantiate(execGraph, graph); +} +static void launch(dpct::experimental::command_graph_exec_ptr execGraph, + sycl::queue *queue) { + detail::graph_mgr::instance().launch(execGraph, queue); } } // namespace experimental From 819999941516b4f8e4b38d2ae9c06fe8600c917d Mon Sep 17 00:00:00 2001 From: Daiyaan Ahmed Date: Wed, 23 Apr 2025 21:45:19 -0400 Subject: [PATCH 04/19] test3 Signed-off-by: Daiyaan Ahmed --- clang/lib/DPCT/ASTTraversal.cpp | 1 + clang/lib/DPCT/AnalysisInfo.cpp | 2 +- clang/lib/DPCT/AnalysisInfo.h | 6 +- clang/lib/DPCT/RulesLang/APINamesGraph.inc | 32 +++-- clang/lib/DPCT/RulesLang/RulesLang.cpp | 50 ++++--- clang/lib/DPCT/RulesLang/RulesLang.h | 6 + clang/lib/DPCT/RulesLang/RulesLangGraph.cpp | 108 +++++++++++--- clang/lib/DPCT/SrcAPI/APINames.inc | 2 +- clang/runtime/dpct-rt/include/dpct/graph.hpp | 142 +++++++++++++++---- 9 files changed, 263 insertions(+), 86 deletions(-) diff --git a/clang/lib/DPCT/ASTTraversal.cpp b/clang/lib/DPCT/ASTTraversal.cpp index 427f0c4fcc8c..8d6a87f856fb 100644 --- a/clang/lib/DPCT/ASTTraversal.cpp +++ b/clang/lib/DPCT/ASTTraversal.cpp @@ -158,6 +158,7 @@ REGISTER_RULE(TypeRemoveRule, PassKind::PK_Analysis) REGISTER_RULE(CompatWithClangRule, PassKind::PK_Migration) REGISTER_RULE(AssertRule, PassKind::PK_Migration) REGISTER_RULE(GraphRule, PassKind::PK_Migration) +REGISTER_RULE(GraphAnalysisRule, PassKind::PK_Analysis) REGISTER_RULE(GraphicsInteropRule, PassKind::PK_Migration) REGISTER_RULE(RulesLangAddrSpaceConvRule, PassKind::PK_Migration) diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index 67fe389b928e..69e578c7e739 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -2504,7 +2504,7 @@ unsigned DpctGlobalInfo::ExperimentalFlag = 0; unsigned DpctGlobalInfo::HelperFuncPreferenceFlag = 0; bool DpctGlobalInfo::AnalysisModeFlag = false; bool DpctGlobalInfo::UseSYCLCompatFlag = false; -bool DpctGlobalInfo::CVersionCUDALaunchUsedFlag = false; +bool DpctGlobalInfo::UseWrapperRegisterFnPtrFlag = false; unsigned int DpctGlobalInfo::ColorOption = 1; std::unordered_map> DpctGlobalInfo::CubPlaceholderIndexMap; diff --git a/clang/lib/DPCT/AnalysisInfo.h b/clang/lib/DPCT/AnalysisInfo.h index a04c596dc344..147f6119e25b 100644 --- a/clang/lib/DPCT/AnalysisInfo.h +++ b/clang/lib/DPCT/AnalysisInfo.h @@ -1355,8 +1355,8 @@ class DpctGlobalInfo { static bool useNoQueueDevice() { return getHelperFuncPreference(HelperFuncPreference::NoQueueDevice); } - static void setCVersionCUDALaunchUsed() { CVersionCUDALaunchUsedFlag = true; } - static bool isCVersionCUDALaunchUsed() { return CVersionCUDALaunchUsedFlag; } + static void setUseWrapperRegisterFnPtr() { UseWrapperRegisterFnPtrFlag = true; } + static bool useWrapperRegisterFnPtr() { return UseWrapperRegisterFnPtrFlag; } static void setUseSYCLCompat(bool Flag = true) { UseSYCLCompatFlag = Flag; } static bool useSYCLCompat() { return UseSYCLCompatFlag; } static bool useEnqueueBarrier() { @@ -1686,7 +1686,7 @@ class DpctGlobalInfo { static unsigned HelperFuncPreferenceFlag; static bool AnalysisModeFlag; static bool UseSYCLCompatFlag; - static bool CVersionCUDALaunchUsedFlag; + static bool UseWrapperRegisterFnPtrFlag; static unsigned int ColorOption; static std::unordered_map> CubPlaceholderIndexMap; diff --git a/clang/lib/DPCT/RulesLang/APINamesGraph.inc b/clang/lib/DPCT/RulesLang/APINamesGraph.inc index 566460c831b6..9c2e3f932f97 100644 --- a/clang/lib/DPCT/RulesLang/APINamesGraph.inc +++ b/clang/lib/DPCT/RulesLang/APINamesGraph.inc @@ -6,19 +6,16 @@ // //===----------------------------------------------------------------------===// -CONDITIONAL_FACTORY_ENTRY( +ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( UseExtGraph, - ASSIGNABLE_FACTORY(ASSIGN_FACTORY_ENTRY( - "cudaGraphInstantiate", DEREF(0), - NEW(MapNames::getClNamespace() + - "ext::oneapi::experimental::command_graph<" + - MapNames::getClNamespace() + - "ext::oneapi::experimental::graph_state::executable>", - MEMBER_CALL(ARG(1), true, "finalize")))), + CALL_FACTORY_ENTRY("cudaGraphInstantiate", + CALL(MapNames::getDpctNamespace() + + "experimental::instantiate", + ARG(0), ARG(1))), UNSUPPORT_FACTORY_ENTRY("cudaGraphInstantiate", Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphInstantiate"), - ARG("--use-experimental-features=graph"))) + ARG("--use-experimental-features=graph")))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( UseExtGraph, DELETE_FACTORY_ENTRY("cudaGraphExecDestroy", ARG(0)), @@ -29,8 +26,10 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( UseExtGraph, - MEMBER_CALL_FACTORY_ENTRY("cudaGraphLaunch", ARG(1), true, - "ext_oneapi_graph", DEREF(0)), + CALL_FACTORY_ENTRY("cudaGraphLaunch", + CALL(MapNames::getDpctNamespace() + + "experimental::launch", + ARG(0), ARG(1))), UNSUPPORT_FACTORY_ENTRY("cudaGraphLaunch", Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphLaunch"), @@ -104,3 +103,14 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphDestroy"), ARG("--use-experimental-features=graph")))) + +ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( + UseExtGraph, + CALL_FACTORY_ENTRY("cudaGraphAddKernelNode", + CALL(MapNames::getDpctNamespace() + + "experimental::add_kernel_node", + ARG(0), ARG(1), ARG(2))), + UNSUPPORT_FACTORY_ENTRY("cudaGraphAddKernelNode", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaGraphAddKernelNode"), + ARG("--use-experimental-features=graph")))) diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index f409af2b76da..1332fe411e53 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -4517,14 +4517,24 @@ void StreamAPICallRule::runRule(const MatchFinder::MatchResult &Result) { } void KernelCallRefRule::registerMatcher(ast_matchers::MatchFinder &MF) { + + auto cudaKernelNodeParamsMatcher = memberExpr(hasObjectExpression(hasType( + type(hasUnqualifiedDesugaredType(recordType(hasDeclaration(recordDecl(hasAnyName("cudaKernelNodeParams"))))))))); MF.addMatcher( - functionDecl( - forEachDescendant( - declRefExpr(allOf(to(functionDecl(hasAttr(attr::CUDAGlobal))), - unless(hasAncestor(cudaKernelCallExpr())))) - .bind("kernelRef"))) - .bind("outerFunc"), - this); + functionDecl( + forEachDescendant( + declRefExpr( + allOf( + to(functionDecl(hasAttr(attr::CUDAGlobal))), + unless(hasAncestor(cudaKernelCallExpr())) + ) + ).bind("kernelRef") + ), + unless(hasDescendant(cudaKernelNodeParamsMatcher)) + ).bind("outerFunc"), + this); + + MF.addMatcher(unresolvedLookupExpr(unless(hasAncestor(cudaKernelCallExpr()))) .bind("unresolvedRef"), this); @@ -4572,14 +4582,13 @@ void KernelCallRefRule::insertWrapperPostfix(const T *Node, bool isInsertWrapperRegister) { auto NLoc = DpctGlobalInfo::getSourceManager().getSpellingLoc( Node->getNameInfo().getBeginLoc()); - - std::cout <<"WRAPPER APPENDED: " << "\n"; - + std::cout << "Inserting _wrapper at location: " << NLoc.printToString(DpctGlobalInfo::getSourceManager()) << "\n"; emplaceTransformation(new InsertText( NLoc.getLocWithOffset(Node->getNameInfo().getAsString().length()), "_wrapper")); if (!isInsertWrapperRegister) { + std::cout << "Not inserting wrapper_register\n"; return; } const Expr *E = Node; @@ -4595,6 +4604,7 @@ void KernelCallRefRule::insertWrapperPostfix(const T *Node, E = COC; } } + std::cout << "Inserting wrapper_register with TypeRepl: " << TypeRepl << "\n"; emplaceTransformation(new InsertBeforeStmt( E, MapNames::getDpctNamespace() + "wrapper_register" + TypeRepl + "(")); emplaceTransformation(new InsertAfterStmt(E, ").get()")); @@ -4603,6 +4613,7 @@ void KernelCallRefRule::insertWrapperPostfix(const T *Node, void KernelCallRefRule::runRule( const ast_matchers::MatchFinder::MatchResult &Result) { if (auto DRE = getAssistNodeAsType(Result, "kernelRef")) { + std::cout << "KernelRef matched\n"; const FunctionDecl *OuterFD = getAssistNodeAsType(Result, "outerFunc"); if (!OuterFD) { @@ -4636,7 +4647,7 @@ void KernelCallRefRule::runRule( (OuterFD->getTemplatedKind() == FunctionDecl::TemplatedKind::TK_FunctionTemplate)) { std::string TypeRepl; - if (DpctGlobalInfo::isCVersionCUDALaunchUsed()) { + if (DpctGlobalInfo::useWrapperRegisterFnPtr()) { if ((IsTemplateRelated && (!DRE->hasExplicitTemplateArgs() || (DRE->getNumTemplateArgs() <= TemplateParamNum))) || @@ -4645,7 +4656,7 @@ void KernelCallRefRule::runRule( } } insertWrapperPostfix( - DRE, std::move(TypeRepl), DpctGlobalInfo::isCVersionCUDALaunchUsed()); + DRE, std::move(TypeRepl), DpctGlobalInfo::useWrapperRegisterFnPtr()); } } if (auto ULE = @@ -4682,7 +4693,7 @@ void KernelCallRefRule::runRule( } } insertWrapperPostfix( - ULE, getTypeRepl(ULE), DpctGlobalInfo::isCVersionCUDALaunchUsed()); + ULE, getTypeRepl(ULE), DpctGlobalInfo::useWrapperRegisterFnPtr()); } } @@ -4955,7 +4966,7 @@ void KernelCallRule::runRule( if (!getAddressedRef(CalleeDRE)) { if (IsFuncTypeErased) { - DpctGlobalInfo::setCVersionCUDALaunchUsed(); + DpctGlobalInfo::setUseWrapperRegisterFnPtr(); } std::string ReplStr; llvm::raw_string_ostream OS(ReplStr); @@ -7178,16 +7189,23 @@ TextModification * ReplaceMemberAssignAsSetMethod(const Expr *E, const MemberExpr *ME, StringRef MethodName, StringRef ReplacedArg, StringRef ExtraArg, StringRef ExtraFeild) { + std::cout << "Entering ReplaceMemberAssignAsSetMethod (overloaded)\n"; + std::cout << "Expr: " << E->getStmtClassName() << "\n"; + std::cout << "MemberExpr: " << ME->getMemberNameInfo().getAsString() << "\n"; + std::cout << "MethodName: " << MethodName.str() << "\n"; + std::cout << "ReplacedArg: " << ReplacedArg.str() << "\n"; + std::cout << "ExtraArg: " << ExtraArg.str() << "\n"; + std::cout << "ExtraFeild: " << ExtraFeild.str() << "\n"; if (ReplacedArg.empty()) { if (auto RHS = getRhs(E)) { + std::cout << "RHS found: " << ExprAnalysis::ref(RHS) << "\n"; StringRef c = ExprAnalysis::ref(RHS); - std::cout <<"Replaced String: "<< c.str() <<"\n"; return ReplaceMemberAssignAsSetMethod( getStmtExpansionSourceRange(E).getEnd(), ME, MethodName, ExprAnalysis::ref(RHS), ExtraArg, ExtraFeild); } } - std::cout << "Coming her!!!!!!!!!e\n"; + std::cout << "ReplacedArg is not empty or RHS not found\n"; return ReplaceMemberAssignAsSetMethod(getStmtExpansionSourceRange(E).getEnd(), ME, MethodName, ReplacedArg, ExtraArg); } diff --git a/clang/lib/DPCT/RulesLang/RulesLang.h b/clang/lib/DPCT/RulesLang/RulesLang.h index 77d9726d1ef9..5382e25dcf36 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.h +++ b/clang/lib/DPCT/RulesLang/RulesLang.h @@ -1008,6 +1008,12 @@ class GraphRule : public NamedMigrationRule { void runRule(const ast_matchers::MatchFinder::MatchResult &Result); }; +class GraphAnalysisRule : public NamedMigrationRule { +public: + void registerMatcher(ast_matchers::MatchFinder &MF) override; + void runRule(const ast_matchers::MatchFinder::MatchResult &Result); +}; + class AssertRule : public NamedMigrationRule { public: void registerMatcher(ast_matchers::MatchFinder &MF) override; diff --git a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp index 222fbade833c..ac4e0ad5f678 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp @@ -28,13 +28,37 @@ extern DpctOption AsyncHandler; namespace clang { namespace dpct { +void GraphAnalysisRule::registerMatcher(MatchFinder &MF) { + auto kernelNodeTypeName = [&]() { + return hasAnyName("cudaKernelNodeParams"); + }; + MF.addMatcher( + memberExpr( + hasObjectExpression(hasType(type(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(recordDecl(kernelNodeTypeName())))))))) + .bind("KernelNodeType"), + this); +} + +void GraphAnalysisRule::runRule(const MatchFinder::MatchResult &Result) { + if (auto ME = getNodeAsType(Result, "KernelNodeType")) { + auto BaseTy = DpctGlobalInfo::getUnqualifiedTypeName( + ME->getBase()->getType().getDesugaredType(*Result.Context), + *Result.Context); + auto MemberName = ME->getMemberNameInfo().getAsString(); + if (BaseTy == "cudaKernelNodeParams") { + DpctGlobalInfo::setUseWrapperRegisterFnPtr(); + } + } +} + void GraphRule::registerMatcher(MatchFinder &MF) { auto functionName = [&]() { - return hasAnyName("cudaGraphInstantiate", "cudaGraphLaunch", - "cudaGraphExecDestroy", "cudaGraphAddEmptyNode", - "cudaGraphAddDependencies", "cudaGraphExecUpdate", - "cudaGraphNodeGetType", "cudaGraphGetNodes", - "cudaGraphGetRootNodes", "cudaGraphDestroy"); + return hasAnyName( + "cudaGraphInstantiate", "cudaGraphLaunch", "cudaGraphExecDestroy", + "cudaGraphAddEmptyNode", "cudaGraphAddDependencies", + "cudaGraphExecUpdate", "cudaGraphNodeGetType", "cudaGraphGetNodes", + "cudaGraphGetRootNodes", "cudaGraphDestroy", "cudaGraphAddKernelNode"); }; MF.addMatcher( callExpr(callee(functionDecl(functionName()))).bind("FunctionCall"), @@ -55,29 +79,67 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { *Result.Context); auto MemberName = ME->getMemberNameInfo().getAsString(); if (BaseTy == "cudaKernelNodeParams") { - std::cout <<"NODE PARAMS FOUND\n"; - DpctGlobalInfo::setCVersionCUDALaunchUsed(); auto FieldName = KernelNodeParamNames[MemberName]; if (FieldName.empty()) { report(ME->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, DpctGlobalInfo::getOriginalTypeName(ME->getBase()->getType()) + "::" + ME->getMemberDecl()->getName().str()); return; - } - // if(FieldName == "func"){ - // Check for the binary operator and fetch the RHS - // Strip the explicit typecast if it exists - // Check for VarDecl on the StrippedRHS - // If not a VarDecl, then insert user warning - // Check for VarDecl Type to be a FunctionDecl - // If FunctionDecl, then - // VarDecl, get var name, Get kernel_node_params variable name - // Create the expression, hardcoded strting - // Create new replace object and emplace transformation (nodeParams.set_func((void*)dpct::wrapper_register(&incrementKernel_wrapper).get());) - // If VarDecl and not a FunctionDecl and if type of VarDecl is function pointer - // Create a hardcoded string (nodeParams.set_func(a.get())); - // } + if (FieldName == "func") { + if (auto BO = dyn_cast( + getParentAsAssignedBO(ME, *Result.Context))) { + auto *LHS = BO->getLHS()->IgnoreCasts(); + if (auto *ME = dyn_cast(LHS)) { + std::cout << "Member Expr\n"; + // Get the base expression of the MemberExpr + auto *Base = ME->getBase()->IgnoreImpCasts(); + + // Check if the base is a DeclRefExpr + if (auto *DRE = dyn_cast(Base)) { + std::cout << "DeclRef Expr\n"; + // Get the variable declaration + if (auto *VD = dyn_cast(DRE->getDecl())) { + std::cout << "Base VarDecl Expr\n"; + // Get the variable name + std::string varName = VD->getNameAsString(); + + // Get the RHS of the assignment + clang::Expr *RHS = BO->getRHS()->IgnoreCasts(); + + // Check if RHS is a DeclRefExpr referring to a function + if (auto *RHS_DRE = dyn_cast(RHS)) { + std::cout << "RHS DRE Expr\n"; + if (auto *FD = dyn_cast(RHS_DRE->getDecl())) { + std::cout << "RHS FunctionDecl Expr\n"; + // Get the function name + std::string funcName = FD->getNameAsString(); + std::string wrapperName = funcName + "_wrapper"; + + // Construct the replacement expression + std::string ReplacementExpr = + varName + ".set_func((void*) dpct::wrapper_register(&" + + wrapperName + ").get());"; + std::cout << "Replacement String: " << ReplacementExpr + << "\n"; + std::string rp = "(void*) dpct::wrapper_register(&" + + wrapperName + ").get()"; + StringRef ReplacedArg = rp; + emplaceTransformation(ReplaceMemberAssignAsSetMethod( + BO, ME, FieldName, ReplacedArg)); + // Replace the original assignment with the new expression + // emplaceTransformation( + // new ReplaceToken(ME->getBeginLoc(), ME->getEndLoc(), + // std ::move(ReplacementExpr))); + return; + } + } + } + } + } + } + } + std::cout << "Coming here\n"; if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { StringRef ReplacedArg = ""; emplaceTransformation( @@ -106,8 +168,8 @@ const Expr *GraphRule::getParentAsAssignedBO(const Expr *E, return nullptr; } -// Return the binary operator if E is the lhs of an assign expression, otherwise -// nullptr. +// Return the binary operator if E is the lhs of an assign expression, +// otherwise nullptr. const Expr *GraphRule::getAssignedBO(const Expr *E, ASTContext &Context) { if (dyn_cast(E)) { // Continue finding parents when E is MemberExpr. diff --git a/clang/lib/DPCT/SrcAPI/APINames.inc b/clang/lib/DPCT/SrcAPI/APINames.inc index 9866f93fcdbe..7bc9e2ebd2d1 100644 --- a/clang/lib/DPCT/SrcAPI/APINames.inc +++ b/clang/lib/DPCT/SrcAPI/APINames.inc @@ -404,7 +404,7 @@ ENTRY(cudaGraphAddEventWaitNode, cudaGraphAddEventWaitNode, false, NO_FLAG, P4, ENTRY(cudaGraphAddExternalSemaphoresSignalNode, cudaGraphAddExternalSemaphoresSignalNode, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphAddExternalSemaphoresWaitNode, cudaGraphAddExternalSemaphoresWaitNode, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphAddHostNode, cudaGraphAddHostNode, false, NO_FLAG, P4, "comment") -ENTRY(cudaGraphAddKernelNode, cudaGraphAddKernelNode, false, NO_FLAG, P4, "comment") +ENTRY(cudaGraphAddKernelNode, cudaGraphAddKernelNode, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphAddMemAllocNode, cudaGraphAddMemAllocNode, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphAddMemFreeNode, cudaGraphAddMemFreeNode, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphAddMemcpyNode, cudaGraphAddMemcpyNode, false, NO_FLAG, P4, "comment") diff --git a/clang/runtime/dpct-rt/include/dpct/graph.hpp b/clang/runtime/dpct-rt/include/dpct/graph.hpp index 9f610f7ded45..3db232719834 100644 --- a/clang/runtime/dpct-rt/include/dpct/graph.hpp +++ b/clang/runtime/dpct-rt/include/dpct/graph.hpp @@ -10,7 +10,9 @@ #include "dpct/kernel.hpp" #include "dpct/util.hpp" +#include "sycl/ext/oneapi/experimental/graph.hpp" #include "sycl/handler.hpp" +#include "sycl/property_list.hpp" #include "sycl/queue.hpp" #include #include @@ -30,6 +32,23 @@ typedef sycl::ext::oneapi::experimental::command_graph< typedef sycl::ext::oneapi::experimental::node *node_ptr; +/// Adds dependencies between nodes in the command graph. +/// \param [in] graph A pointer to the command graph. +/// \param [in] fromNodes An array of node pointers representing +/// the source nodes. +/// \param [in] toNodes An array of node pointers representing +/// the destination nodes. +/// \param [in] numberOfDependencies The number of dependencies +/// to be added. +static void add_dependencies(dpct::experimental::command_graph_ptr graph, + const dpct::experimental::node_ptr *fromNodes, + const dpct::experimental::node_ptr *toNodes, + std::size_t numberOfDependencies) { + for (std::size_t i = 0; i < numberOfDependencies; i++) { + graph->make_edge(*fromNodes[i], *toNodes[i]); + } +} + struct kernel_node_params { dpct::dim3 block_dim; dpct::dim3 grid_dim; @@ -37,6 +56,8 @@ struct kernel_node_params { void *func; unsigned int shared_mem_bytes; + std::vector dependencies; + public: void set_block_dim(dpct::dim3 block_dim) { this->block_dim = block_dim; } void set_grid_dim(dpct::dim3 grid_dim) { this->grid_dim = grid_dim; } @@ -52,6 +73,26 @@ struct kernel_node_params { void **get_kernel_params() { return kernel_params; } void *get_func() { return func; } unsigned int get_shared_mem_bytes() { return shared_mem_bytes; } + + void add_depency(dpct::experimental::node_ptr dependency) { + dependencies.push_back(dependency); + } + std::vector get_dependencies() const { + return dependencies; + } + void update_dependency(dpct::experimental::node_ptr oldDependency, dpct::experimental::node_ptr newDependency){ + std::cout <<"Dependencies size in struct: "< 0) { + for (std::size_t i = 0; i < numberOfDependencies; i++) { + params->add_depency(dependencies[i]); + } + } + kernel_node_params_map[graph].emplace_back(*node, params); + std::cout << "second count for num of deps: " << params->get_dependencies().size() << "\n"; } void launch(dpct::experimental::command_graph_exec_ptr execGraph, sycl::queue *queue) { auto graph = exec_graph_map[execGraph]; - for (auto kernel_params : kernel_node_params_map[graph]) { - graph->add([&](sycl::handler &cgh) { - cgh.host_task([=]() { - dpct::kernel_launcher::launch( - kernel_params->get_func(), kernel_params->get_grid_dim(), - kernel_params->get_block_dim(), - kernel_params->get_kernel_params(), - kernel_params->get_shared_mem_bytes(), queue); - }); - }); + for (std::size_t i = 0; i < kernel_node_params_map[graph].size(); i++) { + std::cout<<"Num of nodes in graph: " << kernel_node_params_map[graph].size() << "\n"; + // for (auto &node_kernel_params_pair : kernel_node_params_map[graph]) { + auto node_kernel_params_pair = kernel_node_params_map[graph][i]; + auto node_params = node_kernel_params_pair.second; + auto dependency_ptrs = node_params->get_dependencies(); + std::cout <<"i right now: " << i <<"\n"; + std::cout << "Dependecnies size: " << dependency_ptrs.size() << "\n"; + if(i==1){ + std::cout <<"The dependency is: " << dependency_ptrs[0] <<"\n"; + } + std::vector dependencies; + + for (auto dep_ptr : dependency_ptrs) { + dependencies.push_back(*dep_ptr); + std::cout << "Dependecy deref & pushed\n"; + } + auto new_node = new sycl::ext::oneapi::experimental::node( + graph->add([&](sycl::handler &cgh) { + cgh.host_task([=]() { + dpct::kernel_launcher::launch( + node_params->get_func(), node_params->get_grid_dim(), + node_params->get_block_dim(), + node_params->get_kernel_params(), + node_params->get_shared_mem_bytes(), queue); + }); + }, sycl::ext::oneapi::experimental::property::node::depends_on(dependencies))); + std::cout << "new node is added to graph and dep\n"; + std::cout <<"Current i:" << i <<"\n"; + std::cout << "Size: " << kernel_node_params_map[graph].size() << "\n"; + if(i< kernel_node_params_map[graph].size()-1){ + std::cout <<"The -1 of i dependency address is: "<< kernel_node_params_map[graph][i+1].second->get_dependencies()[i] << "\n"; + kernel_node_params_map[graph][i+1].second->update_dependency(kernel_node_params_map[graph][i+1].second->get_dependencies()[i], new_node); + } + std::cout <<"new node addr: " << new_node << "\n"; + std::cout <<"old node addr:" << kernel_node_params_map[graph][i].first << "\n"; + kernel_node_params_map[graph][i].first = new_node; + std::cout <<"set node addr:" << kernel_node_params_map[graph][i].first << "\n"; + // if (i == 1) { + // dpct::experimental::node_ptr toNode[1] = {new_node}; + // dpct::experimental::node_ptr fromNode[1] = {dependency_ptrs[0]}; + // std::cout<<"i is 1 wokring\n"; + // for(dpct::experimental::node_ptr ptr: dependency_ptrs){ + // std::cout <<"Dep ptr: " <finalize(); - queue->submit([&](sycl::handler &cgh) { cgh.ext_oneapi_graph(final_graph); }); + queue->submit( + [&](sycl::handler &cgh) { cgh.ext_oneapi_graph(final_graph); }); } void instantiate(dpct::experimental::command_graph_exec_ptr *execGraph, @@ -153,6 +241,9 @@ class graph_mgr { exec_graph_map[*execGraph] = graph; } + void kernel_node_get_params(dpct::experimental::node_ptr node, + dpct::experimental::kernel_node_params *params) {} + private: std::unordered_map queue_graph_map; std::unordered_map exec_graph_map; - std::unordered_map> + std::unordered_map< + dpct::experimental::command_graph_ptr, + std::vector>> kernel_node_params_map; }; } // namespace detail @@ -216,23 +309,6 @@ add_empty_node(dpct::experimental::node_ptr *newNode, dependencies)})); } -/// Adds dependencies between nodes in the command graph. -/// \param [in] graph A pointer to the command graph. -/// \param [in] fromNodes An array of node pointers representing -/// the source nodes. -/// \param [in] toNodes An array of node pointers representing -/// the destination nodes. -/// \param [in] numberOfDependencies The number of dependencies -/// to be added. -static void add_dependencies(dpct::experimental::command_graph_ptr graph, - const dpct::experimental::node_ptr *fromNodes, - const dpct::experimental::node_ptr *toNodes, - std::size_t numberOfDependencies) { - for (std::size_t i = 0; i < numberOfDependencies; i++) { - graph->make_edge(*fromNodes[i], *toNodes[i]); - } -} - /// Gets the nodes in the command graph. /// \param [in] graph A pointer to the command graph. /// \param [out] nodesArray An array of node pointers where the @@ -275,5 +351,9 @@ static void launch(dpct::experimental::command_graph_exec_ptr execGraph, detail::graph_mgr::instance().launch(execGraph, queue); } +static void +kernel_node_get_params(dpct::experimental::node_ptr node, + dpct::experimental::kernel_node_params *params) {} + } // namespace experimental } // namespace dpct From 089ff920fa9ff5f0bea0ef8138059e7c37285a84 Mon Sep 17 00:00:00 2001 From: Daiyaan Ahmed Date: Thu, 24 Apr 2025 03:02:57 -0400 Subject: [PATCH 05/19] test4 Signed-off-by: Daiyaan Ahmed --- clang/lib/DPCT/RulesLang/APINamesGraph.inc | 9 +- clang/lib/DPCT/RulesLang/MapNamesLang.cpp | 2 +- clang/lib/DPCT/RulesLang/RulesLang.cpp | 36 +--- clang/lib/DPCT/RulesLang/RulesLang.h | 12 +- clang/lib/DPCT/RulesLang/RulesLangGraph.cpp | 45 +---- clang/runtime/dpct-rt/include/dpct/graph.hpp | 173 ++++++++----------- 6 files changed, 102 insertions(+), 175 deletions(-) diff --git a/clang/lib/DPCT/RulesLang/APINamesGraph.inc b/clang/lib/DPCT/RulesLang/APINamesGraph.inc index 9c2e3f932f97..68e42ab8730d 100644 --- a/clang/lib/DPCT/RulesLang/APINamesGraph.inc +++ b/clang/lib/DPCT/RulesLang/APINamesGraph.inc @@ -26,10 +26,9 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( UseExtGraph, - CALL_FACTORY_ENTRY("cudaGraphLaunch", - CALL(MapNames::getDpctNamespace() + - "experimental::launch", - ARG(0), ARG(1))), + CALL_FACTORY_ENTRY("cudaGraphLaunch", CALL(MapNames::getDpctNamespace() + + "experimental::launch", + ARG(0), ARG(1))), UNSUPPORT_FACTORY_ENTRY("cudaGraphLaunch", Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphLaunch"), @@ -109,7 +108,7 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( CALL_FACTORY_ENTRY("cudaGraphAddKernelNode", CALL(MapNames::getDpctNamespace() + "experimental::add_kernel_node", - ARG(0), ARG(1), ARG(2))), + ARG(0), ARG(1), ARG(2), ARG(3), ARG(4))), UNSUPPORT_FACTORY_ENTRY("cudaGraphAddKernelNode", Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphAddKernelNode"), diff --git a/clang/lib/DPCT/RulesLang/MapNamesLang.cpp b/clang/lib/DPCT/RulesLang/MapNamesLang.cpp index 965382bc1c5e..551d886083d5 100644 --- a/clang/lib/DPCT/RulesLang/MapNamesLang.cpp +++ b/clang/lib/DPCT/RulesLang/MapNamesLang.cpp @@ -371,4 +371,4 @@ MapNamesLang::MapTy GraphRule::KernelNodeParamNames{ {"func", "func"}}; } // namespace dpct -} // namespace clang \ No newline at end of file +} // namespace clang diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index 1332fe411e53..b9ac5a712ac1 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -4518,22 +4518,14 @@ void StreamAPICallRule::runRule(const MatchFinder::MatchResult &Result) { void KernelCallRefRule::registerMatcher(ast_matchers::MatchFinder &MF) { - auto cudaKernelNodeParamsMatcher = memberExpr(hasObjectExpression(hasType( - type(hasUnqualifiedDesugaredType(recordType(hasDeclaration(recordDecl(hasAnyName("cudaKernelNodeParams"))))))))); MF.addMatcher( - functionDecl( - forEachDescendant( - declRefExpr( - allOf( - to(functionDecl(hasAttr(attr::CUDAGlobal))), - unless(hasAncestor(cudaKernelCallExpr())) - ) - ).bind("kernelRef") - ), - unless(hasDescendant(cudaKernelNodeParamsMatcher)) - ).bind("outerFunc"), - this); - + functionDecl( + forEachDescendant( + declRefExpr(allOf(to(functionDecl(hasAttr(attr::CUDAGlobal))), + unless(hasAncestor(cudaKernelCallExpr())))) + .bind("kernelRef"))) + .bind("outerFunc"), + this); MF.addMatcher(unresolvedLookupExpr(unless(hasAncestor(cudaKernelCallExpr()))) .bind("unresolvedRef"), @@ -4582,13 +4574,11 @@ void KernelCallRefRule::insertWrapperPostfix(const T *Node, bool isInsertWrapperRegister) { auto NLoc = DpctGlobalInfo::getSourceManager().getSpellingLoc( Node->getNameInfo().getBeginLoc()); - std::cout << "Inserting _wrapper at location: " << NLoc.printToString(DpctGlobalInfo::getSourceManager()) << "\n"; emplaceTransformation(new InsertText( NLoc.getLocWithOffset(Node->getNameInfo().getAsString().length()), "_wrapper")); if (!isInsertWrapperRegister) { - std::cout << "Not inserting wrapper_register\n"; return; } const Expr *E = Node; @@ -4604,7 +4594,6 @@ void KernelCallRefRule::insertWrapperPostfix(const T *Node, E = COC; } } - std::cout << "Inserting wrapper_register with TypeRepl: " << TypeRepl << "\n"; emplaceTransformation(new InsertBeforeStmt( E, MapNames::getDpctNamespace() + "wrapper_register" + TypeRepl + "(")); emplaceTransformation(new InsertAfterStmt(E, ").get()")); @@ -4613,7 +4602,6 @@ void KernelCallRefRule::insertWrapperPostfix(const T *Node, void KernelCallRefRule::runRule( const ast_matchers::MatchFinder::MatchResult &Result) { if (auto DRE = getAssistNodeAsType(Result, "kernelRef")) { - std::cout << "KernelRef matched\n"; const FunctionDecl *OuterFD = getAssistNodeAsType(Result, "outerFunc"); if (!OuterFD) { @@ -7189,23 +7177,13 @@ TextModification * ReplaceMemberAssignAsSetMethod(const Expr *E, const MemberExpr *ME, StringRef MethodName, StringRef ReplacedArg, StringRef ExtraArg, StringRef ExtraFeild) { - std::cout << "Entering ReplaceMemberAssignAsSetMethod (overloaded)\n"; - std::cout << "Expr: " << E->getStmtClassName() << "\n"; - std::cout << "MemberExpr: " << ME->getMemberNameInfo().getAsString() << "\n"; - std::cout << "MethodName: " << MethodName.str() << "\n"; - std::cout << "ReplacedArg: " << ReplacedArg.str() << "\n"; - std::cout << "ExtraArg: " << ExtraArg.str() << "\n"; - std::cout << "ExtraFeild: " << ExtraFeild.str() << "\n"; if (ReplacedArg.empty()) { if (auto RHS = getRhs(E)) { - std::cout << "RHS found: " << ExprAnalysis::ref(RHS) << "\n"; - StringRef c = ExprAnalysis::ref(RHS); return ReplaceMemberAssignAsSetMethod( getStmtExpansionSourceRange(E).getEnd(), ME, MethodName, ExprAnalysis::ref(RHS), ExtraArg, ExtraFeild); } } - std::cout << "ReplacedArg is not empty or RHS not found\n"; return ReplaceMemberAssignAsSetMethod(getStmtExpansionSourceRange(E).getEnd(), ME, MethodName, ReplacedArg, ExtraArg); } diff --git a/clang/lib/DPCT/RulesLang/RulesLang.h b/clang/lib/DPCT/RulesLang/RulesLang.h index 5382e25dcf36..cf48bcce2b5d 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.h +++ b/clang/lib/DPCT/RulesLang/RulesLang.h @@ -998,17 +998,17 @@ class CompatWithClangRule : public NamedMigrationRule { void runRule(const ast_matchers::MatchFinder::MatchResult &Result); }; -class GraphRule : public NamedMigrationRule { - static MapNames::MapTy KernelNodeParamNames; - const Expr *getAssignedBO(const Expr *E, ASTContext &Context); - const Expr *getParentAsAssignedBO(const Expr *E, ASTContext &Context); - +class GraphAnalysisRule : public NamedMigrationRule { public: void registerMatcher(ast_matchers::MatchFinder &MF) override; void runRule(const ast_matchers::MatchFinder::MatchResult &Result); }; -class GraphAnalysisRule : public NamedMigrationRule { +class GraphRule : public NamedMigrationRule { + static MapNames::MapTy KernelNodeParamNames; + const Expr *getAssignedBO(const Expr *E, ASTContext &Context); + const Expr *getParentAsAssignedBO(const Expr *E, ASTContext &Context); + public: void registerMatcher(ast_matchers::MatchFinder &MF) override; void runRule(const ast_matchers::MatchFinder::MatchResult &Result); diff --git a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp index ac4e0ad5f678..dc1b012f9b3e 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp @@ -91,46 +91,20 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { getParentAsAssignedBO(ME, *Result.Context))) { auto *LHS = BO->getLHS()->IgnoreCasts(); if (auto *ME = dyn_cast(LHS)) { - std::cout << "Member Expr\n"; - // Get the base expression of the MemberExpr auto *Base = ME->getBase()->IgnoreImpCasts(); - - // Check if the base is a DeclRefExpr if (auto *DRE = dyn_cast(Base)) { - std::cout << "DeclRef Expr\n"; - // Get the variable declaration if (auto *VD = dyn_cast(DRE->getDecl())) { - std::cout << "Base VarDecl Expr\n"; - // Get the variable name - std::string varName = VD->getNameAsString(); - - // Get the RHS of the assignment - clang::Expr *RHS = BO->getRHS()->IgnoreCasts(); - - // Check if RHS is a DeclRefExpr referring to a function + std::string VarName = VD->getNameAsString(); + auto *RHS = BO->getRHS()->IgnoreCasts(); if (auto *RHS_DRE = dyn_cast(RHS)) { - std::cout << "RHS DRE Expr\n"; if (auto *FD = dyn_cast(RHS_DRE->getDecl())) { - std::cout << "RHS FunctionDecl Expr\n"; - // Get the function name - std::string funcName = FD->getNameAsString(); - std::string wrapperName = funcName + "_wrapper"; - - // Construct the replacement expression - std::string ReplacementExpr = - varName + ".set_func((void*) dpct::wrapper_register(&" + - wrapperName + ").get());"; - std::cout << "Replacement String: " << ReplacementExpr - << "\n"; - std::string rp = "(void*) dpct::wrapper_register(&" + - wrapperName + ").get()"; - StringRef ReplacedArg = rp; - emplaceTransformation(ReplaceMemberAssignAsSetMethod( - BO, ME, FieldName, ReplacedArg)); - // Replace the original assignment with the new expression - // emplaceTransformation( - // new ReplaceToken(ME->getBeginLoc(), ME->getEndLoc(), - // std ::move(ReplacementExpr))); + std::string FuncName = FD->getNameAsString(); + std::string WrapperName = FuncName; + std::string AccessOperator = VD->getType()->isPointerType() ? "->" : "."; + std::string ReplacementStr = VarName + AccessOperator + "set_func(" + "(void*) dpct::wrapper_register(&" + WrapperName ; + emplaceTransformation(new ReplaceToken(BO->getBeginLoc(), BO->getEndLoc(), std::move(ReplacementStr))); + emplaceTransformation(new InsertAfterStmt(BO, ")")); return; } } @@ -139,7 +113,6 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { } } } - std::cout << "Coming here\n"; if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { StringRef ReplacedArg = ""; emplaceTransformation( diff --git a/clang/runtime/dpct-rt/include/dpct/graph.hpp b/clang/runtime/dpct-rt/include/dpct/graph.hpp index 3db232719834..57dcb953f1c4 100644 --- a/clang/runtime/dpct-rt/include/dpct/graph.hpp +++ b/clang/runtime/dpct-rt/include/dpct/graph.hpp @@ -8,13 +8,6 @@ #pragma once -#include "dpct/kernel.hpp" -#include "dpct/util.hpp" -#include "sycl/ext/oneapi/experimental/graph.hpp" -#include "sycl/handler.hpp" -#include "sycl/property_list.hpp" -#include "sycl/queue.hpp" -#include #include #include #include @@ -32,35 +25,20 @@ typedef sycl::ext::oneapi::experimental::command_graph< typedef sycl::ext::oneapi::experimental::node *node_ptr; -/// Adds dependencies between nodes in the command graph. -/// \param [in] graph A pointer to the command graph. -/// \param [in] fromNodes An array of node pointers representing -/// the source nodes. -/// \param [in] toNodes An array of node pointers representing -/// the destination nodes. -/// \param [in] numberOfDependencies The number of dependencies -/// to be added. -static void add_dependencies(dpct::experimental::command_graph_ptr graph, - const dpct::experimental::node_ptr *fromNodes, - const dpct::experimental::node_ptr *toNodes, - std::size_t numberOfDependencies) { - for (std::size_t i = 0; i < numberOfDependencies; i++) { - graph->make_edge(*fromNodes[i], *toNodes[i]); - } -} - struct kernel_node_params { - dpct::dim3 block_dim; - dpct::dim3 grid_dim; - void **kernel_params; - void *func; - unsigned int shared_mem_bytes; + dpct::dim3 block_dim{}; + dpct::dim3 grid_dim{}; + void **kernel_params{}; + void *func{}; + unsigned int shared_mem_bytes{}; - std::vector dependencies; + std::vector dependencies{}; public: - void set_block_dim(dpct::dim3 block_dim) { this->block_dim = block_dim; } - void set_grid_dim(dpct::dim3 grid_dim) { this->grid_dim = grid_dim; } + void set_block_dim(const dpct::dim3 &block_dim) { + this->block_dim = block_dim; + } + void set_grid_dim(const dpct::dim3 &grid_dim) { this->grid_dim = grid_dim; } void set_kernel_params(void **kernel_params) { this->kernel_params = kernel_params; } @@ -68,29 +46,24 @@ struct kernel_node_params { void set_shared_mem_bytes(unsigned int shared_mem_bytes) { this->shared_mem_bytes = shared_mem_bytes; } - dpct::dim3 get_block_dim() { return block_dim; } - dpct::dim3 get_grid_dim() { return grid_dim; } - void **get_kernel_params() { return kernel_params; } - void *get_func() { return func; } - unsigned int get_shared_mem_bytes() { return shared_mem_bytes; } - - void add_depency(dpct::experimental::node_ptr dependency) { - dependencies.push_back(dependency); + dpct::dim3 get_block_dim() const { return block_dim; } + dpct::dim3 get_grid_dim() const { return grid_dim; } + void **get_kernel_params() const { return kernel_params; } + void *get_func() const { return func; } + unsigned int get_shared_mem_bytes() const { return shared_mem_bytes; } + + void add_dependency(dpct::experimental::node_ptr dependency) { + dependencies.push_back(std::move(dependency)); } - std::vector get_dependencies() const { + const std::vector &get_dependencies() const { return dependencies; } - void update_dependency(dpct::experimental::node_ptr oldDependency, dpct::experimental::node_ptr newDependency){ - std::cout <<"Dependencies size in struct: "< 0) { - for (std::size_t i = 0; i < numberOfDependencies; i++) { - params->add_depency(dependencies[i]); - } + for (std::size_t i = 0; i < numberOfDependencies; i++) { + params->add_dependency(dependencies[i]); } - kernel_node_params_map[graph].emplace_back(*node, params); - std::cout << "second count for num of deps: " << params->get_dependencies().size() << "\n"; + graph_kernel_node_params_map[graph].emplace_back(*node, params); } + void launch(dpct::experimental::command_graph_exec_ptr execGraph, sycl::queue *queue) { + // Retrieve the graph associated with execGraph auto graph = exec_graph_map[execGraph]; - for (std::size_t i = 0; i < kernel_node_params_map[graph].size(); i++) { - std::cout<<"Num of nodes in graph: " << kernel_node_params_map[graph].size() << "\n"; - // for (auto &node_kernel_params_pair : kernel_node_params_map[graph]) { - auto node_kernel_params_pair = kernel_node_params_map[graph][i]; + auto &kernel_params_vector = graph_kernel_node_params_map[graph]; + for (std::size_t i = 0; i < kernel_params_vector.size(); i++) { + auto &node_kernel_params_pair = kernel_params_vector[i]; auto node_params = node_kernel_params_pair.second; - auto dependency_ptrs = node_params->get_dependencies(); - std::cout <<"i right now: " << i <<"\n"; - std::cout << "Dependecnies size: " << dependency_ptrs.size() << "\n"; - if(i==1){ - std::cout <<"The dependency is: " << dependency_ptrs[0] <<"\n"; - } + + const auto &dependency_ptrs = node_params->get_dependencies(); std::vector dependencies; - - for (auto dep_ptr : dependency_ptrs) { - dependencies.push_back(*dep_ptr); - std::cout << "Dependecy deref & pushed\n"; + dependencies.reserve(dependency_ptrs.size()); + for (const auto &dep_ptr : dependency_ptrs) { + if (dep_ptr) { + dependencies.push_back(*dep_ptr); + } } - auto new_node = new sycl::ext::oneapi::experimental::node( - graph->add([&](sycl::handler &cgh) { + auto new_node = new sycl::ext::oneapi::experimental::node(graph->add( + [&](sycl::handler &cgh) { cgh.host_task([=]() { dpct::kernel_launcher::launch( node_params->get_func(), node_params->get_grid_dim(), @@ -207,30 +174,16 @@ class graph_mgr { node_params->get_kernel_params(), node_params->get_shared_mem_bytes(), queue); }); - }, sycl::ext::oneapi::experimental::property::node::depends_on(dependencies))); - std::cout << "new node is added to graph and dep\n"; - std::cout <<"Current i:" << i <<"\n"; - std::cout << "Size: " << kernel_node_params_map[graph].size() << "\n"; - if(i< kernel_node_params_map[graph].size()-1){ - std::cout <<"The -1 of i dependency address is: "<< kernel_node_params_map[graph][i+1].second->get_dependencies()[i] << "\n"; - kernel_node_params_map[graph][i+1].second->update_dependency(kernel_node_params_map[graph][i+1].second->get_dependencies()[i], new_node); + }, + sycl::ext::oneapi::experimental::property::node::depends_on( + dependencies))); + if (i + 1 < kernel_params_vector.size()) { + auto &next_node_params = kernel_params_vector[i + 1].second; + auto next_dependency = next_node_params->get_dependencies()[i]; + next_node_params->update_dependency(next_dependency, new_node); } - std::cout <<"new node addr: " << new_node << "\n"; - std::cout <<"old node addr:" << kernel_node_params_map[graph][i].first << "\n"; - kernel_node_params_map[graph][i].first = new_node; - std::cout <<"set node addr:" << kernel_node_params_map[graph][i].first << "\n"; - // if (i == 1) { - // dpct::experimental::node_ptr toNode[1] = {new_node}; - // dpct::experimental::node_ptr fromNode[1] = {dependency_ptrs[0]}; - // std::cout<<"i is 1 wokring\n"; - // for(dpct::experimental::node_ptr ptr: dependency_ptrs){ - // std::cout <<"Dep ptr: " <finalize(); queue->submit( [&](sycl::handler &cgh) { cgh.ext_oneapi_graph(final_graph); }); @@ -259,7 +212,10 @@ class graph_mgr { dpct::experimental::command_graph_ptr, std::vector>> - kernel_node_params_map; + graph_kernel_node_params_map; + std::unordered_map + node_params_map; }; } // namespace detail @@ -309,6 +265,23 @@ add_empty_node(dpct::experimental::node_ptr *newNode, dependencies)})); } +/// Adds dependencies between nodes in the command graph. +/// \param [in] graph A pointer to the command graph. +/// \param [in] fromNodes An array of node pointers representing +/// the source nodes. +/// \param [in] toNodes An array of node pointers representing +/// the destination nodes. +/// \param [in] numberOfDependencies The number of dependencies +/// to be added. +static void add_dependencies(dpct::experimental::command_graph_ptr graph, + const dpct::experimental::node_ptr *fromNodes, + const dpct::experimental::node_ptr *toNodes, + std::size_t numberOfDependencies) { + for (std::size_t i = 0; i < numberOfDependencies; i++) { + graph->make_edge(*fromNodes[i], *toNodes[i]); + } +} + /// Gets the nodes in the command graph. /// \param [in] graph A pointer to the command graph. /// \param [out] nodesArray An array of node pointers where the @@ -355,5 +328,9 @@ static void kernel_node_get_params(dpct::experimental::node_ptr node, dpct::experimental::kernel_node_params *params) {} +static void +kernel_node_set_params(dpct::experimental::node_ptr node, + dpct::experimental::kernel_node_params *params) {} + } // namespace experimental } // namespace dpct From a1c23118403a6144ef9569eac8278566a42e24a1 Mon Sep 17 00:00:00 2001 From: Daiyaan Ahmed Date: Mon, 28 Apr 2025 01:32:29 -0400 Subject: [PATCH 06/19] Rebase Signed-off-by: Daiyaan Ahmed --- clang/lib/DPCT/RuleInfra/MapNames.cpp | 51 +++++++++++++- clang/lib/DPCT/RulesLang/APINamesGraph.inc | 39 +++++++++-- clang/lib/DPCT/RulesLang/RulesLang.cpp | 13 +++- clang/lib/DPCT/RulesLang/RulesLangGraph.cpp | 40 +++++++++-- clang/lib/DPCT/SrcAPI/APINames.inc | 4 +- clang/runtime/dpct-rt/include/dpct/graph.hpp | 70 +++++++++++++++++--- clang/test/dpct/cudaGraph_test.cu | 33 +++++---- 7 files changed, 211 insertions(+), 39 deletions(-) diff --git a/clang/lib/DPCT/RuleInfra/MapNames.cpp b/clang/lib/DPCT/RuleInfra/MapNames.cpp index ebfc871dd58e..9ef3c2ee9219 100644 --- a/clang/lib/DPCT/RuleInfra/MapNames.cpp +++ b/clang/lib/DPCT/RuleInfra/MapNames.cpp @@ -640,8 +640,14 @@ void MapNames::setExplicitNamespaceMap( DpctGlobalInfo::useExtGraph() ? getClNamespace() + "ext::oneapi::experimental::node_type" : "cudaGraphNodeType")}, - {"cudaGraphExecUpdateResultInfo", std::make_shared("int")}, - {"cudaGraphExecUpdateResult", std::make_shared("int")}, + {"cudaGraphExecUpdateResultInfo", + std::make_shared(DpctGlobalInfo::useExtGraph() + ? "int" + : "cudaGraphExecUpdateResultInfo")}, + {"cudaGraphExecUpdateResult", + std::make_shared(DpctGlobalInfo::useExtGraph() + ? "int" + : "cudaGraphExecUpdateResultInfo")}, {"CUmem_advise", std::make_shared("int")}, {"CUmemorytype", std::make_shared(getClNamespace() + "usm::alloc")}, @@ -1153,6 +1159,47 @@ void MapNames::setExplicitNamespaceMap( ? getClNamespace() + "ext::oneapi::experimental::node_type::empty" : "cudaGraphNodeTypeEmpty")}, + {"cudaGraphExecUpdateSuccess", + std::make_shared( + DpctGlobalInfo::useExtGraph() ? "1" : "cudaGraphExecUpdateSuccess")}, + {"cudaGraphExecUpdateError", + std::make_shared( + DpctGlobalInfo::useExtGraph() ? "0" : "cudaGraphExecUpdateError")}, + {"cudaGraphExecUpdateErrorTopologyChanged", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorTopologyChanged")}, + {"cudaGraphExecUpdateErrorNodeTypeChanged", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorNodeTypeChanged")}, + {"cudaGraphExecUpdateErrorFunctionChanged", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorFunctionChanged")}, + {"cudaGraphExecUpdateErrorParametersChanged", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorParametersChanged")}, + {"cudaGraphExecUpdateErrorNotSupported", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorNotSupported")}, + {"cudaGraphExecUpdateErrorUnsupportedFunctionChange", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorUnsupportedFunctionChange")}, + {"cudaGraphExecUpdateErrorAttributesChanged", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorAttributesChanged")}, // enum CUmem_advise_enum {"CU_MEM_ADVISE_SET_READ_MOSTLY", std::make_shared("0")}, {"CU_MEM_ADVISE_UNSET_READ_MOSTLY", std::make_shared("0")}, diff --git a/clang/lib/DPCT/RulesLang/APINamesGraph.inc b/clang/lib/DPCT/RulesLang/APINamesGraph.inc index 68e42ab8730d..42d5c69f625f 100644 --- a/clang/lib/DPCT/RulesLang/APINamesGraph.inc +++ b/clang/lib/DPCT/RulesLang/APINamesGraph.inc @@ -58,22 +58,25 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( UseExtGraph, - MEMBER_CALL_FACTORY_ENTRY("cudaGraphExecUpdate", ARG(0), true, "update", - DEREF(1)), + CALL_FACTORY_ENTRY("cudaGraphExecUpdate", + CALL(MapNames::getDpctNamespace() + + "experimental::update", + ARG(0), ARG(1), ARG(2))), UNSUPPORT_FACTORY_ENTRY("cudaGraphExecUpdate", Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphExecUpdate"), ARG("--use-experimental-features=graph")))) -CONDITIONAL_FACTORY_ENTRY( +ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( UseExtGraph, - ASSIGNABLE_FACTORY(ASSIGN_FACTORY_ENTRY("cudaGraphNodeGetType", DEREF(1), - MEMBER_CALL(ARG(0), true, - "get_type"))), + CALL_FACTORY_ENTRY("cudaGraphNodeGetType", + CALL(MapNames::getDpctNamespace() + + "experimental::get_node_type", + ARG(0), ARG(1))), UNSUPPORT_FACTORY_ENTRY("cudaGraphNodeGetType", Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphNodeGetType"), - ARG("--use-experimental-features=graph"))) + ARG("--use-experimental-features=graph")))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( UseExtGraph, @@ -113,3 +116,25 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphAddKernelNode"), ARG("--use-experimental-features=graph")))) + +ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( + UseExtGraph, + CALL_FACTORY_ENTRY("cudaGraphKernelNodeGetParams", + CALL(MapNames::getDpctNamespace() + + "experimental::kernel_node_get_params", + ARG(0), ARG(1))), + UNSUPPORT_FACTORY_ENTRY("cudaGraphKernelNodeGetParams", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaGraphKernelNodeGetParams"), + ARG("--use-experimental-features=graph")))) + +ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( + UseExtGraph, + CALL_FACTORY_ENTRY("cudaGraphKernelNodeSetParams", + CALL(MapNames::getDpctNamespace() + + "experimental::kernel_node_set_params", + ARG(0), ARG(1))), + UNSUPPORT_FACTORY_ENTRY("cudaGraphKernelNodeSetParams", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaGraphKernelNodeSetParams"), + ARG("--use-experimental-features=graph")))) diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index b9ac5a712ac1..3ef637949dcf 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -1930,7 +1930,7 @@ void EnumConstantRule::registerMatcher(MatchFinder &MF) { "cufftType", "cudaMemoryType", "CUctx_flags_enum", "CUpointer_attribute_enum", "CUmemorytype_enum", "cudaGraphicsMapFlags", "cudaGraphicsRegisterFlags", - "cudaGraphNodeType", "CUdevice_P2PAttribute_enum"))), + "cudaGraphNodeType", "CUdevice_P2PAttribute_enum", "cudaGraphExecUpdateResult"))), matchesName("CUDNN_.*"), matchesName("CUSOLVER_.*"))))) .bind("EnumConstant"), this); @@ -2050,7 +2050,16 @@ void EnumConstantRule::runRule(const MatchFinder::MatchResult &Result) { EnumName == "cudaGraphNodeTypeMemset" || EnumName == "cudaGraphNodeTypeHost" || EnumName == "cudaGraphNodeTypeGraph" || - EnumName == "cudaGraphNodeTypeEmpty")) { + EnumName == "cudaGraphNodeTypeEmpty" || + EnumName == "cudaGraphExecUpdateSuccess" || + EnumName == "cudaGraphExecUpdateError" || + EnumName == "cudaGraphExecUpdateErrorTopologyChanged" || + EnumName == "cudaGraphExecUpdateErrorNodeTypeChanged" || + EnumName == "cudaGraphExecUpdateErrorFunctionChanged" || + EnumName == "cudaGraphExecUpdateErrorParametersChanged" || + EnumName == "cudaGraphExecUpdateErrorNotSupported" || + EnumName == "cudaGraphExecUpdateErrorUnsupportedFunctionChange" || + EnumName == "cudaGraphExecUpdateErrorAttributesChanged")) { report(E->getBeginLoc(), Diagnostics::TRY_EXPERIMENTAL_FEATURE, false, EnumName, "--use-experimental-features=graph"); return; diff --git a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp index dc1b012f9b3e..3caf6847b743 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp @@ -58,7 +58,8 @@ void GraphRule::registerMatcher(MatchFinder &MF) { "cudaGraphInstantiate", "cudaGraphLaunch", "cudaGraphExecDestroy", "cudaGraphAddEmptyNode", "cudaGraphAddDependencies", "cudaGraphExecUpdate", "cudaGraphNodeGetType", "cudaGraphGetNodes", - "cudaGraphGetRootNodes", "cudaGraphDestroy", "cudaGraphAddKernelNode"); + "cudaGraphGetRootNodes", "cudaGraphDestroy", "cudaGraphAddKernelNode", + "cudaGraphKernelNodeGetParams", "cudaGraphKernelNodeSetParams"); }; MF.addMatcher( callExpr(callee(functionDecl(functionName()))).bind("FunctionCall"), @@ -70,6 +71,12 @@ void GraphRule::registerMatcher(MatchFinder &MF) { recordType(hasDeclaration(recordDecl(typeName())))))))) .bind("Type"), this); + + MF.addMatcher(memberExpr(hasObjectExpression(hasType( + asString("cudaGraphExecUpdateResultInfo"))), + member(hasName("result"))) + .bind("execUpdateResult"), + this); } void GraphRule::runRule(const MatchFinder::MatchResult &Result) { @@ -100,10 +107,16 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { if (auto *FD = dyn_cast(RHS_DRE->getDecl())) { std::string FuncName = FD->getNameAsString(); std::string WrapperName = FuncName; - std::string AccessOperator = VD->getType()->isPointerType() ? "->" : "."; - std::string ReplacementStr = VarName + AccessOperator + "set_func(" - "(void*) dpct::wrapper_register(&" + WrapperName ; - emplaceTransformation(new ReplaceToken(BO->getBeginLoc(), BO->getEndLoc(), std::move(ReplacementStr))); + std::string AccessOperator = + VD->getType()->isPointerType() ? "->" : "."; + std::string ReplacementStr = + VarName + AccessOperator + + "set_func(" + "(void*) dpct::wrapper_register(&" + + WrapperName; + emplaceTransformation( + new ReplaceToken(BO->getBeginLoc(), BO->getEndLoc(), + std::move(ReplacementStr))); emplaceTransformation(new InsertAfterStmt(BO, ")")); return; } @@ -124,6 +137,23 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { } return; } + if (auto ME = getNodeAsType(Result, "execUpdateResult")) { + auto MD = ME->getMemberDecl(); + const Expr *Base = ME->getBase(); + if (MD->getNameAsString() == "result") { + if (auto *DRE = dyn_cast(Base)) { + SourceLocation StartLoc = Base->getBeginLoc(); + SourceLocation EndLoc = ME->getEndLoc(); + const SourceManager &SM = *Result.SourceManager; + EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, SM, + Result.Context->getLangOpts()); + std::string VarNameStr = DRE->getNameInfo().getAsString(); + emplaceTransformation( + new ReplaceToken(StartLoc, EndLoc, std::move(VarNameStr))); + } + } + return; + } const CallExpr *CE = getNodeAsType(Result, "FunctionCall"); if (!CE) { return; diff --git a/clang/lib/DPCT/SrcAPI/APINames.inc b/clang/lib/DPCT/SrcAPI/APINames.inc index 7bc9e2ebd2d1..e3650473c858 100644 --- a/clang/lib/DPCT/SrcAPI/APINames.inc +++ b/clang/lib/DPCT/SrcAPI/APINames.inc @@ -456,12 +456,12 @@ ENTRY(cudaGraphInstantiateWithFlags, cudaGraphInstantiateWithFlags, false, NO_FL ENTRY(cudaGraphInstantiateWithParams, cudaGraphInstantiateWithParams, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeCopyAttributes, cudaGraphKernelNodeCopyAttributes, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeGetAttribute, cudaGraphKernelNodeGetAttribute, false, NO_FLAG, P4, "comment") -ENTRY(cudaGraphKernelNodeGetParams, cudaGraphKernelNodeGetParams, false, NO_FLAG, P4, "comment") +ENTRY(cudaGraphKernelNodeGetParams, cudaGraphKernelNodeGetParams, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphKernelNodeSetAttribute, cudaGraphKernelNodeSetAttribute, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeSetEnabled, cudaGraphKernelNodeSetEnabled, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeSetGridDim, cudaGraphKernelNodeSetGridDim, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeSetParam, cudaGraphKernelNodeSetParam, false, NO_FLAG, P4, "comment") -ENTRY(cudaGraphKernelNodeSetParams, cudaGraphKernelNodeSetParams, false, NO_FLAG, P4, "comment") +ENTRY(cudaGraphKernelNodeSetParams, cudaGraphKernelNodeSetParams, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphKernelNodeUpdatesApply, cudaGraphKernelNodeUpdatesApply, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphLaunch, cudaGraphLaunch, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphMemAllocNodeGetParams, cudaGraphMemAllocNodeGetParams, false, NO_FLAG, P4, "comment") diff --git a/clang/runtime/dpct-rt/include/dpct/graph.hpp b/clang/runtime/dpct-rt/include/dpct/graph.hpp index 57dcb953f1c4..5a6f211727ea 100644 --- a/clang/runtime/dpct-rt/include/dpct/graph.hpp +++ b/clang/runtime/dpct-rt/include/dpct/graph.hpp @@ -33,6 +33,12 @@ struct kernel_node_params { unsigned int shared_mem_bytes{}; std::vector dependencies{}; + kernel_node_params() = default; + kernel_node_params(const kernel_node_params &other) + : block_dim(other.block_dim), grid_dim(other.grid_dim), + kernel_params(other.kernel_params), func(other.func), + shared_mem_bytes(other.shared_mem_bytes), + dependencies(other.dependencies) {} public: void set_block_dim(const dpct::dim3 &block_dim) { @@ -142,6 +148,7 @@ class graph_mgr { dpct::experimental::node_ptr *dependencies, std::size_t numberOfDependencies, dpct::experimental::kernel_node_params *params) { + node_graph_params_map[*node] = std::make_pair(graph, params); for (std::size_t i = 0; i < numberOfDependencies; i++) { params->add_dependency(dependencies[i]); } @@ -156,7 +163,6 @@ class graph_mgr { for (std::size_t i = 0; i < kernel_params_vector.size(); i++) { auto &node_kernel_params_pair = kernel_params_vector[i]; auto node_params = node_kernel_params_pair.second; - const auto &dependency_ptrs = node_params->get_dependencies(); std::vector dependencies; dependencies.reserve(dependency_ptrs.size()); @@ -184,9 +190,12 @@ class graph_mgr { } node_kernel_params_pair.first = new_node; } - auto final_graph = graph->finalize(); + execGraph = new sycl::ext::oneapi::experimental::command_graph< + sycl::ext::oneapi::experimental::graph_state::executable>( + graph->finalize( + sycl::ext::oneapi::experimental::property::graph::updatable{})); queue->submit( - [&](sycl::handler &cgh) { cgh.ext_oneapi_graph(final_graph); }); + [&](sycl::handler &cgh) { cgh.ext_oneapi_graph(*execGraph); }); } void instantiate(dpct::experimental::command_graph_exec_ptr *execGraph, @@ -195,7 +204,31 @@ class graph_mgr { } void kernel_node_get_params(dpct::experimental::node_ptr node, - dpct::experimental::kernel_node_params *params) {} + dpct::experimental::kernel_node_params *params) { + auto it = node_graph_params_map.find(node); + if (it == node_graph_params_map.end()) { + return; + } + *params = *(it->second.second); + } + + void kernel_node_set_params(dpct::experimental::node_ptr node, + dpct::experimental::kernel_node_params *params) { + node_graph_params_map[node].second = params; + } + + void get_node_type(dpct::experimental::node_ptr node, + sycl::ext::oneapi::experimental::node_type *nodeType) { + if (node_graph_params_map.find(node) != node_graph_params_map.end()) { + *nodeType = sycl::ext::oneapi::experimental::node_type::kernel; + } else { + if (node) { + *nodeType = node->get_type(); + } else { + *nodeType = sycl::ext::oneapi::experimental::node_type::empty; + } + } + } private: std::unordered_map queue_graph_map; @@ -214,8 +247,9 @@ class graph_mgr { dpct::experimental::kernel_node_params *>>> graph_kernel_node_params_map; std::unordered_map - node_params_map; + std::pair> + node_graph_params_map; }; } // namespace detail @@ -326,11 +360,31 @@ static void launch(dpct::experimental::command_graph_exec_ptr execGraph, static void kernel_node_get_params(dpct::experimental::node_ptr node, - dpct::experimental::kernel_node_params *params) {} + dpct::experimental::kernel_node_params *params) { + detail::graph_mgr::instance().kernel_node_get_params(node, params); +} static void kernel_node_set_params(dpct::experimental::node_ptr node, - dpct::experimental::kernel_node_params *params) {} + dpct::experimental::kernel_node_params *params) { + detail::graph_mgr::instance().kernel_node_set_params(node, params); +} + +static void +get_node_type(dpct::experimental::node_ptr node, + sycl::ext::oneapi::experimental::node_type *nodeType) { + detail::graph_mgr::instance().get_node_type(node, nodeType); +} + +static void update(dpct::experimental::command_graph_exec_ptr graphExec, + dpct::experimental::command_graph_ptr graph, + int *updateResultInfo) { + graphExec->update(*graph); + if (!graphExec) { + *updateResultInfo = 0; + } + *updateResultInfo = 1; +} } // namespace experimental } // namespace dpct diff --git a/clang/test/dpct/cudaGraph_test.cu b/clang/test/dpct/cudaGraph_test.cu index f0b5742386f7..2496d4b2e185 100644 --- a/clang/test/dpct/cudaGraph_test.cu +++ b/clang/test/dpct/cudaGraph_test.cu @@ -96,10 +96,10 @@ int main() { // CHECK: dpct::experimental::add_dependencies(graph, node10, node11, 1); cudaGraphAddDependencies(graph, node10, node11, 1); - // CHECK: execGraph = new sycl::ext::oneapi::experimental::command_graph((*graph2)->finalize()); - // CHECK-NEXT: *execGraph2 = new sycl::ext::oneapi::experimental::command_graph(graph->finalize()); - // CHECK-NEXT: **execGraph3 = new sycl::ext::oneapi::experimental::command_graph((*graph2)->finalize()); - // CHECK-NEXT: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(**execGraph3 = new sycl::ext::oneapi::experimental::command_graph((*graph2)->finalize()))); + // CHECK: dpct::experimental::instantiate(&execGraph, *graph2); + // CHECK-NEXT: dpct::experimental::instantiate(execGraph2, graph); + // CHECK-NEXT: dpct::experimental::instantiate(*execGraph3, *graph2); + // CHECK-NEXT: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(dpct::experimental::instantiate(*execGraph3, *graph2))); cudaGraphInstantiate(&execGraph, *graph2, nullptr, nullptr, 0); cudaGraphInstantiate(execGraph2, graph, nullptr, nullptr, 0); cudaGraphInstantiate(*execGraph3, *graph2, nullptr, nullptr, 0); @@ -110,24 +110,31 @@ int main() { cudaStream_t *stream2; - // CHECK: stream->ext_oneapi_graph(*execGraph); - // CHECK-NEXT: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(stream->ext_oneapi_graph(*execGraph))); - // CHECK-NEXT: (*stream2)->ext_oneapi_graph(**execGraph2); + // CHECK: dpct::experimental::launch(execGraph, stream); + // CHECK-NEXT: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(dpct::experimental::launch(execGraph, stream))); + // CHECK-NEXT: dpct::experimental::launch(*execGraph2, *stream2); cudaGraphLaunch(execGraph, stream); CUDA_CHECK_THROW(cudaGraphLaunch(execGraph, stream)); cudaGraphLaunch(*execGraph2, *stream2); #ifndef DNO_BUILD_TEST - // CHECK: execGraph->update(*graph); - cudaGraphExecUpdate(execGraph, graph, nullptr, nullptr); - // CHECK: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(execGraph->update(*graph))); - CUDA_CHECK_THROW(cudaGraphExecUpdate(execGraph, graph, nullptr, nullptr)); + cudaGraphExecUpdateResultInfo updateResult; + // CHECK: dpct::experimental::update(execGraph, graph, &updateResult); + cudaGraphExecUpdate(execGraph, graph, &updateResult); + + // CHECK: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(dpct::experimental::update(execGraph, graph, &updateResult))); + CUDA_CHECK_THROW(cudaGraphExecUpdate(execGraph, graph, &updateResult)); + + // CHECK: if(updateResult == 1){} + // CHECK-NEXT: if(updateResult == 0){} + if(updateResult.result == cudaGraphExecUpdateSuccess){} + if(updateResult.result == cudaGraphExecUpdateErrorTopologyChanged){} #endif // CHECK: sycl::ext::oneapi::experimental::node_type nodeType; - // CHECK-NEXT: nodeType = node->get_type(); - // CHECK-NEXT: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(nodeType = node->get_type())); + // CHECK-NEXT: dpct::experimental::get_node_type(node, &nodeType); + // CHECK-NEXT: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(dpct::experimental::get_node_type(node, &nodeType))); cudaGraphNodeType nodeType; cudaGraphNodeGetType(node, &nodeType); CUDA_CHECK_THROW(cudaGraphNodeGetType(node, &nodeType)); From 14d7608cf9959a2e72577741da50e48da5f979d2 Mon Sep 17 00:00:00 2001 From: Daiyaan Ahmed Date: Mon, 28 Apr 2025 01:30:05 -0400 Subject: [PATCH 07/19] Fix clang format Signed-off-by: Daiyaan Ahmed --- clang/lib/DPCT/AnalysisInfo.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/DPCT/AnalysisInfo.h b/clang/lib/DPCT/AnalysisInfo.h index 147f6119e25b..6352604534ce 100644 --- a/clang/lib/DPCT/AnalysisInfo.h +++ b/clang/lib/DPCT/AnalysisInfo.h @@ -1355,7 +1355,9 @@ class DpctGlobalInfo { static bool useNoQueueDevice() { return getHelperFuncPreference(HelperFuncPreference::NoQueueDevice); } - static void setUseWrapperRegisterFnPtr() { UseWrapperRegisterFnPtrFlag = true; } + static void setUseWrapperRegisterFnPtr() { + UseWrapperRegisterFnPtrFlag = true; + } static bool useWrapperRegisterFnPtr() { return UseWrapperRegisterFnPtrFlag; } static void setUseSYCLCompat(bool Flag = true) { UseSYCLCompatFlag = Flag; } static bool useSYCLCompat() { return UseSYCLCompatFlag; } From b53d57171b5eff34d66ecf157b8a065f725e99d6 Mon Sep 17 00:00:00 2001 From: Daiyaan Ahmed Date: Mon, 28 Apr 2025 02:24:10 -0400 Subject: [PATCH 08/19] Fix clang formaT Signed-off-by: Daiyaan Ahmed --- clang/lib/DPCT/RulesLang/RulesLang.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index 3ef637949dcf..55f8ec459a4a 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -1930,7 +1930,8 @@ void EnumConstantRule::registerMatcher(MatchFinder &MF) { "cufftType", "cudaMemoryType", "CUctx_flags_enum", "CUpointer_attribute_enum", "CUmemorytype_enum", "cudaGraphicsMapFlags", "cudaGraphicsRegisterFlags", - "cudaGraphNodeType", "CUdevice_P2PAttribute_enum", "cudaGraphExecUpdateResult"))), + "cudaGraphNodeType", "CUdevice_P2PAttribute_enum", + "cudaGraphExecUpdateResult"))), matchesName("CUDNN_.*"), matchesName("CUSOLVER_.*"))))) .bind("EnumConstant"), this); From be0f9811722b890b0ed9bc401a7cba7b5af4283f Mon Sep 17 00:00:00 2001 From: Daiyaan Ahmed Date: Mon, 28 Apr 2025 17:31:27 -0400 Subject: [PATCH 09/19] Fix default test Signed-off-by: Daiyaan Ahmed --- clang/lib/DPCT/RuleInfra/MapNames.cpp | 2 +- clang/lib/DPCT/RulesLang/RulesLang.cpp | 8 ++++++++ clang/runtime/dpct-rt/include/dpct/graph.hpp | 2 +- clang/test/dpct/cudaGraph_test_default_option.cu | 11 +++++++++++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/clang/lib/DPCT/RuleInfra/MapNames.cpp b/clang/lib/DPCT/RuleInfra/MapNames.cpp index 9ef3c2ee9219..2102611a762f 100644 --- a/clang/lib/DPCT/RuleInfra/MapNames.cpp +++ b/clang/lib/DPCT/RuleInfra/MapNames.cpp @@ -647,7 +647,7 @@ void MapNames::setExplicitNamespaceMap( {"cudaGraphExecUpdateResult", std::make_shared(DpctGlobalInfo::useExtGraph() ? "int" - : "cudaGraphExecUpdateResultInfo")}, + : "cudaGraphExecUpdateResult")}, {"CUmem_advise", std::make_shared("int")}, {"CUmemorytype", std::make_shared(getClNamespace() + "usm::alloc")}, diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index 55f8ec459a4a..2b08762620a5 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -931,6 +931,14 @@ void TypeInDeclRule::runRule(const MatchFinder::MatchResult &Result) { } } + if (CanonicalTypeStr == "cudaGraphExecUpdateResult") { + if (!DpctGlobalInfo::useExtGraph()) { + report(TL->getBeginLoc(), Diagnostics::TRY_EXPERIMENTAL_FEATURE, false, + "cudaGraphExecUpdateResult", + "--use-experimental-features=graph"); + } + } + if (CanonicalTypeStr == "cudaGraphicsRegisterFlags" || CanonicalTypeStr == "cudaGraphicsMapFlags") { if (!DpctGlobalInfo::useExtBindlessImages()) { diff --git a/clang/runtime/dpct-rt/include/dpct/graph.hpp b/clang/runtime/dpct-rt/include/dpct/graph.hpp index 5a6f211727ea..fe12d2fee824 100644 --- a/clang/runtime/dpct-rt/include/dpct/graph.hpp +++ b/clang/runtime/dpct-rt/include/dpct/graph.hpp @@ -59,7 +59,7 @@ struct kernel_node_params { unsigned int get_shared_mem_bytes() const { return shared_mem_bytes; } void add_dependency(dpct::experimental::node_ptr dependency) { - dependencies.push_back(std::move(dependency)); + dependencies.push_back(dependency); } const std::vector &get_dependencies() const { return dependencies; diff --git a/clang/test/dpct/cudaGraph_test_default_option.cu b/clang/test/dpct/cudaGraph_test_default_option.cu index 79029603d65c..c727157b5483 100644 --- a/clang/test/dpct/cudaGraph_test_default_option.cu +++ b/clang/test/dpct/cudaGraph_test_default_option.cu @@ -71,6 +71,17 @@ int main() { // CHECK-NEXT: */ cudaGraphAddDependencies(graph, NULL, NULL, 0); + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaKernelNodeParams is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaKernelNodeParams params; + params.blockDim = dim3(10); + + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphAddKernelNode is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaGraphAddKernelNode(&node, graph, nullptr, 0, ¶ms); + // CHECK: /* // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphGetNodes is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ From 13f08ea29d4866f2f94aeffa4971cc0f332bb3be Mon Sep 17 00:00:00 2001 From: Daiyaan Ahmed Date: Mon, 28 Apr 2025 18:02:32 -0400 Subject: [PATCH 10/19] Add test for cudaKernelNodeParams Signed-off-by: Daiyaan Ahmed --- clang/test/dpct/cudaGraph_test.cu | 50 ++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/clang/test/dpct/cudaGraph_test.cu b/clang/test/dpct/cudaGraph_test.cu index 2496d4b2e185..32009095e1c5 100644 --- a/clang/test/dpct/cudaGraph_test.cu +++ b/clang/test/dpct/cudaGraph_test.cu @@ -10,6 +10,24 @@ cudaError_t _result = x; \ } while (0) +__global__ void myKernel(int *data) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < 10) { + data[idx] += 1; + } +} + +// CHECK: void myKernel_wrapper(int* data) { +// CHECK: sycl::queue queue = *dpct::kernel_launcher::_que; +// CHECK: unsigned int localMemSize = dpct::kernel_launcher::_local_mem_size; +// CHECK: sycl::nd_range<3> nr = dpct::kernel_launcher::_nr; +// CHECK: queue.parallel_for( +// CHECK: nr, +// CHECK: [=](sycl::nd_item<3> item_ct1) { +// CHECK: myKernel(data); +// CHECK: }); +// CHECK: } + int main() { // CHECK: dpct::experimental::command_graph_ptr graph; // CHECK-NEXT: dpct::experimental::command_graph_ptr *graph2; @@ -70,6 +88,26 @@ int main() { // CHECK: dpct::experimental::add_empty_node(&node, graph, node10, 1); cudaGraphAddEmptyNode(&node, graph, node10, 1); + // CHECK: dpct::experimental::kernel_node_params params = {}; + // CHECK-NEXT: params.set_func((void *)dpct::wrapper_register(&myKernel_wrapper).get()); + // CHECK-NEXT: params.set_block_dim(dpct::dim3(10)); + // CHECK-NEXT: params.set_grid_dim(dpct::dim3(1)); + // CHECK-NEXT: params.set_shared_mem_bytes(0); + // CHECK-NEXT: void *kernelArgs[] = {}; + // CHECK-NEXT: params.set_kernel_params(kernelArgs); + cudaKernelNodeParams params = {}; + params.func = (void *)myKernel; + params.blockDim = dim3(10); + params.gridDim = dim3(1); + params.sharedMemBytes = 0; + void *kernelArgs[] = {}; + params.kernelParams = kernelArgs; + + // CHECK: void *function = (void *)dpct::wrapper_register(myKernel_wrapper).get(); + // CHECK-NEXT: params.set_func(function); + void *function = (void *)myKernel; + params.func = function; + size_t numNodes; // CHECK: dpct::experimental::get_nodes(graph, node4, &numNodes); @@ -126,10 +164,14 @@ int main() { // CHECK: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(dpct::experimental::update(execGraph, graph, &updateResult))); CUDA_CHECK_THROW(cudaGraphExecUpdate(execGraph, graph, &updateResult)); - // CHECK: if(updateResult == 1){} - // CHECK-NEXT: if(updateResult == 0){} - if(updateResult.result == cudaGraphExecUpdateSuccess){} - if(updateResult.result == cudaGraphExecUpdateErrorTopologyChanged){} + // CHECK: if(updateResult == 1) { + // CHECK-NEXT: } + // CHECK-NEXT: if(updateResult == 0) { + // CHECK-NEXT: } + if (updateResult.result == cudaGraphExecUpdateSuccess) { + } + if (updateResult.result == cudaGraphExecUpdateErrorTopologyChanged) { + } #endif // CHECK: sycl::ext::oneapi::experimental::node_type nodeType; From 9f997805ff631a04fea2584d19e0416c6dc48fd2 Mon Sep 17 00:00:00 2001 From: "Ahmed, Daiyaan" Date: Tue, 29 Apr 2025 15:15:39 +0800 Subject: [PATCH 11/19] Fix LIT test Signed-off-by: Ahmed, Daiyaan --- clang/test/dpct/cudaGraph_test.cu | 2 +- clang/test/dpct/cudaGraph_test_default_option.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/dpct/cudaGraph_test.cu b/clang/test/dpct/cudaGraph_test.cu index 32009095e1c5..1c518a66ec02 100644 --- a/clang/test/dpct/cudaGraph_test.cu +++ b/clang/test/dpct/cudaGraph_test.cu @@ -17,7 +17,7 @@ __global__ void myKernel(int *data) { } } -// CHECK: void myKernel_wrapper(int* data) { +// CHECK: void myKernel_wrapper(int *data) { // CHECK: sycl::queue queue = *dpct::kernel_launcher::_que; // CHECK: unsigned int localMemSize = dpct::kernel_launcher::_local_mem_size; // CHECK: sycl::nd_range<3> nr = dpct::kernel_launcher::_nr; diff --git a/clang/test/dpct/cudaGraph_test_default_option.cu b/clang/test/dpct/cudaGraph_test_default_option.cu index c727157b5483..545b29cf5522 100644 --- a/clang/test/dpct/cudaGraph_test_default_option.cu +++ b/clang/test/dpct/cudaGraph_test_default_option.cu @@ -103,7 +103,7 @@ int main() { cudaGraphLaunch(execGraph, stream); // CHECK: /* - // CHECK-NEXT: DPCT1007:{{[0-9]+}}: Migration of cudaGraphExecUpdateResult is not supported. + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecUpdate is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ cudaGraphExecUpdateResult status; From 327e2d5f17129610e9e7987d1b64467a553a2778 Mon Sep 17 00:00:00 2001 From: "Ahmed, Daiyaan" Date: Tue, 29 Apr 2025 08:20:43 +0800 Subject: [PATCH 12/19] Fix LIT format Signed-off-by: Ahmed, Daiyaan --- clang/test/dpct/cudaGraph_test.cu | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/test/dpct/cudaGraph_test.cu b/clang/test/dpct/cudaGraph_test.cu index 1c518a66ec02..8cb4b7dd905f 100644 --- a/clang/test/dpct/cudaGraph_test.cu +++ b/clang/test/dpct/cudaGraph_test.cu @@ -10,14 +10,14 @@ cudaError_t _result = x; \ } while (0) -__global__ void myKernel(int *data) { +__global__ void myKernel(int* data) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < 10) { data[idx] += 1; } } -// CHECK: void myKernel_wrapper(int *data) { +// CHECK: void myKernel_wrapper(int * data) { // CHECK: sycl::queue queue = *dpct::kernel_launcher::_que; // CHECK: unsigned int localMemSize = dpct::kernel_launcher::_local_mem_size; // CHECK: sycl::nd_range<3> nr = dpct::kernel_launcher::_nr; @@ -89,7 +89,7 @@ int main() { cudaGraphAddEmptyNode(&node, graph, node10, 1); // CHECK: dpct::experimental::kernel_node_params params = {}; - // CHECK-NEXT: params.set_func((void *)dpct::wrapper_register(&myKernel_wrapper).get()); + // CHECK-NEXT: params.set_func((void*) dpct::wrapper_register(&myKernel_wrapper).get()); // CHECK-NEXT: params.set_block_dim(dpct::dim3(10)); // CHECK-NEXT: params.set_grid_dim(dpct::dim3(1)); // CHECK-NEXT: params.set_shared_mem_bytes(0); @@ -164,9 +164,9 @@ int main() { // CHECK: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(dpct::experimental::update(execGraph, graph, &updateResult))); CUDA_CHECK_THROW(cudaGraphExecUpdate(execGraph, graph, &updateResult)); - // CHECK: if(updateResult == 1) { + // CHECK: if (updateResult == 1) { // CHECK-NEXT: } - // CHECK-NEXT: if(updateResult == 0) { + // CHECK-NEXT: if (updateResult == 0) { // CHECK-NEXT: } if (updateResult.result == cudaGraphExecUpdateSuccess) { } From b094b16d018aa42c53ed8e4002cbad64ff1b6988 Mon Sep 17 00:00:00 2001 From: "Ahmed, Daiyaan" Date: Tue, 29 Apr 2025 09:38:53 +0800 Subject: [PATCH 13/19] Fix LIT test Signed-off-by: Ahmed, Daiyaan --- clang/test/dpct/cudaGraph_test_default_option.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/dpct/cudaGraph_test_default_option.cu b/clang/test/dpct/cudaGraph_test_default_option.cu index 545b29cf5522..97ae16a269b7 100644 --- a/clang/test/dpct/cudaGraph_test_default_option.cu +++ b/clang/test/dpct/cudaGraph_test_default_option.cu @@ -103,7 +103,7 @@ int main() { cudaGraphLaunch(execGraph, stream); // CHECK: /* - // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecUpdate is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecUpdateResult is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ cudaGraphExecUpdateResult status; From 9bdd0a8cb5d3586e94ef954327ce8a95a9120eac Mon Sep 17 00:00:00 2001 From: "Ahmed, Daiyaan" Date: Tue, 29 Apr 2025 12:30:57 +0800 Subject: [PATCH 14/19] Fix test for dim3 Signed-off-by: Ahmed, Daiyaan --- clang/runtime/dpct-rt/include/dpct/graph.hpp | 12 ++---- clang/test/dpct/dim3.cu | 43 +++++--------------- 2 files changed, 13 insertions(+), 42 deletions(-) diff --git a/clang/runtime/dpct-rt/include/dpct/graph.hpp b/clang/runtime/dpct-rt/include/dpct/graph.hpp index fe12d2fee824..f00e3519c3b1 100644 --- a/clang/runtime/dpct-rt/include/dpct/graph.hpp +++ b/clang/runtime/dpct-rt/include/dpct/graph.hpp @@ -26,19 +26,13 @@ typedef sycl::ext::oneapi::experimental::command_graph< typedef sycl::ext::oneapi::experimental::node *node_ptr; struct kernel_node_params { - dpct::dim3 block_dim{}; - dpct::dim3 grid_dim{}; - void **kernel_params{}; void *func{}; + dpct::dim3 grid_dim{}; + dpct::dim3 block_dim{}; unsigned int shared_mem_bytes{}; + void **kernel_params{}; std::vector dependencies{}; - kernel_node_params() = default; - kernel_node_params(const kernel_node_params &other) - : block_dim(other.block_dim), grid_dim(other.grid_dim), - kernel_params(other.kernel_params), func(other.func), - shared_mem_bytes(other.shared_mem_bytes), - dependencies(other.dependencies) {} public: void set_block_dim(const dpct::dim3 &block_dim) { diff --git a/clang/test/dpct/dim3.cu b/clang/test/dpct/dim3.cu index e93fe3e3bb44..9965c5d1d945 100644 --- a/clang/test/dpct/dim3.cu +++ b/clang/test/dpct/dim3.cu @@ -1,50 +1,27 @@ // UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2 // UNSUPPORTED: v8.0, v9.0, v9.1, v9.2 -// RUN: dpct --format-range=none -out-root %T/dim3 %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only +// RUN: dpct --use-experimental-features=graph --format-range=none -out-root %T/dim3 %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only // RUN: FileCheck --input-file %T/dim3/dim3.dp.cpp --match-full-lines %s #include int main() { - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam0 = {}; + // CHECK: dpct::experimental::kernel_node_params kernelNodeParam0 = {}; + // CHECK-NEXT: dpct::experimental::kernel_node_params kernelNodeParam1 = {0}; + // CHECK-NEXT: dpct::experimental::kernel_node_params kernelNodeParam2 = {0, 0}; + // CHECK-NEXT: dpct::experimental::kernel_node_params kernelNodeParam3 = {0, 0, 0}; cudaKernelNodeParams kernelNodeParam0 = {}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam1 = {0}; cudaKernelNodeParams kernelNodeParam1 = {0}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam2 = {0, 0}; cudaKernelNodeParams kernelNodeParam2 = {0, 0}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam3 = {0, 0, 0}; cudaKernelNodeParams kernelNodeParam3 = {0, 0, 0}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam4{}; + + // CHECK: dpct::experimental::kernel_node_params kernelNodeParam4{}; + // CHECK-NEXT: dpct::experimental::kernel_node_params kernelNodeParam5{0}; + // CHECK-NEXT: dpct::experimental::kernel_node_params kernelNodeParam6{0, 0}; + // CHECK-NEXT: dpct::experimental::kernel_node_params kernelNodeParam7{0, 0, 0}; cudaKernelNodeParams kernelNodeParam4{}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam5{0}; cudaKernelNodeParams kernelNodeParam5{0}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam6{0, 0}; cudaKernelNodeParams kernelNodeParam6{0, 0}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam7{0, 0, 0}; cudaKernelNodeParams kernelNodeParam7{0, 0, 0}; } From a07e14fcfde241851c0ccf41a2547b77bb83e891 Mon Sep 17 00:00:00 2001 From: "Ahmed, Daiyaan" Date: Tue, 29 Apr 2025 17:19:05 +0800 Subject: [PATCH 15/19] Reduce levels in graph rule for func Signed-off-by: Ahmed, Daiyaan --- clang/lib/DPCT/RulesLang/RulesLangGraph.cpp | 68 ++++++++++++--------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp index 3caf6847b743..df09097af76a 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp @@ -94,37 +94,45 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { return; } if (FieldName == "func") { - if (auto BO = dyn_cast( - getParentAsAssignedBO(ME, *Result.Context))) { - auto *LHS = BO->getLHS()->IgnoreCasts(); - if (auto *ME = dyn_cast(LHS)) { - auto *Base = ME->getBase()->IgnoreImpCasts(); - if (auto *DRE = dyn_cast(Base)) { - if (auto *VD = dyn_cast(DRE->getDecl())) { - std::string VarName = VD->getNameAsString(); - auto *RHS = BO->getRHS()->IgnoreCasts(); - if (auto *RHS_DRE = dyn_cast(RHS)) { - if (auto *FD = dyn_cast(RHS_DRE->getDecl())) { - std::string FuncName = FD->getNameAsString(); - std::string WrapperName = FuncName; - std::string AccessOperator = - VD->getType()->isPointerType() ? "->" : "."; - std::string ReplacementStr = - VarName + AccessOperator + - "set_func(" - "(void*) dpct::wrapper_register(&" + - WrapperName; - emplaceTransformation( - new ReplaceToken(BO->getBeginLoc(), BO->getEndLoc(), - std::move(ReplacementStr))); - emplaceTransformation(new InsertAfterStmt(BO, ")")); - return; - } - } - } - } - } + auto BO = dyn_cast( + getParentAsAssignedBO(ME, *Result.Context)); + if (!BO) { + return; } + auto *LHS = BO->getLHS()->IgnoreCasts(); + auto *ME_LHS = dyn_cast(LHS); + if (!ME_LHS) { + return; + } + auto *Base = ME_LHS->getBase()->IgnoreImpCasts(); + auto *DRE = dyn_cast(Base); + if (!DRE) { + return; + } + auto *VD = dyn_cast(DRE->getDecl()); + if (!VD) { + return; + } + std::string VarName = VD->getNameAsString(); + auto *RHS = BO->getRHS()->IgnoreCasts(); + auto *RHS_DRE = dyn_cast(RHS); + if (!RHS_DRE) { + return; + } + auto *FD = dyn_cast(RHS_DRE->getDecl()); + if (!FD) { + return; + } + std::string FuncName = FD->getNameAsString(); + std::string WrapperName = FuncName; + std::string AccessOperator = + VD->getType()->isPointerType() ? "->" : "."; + std::string ReplacementStr = + VarName + AccessOperator + + "set_func((void*) dpct::wrapper_register(&" + WrapperName; + emplaceTransformation(new ReplaceToken( + BO->getBeginLoc(), BO->getEndLoc(), std::move(ReplacementStr))); + emplaceTransformation(new InsertAfterStmt(BO, ")")); } if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { StringRef ReplacedArg = ""; From 325155f52cc60efeb509301edd389587c6542ed5 Mon Sep 17 00:00:00 2001 From: "Ahmed, Daiyaan" Date: Wed, 30 Apr 2025 04:00:05 +0800 Subject: [PATCH 16/19] Fix GPU graph test Signed-off-by: Ahmed, Daiyaan --- clang/runtime/dpct-rt/include/dpct/graph.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/runtime/dpct-rt/include/dpct/graph.hpp b/clang/runtime/dpct-rt/include/dpct/graph.hpp index f00e3519c3b1..86ec072efe6c 100644 --- a/clang/runtime/dpct-rt/include/dpct/graph.hpp +++ b/clang/runtime/dpct-rt/include/dpct/graph.hpp @@ -186,8 +186,7 @@ class graph_mgr { } execGraph = new sycl::ext::oneapi::experimental::command_graph< sycl::ext::oneapi::experimental::graph_state::executable>( - graph->finalize( - sycl::ext::oneapi::experimental::property::graph::updatable{})); + graph->finalize()); queue->submit( [&](sycl::handler &cgh) { cgh.ext_oneapi_graph(*execGraph); }); } From d45fafaa0fb7d0f5ce711b65f5acd9c84a65f5ee Mon Sep 17 00:00:00 2001 From: "Ahmed, Daiyaan" Date: Tue, 6 May 2025 07:32:39 +0800 Subject: [PATCH 17/19] Address comments Signed-off-by: Ahmed, Daiyaan --- .../DPCT/RuleInfra/APINamesTemplateType.inc | 7 + clang/lib/DPCT/RuleInfra/MapNames.cpp | 4 - clang/lib/DPCT/RulesLang/RulesLang.cpp | 129 ++++++++++++------ clang/lib/DPCT/RulesLang/RulesLang.h | 11 +- clang/lib/DPCT/RulesLang/RulesLangGraph.cpp | 64 ++------- .../RulesLang/RulesLangGraphicsInterop.cpp | 53 +------ clang/lib/DPCT/RulesLang/RulesLangTexture.cpp | 51 +------ clang/test/dpct/cudaGraph_test.cu | 27 +++- .../dpct/cudaGraph_test_default_option.cu | 15 +- 9 files changed, 153 insertions(+), 208 deletions(-) diff --git a/clang/lib/DPCT/RuleInfra/APINamesTemplateType.inc b/clang/lib/DPCT/RuleInfra/APINamesTemplateType.inc index 5a645aac5994..a0f5ace03da7 100644 --- a/clang/lib/DPCT/RuleInfra/APINamesTemplateType.inc +++ b/clang/lib/DPCT/RuleInfra/APINamesTemplateType.inc @@ -507,6 +507,13 @@ TYPE_REWRITE_ENTRY( WARNING_FACTORY(Diagnostics::TRY_EXPERIMENTAL_FEATURE, TYPESTR, STR("--use-experimental-features=graph")))) +TYPE_REWRITE_ENTRY( + "cudaGraphExecUpdateResultInfo", + TYPE_CONDITIONAL_FACTORY( + checkEnableGraphForType(), TYPE_FACTORY(STR("int")), + WARNING_FACTORY(Diagnostics::TRY_EXPERIMENTAL_FEATURE, TYPESTR, + STR("--use-experimental-features=graph")))) + TYPE_REWRITE_ENTRY( "cudaKernelNodeParams", TYPE_CONDITIONAL_FACTORY( diff --git a/clang/lib/DPCT/RuleInfra/MapNames.cpp b/clang/lib/DPCT/RuleInfra/MapNames.cpp index 2102611a762f..e30e8c783f4f 100644 --- a/clang/lib/DPCT/RuleInfra/MapNames.cpp +++ b/clang/lib/DPCT/RuleInfra/MapNames.cpp @@ -640,10 +640,6 @@ void MapNames::setExplicitNamespaceMap( DpctGlobalInfo::useExtGraph() ? getClNamespace() + "ext::oneapi::experimental::node_type" : "cudaGraphNodeType")}, - {"cudaGraphExecUpdateResultInfo", - std::make_shared(DpctGlobalInfo::useExtGraph() - ? "int" - : "cudaGraphExecUpdateResultInfo")}, {"cudaGraphExecUpdateResult", std::make_shared(DpctGlobalInfo::useExtGraph() ? "int" diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index 2b08762620a5..90fd2a32d585 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -289,36 +289,35 @@ void TypeInDeclRule::registerMatcher(MatchFinder &MF) { "thrust::host_vector", "cublasHandle_t", "CUevent_st", "__half", "half", "__half2", "half2", "cudaMemoryAdvise", "cudaError_enum", "cudaDeviceProp", "cudaStreamCaptureStatus", - "cudaGraphExecUpdateResult", "cudaGraphExecUpdateResultInfo", - "cudaPitchedPtr", "thrust::counting_iterator", - "thrust::transform_iterator", "thrust::permutation_iterator", - "thrust::iterator_difference", "cusolverDnHandle_t", - "cusolverDnParams_t", "gesvdjInfo_t", "syevjInfo_t", - "thrust::device_malloc_allocator", "thrust::divides", - "thrust::tuple", "thrust::maximum", "thrust::multiplies", - "thrust::plus", "cudaDataType_t", "cudaError_t", "CUresult", - "CUdevice", "cudaEvent_t", "cublasStatus_t", "cuComplex", - "cuFloatComplex", "cuDoubleComplex", "CUevent", - "cublasFillMode_t", "cublasDiagType_t", "cublasSideMode_t", - "cublasOperation_t", "cusolverStatus_t", "cusolverEigType_t", - "cusolverEigMode_t", "curandStatus_t", "cudaStream_t", - "cusparseStatus_t", "cusparseDiagType_t", "cusparseFillMode_t", - "cusparseIndexBase_t", "cusparseMatrixType_t", - "cusparseAlgMode_t", "cusparseOperation_t", "cusparseMatDescr_t", - "cusparseHandle_t", "CUcontext", "cublasPointerMode_t", - "cusparsePointerMode_t", "cublasGemmAlgo_t", - "cusparseSolveAnalysisInfo_t", "cudaDataType", "cublasDataType_t", - "curandState_t", "curandState", "curandStateXORWOW_t", - "curandStateXORWOW", "curandStatePhilox4_32_10_t", - "curandStatePhilox4_32_10", "curandStateMRG32k3a_t", - "curandStateMRG32k3a", "thrust::minus", "thrust::negate", - "thrust::logical_or", "thrust::equal_to", "thrust::less", - "cudaSharedMemConfig", "curandGenerator_t", "curandRngType_t", - "curandOrdering_t", "cufftHandle", "cufftReal", "cufftDoubleReal", - "cufftComplex", "cufftDoubleComplex", "cufftResult_t", - "cufftResult", "cufftType_t", "cufftType", "thrust::pair", - "CUdeviceptr", "cudaDeviceAttr", "CUmodule", "CUjit_option", - "CUfunction", "cudaMemcpyKind", "cudaComputeMode", + "cudaGraphExecUpdateResult", "cudaPitchedPtr", + "thrust::counting_iterator", "thrust::transform_iterator", + "thrust::permutation_iterator", "thrust::iterator_difference", + "cusolverDnHandle_t", "cusolverDnParams_t", "gesvdjInfo_t", + "syevjInfo_t", "thrust::device_malloc_allocator", + "thrust::divides", "thrust::tuple", "thrust::maximum", + "thrust::multiplies", "thrust::plus", "cudaDataType_t", + "cudaError_t", "CUresult", "CUdevice", "cudaEvent_t", + "cublasStatus_t", "cuComplex", "cuFloatComplex", + "cuDoubleComplex", "CUevent", "cublasFillMode_t", + "cublasDiagType_t", "cublasSideMode_t", "cublasOperation_t", + "cusolverStatus_t", "cusolverEigType_t", "cusolverEigMode_t", + "curandStatus_t", "cudaStream_t", "cusparseStatus_t", + "cusparseDiagType_t", "cusparseFillMode_t", "cusparseIndexBase_t", + "cusparseMatrixType_t", "cusparseAlgMode_t", + "cusparseOperation_t", "cusparseMatDescr_t", "cusparseHandle_t", + "CUcontext", "cublasPointerMode_t", "cusparsePointerMode_t", + "cublasGemmAlgo_t", "cusparseSolveAnalysisInfo_t", "cudaDataType", + "cublasDataType_t", "curandState_t", "curandState", + "curandStateXORWOW_t", "curandStateXORWOW", + "curandStatePhilox4_32_10_t", "curandStatePhilox4_32_10", + "curandStateMRG32k3a_t", "curandStateMRG32k3a", "thrust::minus", + "thrust::negate", "thrust::logical_or", "thrust::equal_to", + "thrust::less", "cudaSharedMemConfig", "curandGenerator_t", + "curandRngType_t", "curandOrdering_t", "cufftHandle", "cufftReal", + "cufftDoubleReal", "cufftComplex", "cufftDoubleComplex", + "cufftResult_t", "cufftResult", "cufftType_t", "cufftType", + "thrust::pair", "CUdeviceptr", "cudaDeviceAttr", "CUmodule", + "CUjit_option", "CUfunction", "cudaMemcpyKind", "cudaComputeMode", "__nv_bfloat16", "cooperative_groups::__v1::thread_group", "cooperative_groups::__v1::thread_block", "libraryPropertyType_t", "libraryPropertyType", "cudaDataType_t", "cudaDataType", @@ -354,19 +353,19 @@ void TypeInDeclRule::registerMatcher(MatchFinder &MF) { this); MF.addMatcher( - typeLoc( - loc(qualType(hasDeclaration(namedDecl(hasAnyName( - "cooperative_groups::__v1::coalesced_group", - "cooperative_groups::__v1::grid_group", - "cooperative_groups::__v1::thread_block_tile", "cudaGraph_t", - "cudaGraphExec_t", "cudaGraphNode_t", "cudaGraphicsResource", - "cudaGraphicsResource_t", "CUgraphicsResource", - "cudaExternalMemory_t", "cudaExternalMemoryHandleDesc", - "cudaExternalMemoryMipmappedArrayDesc", - "cudaExternalMemoryBufferDesc", "cudaExternalSemaphore_t", - "cudaExternalSemaphoreHandleDesc", - "cudaExternalSemaphoreSignalParams", - "cudaExternalSemaphoreWaitParams", "cudaKernelNodeParams")))))) + typeLoc(loc(qualType(hasDeclaration(namedDecl(hasAnyName( + "cooperative_groups::__v1::coalesced_group", + "cooperative_groups::__v1::grid_group", + "cooperative_groups::__v1::thread_block_tile", "cudaGraph_t", + "cudaGraphExec_t", "cudaGraphNode_t", "cudaGraphicsResource", + "cudaGraphicsResource_t", "CUgraphicsResource", + "cudaExternalMemory_t", "cudaExternalMemoryHandleDesc", + "cudaExternalMemoryMipmappedArrayDesc", + "cudaExternalMemoryBufferDesc", "cudaExternalSemaphore_t", + "cudaExternalSemaphoreHandleDesc", + "cudaExternalSemaphoreSignalParams", + "cudaExternalSemaphoreWaitParams", "cudaKernelNodeParams", + "cudaGraphExecUpdateResultInfo")))))) .bind("cudaTypeDefEA"), this); MF.addMatcher(varDecl(hasType(classTemplateSpecializationDecl( @@ -2737,6 +2736,50 @@ const VarDecl *getAssignTargetDecl(const Stmt *E) { return nullptr; } +const Expr *getParentAsAssignedBO(const Expr *E, ASTContext &Context, + MigrationRule *Rule) { + auto Parents = Context.getParents(*E); + if (Parents.size() > 0) + return getAssignedBO(Parents[0].get(), Context, Rule); + return nullptr; +} + +// Return the binary operator if E is the lhs of an assign expression, +// otherwise nullptr. +const Expr *getAssignedBO(const Expr *E, ASTContext &Context, + MigrationRule *Rule) { + if (dyn_cast(E)) { + // Continue finding parents when E is MemberExpr. + return getParentAsAssignedBO(E, Context, Rule); + } else if (auto ICE = dyn_cast(E)) { + // Stop finding parents and return nullptr when E is ImplicitCastExpr, + // except for ArrayToPointerDecay cast. + if (ICE->getCastKind() == CK_ArrayToPointerDecay) { + return getParentAsAssignedBO(E, Context, Rule); + } + } else if (auto ASE = dyn_cast(E)) { + // Continue finding parents when E is ArraySubscriptExpr, and remove + // subscript operator anyway for texture object's member. + Rule->emplaceTransformation(new ReplaceToken( + Lexer::getLocForEndOfToken(ASE->getLHS()->getEndLoc(), 0, + Context.getSourceManager(), + Context.getLangOpts()), + ASE->getRBracketLoc(), "")); + return getParentAsAssignedBO(E, Context, Rule); + } else if (auto BO = dyn_cast(E)) { + // If E is BinaryOperator, return E only when it is assign expression, + // otherwise return nullptr. + auto Opcode = BO->getOpcode(); + if (Opcode == BO_Assign || Opcode == BO_OrAssign) + return BO; + } else if (auto COCE = dyn_cast(E)) { + if (COCE->getOperator() == OO_Equal) { + return COCE; + } + } + return nullptr; +} + const VarDecl *EventQueryTraversal::getAssignTarget(const CallExpr *Call) { auto ParentMap = Context.getParents(*Call); if (ParentMap.size() == 0) diff --git a/clang/lib/DPCT/RulesLang/RulesLang.h b/clang/lib/DPCT/RulesLang/RulesLang.h index cf48bcce2b5d..6cc2135c80a5 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.h +++ b/clang/lib/DPCT/RulesLang/RulesLang.h @@ -40,6 +40,11 @@ TextModification *ReplaceMemberAssignAsSetMethod(const Expr *E, StringRef ExtraArg = "", StringRef ExtraFeild = ""); +const Expr *getAssignedBO(const Expr *E, ASTContext &Context, + MigrationRule *Rule); +const Expr *getParentAsAssignedBO(const Expr *E, ASTContext &Context, + MigrationRule *Rule); + /// Migration rule for iteration space built-in variables (threadIdx, etc). class IterationSpaceBuiltinRule : public NamedMigrationRule { @@ -853,8 +858,6 @@ class TextureMemberSetRule : public NamedMigrationRule { /// Texture migration rule class TextureRule : public NamedMigrationRule { // Get the binary operator if E is lhs of an assign expression. - const Expr *getAssignedBO(const Expr *E, ASTContext &Context); - const Expr *getParentAsAssignedBO(const Expr *E, ASTContext &Context); bool removeExtraMemberAccess(const MemberExpr *ME); void replaceTextureMember(const MemberExpr *ME, ASTContext &Context, SourceManager &SM); @@ -1006,8 +1009,6 @@ class GraphAnalysisRule : public NamedMigrationRule { class GraphRule : public NamedMigrationRule { static MapNames::MapTy KernelNodeParamNames; - const Expr *getAssignedBO(const Expr *E, ASTContext &Context); - const Expr *getParentAsAssignedBO(const Expr *E, ASTContext &Context); public: void registerMatcher(ast_matchers::MatchFinder &MF) override; @@ -1023,8 +1024,6 @@ class AssertRule : public NamedMigrationRule { class GraphicsInteropRule : public NamedMigrationRule { static MapNames::MapTy ExtResMemHandleDescNames, ExtResSemParamsNames; - const Expr *getAssignedBO(const Expr *E, ASTContext &Context); - const Expr *getParentAsAssignedBO(const Expr *E, ASTContext &Context); void replaceExtResMemHandleDataExpr(const MemberExpr *ME, ASTContext &Context); void replaceExtResSemParamsDataExpr(const MemberExpr *ME, diff --git a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp index df09097af76a..eb1536da10b2 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp @@ -72,11 +72,12 @@ void GraphRule::registerMatcher(MatchFinder &MF) { .bind("Type"), this); - MF.addMatcher(memberExpr(hasObjectExpression(hasType( - asString("cudaGraphExecUpdateResultInfo"))), - member(hasName("result"))) - .bind("execUpdateResult"), - this); + MF.addMatcher( + memberExpr(hasObjectExpression( + hasType(asString("cudaGraphExecUpdateResultInfo"))), + member(hasAnyName("result", "errorNode", "errorFromNode"))) + .bind("execUpdateResult"), + this); } void GraphRule::runRule(const MatchFinder::MatchResult &Result) { @@ -94,8 +95,13 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { return; } if (FieldName == "func") { - auto BO = dyn_cast( - getParentAsAssignedBO(ME, *Result.Context)); + auto BinaryOp = getParentAsAssignedBO(ME, *Result.Context, this); + if (!BinaryOp) { + emplaceTransformation(new RenameFieldInMemberExpr( + ME, buildString("get_", FieldName, "()"))); + return; + } + auto BO = dyn_cast(BinaryOp); if (!BO) { return; } @@ -134,7 +140,7 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { BO->getBeginLoc(), BO->getEndLoc(), std::move(ReplacementStr))); emplaceTransformation(new InsertAfterStmt(BO, ")")); } - if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { + if (auto BO = getParentAsAssignedBO(ME, *Result.Context, this)) { StringRef ReplacedArg = ""; emplaceTransformation( ReplaceMemberAssignAsSetMethod(BO, ME, FieldName, ReplacedArg)); @@ -171,47 +177,5 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { EA.applyAllSubExprRepl(); } -const Expr *GraphRule::getParentAsAssignedBO(const Expr *E, - ASTContext &Context) { - auto Parents = Context.getParents(*E); - if (Parents.size() > 0) - return getAssignedBO(Parents[0].get(), Context); - return nullptr; -} - -// Return the binary operator if E is the lhs of an assign expression, -// otherwise nullptr. -const Expr *GraphRule::getAssignedBO(const Expr *E, ASTContext &Context) { - if (dyn_cast(E)) { - // Continue finding parents when E is MemberExpr. - return getParentAsAssignedBO(E, Context); - } else if (auto ICE = dyn_cast(E)) { - // Stop finding parents and return nullptr when E is ImplicitCastExpr, - // except for ArrayToPointerDecay cast. - if (ICE->getCastKind() == CK_ArrayToPointerDecay) { - return getParentAsAssignedBO(E, Context); - } - } else if (auto ASE = dyn_cast(E)) { - // Continue finding parents when E is ArraySubscriptExpr, and remove - // subscript operator anyway for texture object's member. - emplaceTransformation(new ReplaceToken( - Lexer::getLocForEndOfToken(ASE->getLHS()->getEndLoc(), 0, - Context.getSourceManager(), - Context.getLangOpts()), - ASE->getRBracketLoc(), "")); - return getParentAsAssignedBO(E, Context); - } else if (auto BO = dyn_cast(E)) { - // If E is BinaryOperator, return E only when it is assign expression, - // otherwise return nullptr. - if (BO->getOpcode() == BO_Assign) - return BO; - } else if (auto COCE = dyn_cast(E)) { - if (COCE->getOperator() == OO_Equal) { - return COCE; - } - } - return nullptr; -} - } // namespace dpct } // namespace clang diff --git a/clang/lib/DPCT/RulesLang/RulesLangGraphicsInterop.cpp b/clang/lib/DPCT/RulesLang/RulesLangGraphicsInterop.cpp index 17ebb94ab61e..34b48357dba1 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangGraphicsInterop.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangGraphicsInterop.cpp @@ -104,7 +104,7 @@ void GraphicsInteropRule::runRule( } requestFeature(HelperFeatureEnum::device_ext); - if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { + if (auto BO = getParentAsAssignedBO(ME, *Result.Context, this)) { StringRef ReplacedArg = ""; if (FieldName == "flags") { @@ -150,7 +150,7 @@ void GraphicsInteropRule::runRule( } requestFeature(HelperFeatureEnum::device_ext); - if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { + if (auto BO = getParentAsAssignedBO(ME, *Result.Context, this)) { StringRef ReplacedArg = ""; if (FieldName == "image_type") { @@ -196,7 +196,7 @@ void GraphicsInteropRule::runRule( } requestFeature(HelperFeatureEnum::device_ext); - if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { + if (auto BO = getParentAsAssignedBO(ME, *Result.Context, this)) { emplaceTransformation( ReplaceMemberAssignAsSetMethod(BO, ME, FieldName)); } else { @@ -273,7 +273,7 @@ void GraphicsInteropRule::replaceExtResMemHandleDataExpr(const MemberExpr *ME, } requestFeature(HelperFeatureEnum::device_ext); - auto AssignedBO = getParentAsAssignedBO(ME, Context); + auto AssignedBO = getParentAsAssignedBO(ME, Context, this); if (AssignedBO) { emplaceTransformation( ReplaceMemberAssignAsSetMethod(AssignedBO, ME, FieldName)); @@ -328,7 +328,7 @@ void GraphicsInteropRule::replaceExtResSemParamsDataExpr(const MemberExpr *ME, } requestFeature(HelperFeatureEnum::device_ext); - auto AssignedBO = getParentAsAssignedBO(ME, Context); + auto AssignedBO = getParentAsAssignedBO(ME, Context, this); if (AssignedBO) { emplaceTransformation( ReplaceMemberAssignAsSetMethod(AssignedBO, ME, FieldName)); @@ -338,48 +338,5 @@ void GraphicsInteropRule::replaceExtResSemParamsDataExpr(const MemberExpr *ME, } } -const Expr *GraphicsInteropRule::getParentAsAssignedBO(const Expr *E, - ASTContext &Context) { - auto Parents = Context.getParents(*E); - if (Parents.size() > 0) - return getAssignedBO(Parents[0].get(), Context); - return nullptr; -} - -// Return the binary operator if E is the lhs of an assign expression, otherwise -// nullptr. -const Expr *GraphicsInteropRule::getAssignedBO(const Expr *E, - ASTContext &Context) { - if (dyn_cast(E)) { - // Continue finding parents when E is MemberExpr. - return getParentAsAssignedBO(E, Context); - } else if (auto ICE = dyn_cast(E)) { - // Stop finding parents and return nullptr when E is ImplicitCastExpr, - // except for ArrayToPointerDecay cast. - if (ICE->getCastKind() == CK_ArrayToPointerDecay) { - return getParentAsAssignedBO(E, Context); - } - } else if (auto ASE = dyn_cast(E)) { - // Continue finding parents when E is ArraySubscriptExpr, and remove - // subscript operator anyway for texture object's member. - emplaceTransformation(new ReplaceToken( - Lexer::getLocForEndOfToken(ASE->getLHS()->getEndLoc(), 0, - Context.getSourceManager(), - Context.getLangOpts()), - ASE->getRBracketLoc(), "")); - return getParentAsAssignedBO(E, Context); - } else if (auto BO = dyn_cast(E)) { - // If E is BinaryOperator, return E only when it is assign expression, - // otherwise return nullptr. - if (BO->getOpcode() == BO_Assign) - return BO; - } else if (auto COCE = dyn_cast(E)) { - if (COCE->getOperator() == OO_Equal) { - return COCE; - } - } - return nullptr; -} - } // namespace dpct } // namespace clang diff --git a/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp b/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp index 23a634b3595e..d70dfa58e22d 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp @@ -648,7 +648,7 @@ bool TextureRule::tryMerge(const MemberExpr *ME, const Expr *BO) { void TextureRule::replaceTextureMember(const MemberExpr *ME, ASTContext &Context, SourceManager &SM) { - auto AssignedBO = getParentAsAssignedBO(ME, Context); + auto AssignedBO = getParentAsAssignedBO(ME, Context, this); if (!DpctGlobalInfo::useExtBindlessImages() && tryMerge(ME, AssignedBO)) return; @@ -715,49 +715,6 @@ void TextureRule::replaceTextureMember(const MemberExpr *ME, } } -const Expr *TextureRule::getParentAsAssignedBO(const Expr *E, - ASTContext &Context) { - auto Parents = Context.getParents(*E); - if (Parents.size() > 0) - return getAssignedBO(Parents[0].get(), Context); - return nullptr; -} - -// Return the binary operator if E is the lhs of an assign expression, otherwise -// nullptr. -const Expr *TextureRule::getAssignedBO(const Expr *E, ASTContext &Context) { - if (dyn_cast(E)) { - // Continue finding parents when E is MemberExpr. - return getParentAsAssignedBO(E, Context); - } else if (auto ICE = dyn_cast(E)) { - // Stop finding parents and return nullptr when E is ImplicitCastExpr, - // except for ArrayToPointerDecay cast. - if (ICE->getCastKind() == CK_ArrayToPointerDecay) { - return getParentAsAssignedBO(E, Context); - } - } else if (auto ASE = dyn_cast(E)) { - // Continue finding parents when E is ArraySubscriptExpr, and remove - // subscript operator anyway for texture object's member. - emplaceTransformation(new ReplaceToken( - Lexer::getLocForEndOfToken(ASE->getLHS()->getEndLoc(), 0, - Context.getSourceManager(), - Context.getLangOpts()), - ASE->getRBracketLoc(), "")); - return getParentAsAssignedBO(E, Context); - } else if (auto BO = dyn_cast(E)) { - // If E is BinaryOperator, return E only when it is assign expression, - // otherwise return nullptr. - auto Opcode = BO->getOpcode(); - if (Opcode == BO_Assign || Opcode == BO_OrAssign) - return BO; - } else if (auto COCE = dyn_cast(E)) { - if (COCE->getOperator() == OO_Equal) { - return COCE; - } - } - return nullptr; -} - bool TextureRule::processTexVarDeclInDevice(const VarDecl *VD) { if (auto FD = dyn_cast_or_null(VD->getParentFunctionOrMethod())) { @@ -867,7 +824,7 @@ void TextureRule::runRule(const MatchFinder::MatchResult &Result) { removeExtraMemberAccess(ME); replaceResourceDataExpr(getParentMemberExpr(ME), *Result.Context); } else if (MemberName == "resType") { - if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { + if (auto BO = getParentAsAssignedBO(ME, *Result.Context, this)) { requestFeature(HelperFeatureEnum::device_ext); emplaceTransformation( ReplaceMemberAssignAsSetMethod(BO, ME, "data_type")); @@ -899,7 +856,7 @@ void TextureRule::runRule(const MatchFinder::MatchResult &Result) { static std::map ExtraArgMap = { {"x", "1"}, {"y", "2"}, {"z", "3"}, {"w", "4"}, {"f", ""}}; std::string MemberName = ME->getMemberNameInfo().getAsString(); - if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { + if (auto BO = getParentAsAssignedBO(ME, *Result.Context, this)) { requestFeature(HelperFeatureEnum::device_ext); requestFeature(MethodNameToSetFeatureMap.at(MemberName)); emplaceTransformation(ReplaceMemberAssignAsSetMethod( @@ -1031,7 +988,7 @@ void TextureRule::replaceResourceDataExpr(const MemberExpr *ME, removeExtraMemberAccess(ME); - auto AssignedBO = getParentAsAssignedBO(TopMember, Context); + auto AssignedBO = getParentAsAssignedBO(TopMember, Context, this); auto FieldName = ResourceTypeNames[TopMember->getMemberNameInfo().getAsString()]; if (FieldName.empty() || diff --git a/clang/test/dpct/cudaGraph_test.cu b/clang/test/dpct/cudaGraph_test.cu index 8cb4b7dd905f..a42687818161 100644 --- a/clang/test/dpct/cudaGraph_test.cu +++ b/clang/test/dpct/cudaGraph_test.cu @@ -1,5 +1,5 @@ -// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2 -// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2 +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1, cuda-10.2, cuda-11.0, cuda-11.1, cuda-11.2, cuda-11.3, cuda-11.4, cuda-11.5, cuda-11.6, cuda-11.7, cuda-11.8 +// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0, v10.1, v10.2, v11.0, v11.1, v11.2, v11.3, v11.4, v11.5, v11.6, v11.7, v11.8 // RUN: dpct --use-experimental-features=graph --format-range=none -out-root %T/cudaGraph_test %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only --std=c++14 // RUN: FileCheck --input-file %T/cudaGraph_test/cudaGraph_test.dp.cpp --match-full-lines %s // RUN: %if build_lit %{icpx -c -DNO_BUILD_TEST -fsycl %T/cudaGraph_test/cudaGraph_test.dp.cpp -o %T/cudaGraph_test/cudaGraph_test.dp.o %} @@ -103,11 +103,17 @@ int main() { void *kernelArgs[] = {}; params.kernelParams = kernelArgs; - // CHECK: void *function = (void *)dpct::wrapper_register(myKernel_wrapper).get(); + // CHECK: void* function = (void*) dpct::wrapper_register(myKernel_wrapper).get(); // CHECK-NEXT: params.set_func(function); - void *function = (void *)myKernel; + void* function = (void*) myKernel; params.func = function; + // CHECK: dpct::dim3 blockDim = params.get_block_dim(); + dim3 blockDim = params.blockDim; + + // CHECK: void* func2 = params.get_func(); + void* func2 = params.func; + size_t numNodes; // CHECK: dpct::experimental::get_nodes(graph, node4, &numNodes); @@ -155,9 +161,12 @@ int main() { CUDA_CHECK_THROW(cudaGraphLaunch(execGraph, stream)); cudaGraphLaunch(*execGraph2, *stream2); -#ifndef DNO_BUILD_TEST - + // CHECK: int updateResult; cudaGraphExecUpdateResultInfo updateResult; + + // CHECK: int result; + cudaGraphExecUpdateResult result; + // CHECK: dpct::experimental::update(execGraph, graph, &updateResult); cudaGraphExecUpdate(execGraph, graph, &updateResult); @@ -172,7 +181,11 @@ int main() { } if (updateResult.result == cudaGraphExecUpdateErrorTopologyChanged) { } -#endif + + // CHECK: if (updateResult != nullptr) { + // CHECK-NEXT: } + if (updateResult.errorFromNode != nullptr) { + } // CHECK: sycl::ext::oneapi::experimental::node_type nodeType; // CHECK-NEXT: dpct::experimental::get_node_type(node, &nodeType); diff --git a/clang/test/dpct/cudaGraph_test_default_option.cu b/clang/test/dpct/cudaGraph_test_default_option.cu index 97ae16a269b7..940ded25d4b7 100644 --- a/clang/test/dpct/cudaGraph_test_default_option.cu +++ b/clang/test/dpct/cudaGraph_test_default_option.cu @@ -1,5 +1,5 @@ -// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2 -// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2 +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1, cuda-10.2, cuda-11.0, cuda-11.1, cuda-11.2, cuda-11.3, cuda-11.4, cuda-11.5, cuda-11.6, cuda-11.7, cuda-11.8 +// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0, v10.1, v10.2, v11.0, v11.1, v11.2, v11.3, v11.4, v11.5, v11.6, v11.7, v11.8 // RUN: dpct --format-range=none -out-root %T/cudaGraph_test_default_option %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only --std=c++14 // RUN: FileCheck --input-file %T/cudaGraph_test_default_option/cudaGraph_test_default_option.dp.cpp --match-full-lines %s // RUN: %if build_lit %{icpx -c -DNO_BUILD_TEST -fsycl %T/cudaGraph_test_default_option/cudaGraph_test_default_option.dp.cpp -o %T/cudaGraph_test_default_option/cudaGraph_test.dp.o %} @@ -102,6 +102,10 @@ int main() { // CHECK-NEXT: */ cudaGraphLaunch(execGraph, stream); + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecUpdateResultInfo is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaGraphExecUpdateResultInfo updateResult; + // CHECK: /* // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecUpdateResult is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ @@ -110,7 +114,7 @@ int main() { // CHECK: /* // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecUpdate is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ - cudaGraphExecUpdate(execGraph, graph, nullptr, &status); + cudaGraphExecUpdate(execGraph, graph, &updateResult); // CHECK: /* // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecDestroy is not supported, please try to remigrate with option: --use-experimental-features=graph. @@ -132,6 +136,11 @@ int main() { // CHECK-NEXT: */ nodeType = cudaGraphNodeTypeKernel; + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaKernelNodeParams is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaKernelNodeParams kernelNodeParam0 = {}; + // CHECK: /* // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphDestroy is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ From d25730a4c95151cdf8340f60e6e6bdb00e2cdc63 Mon Sep 17 00:00:00 2001 From: "Ahmed, Daiyaan" Date: Tue, 6 May 2025 09:32:49 +0800 Subject: [PATCH 18/19] Fix LIT test Signed-off-by: Ahmed, Daiyaan --- clang/lib/DPCT/RulesLang/RulesLangGraph.cpp | 10 +++++++++- clang/test/dpct/cudaGraph_test_default_option.cu | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp index eb1536da10b2..2395c106e802 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp @@ -125,6 +125,12 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { if (!RHS_DRE) { return; } + if (auto RhsVarDecl = dyn_cast(RHS_DRE->getDecl())) { + StringRef ReplacedArg = ""; + emplaceTransformation( + ReplaceMemberAssignAsSetMethod(BO, ME, FieldName, ReplacedArg)); + return; + } auto *FD = dyn_cast(RHS_DRE->getDecl()); if (!FD) { return; @@ -154,7 +160,9 @@ void GraphRule::runRule(const MatchFinder::MatchResult &Result) { if (auto ME = getNodeAsType(Result, "execUpdateResult")) { auto MD = ME->getMemberDecl(); const Expr *Base = ME->getBase(); - if (MD->getNameAsString() == "result") { + if (MD->getNameAsString() == "result" || + MD->getNameAsString() == "errorNode" || + MD->getNameAsString() == "errorFromNode") { if (auto *DRE = dyn_cast(Base)) { SourceLocation StartLoc = Base->getBeginLoc(); SourceLocation EndLoc = ME->getEndLoc(); diff --git a/clang/test/dpct/cudaGraph_test_default_option.cu b/clang/test/dpct/cudaGraph_test_default_option.cu index 940ded25d4b7..157bcd86650f 100644 --- a/clang/test/dpct/cudaGraph_test_default_option.cu +++ b/clang/test/dpct/cudaGraph_test_default_option.cu @@ -102,7 +102,8 @@ int main() { // CHECK-NEXT: */ cudaGraphLaunch(execGraph, stream); - // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecUpdateResultInfo is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecUpdateResultInfo is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ cudaGraphExecUpdateResultInfo updateResult; From 7b2dd6c5a48639ae99e0891a9933a600b2b93414 Mon Sep 17 00:00:00 2001 From: "Ahmed, Daiyaan" Date: Tue, 6 May 2025 09:56:34 +0800 Subject: [PATCH 19/19] Fix Build LIT test Signed-off-by: Ahmed, Daiyaan --- clang/test/dpct/cudaGraph_test.cu | 5 ----- 1 file changed, 5 deletions(-) diff --git a/clang/test/dpct/cudaGraph_test.cu b/clang/test/dpct/cudaGraph_test.cu index a42687818161..46c337378bc9 100644 --- a/clang/test/dpct/cudaGraph_test.cu +++ b/clang/test/dpct/cudaGraph_test.cu @@ -182,11 +182,6 @@ int main() { if (updateResult.result == cudaGraphExecUpdateErrorTopologyChanged) { } - // CHECK: if (updateResult != nullptr) { - // CHECK-NEXT: } - if (updateResult.errorFromNode != nullptr) { - } - // CHECK: sycl::ext::oneapi::experimental::node_type nodeType; // CHECK-NEXT: dpct::experimental::get_node_type(node, &nodeType); // CHECK-NEXT: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(dpct::experimental::get_node_type(node, &nodeType)));