diff --git a/clang/lib/DPCT/ASTTraversal.cpp b/clang/lib/DPCT/ASTTraversal.cpp index 427f0c4fcc8c..8d6a87f856fb 100644 --- a/clang/lib/DPCT/ASTTraversal.cpp +++ b/clang/lib/DPCT/ASTTraversal.cpp @@ -158,6 +158,7 @@ REGISTER_RULE(TypeRemoveRule, PassKind::PK_Analysis) REGISTER_RULE(CompatWithClangRule, PassKind::PK_Migration) REGISTER_RULE(AssertRule, PassKind::PK_Migration) REGISTER_RULE(GraphRule, PassKind::PK_Migration) +REGISTER_RULE(GraphAnalysisRule, PassKind::PK_Analysis) REGISTER_RULE(GraphicsInteropRule, PassKind::PK_Migration) REGISTER_RULE(RulesLangAddrSpaceConvRule, PassKind::PK_Migration) diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index 67fe389b928e..69e578c7e739 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -2504,7 +2504,7 @@ unsigned DpctGlobalInfo::ExperimentalFlag = 0; unsigned DpctGlobalInfo::HelperFuncPreferenceFlag = 0; bool DpctGlobalInfo::AnalysisModeFlag = false; bool DpctGlobalInfo::UseSYCLCompatFlag = false; -bool DpctGlobalInfo::CVersionCUDALaunchUsedFlag = false; +bool DpctGlobalInfo::UseWrapperRegisterFnPtrFlag = false; unsigned int DpctGlobalInfo::ColorOption = 1; std::unordered_map> DpctGlobalInfo::CubPlaceholderIndexMap; diff --git a/clang/lib/DPCT/AnalysisInfo.h b/clang/lib/DPCT/AnalysisInfo.h index a04c596dc344..6352604534ce 100644 --- a/clang/lib/DPCT/AnalysisInfo.h +++ b/clang/lib/DPCT/AnalysisInfo.h @@ -1355,8 +1355,10 @@ class DpctGlobalInfo { static bool useNoQueueDevice() { return getHelperFuncPreference(HelperFuncPreference::NoQueueDevice); } - static void setCVersionCUDALaunchUsed() { CVersionCUDALaunchUsedFlag = true; } - static bool isCVersionCUDALaunchUsed() { return CVersionCUDALaunchUsedFlag; } + static void setUseWrapperRegisterFnPtr() { + UseWrapperRegisterFnPtrFlag = true; + } + static bool useWrapperRegisterFnPtr() { return UseWrapperRegisterFnPtrFlag; } static void setUseSYCLCompat(bool Flag = true) { UseSYCLCompatFlag = Flag; } static bool useSYCLCompat() { return UseSYCLCompatFlag; } static bool useEnqueueBarrier() { @@ -1686,7 +1688,7 @@ class DpctGlobalInfo { static unsigned HelperFuncPreferenceFlag; static bool AnalysisModeFlag; static bool UseSYCLCompatFlag; - static bool CVersionCUDALaunchUsedFlag; + static bool UseWrapperRegisterFnPtrFlag; static unsigned int ColorOption; static std::unordered_map> CubPlaceholderIndexMap; diff --git a/clang/lib/DPCT/RuleInfra/APINamesTemplateType.inc b/clang/lib/DPCT/RuleInfra/APINamesTemplateType.inc index 45757ac75b06..a0f5ace03da7 100644 --- a/clang/lib/DPCT/RuleInfra/APINamesTemplateType.inc +++ b/clang/lib/DPCT/RuleInfra/APINamesTemplateType.inc @@ -507,6 +507,22 @@ TYPE_REWRITE_ENTRY( WARNING_FACTORY(Diagnostics::TRY_EXPERIMENTAL_FEATURE, TYPESTR, STR("--use-experimental-features=graph")))) +TYPE_REWRITE_ENTRY( + "cudaGraphExecUpdateResultInfo", + TYPE_CONDITIONAL_FACTORY( + checkEnableGraphForType(), TYPE_FACTORY(STR("int")), + WARNING_FACTORY(Diagnostics::TRY_EXPERIMENTAL_FEATURE, TYPESTR, + STR("--use-experimental-features=graph")))) + +TYPE_REWRITE_ENTRY( + "cudaKernelNodeParams", + TYPE_CONDITIONAL_FACTORY( + checkEnableGraphForType(), + TYPE_FACTORY(STR(MapNames::getDpctNamespace() + + "experimental::kernel_node_params")), + WARNING_FACTORY(Diagnostics::TRY_EXPERIMENTAL_FEATURE, TYPESTR, + STR("--use-experimental-features=graph")))) + // Graphics Interop Handle TYPE_REWRITE_ENTRY( "cudaGraphicsResource", diff --git a/clang/lib/DPCT/RuleInfra/MapNames.cpp b/clang/lib/DPCT/RuleInfra/MapNames.cpp index 615dd9a43dae..e30e8c783f4f 100644 --- a/clang/lib/DPCT/RuleInfra/MapNames.cpp +++ b/clang/lib/DPCT/RuleInfra/MapNames.cpp @@ -640,6 +640,10 @@ void MapNames::setExplicitNamespaceMap( DpctGlobalInfo::useExtGraph() ? getClNamespace() + "ext::oneapi::experimental::node_type" : "cudaGraphNodeType")}, + {"cudaGraphExecUpdateResult", + std::make_shared(DpctGlobalInfo::useExtGraph() + ? "int" + : "cudaGraphExecUpdateResult")}, {"CUmem_advise", std::make_shared("int")}, {"CUmemorytype", std::make_shared(getClNamespace() + "usm::alloc")}, @@ -1151,6 +1155,47 @@ void MapNames::setExplicitNamespaceMap( ? getClNamespace() + "ext::oneapi::experimental::node_type::empty" : "cudaGraphNodeTypeEmpty")}, + {"cudaGraphExecUpdateSuccess", + std::make_shared( + DpctGlobalInfo::useExtGraph() ? "1" : "cudaGraphExecUpdateSuccess")}, + {"cudaGraphExecUpdateError", + std::make_shared( + DpctGlobalInfo::useExtGraph() ? "0" : "cudaGraphExecUpdateError")}, + {"cudaGraphExecUpdateErrorTopologyChanged", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorTopologyChanged")}, + {"cudaGraphExecUpdateErrorNodeTypeChanged", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorNodeTypeChanged")}, + {"cudaGraphExecUpdateErrorFunctionChanged", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorFunctionChanged")}, + {"cudaGraphExecUpdateErrorParametersChanged", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorParametersChanged")}, + {"cudaGraphExecUpdateErrorNotSupported", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorNotSupported")}, + {"cudaGraphExecUpdateErrorUnsupportedFunctionChange", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorUnsupportedFunctionChange")}, + {"cudaGraphExecUpdateErrorAttributesChanged", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? "0" + : "cudaGraphExecUpdateErrorAttributesChanged")}, // enum CUmem_advise_enum {"CU_MEM_ADVISE_SET_READ_MOSTLY", std::make_shared("0")}, {"CU_MEM_ADVISE_UNSET_READ_MOSTLY", std::make_shared("0")}, diff --git a/clang/lib/DPCT/RulesLang/APINamesGraph.inc b/clang/lib/DPCT/RulesLang/APINamesGraph.inc index 566460c831b6..42d5c69f625f 100644 --- a/clang/lib/DPCT/RulesLang/APINamesGraph.inc +++ b/clang/lib/DPCT/RulesLang/APINamesGraph.inc @@ -6,19 +6,16 @@ // //===----------------------------------------------------------------------===// -CONDITIONAL_FACTORY_ENTRY( +ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( UseExtGraph, - ASSIGNABLE_FACTORY(ASSIGN_FACTORY_ENTRY( - "cudaGraphInstantiate", DEREF(0), - NEW(MapNames::getClNamespace() + - "ext::oneapi::experimental::command_graph<" + - MapNames::getClNamespace() + - "ext::oneapi::experimental::graph_state::executable>", - MEMBER_CALL(ARG(1), true, "finalize")))), + CALL_FACTORY_ENTRY("cudaGraphInstantiate", + CALL(MapNames::getDpctNamespace() + + "experimental::instantiate", + ARG(0), ARG(1))), UNSUPPORT_FACTORY_ENTRY("cudaGraphInstantiate", Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphInstantiate"), - ARG("--use-experimental-features=graph"))) + ARG("--use-experimental-features=graph")))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( UseExtGraph, DELETE_FACTORY_ENTRY("cudaGraphExecDestroy", ARG(0)), @@ -29,8 +26,9 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( UseExtGraph, - MEMBER_CALL_FACTORY_ENTRY("cudaGraphLaunch", ARG(1), true, - "ext_oneapi_graph", DEREF(0)), + CALL_FACTORY_ENTRY("cudaGraphLaunch", CALL(MapNames::getDpctNamespace() + + "experimental::launch", + ARG(0), ARG(1))), UNSUPPORT_FACTORY_ENTRY("cudaGraphLaunch", Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphLaunch"), @@ -60,22 +58,25 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( UseExtGraph, - MEMBER_CALL_FACTORY_ENTRY("cudaGraphExecUpdate", ARG(0), true, "update", - DEREF(1)), + CALL_FACTORY_ENTRY("cudaGraphExecUpdate", + CALL(MapNames::getDpctNamespace() + + "experimental::update", + ARG(0), ARG(1), ARG(2))), UNSUPPORT_FACTORY_ENTRY("cudaGraphExecUpdate", Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphExecUpdate"), ARG("--use-experimental-features=graph")))) -CONDITIONAL_FACTORY_ENTRY( +ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( UseExtGraph, - ASSIGNABLE_FACTORY(ASSIGN_FACTORY_ENTRY("cudaGraphNodeGetType", DEREF(1), - MEMBER_CALL(ARG(0), true, - "get_type"))), + CALL_FACTORY_ENTRY("cudaGraphNodeGetType", + CALL(MapNames::getDpctNamespace() + + "experimental::get_node_type", + ARG(0), ARG(1))), UNSUPPORT_FACTORY_ENTRY("cudaGraphNodeGetType", Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphNodeGetType"), - ARG("--use-experimental-features=graph"))) + ARG("--use-experimental-features=graph")))) ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( UseExtGraph, @@ -104,3 +105,36 @@ ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphDestroy"), ARG("--use-experimental-features=graph")))) + +ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( + UseExtGraph, + CALL_FACTORY_ENTRY("cudaGraphAddKernelNode", + CALL(MapNames::getDpctNamespace() + + "experimental::add_kernel_node", + ARG(0), ARG(1), ARG(2), ARG(3), ARG(4))), + UNSUPPORT_FACTORY_ENTRY("cudaGraphAddKernelNode", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaGraphAddKernelNode"), + ARG("--use-experimental-features=graph")))) + +ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( + UseExtGraph, + CALL_FACTORY_ENTRY("cudaGraphKernelNodeGetParams", + CALL(MapNames::getDpctNamespace() + + "experimental::kernel_node_get_params", + ARG(0), ARG(1))), + UNSUPPORT_FACTORY_ENTRY("cudaGraphKernelNodeGetParams", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaGraphKernelNodeGetParams"), + ARG("--use-experimental-features=graph")))) + +ASSIGNABLE_FACTORY(CONDITIONAL_FACTORY_ENTRY( + UseExtGraph, + CALL_FACTORY_ENTRY("cudaGraphKernelNodeSetParams", + CALL(MapNames::getDpctNamespace() + + "experimental::kernel_node_set_params", + ARG(0), ARG(1))), + UNSUPPORT_FACTORY_ENTRY("cudaGraphKernelNodeSetParams", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaGraphKernelNodeSetParams"), + ARG("--use-experimental-features=graph")))) diff --git a/clang/lib/DPCT/RulesLang/MapNamesLang.cpp b/clang/lib/DPCT/RulesLang/MapNamesLang.cpp index ad6c56cf38f2..551d886083d5 100644 --- a/clang/lib/DPCT/RulesLang/MapNamesLang.cpp +++ b/clang/lib/DPCT/RulesLang/MapNamesLang.cpp @@ -362,5 +362,13 @@ const std::unordered_map {"sampler", HelperFeatureEnum::device_ext}, }; +// Graph kernel node params mapping +MapNamesLang::MapTy GraphRule::KernelNodeParamNames{ + {"gridDim", "grid_dim"}, + {"blockDim", "block_dim"}, + {"kernelParams", "kernel_params"}, + {"sharedMemBytes", "shared_mem_bytes"}, + {"func", "func"}}; + } // namespace dpct -} // namespace clang \ No newline at end of file +} // namespace clang diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index 012f4e0e2370..90fd2a32d585 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -364,7 +364,8 @@ void TypeInDeclRule::registerMatcher(MatchFinder &MF) { "cudaExternalMemoryBufferDesc", "cudaExternalSemaphore_t", "cudaExternalSemaphoreHandleDesc", "cudaExternalSemaphoreSignalParams", - "cudaExternalSemaphoreWaitParams")))))) + "cudaExternalSemaphoreWaitParams", "cudaKernelNodeParams", + "cudaGraphExecUpdateResultInfo")))))) .bind("cudaTypeDefEA"), this); MF.addMatcher(varDecl(hasType(classTemplateSpecializationDecl( @@ -930,9 +931,11 @@ void TypeInDeclRule::runRule(const MatchFinder::MatchResult &Result) { } if (CanonicalTypeStr == "cudaGraphExecUpdateResult") { - report(TL->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - CanonicalTypeStr); - return; + if (!DpctGlobalInfo::useExtGraph()) { + report(TL->getBeginLoc(), Diagnostics::TRY_EXPERIMENTAL_FEATURE, false, + "cudaGraphExecUpdateResult", + "--use-experimental-features=graph"); + } } if (CanonicalTypeStr == "cudaGraphicsRegisterFlags" || @@ -1934,7 +1937,8 @@ void EnumConstantRule::registerMatcher(MatchFinder &MF) { "cufftType", "cudaMemoryType", "CUctx_flags_enum", "CUpointer_attribute_enum", "CUmemorytype_enum", "cudaGraphicsMapFlags", "cudaGraphicsRegisterFlags", - "cudaGraphNodeType", "CUdevice_P2PAttribute_enum"))), + "cudaGraphNodeType", "CUdevice_P2PAttribute_enum", + "cudaGraphExecUpdateResult"))), matchesName("CUDNN_.*"), matchesName("CUSOLVER_.*"))))) .bind("EnumConstant"), this); @@ -2054,7 +2058,16 @@ void EnumConstantRule::runRule(const MatchFinder::MatchResult &Result) { EnumName == "cudaGraphNodeTypeMemset" || EnumName == "cudaGraphNodeTypeHost" || EnumName == "cudaGraphNodeTypeGraph" || - EnumName == "cudaGraphNodeTypeEmpty")) { + EnumName == "cudaGraphNodeTypeEmpty" || + EnumName == "cudaGraphExecUpdateSuccess" || + EnumName == "cudaGraphExecUpdateError" || + EnumName == "cudaGraphExecUpdateErrorTopologyChanged" || + EnumName == "cudaGraphExecUpdateErrorNodeTypeChanged" || + EnumName == "cudaGraphExecUpdateErrorFunctionChanged" || + EnumName == "cudaGraphExecUpdateErrorParametersChanged" || + EnumName == "cudaGraphExecUpdateErrorNotSupported" || + EnumName == "cudaGraphExecUpdateErrorUnsupportedFunctionChange" || + EnumName == "cudaGraphExecUpdateErrorAttributesChanged")) { report(E->getBeginLoc(), Diagnostics::TRY_EXPERIMENTAL_FEATURE, false, EnumName, "--use-experimental-features=graph"); return; @@ -2723,6 +2736,50 @@ const VarDecl *getAssignTargetDecl(const Stmt *E) { return nullptr; } +const Expr *getParentAsAssignedBO(const Expr *E, ASTContext &Context, + MigrationRule *Rule) { + auto Parents = Context.getParents(*E); + if (Parents.size() > 0) + return getAssignedBO(Parents[0].get(), Context, Rule); + return nullptr; +} + +// Return the binary operator if E is the lhs of an assign expression, +// otherwise nullptr. +const Expr *getAssignedBO(const Expr *E, ASTContext &Context, + MigrationRule *Rule) { + if (dyn_cast(E)) { + // Continue finding parents when E is MemberExpr. + return getParentAsAssignedBO(E, Context, Rule); + } else if (auto ICE = dyn_cast(E)) { + // Stop finding parents and return nullptr when E is ImplicitCastExpr, + // except for ArrayToPointerDecay cast. + if (ICE->getCastKind() == CK_ArrayToPointerDecay) { + return getParentAsAssignedBO(E, Context, Rule); + } + } else if (auto ASE = dyn_cast(E)) { + // Continue finding parents when E is ArraySubscriptExpr, and remove + // subscript operator anyway for texture object's member. + Rule->emplaceTransformation(new ReplaceToken( + Lexer::getLocForEndOfToken(ASE->getLHS()->getEndLoc(), 0, + Context.getSourceManager(), + Context.getLangOpts()), + ASE->getRBracketLoc(), "")); + return getParentAsAssignedBO(E, Context, Rule); + } else if (auto BO = dyn_cast(E)) { + // If E is BinaryOperator, return E only when it is assign expression, + // otherwise return nullptr. + auto Opcode = BO->getOpcode(); + if (Opcode == BO_Assign || Opcode == BO_OrAssign) + return BO; + } else if (auto COCE = dyn_cast(E)) { + if (COCE->getOperator() == OO_Equal) { + return COCE; + } + } + return nullptr; +} + const VarDecl *EventQueryTraversal::getAssignTarget(const CallExpr *Call) { auto ParentMap = Context.getParents(*Call); if (ParentMap.size() == 0) @@ -4521,6 +4578,7 @@ void StreamAPICallRule::runRule(const MatchFinder::MatchResult &Result) { } void KernelCallRefRule::registerMatcher(ast_matchers::MatchFinder &MF) { + MF.addMatcher( functionDecl( forEachDescendant( @@ -4529,6 +4587,7 @@ void KernelCallRefRule::registerMatcher(ast_matchers::MatchFinder &MF) { .bind("kernelRef"))) .bind("outerFunc"), this); + MF.addMatcher(unresolvedLookupExpr(unless(hasAncestor(cudaKernelCallExpr()))) .bind("unresolvedRef"), this); @@ -4637,7 +4696,7 @@ void KernelCallRefRule::runRule( (OuterFD->getTemplatedKind() == FunctionDecl::TemplatedKind::TK_FunctionTemplate)) { std::string TypeRepl; - if (DpctGlobalInfo::isCVersionCUDALaunchUsed()) { + if (DpctGlobalInfo::useWrapperRegisterFnPtr()) { if ((IsTemplateRelated && (!DRE->hasExplicitTemplateArgs() || (DRE->getNumTemplateArgs() <= TemplateParamNum))) || @@ -4646,7 +4705,7 @@ void KernelCallRefRule::runRule( } } insertWrapperPostfix( - DRE, std::move(TypeRepl), DpctGlobalInfo::isCVersionCUDALaunchUsed()); + DRE, std::move(TypeRepl), DpctGlobalInfo::useWrapperRegisterFnPtr()); } } if (auto ULE = @@ -4683,7 +4742,7 @@ void KernelCallRefRule::runRule( } } insertWrapperPostfix( - ULE, getTypeRepl(ULE), DpctGlobalInfo::isCVersionCUDALaunchUsed()); + ULE, getTypeRepl(ULE), DpctGlobalInfo::useWrapperRegisterFnPtr()); } } @@ -4956,7 +5015,7 @@ void KernelCallRule::runRule( if (!getAddressedRef(CalleeDRE)) { if (IsFuncTypeErased) { - DpctGlobalInfo::setCVersionCUDALaunchUsed(); + DpctGlobalInfo::setUseWrapperRegisterFnPtr(); } std::string ReplStr; llvm::raw_string_ostream OS(ReplStr); diff --git a/clang/lib/DPCT/RulesLang/RulesLang.h b/clang/lib/DPCT/RulesLang/RulesLang.h index a9e83884103d..6cc2135c80a5 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.h +++ b/clang/lib/DPCT/RulesLang/RulesLang.h @@ -40,6 +40,11 @@ TextModification *ReplaceMemberAssignAsSetMethod(const Expr *E, StringRef ExtraArg = "", StringRef ExtraFeild = ""); +const Expr *getAssignedBO(const Expr *E, ASTContext &Context, + MigrationRule *Rule); +const Expr *getParentAsAssignedBO(const Expr *E, ASTContext &Context, + MigrationRule *Rule); + /// Migration rule for iteration space built-in variables (threadIdx, etc). class IterationSpaceBuiltinRule : public NamedMigrationRule { @@ -853,8 +858,6 @@ class TextureMemberSetRule : public NamedMigrationRule { /// Texture migration rule class TextureRule : public NamedMigrationRule { // Get the binary operator if E is lhs of an assign expression. - const Expr *getAssignedBO(const Expr *E, ASTContext &Context); - const Expr *getParentAsAssignedBO(const Expr *E, ASTContext &Context); bool removeExtraMemberAccess(const MemberExpr *ME); void replaceTextureMember(const MemberExpr *ME, ASTContext &Context, SourceManager &SM); @@ -998,7 +1001,15 @@ class CompatWithClangRule : public NamedMigrationRule { void runRule(const ast_matchers::MatchFinder::MatchResult &Result); }; +class GraphAnalysisRule : public NamedMigrationRule { +public: + void registerMatcher(ast_matchers::MatchFinder &MF) override; + void runRule(const ast_matchers::MatchFinder::MatchResult &Result); +}; + class GraphRule : public NamedMigrationRule { + static MapNames::MapTy KernelNodeParamNames; + public: void registerMatcher(ast_matchers::MatchFinder &MF) override; void runRule(const ast_matchers::MatchFinder::MatchResult &Result); @@ -1013,8 +1024,6 @@ class AssertRule : public NamedMigrationRule { class GraphicsInteropRule : public NamedMigrationRule { static MapNames::MapTy ExtResMemHandleDescNames, ExtResSemParamsNames; - const Expr *getAssignedBO(const Expr *E, ASTContext &Context); - const Expr *getParentAsAssignedBO(const Expr *E, ASTContext &Context); void replaceExtResMemHandleDataExpr(const MemberExpr *ME, ASTContext &Context); void replaceExtResSemParamsDataExpr(const MemberExpr *ME, diff --git a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp index 892a71e34d9c..2395c106e802 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangGraph.cpp @@ -28,20 +28,154 @@ extern DpctOption AsyncHandler; namespace clang { namespace dpct { +void GraphAnalysisRule::registerMatcher(MatchFinder &MF) { + auto kernelNodeTypeName = [&]() { + return hasAnyName("cudaKernelNodeParams"); + }; + MF.addMatcher( + memberExpr( + hasObjectExpression(hasType(type(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(recordDecl(kernelNodeTypeName())))))))) + .bind("KernelNodeType"), + this); +} + +void GraphAnalysisRule::runRule(const MatchFinder::MatchResult &Result) { + if (auto ME = getNodeAsType(Result, "KernelNodeType")) { + auto BaseTy = DpctGlobalInfo::getUnqualifiedTypeName( + ME->getBase()->getType().getDesugaredType(*Result.Context), + *Result.Context); + auto MemberName = ME->getMemberNameInfo().getAsString(); + if (BaseTy == "cudaKernelNodeParams") { + DpctGlobalInfo::setUseWrapperRegisterFnPtr(); + } + } +} + void GraphRule::registerMatcher(MatchFinder &MF) { auto functionName = [&]() { - return hasAnyName("cudaGraphInstantiate", "cudaGraphLaunch", - "cudaGraphExecDestroy", "cudaGraphAddEmptyNode", - "cudaGraphAddDependencies", "cudaGraphExecUpdate", - "cudaGraphNodeGetType", "cudaGraphGetNodes", - "cudaGraphGetRootNodes", "cudaGraphDestroy"); + return hasAnyName( + "cudaGraphInstantiate", "cudaGraphLaunch", "cudaGraphExecDestroy", + "cudaGraphAddEmptyNode", "cudaGraphAddDependencies", + "cudaGraphExecUpdate", "cudaGraphNodeGetType", "cudaGraphGetNodes", + "cudaGraphGetRootNodes", "cudaGraphDestroy", "cudaGraphAddKernelNode", + "cudaGraphKernelNodeGetParams", "cudaGraphKernelNodeSetParams"); }; MF.addMatcher( callExpr(callee(functionDecl(functionName()))).bind("FunctionCall"), this); + + auto typeName = [&]() { return hasAnyName("cudaKernelNodeParams"); }; + MF.addMatcher( + memberExpr(hasObjectExpression(hasType(type(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(recordDecl(typeName())))))))) + .bind("Type"), + this); + + MF.addMatcher( + memberExpr(hasObjectExpression( + hasType(asString("cudaGraphExecUpdateResultInfo"))), + member(hasAnyName("result", "errorNode", "errorFromNode"))) + .bind("execUpdateResult"), + this); } void GraphRule::runRule(const MatchFinder::MatchResult &Result) { + if (auto ME = getNodeAsType(Result, "Type")) { + auto BaseTy = DpctGlobalInfo::getUnqualifiedTypeName( + ME->getBase()->getType().getDesugaredType(*Result.Context), + *Result.Context); + auto MemberName = ME->getMemberNameInfo().getAsString(); + if (BaseTy == "cudaKernelNodeParams") { + auto FieldName = KernelNodeParamNames[MemberName]; + if (FieldName.empty()) { + report(ME->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, + DpctGlobalInfo::getOriginalTypeName(ME->getBase()->getType()) + + "::" + ME->getMemberDecl()->getName().str()); + return; + } + if (FieldName == "func") { + auto BinaryOp = getParentAsAssignedBO(ME, *Result.Context, this); + if (!BinaryOp) { + emplaceTransformation(new RenameFieldInMemberExpr( + ME, buildString("get_", FieldName, "()"))); + return; + } + auto BO = dyn_cast(BinaryOp); + if (!BO) { + return; + } + auto *LHS = BO->getLHS()->IgnoreCasts(); + auto *ME_LHS = dyn_cast(LHS); + if (!ME_LHS) { + return; + } + auto *Base = ME_LHS->getBase()->IgnoreImpCasts(); + auto *DRE = dyn_cast(Base); + if (!DRE) { + return; + } + auto *VD = dyn_cast(DRE->getDecl()); + if (!VD) { + return; + } + std::string VarName = VD->getNameAsString(); + auto *RHS = BO->getRHS()->IgnoreCasts(); + auto *RHS_DRE = dyn_cast(RHS); + if (!RHS_DRE) { + return; + } + if (auto RhsVarDecl = dyn_cast(RHS_DRE->getDecl())) { + StringRef ReplacedArg = ""; + emplaceTransformation( + ReplaceMemberAssignAsSetMethod(BO, ME, FieldName, ReplacedArg)); + return; + } + auto *FD = dyn_cast(RHS_DRE->getDecl()); + if (!FD) { + return; + } + std::string FuncName = FD->getNameAsString(); + std::string WrapperName = FuncName; + std::string AccessOperator = + VD->getType()->isPointerType() ? "->" : "."; + std::string ReplacementStr = + VarName + AccessOperator + + "set_func((void*) dpct::wrapper_register(&" + WrapperName; + emplaceTransformation(new ReplaceToken( + BO->getBeginLoc(), BO->getEndLoc(), std::move(ReplacementStr))); + emplaceTransformation(new InsertAfterStmt(BO, ")")); + } + if (auto BO = getParentAsAssignedBO(ME, *Result.Context, this)) { + StringRef ReplacedArg = ""; + emplaceTransformation( + ReplaceMemberAssignAsSetMethod(BO, ME, FieldName, ReplacedArg)); + } else { + emplaceTransformation(new RenameFieldInMemberExpr( + ME, buildString("get_", FieldName, "()"))); + } + } + return; + } + if (auto ME = getNodeAsType(Result, "execUpdateResult")) { + auto MD = ME->getMemberDecl(); + const Expr *Base = ME->getBase(); + if (MD->getNameAsString() == "result" || + MD->getNameAsString() == "errorNode" || + MD->getNameAsString() == "errorFromNode") { + if (auto *DRE = dyn_cast(Base)) { + SourceLocation StartLoc = Base->getBeginLoc(); + SourceLocation EndLoc = ME->getEndLoc(); + const SourceManager &SM = *Result.SourceManager; + EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, SM, + Result.Context->getLangOpts()); + std::string VarNameStr = DRE->getNameInfo().getAsString(); + emplaceTransformation( + new ReplaceToken(StartLoc, EndLoc, std::move(VarNameStr))); + } + } + return; + } const CallExpr *CE = getNodeAsType(Result, "FunctionCall"); if (!CE) { return; diff --git a/clang/lib/DPCT/RulesLang/RulesLangGraphicsInterop.cpp b/clang/lib/DPCT/RulesLang/RulesLangGraphicsInterop.cpp index 17ebb94ab61e..34b48357dba1 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangGraphicsInterop.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangGraphicsInterop.cpp @@ -104,7 +104,7 @@ void GraphicsInteropRule::runRule( } requestFeature(HelperFeatureEnum::device_ext); - if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { + if (auto BO = getParentAsAssignedBO(ME, *Result.Context, this)) { StringRef ReplacedArg = ""; if (FieldName == "flags") { @@ -150,7 +150,7 @@ void GraphicsInteropRule::runRule( } requestFeature(HelperFeatureEnum::device_ext); - if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { + if (auto BO = getParentAsAssignedBO(ME, *Result.Context, this)) { StringRef ReplacedArg = ""; if (FieldName == "image_type") { @@ -196,7 +196,7 @@ void GraphicsInteropRule::runRule( } requestFeature(HelperFeatureEnum::device_ext); - if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { + if (auto BO = getParentAsAssignedBO(ME, *Result.Context, this)) { emplaceTransformation( ReplaceMemberAssignAsSetMethod(BO, ME, FieldName)); } else { @@ -273,7 +273,7 @@ void GraphicsInteropRule::replaceExtResMemHandleDataExpr(const MemberExpr *ME, } requestFeature(HelperFeatureEnum::device_ext); - auto AssignedBO = getParentAsAssignedBO(ME, Context); + auto AssignedBO = getParentAsAssignedBO(ME, Context, this); if (AssignedBO) { emplaceTransformation( ReplaceMemberAssignAsSetMethod(AssignedBO, ME, FieldName)); @@ -328,7 +328,7 @@ void GraphicsInteropRule::replaceExtResSemParamsDataExpr(const MemberExpr *ME, } requestFeature(HelperFeatureEnum::device_ext); - auto AssignedBO = getParentAsAssignedBO(ME, Context); + auto AssignedBO = getParentAsAssignedBO(ME, Context, this); if (AssignedBO) { emplaceTransformation( ReplaceMemberAssignAsSetMethod(AssignedBO, ME, FieldName)); @@ -338,48 +338,5 @@ void GraphicsInteropRule::replaceExtResSemParamsDataExpr(const MemberExpr *ME, } } -const Expr *GraphicsInteropRule::getParentAsAssignedBO(const Expr *E, - ASTContext &Context) { - auto Parents = Context.getParents(*E); - if (Parents.size() > 0) - return getAssignedBO(Parents[0].get(), Context); - return nullptr; -} - -// Return the binary operator if E is the lhs of an assign expression, otherwise -// nullptr. -const Expr *GraphicsInteropRule::getAssignedBO(const Expr *E, - ASTContext &Context) { - if (dyn_cast(E)) { - // Continue finding parents when E is MemberExpr. - return getParentAsAssignedBO(E, Context); - } else if (auto ICE = dyn_cast(E)) { - // Stop finding parents and return nullptr when E is ImplicitCastExpr, - // except for ArrayToPointerDecay cast. - if (ICE->getCastKind() == CK_ArrayToPointerDecay) { - return getParentAsAssignedBO(E, Context); - } - } else if (auto ASE = dyn_cast(E)) { - // Continue finding parents when E is ArraySubscriptExpr, and remove - // subscript operator anyway for texture object's member. - emplaceTransformation(new ReplaceToken( - Lexer::getLocForEndOfToken(ASE->getLHS()->getEndLoc(), 0, - Context.getSourceManager(), - Context.getLangOpts()), - ASE->getRBracketLoc(), "")); - return getParentAsAssignedBO(E, Context); - } else if (auto BO = dyn_cast(E)) { - // If E is BinaryOperator, return E only when it is assign expression, - // otherwise return nullptr. - if (BO->getOpcode() == BO_Assign) - return BO; - } else if (auto COCE = dyn_cast(E)) { - if (COCE->getOperator() == OO_Equal) { - return COCE; - } - } - return nullptr; -} - } // namespace dpct } // namespace clang diff --git a/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp b/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp index 23a634b3595e..d70dfa58e22d 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangTexture.cpp @@ -648,7 +648,7 @@ bool TextureRule::tryMerge(const MemberExpr *ME, const Expr *BO) { void TextureRule::replaceTextureMember(const MemberExpr *ME, ASTContext &Context, SourceManager &SM) { - auto AssignedBO = getParentAsAssignedBO(ME, Context); + auto AssignedBO = getParentAsAssignedBO(ME, Context, this); if (!DpctGlobalInfo::useExtBindlessImages() && tryMerge(ME, AssignedBO)) return; @@ -715,49 +715,6 @@ void TextureRule::replaceTextureMember(const MemberExpr *ME, } } -const Expr *TextureRule::getParentAsAssignedBO(const Expr *E, - ASTContext &Context) { - auto Parents = Context.getParents(*E); - if (Parents.size() > 0) - return getAssignedBO(Parents[0].get(), Context); - return nullptr; -} - -// Return the binary operator if E is the lhs of an assign expression, otherwise -// nullptr. -const Expr *TextureRule::getAssignedBO(const Expr *E, ASTContext &Context) { - if (dyn_cast(E)) { - // Continue finding parents when E is MemberExpr. - return getParentAsAssignedBO(E, Context); - } else if (auto ICE = dyn_cast(E)) { - // Stop finding parents and return nullptr when E is ImplicitCastExpr, - // except for ArrayToPointerDecay cast. - if (ICE->getCastKind() == CK_ArrayToPointerDecay) { - return getParentAsAssignedBO(E, Context); - } - } else if (auto ASE = dyn_cast(E)) { - // Continue finding parents when E is ArraySubscriptExpr, and remove - // subscript operator anyway for texture object's member. - emplaceTransformation(new ReplaceToken( - Lexer::getLocForEndOfToken(ASE->getLHS()->getEndLoc(), 0, - Context.getSourceManager(), - Context.getLangOpts()), - ASE->getRBracketLoc(), "")); - return getParentAsAssignedBO(E, Context); - } else if (auto BO = dyn_cast(E)) { - // If E is BinaryOperator, return E only when it is assign expression, - // otherwise return nullptr. - auto Opcode = BO->getOpcode(); - if (Opcode == BO_Assign || Opcode == BO_OrAssign) - return BO; - } else if (auto COCE = dyn_cast(E)) { - if (COCE->getOperator() == OO_Equal) { - return COCE; - } - } - return nullptr; -} - bool TextureRule::processTexVarDeclInDevice(const VarDecl *VD) { if (auto FD = dyn_cast_or_null(VD->getParentFunctionOrMethod())) { @@ -867,7 +824,7 @@ void TextureRule::runRule(const MatchFinder::MatchResult &Result) { removeExtraMemberAccess(ME); replaceResourceDataExpr(getParentMemberExpr(ME), *Result.Context); } else if (MemberName == "resType") { - if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { + if (auto BO = getParentAsAssignedBO(ME, *Result.Context, this)) { requestFeature(HelperFeatureEnum::device_ext); emplaceTransformation( ReplaceMemberAssignAsSetMethod(BO, ME, "data_type")); @@ -899,7 +856,7 @@ void TextureRule::runRule(const MatchFinder::MatchResult &Result) { static std::map ExtraArgMap = { {"x", "1"}, {"y", "2"}, {"z", "3"}, {"w", "4"}, {"f", ""}}; std::string MemberName = ME->getMemberNameInfo().getAsString(); - if (auto BO = getParentAsAssignedBO(ME, *Result.Context)) { + if (auto BO = getParentAsAssignedBO(ME, *Result.Context, this)) { requestFeature(HelperFeatureEnum::device_ext); requestFeature(MethodNameToSetFeatureMap.at(MemberName)); emplaceTransformation(ReplaceMemberAssignAsSetMethod( @@ -1031,7 +988,7 @@ void TextureRule::replaceResourceDataExpr(const MemberExpr *ME, removeExtraMemberAccess(ME); - auto AssignedBO = getParentAsAssignedBO(TopMember, Context); + auto AssignedBO = getParentAsAssignedBO(TopMember, Context, this); auto FieldName = ResourceTypeNames[TopMember->getMemberNameInfo().getAsString()]; if (FieldName.empty() || diff --git a/clang/lib/DPCT/SrcAPI/APINames.inc b/clang/lib/DPCT/SrcAPI/APINames.inc index 9866f93fcdbe..e3650473c858 100644 --- a/clang/lib/DPCT/SrcAPI/APINames.inc +++ b/clang/lib/DPCT/SrcAPI/APINames.inc @@ -404,7 +404,7 @@ ENTRY(cudaGraphAddEventWaitNode, cudaGraphAddEventWaitNode, false, NO_FLAG, P4, ENTRY(cudaGraphAddExternalSemaphoresSignalNode, cudaGraphAddExternalSemaphoresSignalNode, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphAddExternalSemaphoresWaitNode, cudaGraphAddExternalSemaphoresWaitNode, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphAddHostNode, cudaGraphAddHostNode, false, NO_FLAG, P4, "comment") -ENTRY(cudaGraphAddKernelNode, cudaGraphAddKernelNode, false, NO_FLAG, P4, "comment") +ENTRY(cudaGraphAddKernelNode, cudaGraphAddKernelNode, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphAddMemAllocNode, cudaGraphAddMemAllocNode, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphAddMemFreeNode, cudaGraphAddMemFreeNode, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphAddMemcpyNode, cudaGraphAddMemcpyNode, false, NO_FLAG, P4, "comment") @@ -456,12 +456,12 @@ ENTRY(cudaGraphInstantiateWithFlags, cudaGraphInstantiateWithFlags, false, NO_FL ENTRY(cudaGraphInstantiateWithParams, cudaGraphInstantiateWithParams, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeCopyAttributes, cudaGraphKernelNodeCopyAttributes, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeGetAttribute, cudaGraphKernelNodeGetAttribute, false, NO_FLAG, P4, "comment") -ENTRY(cudaGraphKernelNodeGetParams, cudaGraphKernelNodeGetParams, false, NO_FLAG, P4, "comment") +ENTRY(cudaGraphKernelNodeGetParams, cudaGraphKernelNodeGetParams, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphKernelNodeSetAttribute, cudaGraphKernelNodeSetAttribute, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeSetEnabled, cudaGraphKernelNodeSetEnabled, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeSetGridDim, cudaGraphKernelNodeSetGridDim, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphKernelNodeSetParam, cudaGraphKernelNodeSetParam, false, NO_FLAG, P4, "comment") -ENTRY(cudaGraphKernelNodeSetParams, cudaGraphKernelNodeSetParams, false, NO_FLAG, P4, "comment") +ENTRY(cudaGraphKernelNodeSetParams, cudaGraphKernelNodeSetParams, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphKernelNodeUpdatesApply, cudaGraphKernelNodeUpdatesApply, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphLaunch, cudaGraphLaunch, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphMemAllocNodeGetParams, cudaGraphMemAllocNodeGetParams, false, NO_FLAG, P4, "comment") diff --git a/clang/lib/DPCT/SrcAPI/TypeNames.inc b/clang/lib/DPCT/SrcAPI/TypeNames.inc index b9b76377e92f..6f4220d31450 100644 --- a/clang/lib/DPCT/SrcAPI/TypeNames.inc +++ b/clang/lib/DPCT/SrcAPI/TypeNames.inc @@ -41,7 +41,7 @@ ENTRY_TYPE(CUgraphNode, false, NO_FLAG, P4, "comment") ENTRY_TYPE(CUgraphicsResource, true, NO_FLAG, P4, "successful") // CUDA Runtime Library -ENTRY_TYPE(cudaKernelNodeParams, false, NO_FLAG, P4, "comment") +ENTRY_TYPE(cudaKernelNodeParams, true, NO_FLAG, P4, "Successful/DPCT1119") // cuDNN Library ENTRY_TYPE(cudnnReduceTensorIndices_t, false, NO_FLAG, P4, "comment") diff --git a/clang/runtime/dpct-rt/include/dpct/graph.hpp b/clang/runtime/dpct-rt/include/dpct/graph.hpp index f17fc869eef1..86ec072efe6c 100644 --- a/clang/runtime/dpct-rt/include/dpct/graph.hpp +++ b/clang/runtime/dpct-rt/include/dpct/graph.hpp @@ -25,6 +25,49 @@ typedef sycl::ext::oneapi::experimental::command_graph< typedef sycl::ext::oneapi::experimental::node *node_ptr; +struct kernel_node_params { + void *func{}; + dpct::dim3 grid_dim{}; + dpct::dim3 block_dim{}; + unsigned int shared_mem_bytes{}; + void **kernel_params{}; + + std::vector dependencies{}; + +public: + void set_block_dim(const dpct::dim3 &block_dim) { + this->block_dim = block_dim; + } + void set_grid_dim(const dpct::dim3 &grid_dim) { this->grid_dim = grid_dim; } + void set_kernel_params(void **kernel_params) { + this->kernel_params = kernel_params; + } + void set_func(void *func) { this->func = func; } + void set_shared_mem_bytes(unsigned int shared_mem_bytes) { + this->shared_mem_bytes = shared_mem_bytes; + } + dpct::dim3 get_block_dim() const { return block_dim; } + dpct::dim3 get_grid_dim() const { return grid_dim; } + void **get_kernel_params() const { return kernel_params; } + void *get_func() const { return func; } + unsigned int get_shared_mem_bytes() const { return shared_mem_bytes; } + + void add_dependency(dpct::experimental::node_ptr dependency) { + dependencies.push_back(dependency); + } + const std::vector &get_dependencies() const { + return dependencies; + } + void update_dependency(const dpct::experimental::node_ptr &oldDependency, + const dpct::experimental::node_ptr &newDependency) { + auto it = + std::find(dependencies.begin(), dependencies.end(), oldDependency); + if (it != dependencies.end()) { + *it = newDependency; + } + } +}; + namespace detail { class graph_mgr { public: @@ -94,6 +137,92 @@ class graph_mgr { } } + void add_kernel_node(dpct::experimental::node_ptr *node, + dpct::experimental::command_graph_ptr graph, + dpct::experimental::node_ptr *dependencies, + std::size_t numberOfDependencies, + dpct::experimental::kernel_node_params *params) { + node_graph_params_map[*node] = std::make_pair(graph, params); + for (std::size_t i = 0; i < numberOfDependencies; i++) { + params->add_dependency(dependencies[i]); + } + graph_kernel_node_params_map[graph].emplace_back(*node, params); + } + + void launch(dpct::experimental::command_graph_exec_ptr execGraph, + sycl::queue *queue) { + // Retrieve the graph associated with execGraph + auto graph = exec_graph_map[execGraph]; + auto &kernel_params_vector = graph_kernel_node_params_map[graph]; + for (std::size_t i = 0; i < kernel_params_vector.size(); i++) { + auto &node_kernel_params_pair = kernel_params_vector[i]; + auto node_params = node_kernel_params_pair.second; + const auto &dependency_ptrs = node_params->get_dependencies(); + std::vector dependencies; + dependencies.reserve(dependency_ptrs.size()); + for (const auto &dep_ptr : dependency_ptrs) { + if (dep_ptr) { + dependencies.push_back(*dep_ptr); + } + } + auto new_node = new sycl::ext::oneapi::experimental::node(graph->add( + [&](sycl::handler &cgh) { + cgh.host_task([=]() { + dpct::kernel_launcher::launch( + node_params->get_func(), node_params->get_grid_dim(), + node_params->get_block_dim(), + node_params->get_kernel_params(), + node_params->get_shared_mem_bytes(), queue); + }); + }, + sycl::ext::oneapi::experimental::property::node::depends_on( + dependencies))); + if (i + 1 < kernel_params_vector.size()) { + auto &next_node_params = kernel_params_vector[i + 1].second; + auto next_dependency = next_node_params->get_dependencies()[i]; + next_node_params->update_dependency(next_dependency, new_node); + } + node_kernel_params_pair.first = new_node; + } + execGraph = new sycl::ext::oneapi::experimental::command_graph< + sycl::ext::oneapi::experimental::graph_state::executable>( + graph->finalize()); + queue->submit( + [&](sycl::handler &cgh) { cgh.ext_oneapi_graph(*execGraph); }); + } + + void instantiate(dpct::experimental::command_graph_exec_ptr *execGraph, + dpct::experimental::command_graph_ptr graph) { + exec_graph_map[*execGraph] = graph; + } + + void kernel_node_get_params(dpct::experimental::node_ptr node, + dpct::experimental::kernel_node_params *params) { + auto it = node_graph_params_map.find(node); + if (it == node_graph_params_map.end()) { + return; + } + *params = *(it->second.second); + } + + void kernel_node_set_params(dpct::experimental::node_ptr node, + dpct::experimental::kernel_node_params *params) { + node_graph_params_map[node].second = params; + } + + void get_node_type(dpct::experimental::node_ptr node, + sycl::ext::oneapi::experimental::node_type *nodeType) { + if (node_graph_params_map.find(node) != node_graph_params_map.end()) { + *nodeType = sycl::ext::oneapi::experimental::node_type::kernel; + } else { + if (node) { + *nodeType = node->get_type(); + } else { + *nodeType = sycl::ext::oneapi::experimental::node_type::empty; + } + } + } + private: std::unordered_map queue_graph_map; std::unordered_map> root_nodes_map; + std::unordered_map + exec_graph_map; + std::unordered_map< + dpct::experimental::command_graph_ptr, + std::vector>> + graph_kernel_node_params_map; + std::unordered_map> + node_graph_params_map; }; } // namespace detail @@ -191,5 +332,52 @@ static void get_root_nodes(dpct::experimental::command_graph_ptr graph, numberOfNodes); } +static void add_kernel_node(dpct::experimental::node_ptr *node, + dpct::experimental::command_graph_ptr graph, + dpct::experimental::node_ptr *dependencies, + std::size_t numberOfDependencies, + dpct::experimental::kernel_node_params *params) { + detail::graph_mgr::instance().add_kernel_node(node, graph, dependencies, + numberOfDependencies, params); +} + +static void instantiate(dpct::experimental::command_graph_exec_ptr *execGraph, + dpct::experimental::command_graph_ptr graph) { + detail::graph_mgr::instance().instantiate(execGraph, graph); +} + +static void launch(dpct::experimental::command_graph_exec_ptr execGraph, + sycl::queue *queue) { + detail::graph_mgr::instance().launch(execGraph, queue); +} + +static void +kernel_node_get_params(dpct::experimental::node_ptr node, + dpct::experimental::kernel_node_params *params) { + detail::graph_mgr::instance().kernel_node_get_params(node, params); +} + +static void +kernel_node_set_params(dpct::experimental::node_ptr node, + dpct::experimental::kernel_node_params *params) { + detail::graph_mgr::instance().kernel_node_set_params(node, params); +} + +static void +get_node_type(dpct::experimental::node_ptr node, + sycl::ext::oneapi::experimental::node_type *nodeType) { + detail::graph_mgr::instance().get_node_type(node, nodeType); +} + +static void update(dpct::experimental::command_graph_exec_ptr graphExec, + dpct::experimental::command_graph_ptr graph, + int *updateResultInfo) { + graphExec->update(*graph); + if (!graphExec) { + *updateResultInfo = 0; + } + *updateResultInfo = 1; +} + } // namespace experimental } // namespace dpct diff --git a/clang/test/dpct/cudaGraph_test.cu b/clang/test/dpct/cudaGraph_test.cu index f0b5742386f7..46c337378bc9 100644 --- a/clang/test/dpct/cudaGraph_test.cu +++ b/clang/test/dpct/cudaGraph_test.cu @@ -1,5 +1,5 @@ -// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2 -// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2 +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1, cuda-10.2, cuda-11.0, cuda-11.1, cuda-11.2, cuda-11.3, cuda-11.4, cuda-11.5, cuda-11.6, cuda-11.7, cuda-11.8 +// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0, v10.1, v10.2, v11.0, v11.1, v11.2, v11.3, v11.4, v11.5, v11.6, v11.7, v11.8 // RUN: dpct --use-experimental-features=graph --format-range=none -out-root %T/cudaGraph_test %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only --std=c++14 // RUN: FileCheck --input-file %T/cudaGraph_test/cudaGraph_test.dp.cpp --match-full-lines %s // RUN: %if build_lit %{icpx -c -DNO_BUILD_TEST -fsycl %T/cudaGraph_test/cudaGraph_test.dp.cpp -o %T/cudaGraph_test/cudaGraph_test.dp.o %} @@ -10,6 +10,24 @@ cudaError_t _result = x; \ } while (0) +__global__ void myKernel(int* data) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < 10) { + data[idx] += 1; + } +} + +// CHECK: void myKernel_wrapper(int * data) { +// CHECK: sycl::queue queue = *dpct::kernel_launcher::_que; +// CHECK: unsigned int localMemSize = dpct::kernel_launcher::_local_mem_size; +// CHECK: sycl::nd_range<3> nr = dpct::kernel_launcher::_nr; +// CHECK: queue.parallel_for( +// CHECK: nr, +// CHECK: [=](sycl::nd_item<3> item_ct1) { +// CHECK: myKernel(data); +// CHECK: }); +// CHECK: } + int main() { // CHECK: dpct::experimental::command_graph_ptr graph; // CHECK-NEXT: dpct::experimental::command_graph_ptr *graph2; @@ -70,6 +88,32 @@ int main() { // CHECK: dpct::experimental::add_empty_node(&node, graph, node10, 1); cudaGraphAddEmptyNode(&node, graph, node10, 1); + // CHECK: dpct::experimental::kernel_node_params params = {}; + // CHECK-NEXT: params.set_func((void*) dpct::wrapper_register(&myKernel_wrapper).get()); + // CHECK-NEXT: params.set_block_dim(dpct::dim3(10)); + // CHECK-NEXT: params.set_grid_dim(dpct::dim3(1)); + // CHECK-NEXT: params.set_shared_mem_bytes(0); + // CHECK-NEXT: void *kernelArgs[] = {}; + // CHECK-NEXT: params.set_kernel_params(kernelArgs); + cudaKernelNodeParams params = {}; + params.func = (void *)myKernel; + params.blockDim = dim3(10); + params.gridDim = dim3(1); + params.sharedMemBytes = 0; + void *kernelArgs[] = {}; + params.kernelParams = kernelArgs; + + // CHECK: void* function = (void*) dpct::wrapper_register(myKernel_wrapper).get(); + // CHECK-NEXT: params.set_func(function); + void* function = (void*) myKernel; + params.func = function; + + // CHECK: dpct::dim3 blockDim = params.get_block_dim(); + dim3 blockDim = params.blockDim; + + // CHECK: void* func2 = params.get_func(); + void* func2 = params.func; + size_t numNodes; // CHECK: dpct::experimental::get_nodes(graph, node4, &numNodes); @@ -96,10 +140,10 @@ int main() { // CHECK: dpct::experimental::add_dependencies(graph, node10, node11, 1); cudaGraphAddDependencies(graph, node10, node11, 1); - // CHECK: execGraph = new sycl::ext::oneapi::experimental::command_graph((*graph2)->finalize()); - // CHECK-NEXT: *execGraph2 = new sycl::ext::oneapi::experimental::command_graph(graph->finalize()); - // CHECK-NEXT: **execGraph3 = new sycl::ext::oneapi::experimental::command_graph((*graph2)->finalize()); - // CHECK-NEXT: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(**execGraph3 = new sycl::ext::oneapi::experimental::command_graph((*graph2)->finalize()))); + // CHECK: dpct::experimental::instantiate(&execGraph, *graph2); + // CHECK-NEXT: dpct::experimental::instantiate(execGraph2, graph); + // CHECK-NEXT: dpct::experimental::instantiate(*execGraph3, *graph2); + // CHECK-NEXT: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(dpct::experimental::instantiate(*execGraph3, *graph2))); cudaGraphInstantiate(&execGraph, *graph2, nullptr, nullptr, 0); cudaGraphInstantiate(execGraph2, graph, nullptr, nullptr, 0); cudaGraphInstantiate(*execGraph3, *graph2, nullptr, nullptr, 0); @@ -110,24 +154,37 @@ int main() { cudaStream_t *stream2; - // CHECK: stream->ext_oneapi_graph(*execGraph); - // CHECK-NEXT: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(stream->ext_oneapi_graph(*execGraph))); - // CHECK-NEXT: (*stream2)->ext_oneapi_graph(**execGraph2); + // CHECK: dpct::experimental::launch(execGraph, stream); + // CHECK-NEXT: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(dpct::experimental::launch(execGraph, stream))); + // CHECK-NEXT: dpct::experimental::launch(*execGraph2, *stream2); cudaGraphLaunch(execGraph, stream); CUDA_CHECK_THROW(cudaGraphLaunch(execGraph, stream)); cudaGraphLaunch(*execGraph2, *stream2); -#ifndef DNO_BUILD_TEST - // CHECK: execGraph->update(*graph); - cudaGraphExecUpdate(execGraph, graph, nullptr, nullptr); + // CHECK: int updateResult; + cudaGraphExecUpdateResultInfo updateResult; + + // CHECK: int result; + cudaGraphExecUpdateResult result; + + // CHECK: dpct::experimental::update(execGraph, graph, &updateResult); + cudaGraphExecUpdate(execGraph, graph, &updateResult); + + // CHECK: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(dpct::experimental::update(execGraph, graph, &updateResult))); + CUDA_CHECK_THROW(cudaGraphExecUpdate(execGraph, graph, &updateResult)); - // CHECK: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(execGraph->update(*graph))); - CUDA_CHECK_THROW(cudaGraphExecUpdate(execGraph, graph, nullptr, nullptr)); -#endif + // CHECK: if (updateResult == 1) { + // CHECK-NEXT: } + // CHECK-NEXT: if (updateResult == 0) { + // CHECK-NEXT: } + if (updateResult.result == cudaGraphExecUpdateSuccess) { + } + if (updateResult.result == cudaGraphExecUpdateErrorTopologyChanged) { + } // CHECK: sycl::ext::oneapi::experimental::node_type nodeType; - // CHECK-NEXT: nodeType = node->get_type(); - // CHECK-NEXT: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(nodeType = node->get_type())); + // CHECK-NEXT: dpct::experimental::get_node_type(node, &nodeType); + // CHECK-NEXT: CUDA_CHECK_THROW(DPCT_CHECK_ERROR(dpct::experimental::get_node_type(node, &nodeType))); cudaGraphNodeType nodeType; cudaGraphNodeGetType(node, &nodeType); CUDA_CHECK_THROW(cudaGraphNodeGetType(node, &nodeType)); diff --git a/clang/test/dpct/cudaGraph_test_default_option.cu b/clang/test/dpct/cudaGraph_test_default_option.cu index 79029603d65c..157bcd86650f 100644 --- a/clang/test/dpct/cudaGraph_test_default_option.cu +++ b/clang/test/dpct/cudaGraph_test_default_option.cu @@ -1,5 +1,5 @@ -// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2 -// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2 +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1, cuda-10.2, cuda-11.0, cuda-11.1, cuda-11.2, cuda-11.3, cuda-11.4, cuda-11.5, cuda-11.6, cuda-11.7, cuda-11.8 +// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0, v10.1, v10.2, v11.0, v11.1, v11.2, v11.3, v11.4, v11.5, v11.6, v11.7, v11.8 // RUN: dpct --format-range=none -out-root %T/cudaGraph_test_default_option %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only --std=c++14 // RUN: FileCheck --input-file %T/cudaGraph_test_default_option/cudaGraph_test_default_option.dp.cpp --match-full-lines %s // RUN: %if build_lit %{icpx -c -DNO_BUILD_TEST -fsycl %T/cudaGraph_test_default_option/cudaGraph_test_default_option.dp.cpp -o %T/cudaGraph_test_default_option/cudaGraph_test.dp.o %} @@ -71,6 +71,17 @@ int main() { // CHECK-NEXT: */ cudaGraphAddDependencies(graph, NULL, NULL, 0); + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaKernelNodeParams is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaKernelNodeParams params; + params.blockDim = dim3(10); + + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphAddKernelNode is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaGraphAddKernelNode(&node, graph, nullptr, 0, ¶ms); + // CHECK: /* // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphGetNodes is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ @@ -92,14 +103,19 @@ int main() { cudaGraphLaunch(execGraph, stream); // CHECK: /* - // CHECK-NEXT: DPCT1007:{{[0-9]+}}: Migration of cudaGraphExecUpdateResult is not supported. + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecUpdateResultInfo is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaGraphExecUpdateResultInfo updateResult; + + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecUpdateResult is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ cudaGraphExecUpdateResult status; // CHECK: /* // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecUpdate is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ - cudaGraphExecUpdate(execGraph, graph, nullptr, &status); + cudaGraphExecUpdate(execGraph, graph, &updateResult); // CHECK: /* // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecDestroy is not supported, please try to remigrate with option: --use-experimental-features=graph. @@ -121,6 +137,11 @@ int main() { // CHECK-NEXT: */ nodeType = cudaGraphNodeTypeKernel; + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaKernelNodeParams is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaKernelNodeParams kernelNodeParam0 = {}; + // CHECK: /* // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphDestroy is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ diff --git a/clang/test/dpct/dim3.cu b/clang/test/dpct/dim3.cu index e93fe3e3bb44..9965c5d1d945 100644 --- a/clang/test/dpct/dim3.cu +++ b/clang/test/dpct/dim3.cu @@ -1,50 +1,27 @@ // UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2 // UNSUPPORTED: v8.0, v9.0, v9.1, v9.2 -// RUN: dpct --format-range=none -out-root %T/dim3 %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only +// RUN: dpct --use-experimental-features=graph --format-range=none -out-root %T/dim3 %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only // RUN: FileCheck --input-file %T/dim3/dim3.dp.cpp --match-full-lines %s #include int main() { - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam0 = {}; + // CHECK: dpct::experimental::kernel_node_params kernelNodeParam0 = {}; + // CHECK-NEXT: dpct::experimental::kernel_node_params kernelNodeParam1 = {0}; + // CHECK-NEXT: dpct::experimental::kernel_node_params kernelNodeParam2 = {0, 0}; + // CHECK-NEXT: dpct::experimental::kernel_node_params kernelNodeParam3 = {0, 0, 0}; cudaKernelNodeParams kernelNodeParam0 = {}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam1 = {0}; cudaKernelNodeParams kernelNodeParam1 = {0}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam2 = {0, 0}; cudaKernelNodeParams kernelNodeParam2 = {0, 0}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam3 = {0, 0, 0}; cudaKernelNodeParams kernelNodeParam3 = {0, 0, 0}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam4{}; + + // CHECK: dpct::experimental::kernel_node_params kernelNodeParam4{}; + // CHECK-NEXT: dpct::experimental::kernel_node_params kernelNodeParam5{0}; + // CHECK-NEXT: dpct::experimental::kernel_node_params kernelNodeParam6{0, 0}; + // CHECK-NEXT: dpct::experimental::kernel_node_params kernelNodeParam7{0, 0, 0}; cudaKernelNodeParams kernelNodeParam4{}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam5{0}; cudaKernelNodeParams kernelNodeParam5{0}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam6{0, 0}; cudaKernelNodeParams kernelNodeParam6{0, 0}; - // CHECK: /* - // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. - // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam7{0, 0, 0}; cudaKernelNodeParams kernelNodeParam7{0, 0, 0}; }