diff --git a/clang/include/clang/DPCT/DPCTOptions.inc b/clang/include/clang/DPCT/DPCTOptions.inc index afa05039ad58..8bdd852a412d 100644 --- a/clang/include/clang/DPCT/DPCTOptions.inc +++ b/clang/include/clang/DPCT/DPCTOptions.inc @@ -811,11 +811,12 @@ DPCT_ENUM_OPTION( "be accessed within a kernel using syntax similar to C++ global " "variables.\n", false), - DPCT_OPTION_ENUM_VALUE( - "virtual_mem", int(ExperimentalFeatures::Exp_VirtualMemory), - "Experimental extension that allows for mapping of an address range onto " - "multiple allocations of physical memory.", - false), + DPCT_OPTION_ENUM_VALUE("virtual_mem", + int(ExperimentalFeatures::Exp_VirtualMemory), + "Experimental extension that allows for mapping " + "of an address range onto " + "multiple allocations of physical memory.", + false), DPCT_OPTION_ENUM_VALUE( "in_order_queue_events", int(ExperimentalFeatures::Exp_InOrderQueueEvents), @@ -838,7 +839,13 @@ DPCT_ENUM_OPTION( "level_zero", int(ExperimentalFeatures::Exp_LevelZero), "Experimental migration feature that enables the use of Level Zero " "APIs to migrate target code, like CUDA Inter-Process " - "Communication (IPC) APIs.\n", false), + "Communication (IPC) APIs.\n", + false), + DPCT_OPTION_ENUM_VALUE("async_alloc", + int(ExperimentalFeatures::Exp_AsyncAlloc), + "Experimental extension that allows use of SYCL " + "async allocation APIs.\n", + false), DPCT_OPTION_ENUM_VALUE( "all", int(ExperimentalFeatures::Exp_All), "Enable all experimental extensions listed in this option.\n", diff --git a/clang/lib/DPCT/AnalysisInfo.h b/clang/lib/DPCT/AnalysisInfo.h index 4eedfb910a25..57a7228fd460 100644 --- a/clang/lib/DPCT/AnalysisInfo.h +++ b/clang/lib/DPCT/AnalysisInfo.h @@ -1352,6 +1352,9 @@ class DpctGlobalInfo { static bool useExtLevelZero() { return getUsingExperimental(); } + static bool useExtAsyncAlloc() { + return getUsingExperimental(); + } static bool useExtPrefetch() { return getUsingExperimental(); } diff --git a/clang/lib/DPCT/CommandOption/ValidateArguments.h b/clang/lib/DPCT/CommandOption/ValidateArguments.h index d7ac5f463211..f6b04b582c74 100644 --- a/clang/lib/DPCT/CommandOption/ValidateArguments.h +++ b/clang/lib/DPCT/CommandOption/ValidateArguments.h @@ -103,6 +103,7 @@ enum class ExperimentalFeatures : unsigned int { Exp_NonStandardSYCLBuiltins, Exp_Prefetch, Exp_LevelZero, + Exp_AsyncAlloc, Exp_All }; enum class HelperFuncPreference : unsigned int { NoQueueDevice = 0 }; diff --git a/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h b/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h index f7a2c8e545fe..628c7e9b5dcb 100644 --- a/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h +++ b/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h @@ -1827,6 +1827,10 @@ inline auto UseExtLevelZero = [](const CallExpr *C) -> bool { return DpctGlobalInfo::useExtLevelZero(); }; +inline auto UseExtAsyncAlloc = [](const CallExpr *C) -> bool { + return DpctGlobalInfo::useExtAsyncAlloc(); +}; + inline auto UseExtGraph = [](const CallExpr *C) -> bool { return DpctGlobalInfo::useExtGraph(); }; diff --git a/clang/lib/DPCT/RulesInclude/HeaderTypes.inc b/clang/lib/DPCT/RulesInclude/HeaderTypes.inc index fb9ab4d0f0c6..6a566e8b573b 100644 --- a/clang/lib/DPCT/RulesInclude/HeaderTypes.inc +++ b/clang/lib/DPCT/RulesInclude/HeaderTypes.inc @@ -52,6 +52,8 @@ STD_HEADER(DL, "") #endif STD_HEADER(SHMEM, "") STD_HEADER(SHMEMX, "") +STD_HEADER(AsyncAlloc, + "") ONEDPL_HEADER(Algorithm, "") ONEDPL_HEADER(Execution, "") diff --git a/clang/lib/DPCT/RulesLang/APINamesMemory.inc b/clang/lib/DPCT/RulesLang/APINamesMemory.inc index 3d959fb3fd23..91a477b3c6a4 100644 --- a/clang/lib/DPCT/RulesLang/APINamesMemory.inc +++ b/clang/lib/DPCT/RulesLang/APINamesMemory.inc @@ -993,6 +993,47 @@ CONDITIONAL_FACTORY_ENTRY( "memcpy", false), MEM_ARG(0), MEM_ARG(1), ARG(2), ARG(3)))))) +CONDITIONAL_FACTORY_ENTRY( + checkIsUSM(), + CONDITIONAL_FACTORY_ENTRY( + CheckArgCount(3), + CONDITIONAL_FACTORY_ENTRY( + UseExtAsyncAlloc, + ASSIGNABLE_FACTORY(HEADER_INSERT_FACTORY( + HeaderType::HT_AsyncAlloc, + ASSIGN_FACTORY_ENTRY( + "cudaMallocAsync", DEREF(0), + CALL(MapNames::getClNamespace() + + "ext::oneapi::experimental::async_malloc", + DEREF(2), + ARG(MapNames::getClNamespace() + "usm::alloc::device"), + ARG(1))))), + UNSUPPORT_FACTORY_ENTRY( + "cudaMallocAsync", Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaMallocAsync"), + ARG("--use-experimental-features=virtual_mem"))), + UNSUPPORT_FACTORY_ENTRY("cudaMallocAsync", + Diagnostics::API_NOT_MIGRATED, + ARG("cudaMallocAsync"))), + UNSUPPORT_FACTORY_ENTRY("cudaMallocAsync", Diagnostics::API_NOT_MIGRATED, + ARG("cudaMallocAsync"))) +CONDITIONAL_FACTORY_ENTRY( + checkIsUSM(), + CONDITIONAL_FACTORY_ENTRY( + UseExtAsyncAlloc, + ASSIGNABLE_FACTORY(HEADER_INSERT_FACTORY( + HeaderType::HT_AsyncAlloc, + CALL_FACTORY_ENTRY("cudaFreeAsync", + CALL(MapNames::getClNamespace() + + "ext::oneapi::experimental::async_free", + DEREF(1), ARG(0))))), + UNSUPPORT_FACTORY_ENTRY( + "cudaFreeAsync", Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaFreeAsync"), + ARG("--use-experimental-features=virtual_mem"))), + UNSUPPORT_FACTORY_ENTRY("cudaFreeAsync", Diagnostics::API_NOT_MIGRATED, + ARG("cudaFreeAsync"))) + #define CUDA_FREE(NAME) \ CONDITIONAL_FACTORY_ENTRY( \ hasManagedAttr(0), \ diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index 1fcf1a6f149d..595b6125a8a8 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -5784,7 +5784,7 @@ void MemoryMigrationRule::mallocMigration( } else if (Name == "cudaHostAlloc" || Name == "cudaMallocHost" || Name == "cuMemHostAlloc" || Name == "cuMemAllocHost_v2" || Name == "cuMemAllocPitch_v2" || Name == "cudaMallocPitch" || - Name == "cudaMallocMipmappedArray") { + Name == "cudaMallocMipmappedArray" || Name == "cudaMallocAsync") { ExprAnalysis EA(C); emplaceTransformation(EA.getReplacement()); EA.applyAllSubExprRepl(); @@ -6815,7 +6815,7 @@ void MemoryMigrationRule::registerMatcher(MatchFinder &MF) { "cuMemsetD8_v2", "cuMemsetD8Async", "cudaMallocMipmappedArray", "cudaGetMipmappedArrayLevel", "cudaFreeMipmappedArray", "cudaMemcpyPeer", "cudaMemcpyPeerAsync", "cuMemcpyPeer", - "cuMemcpyPeerAsync"); + "cuMemcpyPeerAsync", "cudaMallocAsync", "cudaFreeAsync"); }; MF.addMatcher(callExpr(allOf(callee(functionDecl(memoryAPI())), parentStmt())) @@ -6905,7 +6905,8 @@ void MemoryMigrationRule::runRule(const MatchFinder::MatchResult &Result) { Name.compare("cudaMallocMipmappedArray") && Name.compare("cudaGetMipmappedArrayLevel") && Name.compare("cudaFreeMipmappedArray") && Name.compare("cudaMemcpy") && - Name.compare("cudaFree") && Name.compare("cublasFree")) { + Name.compare("cudaFree") && Name.compare("cublasFree") && + Name.compare("cudaMallocAsync") && Name.compare("cudaFreeAsync")) { requestFeature(HelperFeatureEnum::device_ext); insertAroundStmt(C, MapNames::getCheckErrorMacroName() + "(", ")"); } else if (IsAssigned && !Name.compare("cudaMemAdvise") && @@ -6968,6 +6969,7 @@ MemoryMigrationRule::MemoryMigrationRule() { const CallExpr *, const UnresolvedLookupExpr *, bool)>> Dispatcher{ {"cudaMalloc", &MemoryMigrationRule::mallocMigration}, + {"cudaMallocAsync", &MemoryMigrationRule::mallocMigration}, {"cuMemAlloc_v2", &MemoryMigrationRule::mallocMigration}, {"cudaHostAlloc", &MemoryMigrationRule::mallocMigration}, {"cudaMallocHost", &MemoryMigrationRule::mallocMigration}, @@ -7030,6 +7032,7 @@ MemoryMigrationRule::MemoryMigrationRule() { {"cuMemcpyDtoA_v2", &MemoryMigrationRule::arrayMigration}, {"cuMemcpyAtoA_v2", &MemoryMigrationRule::arrayMigration}, {"cudaFree", &MemoryMigrationRule::freeMigration}, + {"cudaFreeAsync", &MemoryMigrationRule::freeMigration}, {"cuMemFree_v2", &MemoryMigrationRule::freeMigration}, {"cudaFreeArray", &MemoryMigrationRule::freeMigration}, {"cudaFreeMipmappedArray", &MemoryMigrationRule::freeMigration}, diff --git a/clang/lib/DPCT/SrcAPI/APINames.inc b/clang/lib/DPCT/SrcAPI/APINames.inc index 17d1872b487b..f384ebe1d970 100644 --- a/clang/lib/DPCT/SrcAPI/APINames.inc +++ b/clang/lib/DPCT/SrcAPI/APINames.inc @@ -237,8 +237,8 @@ ENTRY(cudaMemcpyToArray, cudaMemcpyToArray, true, NO_FLAG, P0, "Successful") ENTRY(cudaMemcpyToArrayAsync, cudaMemcpyToArrayAsync, true, NO_FLAG, P0, "Successful") // stream ordered memory allocator functions of runtime API -ENTRY(cudaFreeAsync, cudaFreeAsync, false, NO_FLAG, P7, "comment") -ENTRY(cudaMallocAsync, cudaMallocAsync, false, NO_FLAG, P7, "comment") +ENTRY(cudaFreeAsync, cudaFreeAsync, true, NO_FLAG, P7, "comment") +ENTRY(cudaMallocAsync, cudaMallocAsync, true, NO_FLAG, P7, "comment") ENTRY(cudaMallocFromPoolAsync, cudaMallocFromPoolAsync, false, NO_FLAG, P7, "comment") ENTRY(cudaMemPoolCreate, cudaMemPoolCreate, false, NO_FLAG, P4, "comment") ENTRY(cudaMemPoolDestroy, cudaMemPoolDestroy, false, NO_FLAG, P7, "comment") diff --git a/clang/test/dpct/async_alloc.cu b/clang/test/dpct/async_alloc.cu new file mode 100644 index 000000000000..9a80e8db5ba3 --- /dev/null +++ b/clang/test/dpct/async_alloc.cu @@ -0,0 +1,23 @@ +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1, cuda-10.2, cuda-11.0, cuda-11.1 +// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0, v10.1, v10.2, v11.0, v11.1 +// RUN: dpct --format-range=none --out-root %T/async_alloc %s --cuda-include-path="%cuda-path/include" --use-experimental-features=async_alloc +// RUN: FileCheck --match-full-lines --input-file %T/async_alloc/async_alloc.dp.cpp %s +// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/async_alloc/async_alloc.dp.cpp -o %T/async_alloc/async_alloc.dp.o %} + +// CHECK: #include + +void foo_1(float *f, cudaStream_t hStream) { + // CHECK: cudaMemPool_t memPool; + // CHECK-NEXT: /* + // CHECK-NEXT: DPCT1007:{{[0-9]+}}: Migration of cudaMallocAsync is not supported. + // CHECK-NEXT: */ + // CHECK-NEXT: cudaMallocAsync(&f, 1024, memPool, hStream); + // CHECK: f = sycl::ext::oneapi::experimental::async_malloc(*hStream, sycl::usm::alloc::device, 1024); + // CHECK-NEXT: sycl::ext::oneapi::experimental::async_free(*hStream, f); +#ifndef NO_BUILD_TEST + cudaMemPool_t memPool; + cudaMallocAsync(&f, 1024, memPool, hStream); +#endif + cudaMallocAsync(&f, 1024, hStream); + cudaFreeAsync(f, hStream); +} diff --git a/clang/test/dpct/async_alloc_no_ext.cu b/clang/test/dpct/async_alloc_no_ext.cu new file mode 100644 index 000000000000..9d5db3931e66 --- /dev/null +++ b/clang/test/dpct/async_alloc_no_ext.cu @@ -0,0 +1,27 @@ +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1, cuda-10.2, cuda-11.0, cuda-11.1 +// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0, v10.1, v10.2, v11.0, v11.1 +// RUN: dpct --format-range=none --out-root %T/async_alloc_no_ext %s --cuda-include-path="%cuda-path/include" +// RUN: FileCheck --match-full-lines --input-file %T/async_alloc_no_ext/async_alloc_no_ext.dp.cpp %s +// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/async_alloc_no_ext/async_alloc_no_ext.dp.cpp -o %T/async_alloc_no_ext/async_alloc_no_ext.dp.o %} + +void foo_1(float *f, cudaStream_t hStream) { + // CHECK: cudaMemPool_t memPool; + // CHECK-NEXT: /* + // CHECK-NEXT: DPCT1007:{{[0-9]+}}: Migration of cudaMallocAsync is not supported. + // CHECK-NEXT: */ + // CHECK-NEXT: cudaMallocAsync(&f, 1024, memPool, hStream); + // CHECK-NEXT: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaMallocAsync is not supported, please try to remigrate with option: --use-experimental-features=virtual_mem. + // CHECK-NEXT: */ + // CHECK-NEXT: cudaMallocAsync(&f, 1024, hStream); + // CHECK-NEXT: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaFreeAsync is not supported, please try to remigrate with option: --use-experimental-features=virtual_mem. + // CHECK-NEXT: */ + // CHECK-NEXT: cudaFreeAsync(f, hStream); +#ifndef NO_BUILD_TEST + cudaMemPool_t memPool; + cudaMallocAsync(&f, 1024, memPool, hStream); + cudaMallocAsync(&f, 1024, hStream); + cudaFreeAsync(f, hStream); +#endif +} diff --git a/clang/test/dpct/help_option_check/lin/help_advanced.txt b/clang/test/dpct/help_option_check/lin/help_advanced.txt index eec0dd19163c..5e699c861c3f 100644 --- a/clang/test/dpct/help_option_check/lin/help_advanced.txt +++ b/clang/test/dpct/help_option_check/lin/help_advanced.txt @@ -65,6 +65,7 @@ Advanced DPCT options =non-stdandard-sycl-builtins - Experimental extension that allows use of non standard SYCL builtin functions. =prefetch - Experimental extension that allows use of SYCL prefetch APIs. =level_zero - Experimental migration feature that enables the use of Level Zero APIs to migrate target code, like CUDA Inter-Process Communication (IPC) APIs. + =async_alloc - Experimental extension that allows use of SYCL async allocation APIs. =all - Enable all experimental extensions listed in this option. ... Paths of input source files. These paths are looked up in the compilation database. diff --git a/clang/test/dpct/help_option_check/lin/help_all.txt b/clang/test/dpct/help_option_check/lin/help_all.txt index bc535d587eec..b292496f7026 100644 --- a/clang/test/dpct/help_option_check/lin/help_all.txt +++ b/clang/test/dpct/help_option_check/lin/help_all.txt @@ -172,6 +172,7 @@ All DPCT options =non-stdandard-sycl-builtins - Experimental extension that allows use of non standard SYCL builtin functions. =prefetch - Experimental extension that allows use of SYCL prefetch APIs. =level_zero - Experimental migration feature that enables the use of Level Zero APIs to migrate target code, like CUDA Inter-Process Communication (IPC) APIs. + =async_alloc - Experimental extension that allows use of SYCL async allocation APIs. =all - Enable all experimental extensions listed in this option. --use-explicit-namespace= - Define the namespaces to use explicitly in generated code. The is a comma separated list. Default: dpct/syclcompat, sycl. diff --git a/clang/test/dpct/help_option_check/lin/help_option_check.cpp b/clang/test/dpct/help_option_check/lin/help_option_check.cpp index 7ee2698c29d6..88b4f1c3e6be 100644 --- a/clang/test/dpct/help_option_check/lin/help_option_check.cpp +++ b/clang/test/dpct/help_option_check/lin/help_option_check.cpp @@ -4,8 +4,8 @@ // RUN: cd %T/help_option_check // RUN: dpct --help > output.txt -// RUN: diff --strip-trailing-cr %S/help_all.txt %T/help_option_check/output.txt >> %T/diff.txt +// RUN: diff --strip-trailing-cr %S/help_all.txt %T/help_option_check/output.txt // RUN: dpct --help=basic > output.txt -// RUN: diff --strip-trailing-cr %S/help_basic.txt %T/help_option_check/output.txt >> %T/diff.txt +// RUN: diff --strip-trailing-cr %S/help_basic.txt %T/help_option_check/output.txt // RUN: dpct --help=advanced > output.txt -// RUN: diff --strip-trailing-cr %S/help_advanced.txt %T/help_option_check/output.txt >> %T/diff.txt +// RUN: diff --strip-trailing-cr %S/help_advanced.txt %T/help_option_check/output.txt diff --git a/clang/test/dpct/help_option_check/win/help_advanced.txt b/clang/test/dpct/help_option_check/win/help_advanced.txt index 965b8f959536..6543d07d1711 100644 --- a/clang/test/dpct/help_option_check/win/help_advanced.txt +++ b/clang/test/dpct/help_option_check/win/help_advanced.txt @@ -65,6 +65,7 @@ Advanced DPCT options =non-stdandard-sycl-builtins - Experimental extension that allows use of non standard SYCL builtin functions. =prefetch - Experimental extension that allows use of SYCL prefetch APIs. =level_zero - Experimental migration feature that enables the use of Level Zero APIs to migrate target code, like CUDA Inter-Process Communication (IPC) APIs. + =async_alloc - Experimental extension that allows use of SYCL async allocation APIs. =all - Enable all experimental extensions listed in this option. ... Paths of input source files. These paths are looked up in the compilation database. diff --git a/clang/test/dpct/help_option_check/win/help_all.txt b/clang/test/dpct/help_option_check/win/help_all.txt index 7beb74a521c1..010ea0db6e54 100644 --- a/clang/test/dpct/help_option_check/win/help_all.txt +++ b/clang/test/dpct/help_option_check/win/help_all.txt @@ -171,6 +171,7 @@ All DPCT options =non-stdandard-sycl-builtins - Experimental extension that allows use of non standard SYCL builtin functions. =prefetch - Experimental extension that allows use of SYCL prefetch APIs. =level_zero - Experimental migration feature that enables the use of Level Zero APIs to migrate target code, like CUDA Inter-Process Communication (IPC) APIs. + =async_alloc - Experimental extension that allows use of SYCL async allocation APIs. =all - Enable all experimental extensions listed in this option. --use-explicit-namespace= - Define the namespaces to use explicitly in generated code. The is a comma separated list. Default: dpct/syclcompat, sycl. diff --git a/clang/test/dpct/help_option_check/win/help_option_check.cpp b/clang/test/dpct/help_option_check/win/help_option_check.cpp index b4d8e3c60181..e87b7f897d0c 100644 --- a/clang/test/dpct/help_option_check/win/help_option_check.cpp +++ b/clang/test/dpct/help_option_check/win/help_option_check.cpp @@ -4,8 +4,8 @@ // RUN: cd %T/help_option_check // RUN: dpct --help > output.txt -// RUN: diff --strip-trailing-cr %S/help_all.txt %T/help_option_check/output.txt >> %T/diff.txt +// RUN: diff --strip-trailing-cr %S/help_all.txt %T/help_option_check/output.txt // RUN: dpct --help=basic > output.txt -// RUN: diff --strip-trailing-cr %S/help_basic.txt %T/help_option_check/output.txt >> %T/diff.txt +// RUN: diff --strip-trailing-cr %S/help_basic.txt %T/help_option_check/output.txt // RUN: dpct --help=advanced > output.txt -// RUN: diff --strip-trailing-cr %S/help_advanced.txt %T/help_option_check/output.txt >> %T/diff.txt +// RUN: diff --strip-trailing-cr %S/help_advanced.txt %T/help_option_check/output.txt