Skip to content

Commit 28ec448

Browse files
PietroGhguwedolinskyAlexeySachkovsteffenlarsen
authored andcommitted
[SYCL][NATIVECPU] Support multiple SYCL targets in the same compiler invocation (#10495)
This PR adds support to multiple SYCL targets alongside `native_cpu` in the same compiler invocation (e.g. `clang++ -fsycl -fsycl-targets=native_cpu,spir64 input.cpp`). In order to implement this we had to make changes to multiple components, here is a quick overview: * Driver: changes in the Driver allow to correctly parse all the targets passed to `-fsycl-targets` (before we were just looking for `native_cpu`, ignoring the others). The Driver now also calls `sycl-post-link` and `clang-offload-wrapper`, performing a compilation flow more similar to the one used for other targets. * Sema: since the kernel name needs to be the same for all the SYCL targets, the change to the kernel name in Sema has been removed, and replaced with an LLVM Pass that gets run when lowering the device module (`llvm/lib/SYCLLowerIR/RenameKernelSYCLNativeCPU.cpp`). * Runtime: The definition for `_pi_program` in the Native CPU Plug-In now supports multiple kernels in one program, and the `__SYCL_PI_DEVICE_BINARY_TARGET_NATIVE_CPU` binary type has been added in order to identify kernels compiled for Native CPU. * clang-offload-wrapper: for Native CPU, the offload-wrapper doesn't bundle the device code in the host module, but instead produces an array containing function declarations that are resolved by the linker, see `sycl/doc/design/SYCLNativeCPU.md` for more information. --------- Co-authored-by: Uwe Dolinsky <uwe@codeplay.com> Co-authored-by: Alexey Sachkov <alexey.sachkov@intel.com> Co-authored-by: Steffen Larsen <steffen.larsen@intel.com>
1 parent 5e2e218 commit 28ec448

File tree

4 files changed

+46
-3
lines changed

4 files changed

+46
-3
lines changed

sycl/plugins/unified_runtime/ur/adapters/native_cpu/device.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
8383
// TODO : Populate return string accordingly - e.g. cl_khr_fp16,
8484
// cl_khr_fp64, cl_khr_int64_base_atomics,
8585
// cl_khr_int64_extended_atomics
86-
return ReturnValue("");
86+
return ReturnValue("cl_khr_fp64 ");
8787
case UR_DEVICE_INFO_VERSION:
8888
return ReturnValue("0.1");
8989
case UR_DEVICE_INFO_COMPILER_AVAILABLE:
@@ -328,5 +328,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary(
328328
std::ignore = NumBinaries;
329329
std::ignore = pSelectedBinary;
330330

331-
CONTINUE_NO_IMPLEMENTATION;
331+
#define UR_DEVICE_BINARY_TARGET_NATIVE_CPU "native_cpu"
332+
// look for a binary with type "native_cpu"
333+
// Todo: error checking
334+
// Todo: define UR_DEVICE_BINARY_TARGET_NATIVE_CPU in upstream
335+
const char *image_target = UR_DEVICE_BINARY_TARGET_NATIVE_CPU;
336+
for (uint32_t i = 0; i < NumBinaries; ++i) {
337+
if (strcmp(pBinaries[i].pDeviceTargetSpec, image_target) == 0) {
338+
*pSelectedBinary = i;
339+
return UR_RESULT_SUCCESS;
340+
}
341+
}
342+
343+
// No image can be loaded for the given device
344+
return UR_RESULT_ERROR_INVALID_BINARY;
332345
}

sycl/plugins/unified_runtime/ur/adapters/native_cpu/kernel.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@ urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName,
1919
UR_ASSERT(hProgram, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
2020
UR_ASSERT(pKernelName, UR_RESULT_ERROR_INVALID_NULL_POINTER);
2121

22-
auto f = reinterpret_cast<nativecpu_ptr_t>(hProgram->_ptr);
22+
auto kernelEntry = hProgram->_kernels.find(pKernelName);
23+
if (kernelEntry == hProgram->_kernels.end())
24+
return UR_RESULT_ERROR_INVALID_KERNEL;
25+
26+
auto f = reinterpret_cast<nativecpu_ptr_t>(kernelEntry->second);
2327
auto kernel = new ur_kernel_handle_t_(pKernelName, *f);
2428

2529
*phKernel = kernel;

sycl/plugins/unified_runtime/ur/adapters/native_cpu/program.cpp

100644100755
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
3939
auto hProgram = new ur_program_handle_t_(
4040
hContext, reinterpret_cast<const unsigned char *>(pBinary));
4141

42+
const nativecpu_entry *nativecpu_it =
43+
reinterpret_cast<const nativecpu_entry *>(pBinary);
44+
while (nativecpu_it->kernel_ptr != nullptr) {
45+
hProgram->_kernels.insert(
46+
std::make_pair(nativecpu_it->kernelname, nativecpu_it->kernel_ptr));
47+
nativecpu_it++;
48+
}
49+
4250
*phProgram = hProgram;
4351

4452
return UR_RESULT_SUCCESS;

sycl/plugins/unified_runtime/ur/adapters/native_cpu/program.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <ur_api.h>
1212

1313
#include "context.hpp"
14+
#include <map>
1415

1516
struct ur_program_handle_t_ {
1617
ur_program_handle_t_(ur_context_handle_t ctx, const unsigned char *pBinary)
@@ -21,4 +22,21 @@ struct ur_program_handle_t_ {
2122
ur_context_handle_t _ctx;
2223
const unsigned char *_ptr;
2324
std::atomic_uint32_t _refCount;
25+
26+
struct _compare {
27+
bool operator()(char const *a, char const *b) const {
28+
return std::strcmp(a, b) < 0;
29+
}
30+
};
31+
32+
std::map<const char *, const unsigned char *, _compare> _kernels;
33+
};
34+
35+
// The nativecpu_entry struct is also defined as LLVM-IR in the
36+
// clang-offload-wrapper tool. The two definitions need to match,
37+
// therefore any change to this struct needs to be reflected in the
38+
// offload-wrapper.
39+
struct nativecpu_entry {
40+
const char *kernelname;
41+
const unsigned char *kernel_ptr;
2442
};

0 commit comments

Comments
 (0)