Skip to content

Commit 7e2f297

Browse files
authored
[SYCL] Implement backend content extension (#16633)
Implement [sycl_ext_oneapi_device_image_backend_content](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_device_image_backend_content.asciidoc).
1 parent 19d54ff commit 7e2f297

File tree

11 files changed

+367
-15
lines changed

11 files changed

+367
-15
lines changed

sycl/doc/extensions/proposed/sycl_ext_oneapi_device_image_backend_content.asciidoc renamed to sycl/doc/extensions/experimental/sycl_ext_oneapi_device_image_backend_content.asciidoc

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,10 @@ the SYCL specification refer to that revision.
4444

4545
== Status
4646

47-
This is a proposed extension specification, intended to gather community
48-
feedback.
49-
Interfaces defined in this specification may not be implemented yet or may be
50-
in a preliminary state.
51-
The specification itself may also change in incompatible ways before it is
52-
finalized.
47+
This is an experimental extension specification, intended to provide early
48+
access to features and gather community feedback. Interfaces defined in this
49+
specification are implemented in {dpcpp}, but they are not finalized and may
50+
change incompatibly in future versions of {dpcpp} without prior notice.
5351
*Shipping software products should not rely on APIs defined in this
5452
specification.*
5553

@@ -101,7 +99,7 @@ class device_image {
10199
backend ext_oneapi_get_backend() const noexcept;
102100
std::vector<std::byte> ext_oneapi_get_backend_content() const;
103101
104-
std::span<std::byte> ext_oneapi_get_backend_content_view() const; // Requires C++20
102+
std::span<const std::byte> ext_oneapi_get_backend_content_view() const; // Requires C++20
105103
106104
/*...*/
107105
};
@@ -148,15 +146,15 @@ See below for a description of the formats used by {dpcpp}.
148146
a@
149147
[source,c++]
150148
----
151-
std::span<std::byte> ext_oneapi_get_content_backend_view() const;
149+
std::span<const std::byte> ext_oneapi_get_content_backend_view() const;
152150
----
153151
!====
154152

155153
Minimum C++ Version: {cpp}20
156154

157155
_Constraints:_ Available only when `State` is `bundle_state::executable`.
158156

159-
_Returns:_ A view of the raw backend content for this device image.
157+
_Returns:_ An immutable view of the raw backend content for this device image.
160158
The data behind this view has the same lifetime as the `device_image` object.
161159
The format of this data is implementation-defined.
162160
See below for a description of the formats used by {dpcpp}.

sycl/include/sycl/kernel_bundle.hpp

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,15 @@
2626
#include <sycl/ext/oneapi/properties/property.hpp> // build_options
2727
#include <sycl/ext/oneapi/properties/property_value.hpp> // and log
2828

29-
#include <array> // for array
30-
#include <cstddef> // for std::byte
31-
#include <cstring> // for size_t, memcpy
32-
#include <functional> // for function
33-
#include <iterator> // for distance
34-
#include <memory> // for shared_ptr, operator==, hash
29+
#include <array> // for array
30+
#include <cstddef> // for std::byte
31+
#include <cstring> // for size_t, memcpy
32+
#include <functional> // for function
33+
#include <iterator> // for distance
34+
#include <memory> // for shared_ptr, operator==, hash
35+
#if __has_include(<span>)
36+
#include <span>
37+
#endif
3538
#include <string> // for string
3639
#include <type_traits> // for enable_if_t, remove_refer...
3740
#include <utility> // for move
@@ -123,6 +126,13 @@ class __SYCL_EXPORT device_image_plain {
123126

124127
template <class T>
125128
friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj);
129+
130+
backend ext_oneapi_get_backend_impl() const noexcept;
131+
132+
#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
133+
std::pair<const std::byte *, const std::byte *>
134+
ext_oneapi_get_backend_content_view_impl() const;
135+
#endif // HAS_STD_BYTE
126136
};
127137
} // namespace detail
128138

@@ -145,6 +155,30 @@ class device_image : public detail::device_image_plain,
145155
return device_image_plain::has_kernel(KernelID, Dev);
146156
}
147157

158+
backend ext_oneapi_get_backend() const noexcept {
159+
return device_image_plain::ext_oneapi_get_backend_impl();
160+
}
161+
162+
#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
163+
template <sycl::bundle_state T = State,
164+
typename = std::enable_if_t<T == bundle_state::executable>>
165+
std::vector<std::byte> ext_oneapi_get_backend_content() const {
166+
const auto view =
167+
device_image_plain::ext_oneapi_get_backend_content_view_impl();
168+
return std::vector(view.first, view.second);
169+
}
170+
171+
#ifdef __cpp_lib_span
172+
template <sycl::bundle_state T = State,
173+
typename = std::enable_if_t<T == bundle_state::executable>>
174+
std::span<const std::byte> ext_oneapi_get_backend_content_view() const {
175+
const auto view =
176+
device_image_plain::ext_oneapi_get_backend_content_view_impl();
177+
return std::span<const std::byte>{view.first, view.second};
178+
}
179+
#endif // __cpp_lib_span
180+
#endif // _HAS_STD_BYTE
181+
148182
private:
149183
device_image(detail::DeviceImageImplPtr Impl)
150184
: device_image_plain(std::move(Impl)) {}

sycl/source/feature_test.hpp.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ inline namespace _V1 {
113113
#define SYCL_EXT_ONEAPI_WORK_GROUP_SCRATCH_MEMORY 1
114114
#define SYCL_EXT_ONEAPI_WORK_GROUP_STATIC 1
115115
#define SYCL_EXT_ONEAPI_NUM_COMPUTE_UNITS 1
116+
#define SYCL_EXT_ONEAPI_DEVICE_IMAGE_BACKEND_CONTENT 1
116117
// In progress yet
117118
#define SYCL_EXT_ONEAPI_ATOMIC16 0
118119

sycl/source/kernel_bundle.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,19 @@ ur_native_handle_t device_image_plain::getNative() const {
4545
return impl->getNative();
4646
}
4747

48+
backend device_image_plain::ext_oneapi_get_backend_impl() const noexcept {
49+
return impl->get_context().get_backend();
50+
}
51+
52+
std::pair<const std::byte *, const std::byte *>
53+
device_image_plain::ext_oneapi_get_backend_content_view_impl() const {
54+
return std::make_pair(
55+
reinterpret_cast<const std::byte *>(
56+
impl->get_bin_image_ref()->getRawData().BinaryStart),
57+
reinterpret_cast<const std::byte *>(
58+
impl->get_bin_image_ref()->getRawData().BinaryEnd));
59+
}
60+
4861
////////////////////////////
4962
///// kernel_bundle_plain
5063
///////////////////////////
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: %{build} %cuda_options -o %t.out
2+
// RUN: %{run} %t.out
3+
// REQUIRES: cuda, cuda_dev_kit
4+
5+
#include <cuda.h>
6+
#include <sycl/backend.hpp>
7+
#include <sycl/detail/core.hpp>
8+
#include <vector>
9+
10+
int main() {
11+
sycl::queue q;
12+
sycl::context ctxt = q.get_context();
13+
sycl::kernel_id k_id = sycl::get_kernel_id<class mykernel>();
14+
auto bundle =
15+
sycl::get_kernel_bundle<sycl::bundle_state::executable>(ctxt, {k_id});
16+
assert(!bundle.empty());
17+
sycl::kernel krn = bundle.get_kernel(k_id);
18+
sycl::buffer<int> buf(sycl::range<1>(1));
19+
q.submit([&](sycl::handler &cgh) {
20+
sycl::accessor acc(buf, cgh);
21+
cgh.single_task<class mykernel>(krn, [=]() { acc[0] = 42; });
22+
});
23+
const auto img = *(bundle.begin());
24+
const auto bytes = img.ext_oneapi_get_backend_content();
25+
CUmodule m;
26+
CUresult result =
27+
cuModuleLoadData(&m, reinterpret_cast<const void *>(bytes.data()));
28+
assert(result == CUDA_SUCCESS);
29+
return 0;
30+
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// REQUIRES: level_zero, level_zero_dev_kit, aspect-usm_shared_allocations
2+
// RUN: %{build} %level_zero_options -fno-sycl-dead-args-optimization -o %t.out
3+
// RUN: %{run} %t.out
4+
//
5+
#include <level_zero/ze_api.h>
6+
#include <sycl/detail/core.hpp>
7+
#include <sycl/ext/oneapi/backend/level_zero.hpp>
8+
#include <sycl/ext/oneapi/free_function_queries.hpp>
9+
#include <sycl/usm.hpp>
10+
#include <vector>
11+
12+
namespace syclext = sycl::ext::oneapi;
13+
namespace syclexp = sycl::ext::oneapi::experimental;
14+
15+
extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
16+
(syclexp::nd_range_kernel<1>)) void iota(int *ptr) {
17+
size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id();
18+
ptr[id] = 42;
19+
}
20+
21+
int main() {
22+
sycl::queue q;
23+
sycl::context ctxt = q.get_context();
24+
sycl::device d = ctxt.get_devices()[0];
25+
// The following ifndef is required due to a number of limitations of free
26+
// function kernels. See CMPLRLLVM-61498.
27+
// TODO: Remove it once these limitations are no longer there.
28+
#ifndef __SYCL_DEVICE_ONLY__
29+
// First, run the kernel using the SYCL API.
30+
auto bundle = sycl::get_kernel_bundle<sycl::bundle_state::executable>(ctxt);
31+
sycl::kernel_id iota_id = syclexp::get_kernel_id<iota>();
32+
sycl::kernel k_iota = bundle.get_kernel(iota_id);
33+
int *ptr = sycl::malloc_shared<int>(1, q);
34+
*ptr = 0;
35+
q.submit([&](sycl::handler &cgh) {
36+
cgh.set_args(ptr);
37+
cgh.parallel_for(sycl::nd_range{{1}, {1}}, k_iota);
38+
}).wait();
39+
40+
// Now, run the kernel by first getting its image as an executable,
41+
// making an L0 kernel out of it and then making a SYCL kernel out of
42+
// the L0 kernel. Run this kernel on the SYCL API and verify
43+
// that it has the same result as the kernel that was run directly on SYCL
44+
// API. First, get a kernel bundle that contains the kernel "iota".
45+
auto exe_bndl = sycl::get_kernel_bundle<sycl::bundle_state::executable>(
46+
ctxt, {d},
47+
[&](const sycl::device_image<sycl::bundle_state::executable> &img) {
48+
return img.has_kernel(iota_id, d);
49+
});
50+
assert(!exe_bndl.empty());
51+
std::vector<std::byte> bytes;
52+
const sycl::device_image<sycl::bundle_state::executable> &img =
53+
*(exe_bndl.begin());
54+
bytes = img.ext_oneapi_get_backend_content();
55+
56+
auto ZeContext = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(ctxt);
57+
auto ZeDevice = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(d);
58+
59+
ze_result_t status;
60+
ze_module_desc_t moduleDesc = {
61+
ZE_STRUCTURE_TYPE_MODULE_DESC,
62+
nullptr,
63+
ZE_MODULE_FORMAT_IL_SPIRV,
64+
bytes.size(),
65+
reinterpret_cast<unsigned char *>(bytes.data()),
66+
nullptr,
67+
nullptr};
68+
ze_module_handle_t ZeModule;
69+
status = zeModuleCreate(ZeContext, ZeDevice, &moduleDesc, &ZeModule, nullptr);
70+
assert(status == ZE_RESULT_SUCCESS);
71+
72+
ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC, nullptr, 0,
73+
"__sycl_kernel_iota"};
74+
ze_kernel_handle_t ZeKernel;
75+
status = zeKernelCreate(ZeModule, &kernelDesc, &ZeKernel);
76+
assert(status == ZE_RESULT_SUCCESS);
77+
sycl::kernel k_iota_twin =
78+
sycl::make_kernel<sycl::backend::ext_oneapi_level_zero>(
79+
{sycl::make_kernel_bundle<sycl::backend::ext_oneapi_level_zero,
80+
sycl::bundle_state::executable>({ZeModule},
81+
ctxt),
82+
ZeKernel},
83+
ctxt);
84+
int *ptr_twin = sycl::malloc_shared<int>(1, q);
85+
*ptr_twin = 1;
86+
q.submit([&](sycl::handler &cgh) {
87+
cgh.set_args(ptr_twin);
88+
cgh.parallel_for(sycl::nd_range{{1}, {1}}, k_iota_twin);
89+
}).wait();
90+
assert(*ptr_twin == *ptr);
91+
sycl::free(ptr, q);
92+
sycl::free(ptr_twin, q);
93+
#endif
94+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// REQUIRES: opencl, opencl_icd, aspect-usm_shared_allocations
2+
// RUN: %{build} %opencl_lib -fno-sycl-dead-args-optimization -o %t.out
3+
// RUN: %{run} %t.out
4+
//
5+
#include <sycl/backend.hpp>
6+
#include <sycl/detail/cl.h>
7+
#include <sycl/detail/core.hpp>
8+
#include <sycl/ext/oneapi/free_function_queries.hpp>
9+
#include <sycl/usm.hpp>
10+
#include <vector>
11+
12+
namespace syclext = sycl::ext::oneapi;
13+
namespace syclexp = sycl::ext::oneapi::experimental;
14+
15+
extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
16+
(syclexp::nd_range_kernel<1>)) void iota(int *ptr) {
17+
size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id();
18+
ptr[id] = 42;
19+
}
20+
21+
int main() {
22+
sycl::queue q;
23+
sycl::context ctxt = q.get_context();
24+
sycl::device d = ctxt.get_devices()[0];
25+
// The following ifndef is required due to a number of limitations of free
26+
// function kernels. See CMPLRLLVM-61498.
27+
// TODO: Remove it once these limitations are no longer there.
28+
#ifndef __SYCL_DEVICE_ONLY__
29+
// First, run the kernel using the SYCL API.
30+
31+
auto bundle = sycl::get_kernel_bundle<sycl::bundle_state::executable>(ctxt);
32+
sycl::kernel_id iota_id = syclexp::get_kernel_id<iota>();
33+
sycl::kernel k_iota = bundle.get_kernel(iota_id);
34+
35+
int *ptr = sycl::malloc_shared<int>(1, q);
36+
*ptr = 0;
37+
q.submit([&](sycl::handler &cgh) {
38+
cgh.set_args(ptr);
39+
cgh.parallel_for(sycl::nd_range{{1}, {1}}, k_iota);
40+
}).wait();
41+
// Now, run the kernel by first getting its image as an executable,
42+
// making an OCL kernel out of it and then making a SYCL kernel out of
43+
// the OCL kernel. Run this kernel on the SYCL API and verify
44+
// that it has the same result as the kernel that was run directly on SYCL
45+
// API. First, get a kernel bundle that contains the kernel "iota".
46+
auto exe_bndl = sycl::get_kernel_bundle<sycl::bundle_state::executable>(
47+
ctxt, {d},
48+
[&](const sycl::device_image<sycl::bundle_state::executable> &img) {
49+
return img.has_kernel(iota_id, d);
50+
});
51+
assert(!exe_bndl.empty());
52+
std::vector<std::byte> bytes;
53+
const sycl::device_image<sycl::bundle_state::executable> &img =
54+
*(exe_bndl.begin());
55+
bytes = img.ext_oneapi_get_backend_content();
56+
std::cout << bytes.size() << std::endl;
57+
auto clContext = sycl::get_native<sycl::backend::opencl>(ctxt);
58+
auto clDevice = sycl::get_native<sycl::backend::opencl>(d);
59+
60+
cl_int status;
61+
auto clProgram = clCreateProgramWithIL(
62+
clContext, reinterpret_cast<unsigned char *>(bytes.data()), bytes.size(),
63+
&status);
64+
assert(status == CL_SUCCESS);
65+
status = clBuildProgram(clProgram, 1, &clDevice, "", nullptr, nullptr);
66+
assert(status == CL_SUCCESS);
67+
auto clKernel = clCreateKernel(clProgram, "__sycl_kernel_iota", &status);
68+
assert(status == CL_SUCCESS);
69+
sycl::kernel k_iota_twin =
70+
sycl::make_kernel<sycl::backend::opencl>(clKernel, ctxt);
71+
int *ptr_twin = sycl::malloc_shared<int>(1, q);
72+
*ptr_twin = 1;
73+
q.submit([&](sycl::handler &cgh) {
74+
cgh.set_args(ptr_twin);
75+
cgh.parallel_for(sycl::nd_range{{1}, {1}}, k_iota_twin);
76+
}).wait();
77+
assert(*ptr_twin == *ptr);
78+
sycl::free(ptr, q);
79+
sycl::free(ptr_twin, q);
80+
#endif
81+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// RUN: %{build} -std=c++20 -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
#include <sycl/detail/core.hpp>
5+
#include <sycl/kernel_bundle.hpp>
6+
#include <type_traits>
7+
8+
int main() {
9+
sycl::queue q;
10+
sycl::context ctxt = q.get_context();
11+
sycl::buffer<int> buf(sycl::range<1>(1));
12+
sycl::kernel_id k_id = sycl::get_kernel_id<class mykernel>();
13+
auto bundle =
14+
sycl::get_kernel_bundle<sycl::bundle_state::executable>(ctxt, {k_id});
15+
assert(!bundle.empty());
16+
sycl::kernel krn = bundle.get_kernel(k_id);
17+
q.submit([&](sycl::handler &cgh) {
18+
sycl::accessor acc(buf, cgh);
19+
cgh.single_task<class mykernel>(krn, [=]() { acc[0] = 42; });
20+
});
21+
sycl::backend backend;
22+
std::vector<std::byte> bytes;
23+
#ifdef __cpp_lib_span
24+
std::span<const std::byte> bytes_view;
25+
#endif
26+
for (const auto &img : bundle) {
27+
// Check that all 3 functions of the api return correct types and compile.
28+
// Furthermore, check that the backend corresponds to the backend of the
29+
// bundle Check that the view of the content is indeed equal to the
30+
// content.
31+
static_assert(std::is_same_v<decltype(img.ext_oneapi_get_backend()),
32+
decltype(backend)>);
33+
static_assert(std::is_same_v<decltype(img.ext_oneapi_get_backend_content()),
34+
decltype(bytes)>);
35+
backend = img.ext_oneapi_get_backend();
36+
assert(backend == bundle.get_backend());
37+
bytes = img.ext_oneapi_get_backend_content();
38+
#ifdef __cpp_lib_span
39+
static_assert(
40+
std ::is_same_v<decltype(img.ext_oneapi_get_backend_content_view()),
41+
decltype(bytes_view)>);
42+
bytes_view = img.ext_oneapi_get_backend_content_view();
43+
assert(bytes_view.size() == bytes.size());
44+
for (size_t i = 0; i < bytes.size(); ++i) {
45+
assert(bytes[i] == bytes_view[i]);
46+
}
47+
#endif
48+
}
49+
return 0;
50+
}

0 commit comments

Comments
 (0)