Skip to content

Commit 13ff78e

Browse files
[SYCL] Make handler.hpp independent from kernel_bundle.hpp (#16012)
I was looking into ways of splitting `sycl.hpp` into different finer-grained headers (with the intention to propose such split as a KHR extension/SYCL-Next thing) and I decided to try and see what is the impact of different header files on the compilation time. I started my investigation with `kernel_bundle.hpp`. Looking at [zjin-lcf/HeCBench](https://github.com/zjin-lcf/HeCBench), I do not see any benchmarks that use it, so it seems like a good candidate for being an opt-in header. To do the measurements I decided to drop `#include <kernel_bundle.hpp>` from `sycl.hpp` and then compare compilation time of two empty files including `sycl.hpp` (the modified one and the original one). Apparently, it is not that easy to drop an include, because there are so many inter-dependencies on it. I succeeded and I see ~200ms device compilation time improvement when `kernel_bundle.hpp` is not included at all. However, for my experiments I made some other hacks which I'm unable to push into the repo, like dropping backend-specific headers as well. Specifically, L0 backend interop has `kernel_bundle` as a struct member of some of input or return types which requires a full definition. SYCL spec ([6.3.7. Adding a backend](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#_adding_a_backend)) allows backend interop headers to be put into separate headers and I think that we should actually use this opportunity in the future and drop them (somehow without many regressions) from `sycl.hpp`.
1 parent ac207a1 commit 13ff78e

File tree

81 files changed

+150
-52
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+150
-52
lines changed

sycl/include/sycl/backend.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
#include <sycl/exception.hpp> // for make_error_code
2424
#include <sycl/feature_test.hpp> // for SYCL_BACKEND_OP...
2525
#include <sycl/image.hpp> // for image, image_al...
26-
#include <sycl/kernel_bundle.hpp> // for kernel_bundle
2726
#include <sycl/kernel_bundle_enums.hpp> // for bundle_state
2827
#include <sycl/platform.hpp> // for platform, get_n...
2928
#include <sycl/queue.hpp> // for queue, get_native
@@ -56,6 +55,7 @@
5655
namespace sycl {
5756
inline namespace _V1 {
5857

58+
template <bundle_state State> class kernel_bundle;
5959
class property_list;
6060

6161
namespace detail {
@@ -141,13 +141,15 @@ auto get_native(const queue &Obj) -> backend_return_t<BackendName, queue> {
141141
int32_t IsImmCmdList;
142142
ur_native_handle_t Handle = Obj.getNative(IsImmCmdList);
143143
backend_return_t<BackendName, queue> RetVal;
144+
#if SYCL_EXT_ONEAPI_BACKEND_LEVEL_ZERO
144145
if constexpr (BackendName == backend::ext_oneapi_level_zero)
145146
RetVal = IsImmCmdList
146147
? backend_return_t<BackendName, queue>{reinterpret_cast<
147148
ze_command_list_handle_t>(Handle)}
148149
: backend_return_t<BackendName, queue>{
149150
reinterpret_cast<ze_command_queue_handle_t>(Handle)};
150151
else
152+
#endif
151153
RetVal = reinterpret_cast<backend_return_t<BackendName, queue>>(Handle);
152154

153155
return RetVal;

sycl/include/sycl/detail/backend_traits_cuda.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include <sycl/detail/backend_traits.hpp>
1919
#include <sycl/device.hpp>
2020
#include <sycl/event.hpp>
21-
#include <sycl/kernel_bundle.hpp>
2221
#include <sycl/queue.hpp>
2322

2423
typedef int CUdevice;

sycl/include/sycl/detail/backend_traits_hip.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include <sycl/detail/backend_traits.hpp>
1919
#include <sycl/device.hpp>
2020
#include <sycl/event.hpp>
21-
#include <sycl/kernel_bundle.hpp>
2221
#include <sycl/queue.hpp>
2322

2423
typedef int HIPdevice;

sycl/include/sycl/detail/backend_traits_level_zero.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,12 @@
2121
#include <sycl/device.hpp> // for device
2222
#include <sycl/event.hpp> // for event
2323
#include <sycl/ext/oneapi/backend/level_zero_ownership.hpp> // for ownership
24-
#include <sycl/handler.hpp> // for buffer
2524
#include <sycl/image.hpp> // for image
2625
#include <sycl/kernel.hpp> // for kernel
2726
#include <sycl/kernel_bundle.hpp> // for kernel_b...
2827
#include <sycl/kernel_bundle_enums.hpp> // for bundle_s...
2928
#include <sycl/platform.hpp> // for platform
3029
#include <sycl/property_list.hpp> // for property...
31-
#include <sycl/queue.hpp> // for queue
3230
#include <sycl/range.hpp> // for range
3331

3432
#include <variant> // for variant
@@ -46,6 +44,8 @@ typedef struct _ze_module_handle_t *ze_module_handle_t;
4644

4745
namespace sycl {
4846
inline namespace _V1 {
47+
class queue;
48+
4949
namespace detail {
5050

5151
// Forward declarations

sycl/include/sycl/detail/backend_traits_opencl.hpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,20 @@
2222
#include <sycl/detail/ur.hpp> // for assertion and ur handles
2323
#include <sycl/device.hpp> // for device
2424
#include <sycl/event.hpp> // for event
25-
#include <sycl/handler.hpp> // for buffer
2625
#include <sycl/kernel.hpp> // for kernel
27-
#include <sycl/kernel_bundle.hpp> // for kernel_bundle
2826
#include <sycl/kernel_bundle_enums.hpp> // for bundle_state
2927
#include <sycl/platform.hpp> // for platform
30-
#include <sycl/queue.hpp> // for queue
3128

3229
#include <vector> // for vector
3330

3431
namespace sycl {
3532
inline namespace _V1 {
33+
34+
template <bundle_state State> class kernel_bundle;
35+
class queue;
36+
template <typename T, int Dimensions, typename AllocatorT, typename Enable>
37+
class buffer;
38+
3639
namespace detail {
3740

3841
// TODO the interops for context, device, event, platform and program
@@ -54,13 +57,15 @@ template <> struct interop<backend::opencl, platform> {
5457
using type = cl_platform_id;
5558
};
5659

57-
template <typename DataT, int Dimensions, typename AllocatorT>
58-
struct BackendInput<backend::opencl, buffer<DataT, Dimensions, AllocatorT>> {
60+
template <typename DataT, int Dimensions, typename AllocatorT, typename Enable>
61+
struct BackendInput<backend::opencl,
62+
buffer<DataT, Dimensions, AllocatorT, Enable>> {
5963
using type = cl_mem;
6064
};
6165

62-
template <typename DataT, int Dimensions, typename AllocatorT>
63-
struct BackendReturn<backend::opencl, buffer<DataT, Dimensions, AllocatorT>> {
66+
template <typename DataT, int Dimensions, typename AllocatorT, typename Enable>
67+
struct BackendReturn<backend::opencl,
68+
buffer<DataT, Dimensions, AllocatorT, Enable>> {
6469
using type = std::vector<cl_mem>;
6570
};
6671

sycl/include/sycl/ext/oneapi/backend/level_zero.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include <sycl/ext/oneapi/backend/level_zero_ownership.hpp> // for ownership
2525
#include <sycl/image.hpp> // for image
2626
#include <sycl/kernel.hpp> // for kernel
27-
#include <sycl/kernel_bundle.hpp> // for kernel_bu...
2827
#include <sycl/kernel_bundle_enums.hpp> // for bundle_state
2928
#include <sycl/platform.hpp> // for platform
3029
#include <sycl/properties/image_properties.hpp> // for image
@@ -39,6 +38,9 @@
3938

4039
namespace sycl {
4140
inline namespace _V1 {
41+
42+
template <bundle_state State> class kernel_bundle;
43+
4244
namespace ext::oneapi::level_zero::detail {
4345
__SYCL_EXPORT device make_device(const platform &Platform,
4446
ur_native_handle_t NativeHandle);

sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include <sycl/detail/backend_traits.hpp>
2020
#include <sycl/device.hpp>
2121
#include <sycl/event.hpp>
22-
#include <sycl/kernel_bundle.hpp>
2322
#include <sycl/queue.hpp>
2423

2524
#include <vector>

sycl/include/sycl/ext/oneapi/get_kernel_info.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,20 @@
1111
#include <sycl/detail/export.hpp>
1212
#include <sycl/detail/info_desc_helpers.hpp>
1313
#include <sycl/device.hpp>
14+
#include <sycl/kernel_bundle_enums.hpp>
1415
#include <sycl/queue.hpp>
1516

17+
#include <vector>
18+
1619
namespace sycl {
1720
inline namespace _V1 {
21+
22+
template <bundle_state State> class kernel_bundle;
23+
24+
template <typename KernelName, bundle_state State>
25+
kernel_bundle<State> get_kernel_bundle(const context &,
26+
const std::vector<device> &);
27+
1828
namespace ext::oneapi {
1929

2030
template <typename KernelName, typename Param>

sycl/include/sycl/ext/oneapi/owner_less.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include <sycl/event.hpp> // for event
1717
#include <sycl/ext/oneapi/weak_object.hpp> // for weak_object
1818
#include <sycl/kernel.hpp> // for kernel
19-
#include <sycl/kernel_bundle.hpp> // for kernel_id
2019
#include <sycl/kernel_bundle_enums.hpp> // for bundle_state
2120
#include <sycl/platform.hpp> // for platform
2221
#include <sycl/properties/image_properties.hpp> // for sampled_i...
@@ -25,6 +24,10 @@
2524

2625
namespace sycl {
2726
inline namespace _V1 {
27+
class kernel_id;
28+
template <bundle_state State> class kernel_bundle;
29+
template <bundle_state State> class device_image;
30+
2831
namespace ext::oneapi {
2932

3033
namespace detail {

sycl/include/sycl/handler.hpp

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
#include <sycl/id.hpp>
4444
#include <sycl/item.hpp>
4545
#include <sycl/kernel.hpp>
46-
#include <sycl/kernel_bundle.hpp>
4746
#include <sycl/kernel_bundle_enums.hpp>
4847
#include <sycl/kernel_handler.hpp>
4948
#include <sycl/nd_item.hpp>
@@ -141,6 +140,7 @@ inline namespace _V1 {
141140

142141
// Forward declaration
143142

143+
template <bundle_state State> class kernel_bundle;
144144
class handler;
145145
template <typename T, int Dimensions, typename AllocatorT, typename Enable>
146146
class buffer;
@@ -162,6 +162,7 @@ class graph_impl;
162162
} // namespace ext::oneapi::experimental::detail
163163
namespace detail {
164164

165+
class kernel_bundle_impl;
165166
class work_group_memory_impl;
166167
class handler_impl;
167168
class kernel_impl;
@@ -1710,36 +1711,15 @@ class __SYCL_EXPORT handler {
17101711
handler &operator=(const handler &) = delete;
17111712
handler &operator=(handler &&) = delete;
17121713

1714+
// Out-of-class definition within kernel_bundle.hpp
17131715
template <auto &SpecName>
17141716
void set_specialization_constant(
1715-
typename std::remove_reference_t<decltype(SpecName)>::value_type Value) {
1716-
1717-
setStateSpecConstSet();
1718-
1719-
std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImplPtr =
1720-
getOrInsertHandlerKernelBundle(/*Insert=*/true);
1721-
1722-
detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1723-
KernelBundleImplPtr)
1724-
.set_specialization_constant<SpecName>(Value);
1725-
}
1717+
typename std::remove_reference_t<decltype(SpecName)>::value_type Value);
17261718

1719+
// Out-of-class definition within kernel_bundle.hpp
17271720
template <auto &SpecName>
17281721
typename std::remove_reference_t<decltype(SpecName)>::value_type
1729-
get_specialization_constant() const {
1730-
1731-
if (isStateExplicitKernelBundle())
1732-
throw sycl::exception(make_error_code(errc::invalid),
1733-
"Specialization constants cannot be read after "
1734-
"explicitly setting the used kernel bundle");
1735-
1736-
std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImplPtr =
1737-
getOrInsertHandlerKernelBundle(/*Insert=*/true);
1738-
1739-
return detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1740-
KernelBundleImplPtr)
1741-
.get_specialization_constant<SpecName>();
1742-
}
1722+
get_specialization_constant() const;
17431723

17441724
void
17451725
use_kernel_bundle(const kernel_bundle<bundle_state::executable> &ExecBundle);

0 commit comments

Comments
 (0)