Skip to content

Commit 15f00b8

Browse files
authored
[SYCL] Avoid unnecessary queue copies in enqueue functions (#17963)
Decrease the amount of `sycl::queue` copies when enqueueing the kernel.
1 parent 24c4df9 commit 15f00b8

File tree

1 file changed

+27
-18
lines changed

1 file changed

+27
-18
lines changed

sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ template <typename CommandGroupFunc>
124124
void submit(queue Q, CommandGroupFunc &&CGF,
125125
const sycl::detail::code_location &CodeLoc =
126126
sycl::detail::code_location::current()) {
127-
submit(Q, empty_properties_t{}, std::forward<CommandGroupFunc>(CGF), CodeLoc);
127+
submit(std::move(Q), empty_properties_t{},
128+
std::forward<CommandGroupFunc>(CGF), CodeLoc);
128129
}
129130

130131
template <typename CommandGroupFunc, typename PropertiesT>
@@ -139,7 +140,7 @@ template <typename CommandGroupFunc>
139140
event submit_with_event(queue Q, CommandGroupFunc &&CGF,
140141
const sycl::detail::code_location &CodeLoc =
141142
sycl::detail::code_location::current()) {
142-
return submit_with_event(Q, empty_properties_t{},
143+
return submit_with_event(std::move(Q), empty_properties_t{},
143144
std::forward<CommandGroupFunc>(CGF), CodeLoc);
144145
}
145146

@@ -153,8 +154,8 @@ void single_task(queue Q, const KernelType &KernelObj,
153154
const sycl::detail::code_location &CodeLoc =
154155
sycl::detail::code_location::current()) {
155156
submit(
156-
Q, [&](handler &CGH) { single_task<KernelName>(CGH, KernelObj); },
157-
CodeLoc);
157+
std::move(Q),
158+
[&](handler &CGH) { single_task<KernelName>(CGH, KernelObj); }, CodeLoc);
158159
}
159160

160161
template <typename... ArgsT>
@@ -165,7 +166,7 @@ void single_task(handler &CGH, const kernel &KernelObj, ArgsT &&...Args) {
165166

166167
template <typename... ArgsT>
167168
void single_task(queue Q, const kernel &KernelObj, ArgsT &&...Args) {
168-
submit(Q, [&](handler &CGH) {
169+
submit(std::move(Q), [&](handler &CGH) {
169170
single_task(CGH, KernelObj, std::forward<ArgsT>(Args)...);
170171
});
171172
}
@@ -183,7 +184,7 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
183184
typename KernelType, typename... ReductionsT>
184185
void parallel_for(queue Q, range<Dimensions> Range, const KernelType &KernelObj,
185186
ReductionsT &&...Reductions) {
186-
submit(Q, [&](handler &CGH) {
187+
submit(std::move(Q), [&](handler &CGH) {
187188
parallel_for<KernelName>(CGH, Range, KernelObj,
188189
std::forward<ReductionsT>(Reductions)...);
189190
});
@@ -206,7 +207,7 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
206207
typename Properties, typename KernelType, typename... ReductionsT>
207208
void parallel_for(queue Q, launch_config<range<Dimensions>, Properties> Config,
208209
const KernelType &KernelObj, ReductionsT &&...Reductions) {
209-
submit(Q, [&](handler &CGH) {
210+
submit(std::move(Q), [&](handler &CGH) {
210211
parallel_for<KernelName>(CGH, Config, KernelObj,
211212
std::forward<ReductionsT>(Reductions)...);
212213
});
@@ -222,7 +223,7 @@ void parallel_for(handler &CGH, range<Dimensions> Range,
222223
template <int Dimensions, typename... ArgsT>
223224
void parallel_for(queue Q, range<Dimensions> Range, const kernel &KernelObj,
224225
ArgsT &&...Args) {
225-
submit(Q, [&](handler &CGH) {
226+
submit(std::move(Q), [&](handler &CGH) {
226227
parallel_for(CGH, Range, KernelObj, std::forward<ArgsT>(Args)...);
227228
});
228229
}
@@ -242,7 +243,7 @@ void parallel_for(handler &CGH,
242243
template <int Dimensions, typename Properties, typename... ArgsT>
243244
void parallel_for(queue Q, launch_config<range<Dimensions>, Properties> Config,
244245
const kernel &KernelObj, ArgsT &&...Args) {
245-
submit(Q, [&](handler &CGH) {
246+
submit(std::move(Q), [&](handler &CGH) {
246247
parallel_for(CGH, Config, KernelObj, std::forward<ArgsT>(Args)...);
247248
});
248249
}
@@ -259,7 +260,7 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
259260
typename KernelType, typename... ReductionsT>
260261
void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
261262
ReductionsT &&...Reductions) {
262-
submit(Q, [&](handler &CGH) {
263+
submit(std::move(Q), [&](handler &CGH) {
263264
nd_launch<KernelName>(CGH, Range, KernelObj,
264265
std::forward<ReductionsT>(Reductions)...);
265266
});
@@ -283,7 +284,7 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
283284
typename Properties, typename KernelType, typename... ReductionsT>
284285
void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
285286
const KernelType &KernelObj, ReductionsT &&...Reductions) {
286-
submit(Q, [&](handler &CGH) {
287+
submit(std::move(Q), [&](handler &CGH) {
287288
nd_launch<KernelName>(CGH, Config, KernelObj,
288289
std::forward<ReductionsT>(Reductions)...);
289290
});
@@ -299,7 +300,7 @@ void nd_launch(handler &CGH, nd_range<Dimensions> Range,
299300
template <int Dimensions, typename... ArgsT>
300301
void nd_launch(queue Q, nd_range<Dimensions> Range, const kernel &KernelObj,
301302
ArgsT &&...Args) {
302-
submit(Q, [&](handler &CGH) {
303+
submit(std::move(Q), [&](handler &CGH) {
303304
nd_launch(CGH, Range, KernelObj, std::forward<ArgsT>(Args)...);
304305
});
305306
}
@@ -319,7 +320,7 @@ void nd_launch(handler &CGH,
319320
template <int Dimensions, typename Properties, typename... ArgsT>
320321
void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
321322
const kernel &KernelObj, ArgsT &&...Args) {
322-
submit(Q, [&](handler &CGH) {
323+
submit(std::move(Q), [&](handler &CGH) {
323324
nd_launch(CGH, Config, KernelObj, std::forward<ArgsT>(Args)...);
324325
});
325326
}
@@ -341,7 +342,9 @@ template <typename T>
341342
void copy(queue Q, const T *Src, T *Dest, size_t Count,
342343
const sycl::detail::code_location &CodeLoc =
343344
sycl::detail::code_location::current()) {
344-
submit(Q, [&](handler &CGH) { copy<T>(CGH, Src, Dest, Count); }, CodeLoc);
345+
submit(
346+
std::move(Q), [&](handler &CGH) { copy<T>(CGH, Src, Dest, Count); },
347+
CodeLoc);
345348
}
346349

347350
inline void memset(handler &CGH, void *Ptr, int Value, size_t NumBytes) {
@@ -361,7 +364,9 @@ template <typename T>
361364
void fill(sycl::queue Q, T *Ptr, const T &Pattern, size_t Count,
362365
const sycl::detail::code_location &CodeLoc =
363366
sycl::detail::code_location::current()) {
364-
submit(Q, [&](handler &CGH) { fill<T>(CGH, Ptr, Pattern, Count); }, CodeLoc);
367+
submit(
368+
std::move(Q), [&](handler &CGH) { fill<T>(CGH, Ptr, Pattern, Count); },
369+
CodeLoc);
365370
}
366371

367372
inline void prefetch(handler &CGH, void *Ptr, size_t NumBytes) {
@@ -371,7 +376,9 @@ inline void prefetch(handler &CGH, void *Ptr, size_t NumBytes) {
371376
inline void prefetch(queue Q, void *Ptr, size_t NumBytes,
372377
const sycl::detail::code_location &CodeLoc =
373378
sycl::detail::code_location::current()) {
374-
submit(Q, [&](handler &CGH) { prefetch(CGH, Ptr, NumBytes); }, CodeLoc);
379+
submit(
380+
std::move(Q), [&](handler &CGH) { prefetch(CGH, Ptr, NumBytes); },
381+
CodeLoc);
375382
}
376383

377384
inline void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice) {
@@ -386,7 +393,7 @@ inline void barrier(handler &CGH) { CGH.ext_oneapi_barrier(); }
386393

387394
inline void barrier(queue Q, const sycl::detail::code_location &CodeLoc =
388395
sycl::detail::code_location::current()) {
389-
submit(Q, [&](handler &CGH) { barrier(CGH); }, CodeLoc);
396+
submit(std::move(Q), [&](handler &CGH) { barrier(CGH); }, CodeLoc);
390397
}
391398

392399
inline void partial_barrier(handler &CGH, const std::vector<event> &Events) {
@@ -396,7 +403,9 @@ inline void partial_barrier(handler &CGH, const std::vector<event> &Events) {
396403
inline void partial_barrier(queue Q, const std::vector<event> &Events,
397404
const sycl::detail::code_location &CodeLoc =
398405
sycl::detail::code_location::current()) {
399-
submit(Q, [&](handler &CGH) { partial_barrier(CGH, Events); }, CodeLoc);
406+
submit(
407+
std::move(Q), [&](handler &CGH) { partial_barrier(CGH, Events); },
408+
CodeLoc);
400409
}
401410

402411
inline void execute_graph(queue Q, command_graph<graph_state::executable> &G,

0 commit comments

Comments
 (0)