6
6
//
7
7
// ===----------------------------------------------------------------------===//
8
8
9
- #include < sycl/detail/cg_types.hpp>
9
+ #include < array>
10
+ #include < cstdint>
10
11
11
12
#include " ur_api.h"
12
13
13
14
#include " common.hpp"
14
15
#include " kernel.hpp"
15
16
#include " memory.hpp"
16
17
17
- sycl::detail::NDRDescT getNDRDesc (uint32_t WorkDim,
18
- const size_t *GlobalWorkOffset,
19
- const size_t *GlobalWorkSize,
20
- const size_t *LocalWorkSize) {
21
- // Todo: we flip indexes here, I'm not sure we should, if we don't we need to
22
- // un-flip them in the spirv builtins definitions as well
23
- sycl::detail::NDRDescT Res;
24
- switch (WorkDim) {
25
- case 1 :
26
- Res.set <1 >(sycl::nd_range<1 >({GlobalWorkSize[0 ]}, {LocalWorkSize[0 ]},
27
- {GlobalWorkOffset[0 ]}));
28
- break ;
29
- case 2 :
30
- Res.set <2 >(sycl::nd_range<2 >({GlobalWorkSize[0 ], GlobalWorkSize[1 ]},
31
- {LocalWorkSize[0 ], LocalWorkSize[1 ]},
32
- {GlobalWorkOffset[0 ], GlobalWorkOffset[1 ]}));
33
- break ;
34
- case 3 :
35
- Res.set <3 >(sycl::nd_range<3 >(
36
- {GlobalWorkSize[0 ], GlobalWorkSize[1 ], GlobalWorkSize[2 ]},
37
- {LocalWorkSize[0 ], LocalWorkSize[1 ], LocalWorkSize[2 ]},
38
- {GlobalWorkOffset[0 ], GlobalWorkOffset[1 ], GlobalWorkOffset[2 ]}));
39
- break ;
18
+ namespace native_cpu {
19
+ struct NDRDescT {
20
+ using RangeT = std::array<size_t , 3 >;
21
+ uint32_t WorkDim;
22
+ RangeT GlobalOffset;
23
+ RangeT GlobalSize;
24
+ RangeT LocalSize;
25
+ NDRDescT (uint32_t WorkDim, const size_t *GlobalWorkOffset,
26
+ const size_t *GlobalWorkSize, const size_t *LocalWorkSize) {
27
+ for (uint32_t I = 0 ; I < WorkDim; I++) {
28
+ GlobalOffset[I] = GlobalWorkOffset[I];
29
+ GlobalSize[I] = GlobalWorkSize[I];
30
+ LocalSize[I] = LocalWorkSize[I];
31
+ }
32
+ for (uint32_t I = WorkDim; I < 3 ; I++) {
33
+ GlobalSize[I] = 1 ;
34
+ LocalSize[I] = LocalSize[0 ] ? 1 : 0 ;
35
+ GlobalOffset[I] = 0 ;
36
+ }
40
37
}
41
- return Res ;
42
- }
38
+ } ;
39
+ } // namespace native_cpu
43
40
44
41
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch (
45
42
ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
@@ -62,11 +59,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
62
59
63
60
// TODO: add proper error checking
64
61
// TODO: add proper event dep management
65
- sycl::detail:: NDRDescT ndr =
66
- getNDRDesc (workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize);
62
+ native_cpu:: NDRDescT ndr (workDim, pGlobalWorkOffset, pGlobalWorkSize,
63
+ pLocalWorkSize);
67
64
hKernel->handleLocalArgs ();
68
65
69
- __nativecpu_state state (ndr.GlobalSize [0 ], ndr.GlobalSize [1 ],
66
+ native_cpu::state state (ndr.GlobalSize [0 ], ndr.GlobalSize [1 ],
70
67
ndr.GlobalSize [2 ], ndr.LocalSize [0 ], ndr.LocalSize [1 ],
71
68
ndr.LocalSize [2 ], ndr.GlobalOffset [0 ],
72
69
ndr.GlobalOffset [1 ], ndr.GlobalOffset [2 ]);
@@ -124,7 +121,7 @@ static inline ur_result_t enqueueMemBufferReadWriteRect_impl(
124
121
ur_rect_region_t region, size_t BufferRowPitch, size_t BufferSlicePitch,
125
122
size_t HostRowPitch, size_t HostSlicePitch,
126
123
typename std::conditional<IsRead, void *, const void *>::type DstMem,
127
- pi_uint32 , const ur_event_handle_t *, ur_event_handle_t *) {
124
+ uint32_t , const ur_event_handle_t *, ur_event_handle_t *) {
128
125
// TODO: events, blocking, check other constraints, performance optimizations
129
126
// More sharing with level_zero where possible
130
127
0 commit comments