Skip to content

Commit 0868c6e

Browse files
PietroGhgmartygrant
authored andcommitted
[SYCL] [NATIVECPU] Remove dependencies to sycl in UR adapter (#11685)
This PR removes dependencies to the sycl headers/library in the Native CPU UR adapter. `sycl/include/sycl/detail/native_cpu.hpp` has been moved to `sycl/plugins/unified_runtime/ur/adapters/native_cpu/nativecpu_state.hpp`, and the definitions of work item builtins has been moved from that header to the compiler, which now emits them in the `PrepareSYCLNativeCPUPass`.
1 parent 833e4e3 commit 0868c6e

File tree

3 files changed

+96
-34
lines changed

3 files changed

+96
-34
lines changed

sycl/plugins/unified_runtime/ur/adapters/native_cpu/enqueue.cpp

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,40 +6,37 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#include <sycl/detail/cg_types.hpp>
9+
#include <array>
10+
#include <cstdint>
1011

1112
#include "ur_api.h"
1213

1314
#include "common.hpp"
1415
#include "kernel.hpp"
1516
#include "memory.hpp"
1617

17-
sycl::detail::NDRDescT getNDRDesc(uint32_t WorkDim,
18-
const size_t *GlobalWorkOffset,
19-
const size_t *GlobalWorkSize,
20-
const size_t *LocalWorkSize) {
21-
// Todo: we flip indexes here, I'm not sure we should, if we don't we need to
22-
// un-flip them in the spirv builtins definitions as well
23-
sycl::detail::NDRDescT Res;
24-
switch (WorkDim) {
25-
case 1:
26-
Res.set<1>(sycl::nd_range<1>({GlobalWorkSize[0]}, {LocalWorkSize[0]},
27-
{GlobalWorkOffset[0]}));
28-
break;
29-
case 2:
30-
Res.set<2>(sycl::nd_range<2>({GlobalWorkSize[0], GlobalWorkSize[1]},
31-
{LocalWorkSize[0], LocalWorkSize[1]},
32-
{GlobalWorkOffset[0], GlobalWorkOffset[1]}));
33-
break;
34-
case 3:
35-
Res.set<3>(sycl::nd_range<3>(
36-
{GlobalWorkSize[0], GlobalWorkSize[1], GlobalWorkSize[2]},
37-
{LocalWorkSize[0], LocalWorkSize[1], LocalWorkSize[2]},
38-
{GlobalWorkOffset[0], GlobalWorkOffset[1], GlobalWorkOffset[2]}));
39-
break;
18+
namespace native_cpu {
19+
struct NDRDescT {
20+
using RangeT = std::array<size_t, 3>;
21+
uint32_t WorkDim;
22+
RangeT GlobalOffset;
23+
RangeT GlobalSize;
24+
RangeT LocalSize;
25+
NDRDescT(uint32_t WorkDim, const size_t *GlobalWorkOffset,
26+
const size_t *GlobalWorkSize, const size_t *LocalWorkSize) {
27+
for (uint32_t I = 0; I < WorkDim; I++) {
28+
GlobalOffset[I] = GlobalWorkOffset[I];
29+
GlobalSize[I] = GlobalWorkSize[I];
30+
LocalSize[I] = LocalWorkSize[I];
31+
}
32+
for (uint32_t I = WorkDim; I < 3; I++) {
33+
GlobalSize[I] = 1;
34+
LocalSize[I] = LocalSize[0] ? 1 : 0;
35+
GlobalOffset[I] = 0;
36+
}
4037
}
41-
return Res;
42-
}
38+
};
39+
} // namespace native_cpu
4340

4441
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
4542
ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
@@ -62,11 +59,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
6259

6360
// TODO: add proper error checking
6461
// TODO: add proper event dep management
65-
sycl::detail::NDRDescT ndr =
66-
getNDRDesc(workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize);
62+
native_cpu::NDRDescT ndr(workDim, pGlobalWorkOffset, pGlobalWorkSize,
63+
pLocalWorkSize);
6764
hKernel->handleLocalArgs();
6865

69-
__nativecpu_state state(ndr.GlobalSize[0], ndr.GlobalSize[1],
66+
native_cpu::state state(ndr.GlobalSize[0], ndr.GlobalSize[1],
7067
ndr.GlobalSize[2], ndr.LocalSize[0], ndr.LocalSize[1],
7168
ndr.LocalSize[2], ndr.GlobalOffset[0],
7269
ndr.GlobalOffset[1], ndr.GlobalOffset[2]);
@@ -124,7 +121,7 @@ static inline ur_result_t enqueueMemBufferReadWriteRect_impl(
124121
ur_rect_region_t region, size_t BufferRowPitch, size_t BufferSlicePitch,
125122
size_t HostRowPitch, size_t HostSlicePitch,
126123
typename std::conditional<IsRead, void *, const void *>::type DstMem,
127-
pi_uint32, const ur_event_handle_t *, ur_event_handle_t *) {
124+
uint32_t, const ur_event_handle_t *, ur_event_handle_t *) {
128125
// TODO: events, blocking, check other constraints, performance optimizations
129126
// More sharing with level_zero where possible
130127

sycl/plugins/unified_runtime/ur/adapters/native_cpu/kernel.hpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,21 @@
99
#pragma once
1010

1111
#include "common.hpp"
12-
#include <sycl/detail/native_cpu.hpp>
12+
#include "nativecpu_state.hpp"
1313
#include <ur_api.h>
1414

15-
using nativecpu_kernel_t = void(const sycl::detail::NativeCPUArgDesc *,
16-
__nativecpu_state *);
15+
namespace native_cpu {
16+
17+
struct NativeCPUArgDesc {
18+
void *MPtr;
19+
20+
NativeCPUArgDesc(void *Ptr) : MPtr(Ptr){};
21+
};
22+
23+
} // namespace native_cpu
24+
25+
using nativecpu_kernel_t = void(const native_cpu::NativeCPUArgDesc *,
26+
native_cpu::state *);
1727
using nativecpu_ptr_t = nativecpu_kernel_t *;
1828
using nativecpu_task_t = std::function<nativecpu_kernel_t>;
1929

@@ -31,7 +41,7 @@ struct ur_kernel_handle_t_ : RefCounted {
3141

3242
const char *_name;
3343
nativecpu_task_t _subhandler;
34-
std::vector<sycl::detail::NativeCPUArgDesc> _args;
44+
std::vector<native_cpu::NativeCPUArgDesc> _args;
3545
std::vector<local_arg_info_t> _localArgInfo;
3646

3747
// To be called before enqueing the kernel.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
//===-------------- nativecpu_state.hpp - SYCL Native CPU state -------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===-----------------------------------------------------------------===//
8+
#pragma once
9+
#include <cstdlib>
10+
namespace native_cpu {
11+
12+
struct state {
13+
size_t MGlobal_id[3];
14+
size_t MGlobal_range[3];
15+
size_t MWorkGroup_size[3];
16+
size_t MWorkGroup_id[3];
17+
size_t MLocal_id[3];
18+
size_t MNumGroups[3];
19+
size_t MGlobalOffset[3];
20+
state(size_t globalR0, size_t globalR1, size_t globalR2, size_t localR0,
21+
size_t localR1, size_t localR2, size_t globalO0, size_t globalO1,
22+
size_t globalO2)
23+
: MGlobal_range{globalR0, globalR1, globalR2},
24+
MWorkGroup_size{localR0, localR1, localR2},
25+
MNumGroups{globalR0 / localR0, globalR1 / localR1, globalR2 / localR2},
26+
MGlobalOffset{globalO0, globalO1, globalO2} {
27+
MGlobal_id[0] = 0;
28+
MGlobal_id[1] = 0;
29+
MGlobal_id[2] = 0;
30+
MWorkGroup_id[0] = 0;
31+
MWorkGroup_id[1] = 0;
32+
MWorkGroup_id[2] = 0;
33+
MLocal_id[0] = 0;
34+
MLocal_id[1] = 0;
35+
MLocal_id[2] = 0;
36+
}
37+
38+
void update(size_t group0, size_t group1, size_t group2, size_t local0,
39+
size_t local1, size_t local2) {
40+
MWorkGroup_id[0] = group0;
41+
MWorkGroup_id[1] = group1;
42+
MWorkGroup_id[2] = group2;
43+
MLocal_id[0] = local0;
44+
MLocal_id[1] = local1;
45+
MLocal_id[2] = local2;
46+
MGlobal_id[0] =
47+
MWorkGroup_size[0] * MWorkGroup_id[0] + MLocal_id[0] + MGlobalOffset[0];
48+
MGlobal_id[1] =
49+
MWorkGroup_size[1] * MWorkGroup_id[1] + MLocal_id[1] + MGlobalOffset[1];
50+
MGlobal_id[2] =
51+
MWorkGroup_size[2] * MWorkGroup_id[2] + MLocal_id[2] + MGlobalOffset[2];
52+
}
53+
};
54+
55+
} // namespace native_cpu

0 commit comments

Comments
 (0)