Skip to content

Commit 382b27d

Browse files
uwedolinskymartygrant
authored andcommitted
[SYCL][NATIVECPU] Initial support for memory copy operations (#10808)
This PR adds some initial support for memory copy operations to the NativeCPU device. It also fixes global offsets and the query for queue properties in the NativeCPU PI/UR adapter.
1 parent 28ec448 commit 382b27d

File tree

2 files changed

+77
-78
lines changed

2 files changed

+77
-78
lines changed

sycl/plugins/unified_runtime/ur/adapters/native_cpu/device.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
121121
case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION:
122122
return ReturnValue("");
123123
case UR_DEVICE_INFO_QUEUE_PROPERTIES:
124-
return ReturnValue(ur_queue_properties_t{});
124+
return ReturnValue(
125+
ur_queue_flag_t(UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE |
126+
UR_QUEUE_FLAG_PROFILING_ENABLE));
125127
case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: {
126128
struct {
127129
size_t Arr[3];

sycl/plugins/unified_runtime/ur/adapters/native_cpu/enqueue.cpp

Lines changed: 74 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -115,39 +115,77 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
115115
DIE_NO_IMPLEMENTATION;
116116
}
117117

118+
template <bool IsRead>
119+
static inline ur_result_t enqueueMemBufferReadWriteRect_impl(
120+
ur_queue_handle_t, ur_mem_handle_t Buff, bool,
121+
ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset,
122+
ur_rect_region_t region, size_t BufferRowPitch, size_t BufferSlicePitch,
123+
size_t HostRowPitch, size_t HostSlicePitch,
124+
typename std::conditional<IsRead, void *, const void *>::type DstMem,
125+
pi_uint32, const ur_event_handle_t *, ur_event_handle_t *) {
126+
// TODO: events, blocking, check other constraints, performance optimizations
127+
// More sharing with level_zero where possible
128+
129+
if (BufferRowPitch == 0)
130+
BufferRowPitch = region.width;
131+
if (BufferSlicePitch == 0)
132+
BufferSlicePitch = BufferRowPitch * region.height;
133+
if (HostRowPitch == 0)
134+
HostRowPitch = region.width;
135+
if (HostSlicePitch == 0)
136+
HostSlicePitch = HostRowPitch * region.height;
137+
for (size_t w = 0; w < region.width; w++)
138+
for (size_t h = 0; h < region.height; h++)
139+
for (size_t d = 0; d < region.depth; d++) {
140+
size_t buff_orign = (d + BufferOffset.z) * BufferSlicePitch +
141+
(h + BufferOffset.y) * BufferRowPitch + w +
142+
BufferOffset.x;
143+
size_t host_origin = (d + HostOffset.z) * HostSlicePitch +
144+
(h + HostOffset.y) * HostRowPitch + w +
145+
HostOffset.x;
146+
int8_t &host_mem = ur_cast<int8_t *>(DstMem)[host_origin];
147+
int8_t &buff_mem = ur_cast<int8_t *>(Buff->_mem)[buff_orign];
148+
if (IsRead)
149+
host_mem = buff_mem;
150+
else
151+
buff_mem = host_mem;
152+
}
153+
return UR_RESULT_SUCCESS;
154+
}
155+
156+
static inline ur_result_t doCopy_impl(ur_queue_handle_t hQueue, void *DstPtr,
157+
const void *SrcPtr, size_t Size,
158+
uint32_t numEventsInWaitList,
159+
const ur_event_handle_t *EventWaitList,
160+
ur_event_handle_t *Event) {
161+
// todo: non-blocking, events, UR integration
162+
std::ignore = hQueue;
163+
std::ignore = numEventsInWaitList;
164+
if (SrcPtr != DstPtr && Size)
165+
memmove(DstPtr, SrcPtr, Size);
166+
return UR_RESULT_SUCCESS;
167+
}
168+
118169
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead(
119170
ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead,
120171
size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList,
121172
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
122-
std::ignore = hQueue;
123-
std::ignore = hBuffer;
124173
std::ignore = blockingRead;
125-
std::ignore = offset;
126-
std::ignore = size;
127-
std::ignore = pDst;
128-
std::ignore = numEventsInWaitList;
129-
std::ignore = phEventWaitList;
130-
std::ignore = phEvent;
131174

132-
// TODO: is it ok to have this as no-op?
133-
return UR_RESULT_SUCCESS;
175+
void *FromPtr = /*Src*/ hBuffer->_mem + offset;
176+
return doCopy_impl(hQueue, pDst, FromPtr, size, numEventsInWaitList,
177+
phEventWaitList, phEvent);
134178
}
135179

136180
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite(
137181
ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite,
138182
size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList,
139183
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
140-
std::ignore = hQueue;
141-
std::ignore = hBuffer;
142184
std::ignore = blockingWrite;
143-
std::ignore = offset;
144-
std::ignore = size;
145-
std::ignore = pSrc;
146-
std::ignore = numEventsInWaitList;
147-
std::ignore = phEventWaitList;
148-
std::ignore = phEvent;
149185

150-
DIE_NO_IMPLEMENTATION;
186+
void *ToPtr = hBuffer->_mem + offset;
187+
return doCopy_impl(hQueue, ToPtr, pSrc, size, numEventsInWaitList,
188+
phEventWaitList, phEvent);
151189
}
152190

153191
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect(
@@ -157,22 +195,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect(
157195
size_t hostRowPitch, size_t hostSlicePitch, void *pDst,
158196
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
159197
ur_event_handle_t *phEvent) {
160-
std::ignore = hQueue;
161-
std::ignore = hBuffer;
162-
std::ignore = blockingRead;
163-
std::ignore = bufferOrigin;
164-
std::ignore = hostOrigin;
165-
std::ignore = region;
166-
std::ignore = bufferRowPitch;
167-
std::ignore = bufferSlicePitch;
168-
std::ignore = hostRowPitch;
169-
std::ignore = hostSlicePitch;
170-
std::ignore = pDst;
171-
std::ignore = numEventsInWaitList;
172-
std::ignore = phEventWaitList;
173-
std::ignore = phEvent;
174-
175-
DIE_NO_IMPLEMENTATION;
198+
return enqueueMemBufferReadWriteRect_impl<true /*read*/>(
199+
hQueue, hBuffer, blockingRead, bufferOrigin, hostOrigin, region,
200+
bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst,
201+
numEventsInWaitList, phEventWaitList, phEvent);
176202
}
177203

178204
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect(
@@ -182,40 +208,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect(
182208
size_t hostRowPitch, size_t hostSlicePitch, void *pSrc,
183209
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
184210
ur_event_handle_t *phEvent) {
185-
std::ignore = hQueue;
186-
std::ignore = hBuffer;
187-
std::ignore = blockingWrite;
188-
std::ignore = bufferOrigin;
189-
std::ignore = hostOrigin;
190-
std::ignore = region;
191-
std::ignore = bufferRowPitch;
192-
std::ignore = bufferSlicePitch;
193-
std::ignore = hostRowPitch;
194-
std::ignore = hostSlicePitch;
195-
std::ignore = pSrc;
196-
std::ignore = numEventsInWaitList;
197-
std::ignore = phEventWaitList;
198-
std::ignore = phEvent;
199-
200-
DIE_NO_IMPLEMENTATION;
211+
return enqueueMemBufferReadWriteRect_impl<false /*write*/>(
212+
hQueue, hBuffer, blockingWrite, bufferOrigin, hostOrigin, region,
213+
bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc,
214+
numEventsInWaitList, phEventWaitList, phEvent);
201215
}
202216

203217
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy(
204218
ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc,
205219
ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size,
206220
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
207221
ur_event_handle_t *phEvent) {
208-
std::ignore = hQueue;
209-
std::ignore = hBufferSrc;
210-
std::ignore = hBufferDst;
211-
std::ignore = srcOffset;
212-
std::ignore = dstOffset;
213-
std::ignore = size;
214-
std::ignore = numEventsInWaitList;
215-
std::ignore = phEventWaitList;
216-
std::ignore = phEvent;
217-
218-
DIE_NO_IMPLEMENTATION;
222+
const void *SrcPtr = hBufferSrc->_mem + srcOffset;
223+
void *DstPtr = hBufferDst->_mem + dstOffset;
224+
return doCopy_impl(hQueue, DstPtr, SrcPtr, size, numEventsInWaitList,
225+
phEventWaitList, phEvent);
219226
}
220227

221228
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect(
@@ -225,21 +232,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect(
225232
size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
226233
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
227234
ur_event_handle_t *phEvent) {
228-
std::ignore = hQueue;
229-
std::ignore = hBufferSrc;
230-
std::ignore = hBufferDst;
231-
std::ignore = srcOrigin;
232-
std::ignore = dstOrigin;
233-
std::ignore = region;
234-
std::ignore = srcRowPitch;
235-
std::ignore = srcSlicePitch;
236-
std::ignore = dstRowPitch;
237-
std::ignore = dstSlicePitch;
238-
std::ignore = numEventsInWaitList;
239-
std::ignore = phEvent;
240-
std::ignore = phEventWaitList;
241-
242-
DIE_NO_IMPLEMENTATION;
235+
return enqueueMemBufferReadWriteRect_impl<true /*read*/>(
236+
hQueue, hBufferSrc, false /*todo: check blocking*/, srcOrigin,
237+
/*HostOffset*/ dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch,
238+
dstSlicePitch, hBufferDst->_mem, numEventsInWaitList, phEventWaitList,
239+
phEvent);
243240
}
244241

245242
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill(

0 commit comments

Comments
 (0)