@@ -115,39 +115,77 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
115
115
DIE_NO_IMPLEMENTATION;
116
116
}
117
117
118
+ template <bool IsRead>
119
+ static inline ur_result_t enqueueMemBufferReadWriteRect_impl (
120
+ ur_queue_handle_t , ur_mem_handle_t Buff, bool ,
121
+ ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset,
122
+ ur_rect_region_t region, size_t BufferRowPitch, size_t BufferSlicePitch,
123
+ size_t HostRowPitch, size_t HostSlicePitch,
124
+ typename std::conditional<IsRead, void *, const void *>::type DstMem,
125
+ pi_uint32, const ur_event_handle_t *, ur_event_handle_t *) {
126
+ // TODO: events, blocking, check other constraints, performance optimizations
127
+ // More sharing with level_zero where possible
128
+
129
+ if (BufferRowPitch == 0 )
130
+ BufferRowPitch = region.width ;
131
+ if (BufferSlicePitch == 0 )
132
+ BufferSlicePitch = BufferRowPitch * region.height ;
133
+ if (HostRowPitch == 0 )
134
+ HostRowPitch = region.width ;
135
+ if (HostSlicePitch == 0 )
136
+ HostSlicePitch = HostRowPitch * region.height ;
137
+ for (size_t w = 0 ; w < region.width ; w++)
138
+ for (size_t h = 0 ; h < region.height ; h++)
139
+ for (size_t d = 0 ; d < region.depth ; d++) {
140
+ size_t buff_orign = (d + BufferOffset.z ) * BufferSlicePitch +
141
+ (h + BufferOffset.y ) * BufferRowPitch + w +
142
+ BufferOffset.x ;
143
+ size_t host_origin = (d + HostOffset.z ) * HostSlicePitch +
144
+ (h + HostOffset.y ) * HostRowPitch + w +
145
+ HostOffset.x ;
146
+ int8_t &host_mem = ur_cast<int8_t *>(DstMem)[host_origin];
147
+ int8_t &buff_mem = ur_cast<int8_t *>(Buff->_mem )[buff_orign];
148
+ if (IsRead)
149
+ host_mem = buff_mem;
150
+ else
151
+ buff_mem = host_mem;
152
+ }
153
+ return UR_RESULT_SUCCESS;
154
+ }
155
+
156
+ static inline ur_result_t doCopy_impl (ur_queue_handle_t hQueue, void *DstPtr,
157
+ const void *SrcPtr, size_t Size,
158
+ uint32_t numEventsInWaitList,
159
+ const ur_event_handle_t *EventWaitList,
160
+ ur_event_handle_t *Event) {
161
+ // todo: non-blocking, events, UR integration
162
+ std::ignore = hQueue;
163
+ std::ignore = numEventsInWaitList;
164
+ if (SrcPtr != DstPtr && Size)
165
+ memmove (DstPtr, SrcPtr, Size);
166
+ return UR_RESULT_SUCCESS;
167
+ }
168
+
118
169
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead (
119
170
ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead,
120
171
size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList,
121
172
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
122
- std::ignore = hQueue;
123
- std::ignore = hBuffer;
124
173
std::ignore = blockingRead;
125
- std::ignore = offset;
126
- std::ignore = size;
127
- std::ignore = pDst;
128
- std::ignore = numEventsInWaitList;
129
- std::ignore = phEventWaitList;
130
- std::ignore = phEvent;
131
174
132
- // TODO: is it ok to have this as no-op?
133
- return UR_RESULT_SUCCESS;
175
+ void *FromPtr = /* Src*/ hBuffer->_mem + offset;
176
+ return doCopy_impl (hQueue, pDst, FromPtr, size, numEventsInWaitList,
177
+ phEventWaitList, phEvent);
134
178
}
135
179
136
180
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite (
137
181
ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite,
138
182
size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList,
139
183
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
140
- std::ignore = hQueue;
141
- std::ignore = hBuffer;
142
184
std::ignore = blockingWrite;
143
- std::ignore = offset;
144
- std::ignore = size;
145
- std::ignore = pSrc;
146
- std::ignore = numEventsInWaitList;
147
- std::ignore = phEventWaitList;
148
- std::ignore = phEvent;
149
185
150
- DIE_NO_IMPLEMENTATION;
186
+ void *ToPtr = hBuffer->_mem + offset;
187
+ return doCopy_impl (hQueue, ToPtr, pSrc, size, numEventsInWaitList,
188
+ phEventWaitList, phEvent);
151
189
}
152
190
153
191
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect (
@@ -157,22 +195,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect(
157
195
size_t hostRowPitch, size_t hostSlicePitch, void *pDst,
158
196
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
159
197
ur_event_handle_t *phEvent) {
160
- std::ignore = hQueue;
161
- std::ignore = hBuffer;
162
- std::ignore = blockingRead;
163
- std::ignore = bufferOrigin;
164
- std::ignore = hostOrigin;
165
- std::ignore = region;
166
- std::ignore = bufferRowPitch;
167
- std::ignore = bufferSlicePitch;
168
- std::ignore = hostRowPitch;
169
- std::ignore = hostSlicePitch;
170
- std::ignore = pDst;
171
- std::ignore = numEventsInWaitList;
172
- std::ignore = phEventWaitList;
173
- std::ignore = phEvent;
174
-
175
- DIE_NO_IMPLEMENTATION;
198
+ return enqueueMemBufferReadWriteRect_impl<true /* read*/ >(
199
+ hQueue, hBuffer, blockingRead, bufferOrigin, hostOrigin, region,
200
+ bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst,
201
+ numEventsInWaitList, phEventWaitList, phEvent);
176
202
}
177
203
178
204
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect (
@@ -182,40 +208,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect(
182
208
size_t hostRowPitch, size_t hostSlicePitch, void *pSrc,
183
209
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
184
210
ur_event_handle_t *phEvent) {
185
- std::ignore = hQueue;
186
- std::ignore = hBuffer;
187
- std::ignore = blockingWrite;
188
- std::ignore = bufferOrigin;
189
- std::ignore = hostOrigin;
190
- std::ignore = region;
191
- std::ignore = bufferRowPitch;
192
- std::ignore = bufferSlicePitch;
193
- std::ignore = hostRowPitch;
194
- std::ignore = hostSlicePitch;
195
- std::ignore = pSrc;
196
- std::ignore = numEventsInWaitList;
197
- std::ignore = phEventWaitList;
198
- std::ignore = phEvent;
199
-
200
- DIE_NO_IMPLEMENTATION;
211
+ return enqueueMemBufferReadWriteRect_impl<false /* write*/ >(
212
+ hQueue, hBuffer, blockingWrite, bufferOrigin, hostOrigin, region,
213
+ bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc,
214
+ numEventsInWaitList, phEventWaitList, phEvent);
201
215
}
202
216
203
217
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy (
204
218
ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc,
205
219
ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size,
206
220
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
207
221
ur_event_handle_t *phEvent) {
208
- std::ignore = hQueue;
209
- std::ignore = hBufferSrc;
210
- std::ignore = hBufferDst;
211
- std::ignore = srcOffset;
212
- std::ignore = dstOffset;
213
- std::ignore = size;
214
- std::ignore = numEventsInWaitList;
215
- std::ignore = phEventWaitList;
216
- std::ignore = phEvent;
217
-
218
- DIE_NO_IMPLEMENTATION;
222
+ const void *SrcPtr = hBufferSrc->_mem + srcOffset;
223
+ void *DstPtr = hBufferDst->_mem + dstOffset;
224
+ return doCopy_impl (hQueue, DstPtr, SrcPtr, size, numEventsInWaitList,
225
+ phEventWaitList, phEvent);
219
226
}
220
227
221
228
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect (
@@ -225,21 +232,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect(
225
232
size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
226
233
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
227
234
ur_event_handle_t *phEvent) {
228
- std::ignore = hQueue;
229
- std::ignore = hBufferSrc;
230
- std::ignore = hBufferDst;
231
- std::ignore = srcOrigin;
232
- std::ignore = dstOrigin;
233
- std::ignore = region;
234
- std::ignore = srcRowPitch;
235
- std::ignore = srcSlicePitch;
236
- std::ignore = dstRowPitch;
237
- std::ignore = dstSlicePitch;
238
- std::ignore = numEventsInWaitList;
239
- std::ignore = phEvent;
240
- std::ignore = phEventWaitList;
241
-
242
- DIE_NO_IMPLEMENTATION;
235
+ return enqueueMemBufferReadWriteRect_impl<true /* read*/ >(
236
+ hQueue, hBufferSrc, false /* todo: check blocking*/ , srcOrigin,
237
+ /* HostOffset*/ dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch,
238
+ dstSlicePitch, hBufferDst->_mem , numEventsInWaitList, phEventWaitList,
239
+ phEvent);
243
240
}
244
241
245
242
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill (
0 commit comments