Skip to content

Commit 6a292f7

Browse files
Merge pull request #1154 from IntelPython/handle-allocation-failure-in-device-allocate-and-pack
Handle device_allocate_and_pack allocation failure
2 parents bd3eb4d + ee08ce8 commit 6a292f7

File tree

5 files changed

+65
-21
lines changed

5 files changed

+65
-21
lines changed

dpctl/tensor/libtensor/include/utils/offset_utils.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,10 @@ device_allocate_and_pack(sycl::queue q,
106106
auto sz = packed_shape_strides_owner->size();
107107
indT *shape_strides = sycl::malloc_device<indT>(sz, q);
108108

109+
if (shape_strides == nullptr) {
110+
return std::make_tuple(shape_strides, 0, sycl::event());
111+
}
112+
109113
sycl::event copy_ev =
110114
q.copy<indT>(packed_shape_strides_owner->data(), shape_strides, sz);
111115

dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -207,16 +207,20 @@ size_t py_mask_positions(dpctl::tensor::usm_ndarray mask,
207207
std::vector<sycl::event> host_task_events;
208208

209209
using dpctl::tensor::offset_utils::device_allocate_and_pack;
210-
auto ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t>(
210+
const auto &ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t>(
211211
exec_q, host_task_events, simplified_shape, simplified_strides);
212212
py::ssize_t *shape_strides = std::get<0>(ptr_size_event_tuple);
213+
if (shape_strides == nullptr) {
214+
sycl::event::wait(host_task_events);
215+
throw std::runtime_error("Unexpected error");
216+
}
213217
sycl::event copy_shape_ev = std::get<2>(ptr_size_event_tuple);
214218

215219
if (2 * static_cast<size_t>(nd) != std::get<1>(ptr_size_event_tuple)) {
216220
copy_shape_ev.wait();
217221
sycl::event::wait(host_task_events);
218222
sycl::free(shape_strides, exec_q);
219-
throw std::runtime_error("Unexacted error");
223+
throw std::runtime_error("Unexpected error");
220224
}
221225

222226
std::vector<sycl::event> dependent_events;
@@ -390,10 +394,14 @@ py_extract(dpctl::tensor::usm_ndarray src,
390394
masked_extract_all_slices_strided_impl_dispatch_vector[src_typeid];
391395

392396
using dpctl::tensor::offset_utils::device_allocate_and_pack;
393-
auto ptr_size_event_tuple1 = device_allocate_and_pack<py::ssize_t>(
394-
exec_q, host_task_events, src_shape_vec, src_strides_vec);
397+
const auto &ptr_size_event_tuple1 =
398+
device_allocate_and_pack<py::ssize_t>(
399+
exec_q, host_task_events, src_shape_vec, src_strides_vec);
395400
py::ssize_t *packed_src_shape_strides =
396401
std::get<0>(ptr_size_event_tuple1);
402+
if (packed_src_shape_strides == nullptr) {
403+
throw std::runtime_error("Unable to allocated device memory");
404+
}
397405
sycl::event copy_src_shape_strides_ev =
398406
std::get<2>(ptr_size_event_tuple1);
399407

@@ -476,17 +484,27 @@ py_extract(dpctl::tensor::usm_ndarray src,
476484
simplified_ortho_dst_strides, ortho_src_offset, ortho_dst_offset);
477485

478486
using dpctl::tensor::offset_utils::device_allocate_and_pack;
479-
auto ptr_size_event_tuple1 = device_allocate_and_pack<py::ssize_t>(
480-
exec_q, host_task_events, simplified_ortho_shape,
481-
simplified_ortho_src_strides, simplified_ortho_dst_strides);
487+
const auto &ptr_size_event_tuple1 =
488+
device_allocate_and_pack<py::ssize_t>(
489+
exec_q, host_task_events, simplified_ortho_shape,
490+
simplified_ortho_src_strides, simplified_ortho_dst_strides);
482491
py::ssize_t *packed_ortho_src_dst_shape_strides =
483492
std::get<0>(ptr_size_event_tuple1);
493+
if (packed_ortho_src_dst_shape_strides == nullptr) {
494+
throw std::runtime_error("Unable to allocate device memory");
495+
}
484496
sycl::event copy_shape_strides_ev1 = std::get<2>(ptr_size_event_tuple1);
485497

486-
auto ptr_size_event_tuple2 = device_allocate_and_pack<py::ssize_t>(
487-
exec_q, host_task_events, masked_src_shape, masked_src_strides);
498+
const auto &ptr_size_event_tuple2 =
499+
device_allocate_and_pack<py::ssize_t>(
500+
exec_q, host_task_events, masked_src_shape, masked_src_strides);
488501
py::ssize_t *packed_masked_src_shape_strides =
489502
std::get<0>(ptr_size_event_tuple2);
503+
if (packed_masked_src_shape_strides == nullptr) {
504+
copy_shape_strides_ev1.wait();
505+
sycl::free(packed_ortho_src_dst_shape_strides, exec_q);
506+
throw std::runtime_error("Unable to allocate device memory");
507+
}
490508
sycl::event copy_shape_strides_ev2 = std::get<2>(ptr_size_event_tuple2);
491509

492510
assert(masked_dst_shape.size() == 1);
@@ -691,10 +709,14 @@ py_place(dpctl::tensor::usm_ndarray dst,
691709
masked_place_all_slices_strided_impl_dispatch_vector[dst_typeid];
692710

693711
using dpctl::tensor::offset_utils::device_allocate_and_pack;
694-
auto ptr_size_event_tuple1 = device_allocate_and_pack<py::ssize_t>(
695-
exec_q, host_task_events, dst_shape_vec, dst_strides_vec);
712+
const auto &ptr_size_event_tuple1 =
713+
device_allocate_and_pack<py::ssize_t>(
714+
exec_q, host_task_events, dst_shape_vec, dst_strides_vec);
696715
py::ssize_t *packed_dst_shape_strides =
697716
std::get<0>(ptr_size_event_tuple1);
717+
if (packed_dst_shape_strides == nullptr) {
718+
throw std::runtime_error("Unable to allocate device memory");
719+
}
698720
sycl::event copy_dst_shape_strides_ev =
699721
std::get<2>(ptr_size_event_tuple1);
700722

@@ -777,17 +799,26 @@ py_place(dpctl::tensor::usm_ndarray dst,
777799
simplified_ortho_rhs_strides, ortho_dst_offset, ortho_rhs_offset);
778800

779801
using dpctl::tensor::offset_utils::device_allocate_and_pack;
780-
auto ptr_size_event_tuple1 = device_allocate_and_pack<py::ssize_t>(
781-
exec_q, host_task_events, simplified_ortho_shape,
782-
simplified_ortho_dst_strides, simplified_ortho_rhs_strides);
802+
const auto &ptr_size_event_tuple1 =
803+
device_allocate_and_pack<py::ssize_t>(
804+
exec_q, host_task_events, simplified_ortho_shape,
805+
simplified_ortho_dst_strides, simplified_ortho_rhs_strides);
783806
py::ssize_t *packed_ortho_dst_rhs_shape_strides =
784807
std::get<0>(ptr_size_event_tuple1);
808+
if (packed_ortho_dst_rhs_shape_strides == nullptr) {
809+
throw std::runtime_error("Unable to allocate device memory");
810+
}
785811
sycl::event copy_shape_strides_ev1 = std::get<2>(ptr_size_event_tuple1);
786812

787813
auto ptr_size_event_tuple2 = device_allocate_and_pack<py::ssize_t>(
788814
exec_q, host_task_events, masked_dst_shape, masked_dst_strides);
789815
py::ssize_t *packed_masked_dst_shape_strides =
790816
std::get<0>(ptr_size_event_tuple2);
817+
if (packed_masked_dst_shape_strides == nullptr) {
818+
copy_shape_strides_ev1.wait();
819+
sycl::free(packed_ortho_dst_rhs_shape_strides, exec_q);
820+
throw std::runtime_error("Unable to allocate device memory");
821+
}
791822
sycl::event copy_shape_strides_ev2 = std::get<2>(ptr_size_event_tuple2);
792823

793824
assert(masked_rhs_shape.size() == 1);
@@ -922,15 +953,15 @@ std::pair<sycl::event, sycl::event> py_nonzero(
922953
host_task_events.reserve(2);
923954

924955
using dpctl::tensor::offset_utils::device_allocate_and_pack;
925-
auto mask_shape_copying_tuple = device_allocate_and_pack<py::ssize_t>(
926-
exec_q, host_task_events, mask_shape);
956+
const auto &mask_shape_copying_tuple =
957+
device_allocate_and_pack<py::ssize_t>(exec_q, host_task_events,
958+
mask_shape);
927959
py::ssize_t *src_shape_device_ptr = std::get<0>(mask_shape_copying_tuple);
928-
sycl::event copy_ev = std::get<2>(mask_shape_copying_tuple);
929-
930960
if (src_shape_device_ptr == nullptr) {
931961
sycl::event::wait(host_task_events);
932962
throw std::runtime_error("Device allocation failed");
933963
}
964+
sycl::event copy_ev = std::get<2>(mask_shape_copying_tuple);
934965

935966
std::vector<sycl::event> all_deps;
936967
all_deps.reserve(depends.size() + 1);

dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,10 +248,13 @@ copy_usm_ndarray_into_usm_ndarray(dpctl::tensor::usm_ndarray src,
248248
host_task_events.reserve(2);
249249

250250
using dpctl::tensor::offset_utils::device_allocate_and_pack;
251-
auto ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t>(
251+
const auto &ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t>(
252252
exec_q, host_task_events, simplified_shape, simplified_src_strides,
253253
simplified_dst_strides);
254254
py::ssize_t *shape_strides = std::get<0>(ptr_size_event_tuple);
255+
if (shape_strides == nullptr) {
256+
throw std::runtime_error("Unable to allocate device memory");
257+
}
255258
sycl::event copy_shape_ev = std::get<2>(ptr_size_event_tuple);
256259

257260
sycl::event copy_and_cast_generic_ev = copy_and_cast_fn(

dpctl/tensor/libtensor/source/copy_for_reshape.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,13 @@ copy_usm_ndarray_for_reshape(dpctl::tensor::usm_ndarray src,
137137

138138
// shape_strides = [src_shape, src_strides, dst_shape, dst_strides]
139139
using dpctl::tensor::offset_utils::device_allocate_and_pack;
140-
auto ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t>(
140+
const auto &ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t>(
141141
exec_q, host_task_events, src_shape, src_strides, dst_shape,
142142
dst_strides);
143143
py::ssize_t *shape_strides = std::get<0>(ptr_size_event_tuple);
144+
if (shape_strides == nullptr) {
145+
throw std::runtime_error("Unable to allocate device memory");
146+
}
144147
sycl::event copy_shape_ev = std::get<2>(ptr_size_event_tuple);
145148

146149
char *src_data = src.get_data();

dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,13 @@ void copy_numpy_ndarray_into_usm_ndarray(
215215

216216
// Copy shape strides into device memory
217217
using dpctl::tensor::offset_utils::device_allocate_and_pack;
218-
auto ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t>(
218+
const auto &ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t>(
219219
exec_q, host_task_events, simplified_shape, simplified_src_strides,
220220
simplified_dst_strides);
221221
py::ssize_t *shape_strides = std::get<0>(ptr_size_event_tuple);
222+
if (shape_strides == nullptr) {
223+
throw std::runtime_error("Unable to allocate device memory");
224+
}
222225
sycl::event copy_shape_ev = std::get<2>(ptr_size_event_tuple);
223226

224227
// Get implementation function pointer

0 commit comments

Comments
 (0)