@@ -207,16 +207,20 @@ size_t py_mask_positions(dpctl::tensor::usm_ndarray mask,
207
207
std::vector<sycl::event> host_task_events;
208
208
209
209
using dpctl::tensor::offset_utils::device_allocate_and_pack;
210
- auto ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t >(
210
+ const auto & ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t >(
211
211
exec_q, host_task_events, simplified_shape, simplified_strides);
212
212
py::ssize_t *shape_strides = std::get<0 >(ptr_size_event_tuple);
213
+ if (shape_strides == nullptr ) {
214
+ sycl::event::wait (host_task_events);
215
+ throw std::runtime_error (" Unexpected error" );
216
+ }
213
217
sycl::event copy_shape_ev = std::get<2 >(ptr_size_event_tuple);
214
218
215
219
if (2 * static_cast <size_t >(nd) != std::get<1 >(ptr_size_event_tuple)) {
216
220
copy_shape_ev.wait ();
217
221
sycl::event::wait (host_task_events);
218
222
sycl::free (shape_strides, exec_q);
219
- throw std::runtime_error (" Unexacted error" );
223
+ throw std::runtime_error (" Unexpected error" );
220
224
}
221
225
222
226
std::vector<sycl::event> dependent_events;
@@ -390,10 +394,14 @@ py_extract(dpctl::tensor::usm_ndarray src,
390
394
masked_extract_all_slices_strided_impl_dispatch_vector[src_typeid];
391
395
392
396
using dpctl::tensor::offset_utils::device_allocate_and_pack;
393
- auto ptr_size_event_tuple1 = device_allocate_and_pack<py::ssize_t >(
394
- exec_q, host_task_events, src_shape_vec, src_strides_vec);
397
+ const auto &ptr_size_event_tuple1 =
398
+ device_allocate_and_pack<py::ssize_t >(
399
+ exec_q, host_task_events, src_shape_vec, src_strides_vec);
395
400
py::ssize_t *packed_src_shape_strides =
396
401
std::get<0 >(ptr_size_event_tuple1);
402
+ if (packed_src_shape_strides == nullptr ) {
403
+ throw std::runtime_error (" Unable to allocated device memory" );
404
+ }
397
405
sycl::event copy_src_shape_strides_ev =
398
406
std::get<2 >(ptr_size_event_tuple1);
399
407
@@ -476,17 +484,27 @@ py_extract(dpctl::tensor::usm_ndarray src,
476
484
simplified_ortho_dst_strides, ortho_src_offset, ortho_dst_offset);
477
485
478
486
using dpctl::tensor::offset_utils::device_allocate_and_pack;
479
- auto ptr_size_event_tuple1 = device_allocate_and_pack<py::ssize_t >(
480
- exec_q, host_task_events, simplified_ortho_shape,
481
- simplified_ortho_src_strides, simplified_ortho_dst_strides);
487
+ const auto &ptr_size_event_tuple1 =
488
+ device_allocate_and_pack<py::ssize_t >(
489
+ exec_q, host_task_events, simplified_ortho_shape,
490
+ simplified_ortho_src_strides, simplified_ortho_dst_strides);
482
491
py::ssize_t *packed_ortho_src_dst_shape_strides =
483
492
std::get<0 >(ptr_size_event_tuple1);
493
+ if (packed_ortho_src_dst_shape_strides == nullptr ) {
494
+ throw std::runtime_error (" Unable to allocate device memory" );
495
+ }
484
496
sycl::event copy_shape_strides_ev1 = std::get<2 >(ptr_size_event_tuple1);
485
497
486
- auto ptr_size_event_tuple2 = device_allocate_and_pack<py::ssize_t >(
487
- exec_q, host_task_events, masked_src_shape, masked_src_strides);
498
+ const auto &ptr_size_event_tuple2 =
499
+ device_allocate_and_pack<py::ssize_t >(
500
+ exec_q, host_task_events, masked_src_shape, masked_src_strides);
488
501
py::ssize_t *packed_masked_src_shape_strides =
489
502
std::get<0 >(ptr_size_event_tuple2);
503
+ if (packed_masked_src_shape_strides == nullptr ) {
504
+ copy_shape_strides_ev1.wait ();
505
+ sycl::free (packed_ortho_src_dst_shape_strides, exec_q);
506
+ throw std::runtime_error (" Unable to allocate device memory" );
507
+ }
490
508
sycl::event copy_shape_strides_ev2 = std::get<2 >(ptr_size_event_tuple2);
491
509
492
510
assert (masked_dst_shape.size () == 1 );
@@ -691,10 +709,14 @@ py_place(dpctl::tensor::usm_ndarray dst,
691
709
masked_place_all_slices_strided_impl_dispatch_vector[dst_typeid];
692
710
693
711
using dpctl::tensor::offset_utils::device_allocate_and_pack;
694
- auto ptr_size_event_tuple1 = device_allocate_and_pack<py::ssize_t >(
695
- exec_q, host_task_events, dst_shape_vec, dst_strides_vec);
712
+ const auto &ptr_size_event_tuple1 =
713
+ device_allocate_and_pack<py::ssize_t >(
714
+ exec_q, host_task_events, dst_shape_vec, dst_strides_vec);
696
715
py::ssize_t *packed_dst_shape_strides =
697
716
std::get<0 >(ptr_size_event_tuple1);
717
+ if (packed_dst_shape_strides == nullptr ) {
718
+ throw std::runtime_error (" Unable to allocate device memory" );
719
+ }
698
720
sycl::event copy_dst_shape_strides_ev =
699
721
std::get<2 >(ptr_size_event_tuple1);
700
722
@@ -777,17 +799,26 @@ py_place(dpctl::tensor::usm_ndarray dst,
777
799
simplified_ortho_rhs_strides, ortho_dst_offset, ortho_rhs_offset);
778
800
779
801
using dpctl::tensor::offset_utils::device_allocate_and_pack;
780
- auto ptr_size_event_tuple1 = device_allocate_and_pack<py::ssize_t >(
781
- exec_q, host_task_events, simplified_ortho_shape,
782
- simplified_ortho_dst_strides, simplified_ortho_rhs_strides);
802
+ const auto &ptr_size_event_tuple1 =
803
+ device_allocate_and_pack<py::ssize_t >(
804
+ exec_q, host_task_events, simplified_ortho_shape,
805
+ simplified_ortho_dst_strides, simplified_ortho_rhs_strides);
783
806
py::ssize_t *packed_ortho_dst_rhs_shape_strides =
784
807
std::get<0 >(ptr_size_event_tuple1);
808
+ if (packed_ortho_dst_rhs_shape_strides == nullptr ) {
809
+ throw std::runtime_error (" Unable to allocate device memory" );
810
+ }
785
811
sycl::event copy_shape_strides_ev1 = std::get<2 >(ptr_size_event_tuple1);
786
812
787
813
auto ptr_size_event_tuple2 = device_allocate_and_pack<py::ssize_t >(
788
814
exec_q, host_task_events, masked_dst_shape, masked_dst_strides);
789
815
py::ssize_t *packed_masked_dst_shape_strides =
790
816
std::get<0 >(ptr_size_event_tuple2);
817
+ if (packed_masked_dst_shape_strides == nullptr ) {
818
+ copy_shape_strides_ev1.wait ();
819
+ sycl::free (packed_ortho_dst_rhs_shape_strides, exec_q);
820
+ throw std::runtime_error (" Unable to allocate device memory" );
821
+ }
791
822
sycl::event copy_shape_strides_ev2 = std::get<2 >(ptr_size_event_tuple2);
792
823
793
824
assert (masked_rhs_shape.size () == 1 );
@@ -922,15 +953,15 @@ std::pair<sycl::event, sycl::event> py_nonzero(
922
953
host_task_events.reserve (2 );
923
954
924
955
using dpctl::tensor::offset_utils::device_allocate_and_pack;
925
- auto mask_shape_copying_tuple = device_allocate_and_pack<py::ssize_t >(
926
- exec_q, host_task_events, mask_shape);
956
+ const auto &mask_shape_copying_tuple =
957
+ device_allocate_and_pack<py::ssize_t >(exec_q, host_task_events,
958
+ mask_shape);
927
959
py::ssize_t *src_shape_device_ptr = std::get<0 >(mask_shape_copying_tuple);
928
- sycl::event copy_ev = std::get<2 >(mask_shape_copying_tuple);
929
-
930
960
if (src_shape_device_ptr == nullptr ) {
931
961
sycl::event::wait (host_task_events);
932
962
throw std::runtime_error (" Device allocation failed" );
933
963
}
964
+ sycl::event copy_ev = std::get<2 >(mask_shape_copying_tuple);
934
965
935
966
std::vector<sycl::event> all_deps;
936
967
all_deps.reserve (depends.size () + 1 );
0 commit comments