Skip to content

Commit 49bb3a8

Browse files
Set execution status of CUB device functions to error code (NVIDIA#4511)
1 parent 34de5af commit 49bb3a8

File tree

8 files changed

+43
-35
lines changed

8 files changed

+43
-35
lines changed

c/parallel/src/for.cu

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,9 @@ CUresult cccl_device_for(
147147

148148
try
149149
{
150-
pushed = try_push_context();
151-
Invoke(d_data, num_items, op, build.cc, (CUfunction) build.static_kernel, stream);
150+
pushed = try_push_context();
151+
auto exec_status = Invoke(d_data, num_items, op, build.cc, (CUfunction) build.static_kernel, stream);
152+
error = static_cast<CUresult>(exec_status);
152153
}
153154
catch (...)
154155
{

c/parallel/src/merge_sort.cu

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,7 @@ CUresult cccl_device_merge_sort(
493493
CUdevice cu_device;
494494
check(cuCtxGetDevice(&cu_device));
495495

496-
cub::DispatchMergeSort<
496+
auto exec_status = cub::DispatchMergeSort<
497497
indirect_arg_t,
498498
indirect_arg_t,
499499
indirect_arg_t,
@@ -517,6 +517,8 @@ CUresult cccl_device_merge_sort(
517517
{build},
518518
cub::detail::CudaDriverLauncherFactory{cu_device, build.cc},
519519
{d_out_keys.value_type.size});
520+
521+
error = static_cast<CUresult>(exec_status);
520522
}
521523
catch (const std::exception& exc)
522524
{

c/parallel/src/radix_sort.cu

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -724,14 +724,15 @@ CUresult cccl_device_radix_sort_impl(
724724
cub::DoubleBuffer<indirect_arg_t> d_values_buffer(
725725
*static_cast<indirect_arg_t**>(&val_arg_in), *static_cast<indirect_arg_t**>(&val_arg_out));
726726

727-
cub::DispatchRadixSort<Order,
728-
indirect_arg_t,
729-
indirect_arg_t,
730-
OffsetT,
731-
indirect_arg_t,
732-
radix_sort::dynamic_radix_sort_policy_t<&radix_sort::get_policy>,
733-
radix_sort::radix_sort_kernel_source,
734-
cub::detail::CudaDriverLauncherFactory>::
727+
auto exec_status = cub::DispatchRadixSort<
728+
Order,
729+
indirect_arg_t,
730+
indirect_arg_t,
731+
OffsetT,
732+
indirect_arg_t,
733+
radix_sort::dynamic_radix_sort_policy_t<&radix_sort::get_policy>,
734+
radix_sort::radix_sort_kernel_source,
735+
cub::detail::CudaDriverLauncherFactory>::
735736
Dispatch(
736737
d_temp_storage,
737738
*temp_storage_bytes,
@@ -748,6 +749,7 @@ CUresult cccl_device_radix_sort_impl(
748749
{d_keys_in.value_type.size});
749750

750751
*selector = d_keys_buffer.selector;
752+
error = static_cast<CUresult>(exec_status);
751753
}
752754
catch (const std::exception& exc)
753755
{

c/parallel/src/reduce.cu

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -375,16 +375,17 @@ CUresult cccl_device_reduce(
375375
CUdevice cu_device;
376376
check(cuCtxGetDevice(&cu_device));
377377

378-
cub::DispatchReduce<indirect_arg_t, // InputIteratorT
379-
indirect_arg_t, // OutputIteratorT
380-
::cuda::std::size_t, // OffsetT
381-
indirect_arg_t, // ReductionOpT
382-
indirect_arg_t, // InitT
383-
void, // AccumT
384-
::cuda::std::__identity, // TransformOpT
385-
reduce::dynamic_reduce_policy_t<&reduce::get_policy>, // PolicyHub
386-
reduce::reduce_kernel_source, // KernelSource
387-
cub::detail::CudaDriverLauncherFactory>:: // KernelLauncherFactory
378+
auto exec_status = cub::DispatchReduce<
379+
indirect_arg_t, // InputIteratorT
380+
indirect_arg_t, // OutputIteratorT
381+
::cuda::std::size_t, // OffsetT
382+
indirect_arg_t, // ReductionOpT
383+
indirect_arg_t, // InitT
384+
void, // AccumT
385+
::cuda::std::__identity, // TransformOpT
386+
reduce::dynamic_reduce_policy_t<&reduce::get_policy>, // PolicyHub
387+
reduce::reduce_kernel_source, // KernelSource
388+
cub::detail::CudaDriverLauncherFactory>:: // KernelLauncherFactory
388389
Dispatch(
389390
d_temp_storage,
390391
*temp_storage_bytes,
@@ -398,6 +399,8 @@ CUresult cccl_device_reduce(
398399
{build},
399400
cub::detail::CudaDriverLauncherFactory{cu_device, build.cc},
400401
{reduce::get_accumulator_type(op, d_in, init)});
402+
403+
error = static_cast<CUresult>(exec_status);
401404
}
402405
catch (const std::exception& exc)
403406
{

c/parallel/src/scan.cu

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,8 @@ CUresult cccl_device_scan(
368368

369369
CUdevice cu_device;
370370
check(cuCtxGetDevice(&cu_device));
371-
auto cuda_error = cub::DispatchScan<
371+
372+
auto exec_status = cub::DispatchScan<
372373
indirect_arg_t,
373374
indirect_arg_t,
374375
indirect_arg_t,
@@ -391,11 +392,8 @@ CUresult cccl_device_scan(
391392
{build},
392393
cub::detail::CudaDriverLauncherFactory{cu_device, build.cc},
393394
{scan::get_accumulator_type(op, d_in, init)});
394-
if (cuda_error != cudaSuccess)
395-
{
396-
const char* errorString = cudaGetErrorString(cuda_error); // Get the error string
397-
std::cerr << "CUDA error: " << errorString << std::endl;
398-
}
395+
396+
error = static_cast<CUresult>(exec_status);
399397
}
400398
catch (const std::exception& exc)
401399
{

c/parallel/src/segmented_reduce.cu

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ CUresult cccl_device_segmented_reduce(
404404
CUdevice cu_device;
405405
check(cuCtxGetDevice(&cu_device));
406406

407-
cub::DispatchSegmentedReduce<
407+
auto exec_status = cub::DispatchSegmentedReduce<
408408
indirect_arg_t, // InputIteratorT
409409
indirect_arg_t, // OutputIteratorT
410410
indirect_arg_t, // BeginSegmentIteratorT
@@ -430,6 +430,8 @@ CUresult cccl_device_segmented_reduce(
430430
/* kernel_source */ {build},
431431
/* launcher_factory &*/ cub::detail::CudaDriverLauncherFactory{cu_device, build.cc},
432432
/* policy */ {segmented_reduce::get_accumulator_type(op, d_in, init)});
433+
434+
error = static_cast<CUresult>(exec_status);
433435
}
434436
catch (const std::exception& exc)
435437
{

c/parallel/src/transform.cu

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,8 @@ CUresult cccl_device_binary_transform(
529529

530530
CUdevice cu_device;
531531
check(cuCtxGetDevice(&cu_device));
532-
auto cuda_error = cub::detail::transform::dispatch_t<
532+
533+
auto exec_status = cub::detail::transform::dispatch_t<
533534
cub::detail::transform::requires_stable_address::no, // TODO implement yes
534535
OffsetT,
535536
::cuda::std::tuple<indirect_arg_t, indirect_arg_t>,
@@ -545,11 +546,8 @@ CUresult cccl_device_binary_transform(
545546
stream,
546547
{build},
547548
cub::detail::CudaDriverLauncherFactory{cu_device, build.cc});
548-
if (cuda_error != cudaSuccess)
549-
{
550-
const char* errorString = cudaGetErrorString(cuda_error); // Get the error string
551-
std::cerr << "CUDA error: " << errorString << std::endl;
552-
}
549+
550+
error = static_cast<CUresult>(exec_status);
553551
}
554552
catch (const std::exception& exc)
555553
{

c/parallel/src/unique_by_key.cu

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ CUresult cccl_device_unique_by_key(
440440
CUdevice cu_device;
441441
check(cuCtxGetDevice(&cu_device));
442442

443-
cub::DispatchUniqueByKey<
443+
auto exec_status = cub::DispatchUniqueByKey<
444444
indirect_arg_t,
445445
indirect_arg_t,
446446
indirect_arg_t,
@@ -466,6 +466,8 @@ CUresult cccl_device_unique_by_key(
466466
{build},
467467
cub::detail::CudaDriverLauncherFactory{cu_device, build.cc},
468468
{d_keys_in.value_type.size});
469+
470+
error = static_cast<CUresult>(exec_status);
469471
}
470472
catch (const std::exception& exc)
471473
{

0 commit comments

Comments
 (0)