Skip to content

Commit 963efd4

Browse files
In RadixSort, InvokePass must not mutate d_begin_offsets, d_end_offsets
This is because InvokePass may be called multiple times by InvokePasses due to algorithmic nature of radix sorting. With this chanage, InvokePass creates local copies of `d_begin_offsets` and `d_end_offsets` and advances these copies in-place if necessary.
1 parent 925a8c7 commit 963efd4

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

cub/cub/device/dispatch/dispatch_radix_sort.cuh

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,6 +1264,9 @@ struct DispatchSegmentedRadixSort
12641264
return cudaErrorInvalidValue;
12651265
}
12661266

1267+
BeginOffsetIteratorT begin_offsets_current_it = d_begin_offsets;
1268+
EndOffsetIteratorT end_offsets_current_it = d_end_offsets;
1269+
12671270
// Iterate over chunks of segments
12681271
for (::cuda::std::int64_t invocation_index = 0; invocation_index < num_invocations; invocation_index++)
12691272
{
@@ -1293,8 +1296,8 @@ struct DispatchSegmentedRadixSort
12931296
d_keys_out,
12941297
d_values_in,
12951298
d_values_out,
1296-
d_begin_offsets,
1297-
d_end_offsets,
1299+
begin_offsets_current_it,
1300+
end_offsets_current_it,
12981301
current_bit,
12991302
pass_bits,
13001303
decomposer);
@@ -1308,8 +1311,8 @@ struct DispatchSegmentedRadixSort
13081311

13091312
if (invocation_index + 1 < num_invocations)
13101313
{
1311-
detail::advance_iterators_inplace_if_supported(d_begin_offsets, num_current_segments);
1312-
detail::advance_iterators_inplace_if_supported(d_end_offsets, num_current_segments);
1314+
detail::advance_iterators_inplace_if_supported(begin_offsets_current_it, num_current_segments);
1315+
detail::advance_iterators_inplace_if_supported(end_offsets_current_it, num_current_segments);
13131316
}
13141317

13151318
// Sync the stream if specified to flush runtime errors

0 commit comments

Comments
 (0)