Skip to content

Commit d8771ec

Browse files
authored
Workaround memory allocation issues on Windows for large size test cases (#690)
Radix sort and merge sort test cases that use the largest data sizes hang or cause an SEH exception on Windows when using the latest version of the HIP SDK. Unfortunately, the HIP SDK issue interferes with our current strategies for skipping tests that require too much memory for a device (querying available device memory or examining the return value from hipMalloc), making them unreliable. While we wait for a fix, this change introduces a workaround by reducing the maximum size these test cases can use on Windows.
1 parent 6a6b05c commit d8771ec

File tree

3 files changed

+33
-4
lines changed

3 files changed

+33
-4
lines changed

test/rocprim/test_device_merge_sort.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,15 @@ void testLargeIndices()
423423
// at least some sizes that fit into device memory.
424424
using config = rocprim::merge_sort_config<256, 256, 1, 128, 128, 1, (1 << 17)>;
425425

426-
for(size_t size : test_utils::get_large_sizes(seeds[0]))
426+
// On Windows, sizes above 2^34 cause issues that we can't currently catch by examining
427+
// the hipMalloc return value or querying available memory. Workaround this for now
428+
// by setting a different maximum size for that platform.
429+
#if defined(_WIN32)
430+
const size_t max_pow2 = 34;
431+
#else
432+
const size_t max_pow2 = 37;
433+
#endif
434+
for(size_t size : test_utils::get_large_sizes<max_pow2>(seeds[0]))
427435
{
428436
SCOPED_TRACE(testing::Message() << "with size = " << size);
429437

test/rocprim/test_device_radix_sort.hpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1409,7 +1409,16 @@ inline void sort_keys_large_sizes()
14091409

14101410
// Currently, CI enforces a hard limit of 96 GB on memory allocations.
14111411
// Temporarily use sizes that will require less space than the limit.
1412-
const std::vector<size_t> sizes = test_utils::get_large_sizes<35>(seeds[0]);
1412+
// On Windows, sizes above 2^34 (that are still under the 96 GB limit)
1413+
// can hang due to issues that we can't currently catch by examining
1414+
// the hipMalloc return value or querying available memory. Workaround
1415+
// this for now by setting a different maximum size for that platform.
1416+
#if defined(_WIN32)
1417+
const size_t max_pow2 = 34;
1418+
#else
1419+
const size_t max_pow2 = 35;
1420+
#endif
1421+
const std::vector<size_t> sizes = test_utils::get_large_sizes<max_pow2>(seeds[0]);
14131422
for(const size_t size : sizes)
14141423
{
14151424
SCOPED_TRACE(testing::Message() << "with size = " << size);

test/rocprim/test_utils_data_generation.hpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <iterator>
3636
#include <random>
3737
#include <vector>
38+
#include <algorithm>
3839

3940
namespace test_utils {
4041

@@ -505,7 +506,7 @@ std::vector<size_t> get_sizes(T seed_value)
505506
template<unsigned int MaxPow2 = 37, class T>
506507
std::vector<size_t> get_large_sizes(T seed_value)
507508
{
508-
std::vector<size_t> sizes = {
509+
std::vector<size_t> test_sizes = {
509510
(size_t{1} << 30) - 1,
510511
size_t{1} << 31,
511512
(size_t{1} << 32) - 15,
@@ -519,7 +520,18 @@ std::vector<size_t> get_large_sizes(T seed_value)
519520
(size_t{1} << 30) + 1,
520521
(size_t{1} << MaxPow2) - 2,
521522
seed_value);
522-
sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end());
523+
524+
std::vector<size_t> sizes(test_sizes.size() + random_sizes.size());
525+
int count = 0;
526+
auto predicate = [&count](const size_t& val) {
527+
const bool result = (val <= (size_t{1} << MaxPow2));
528+
count += (result ? 1 : 0);
529+
return result;
530+
};
531+
std::copy_if(test_sizes.begin(), test_sizes.end(), sizes.begin(), predicate);
532+
std::copy_if(random_sizes.begin(), random_sizes.end(), sizes.begin() + count, predicate);
533+
sizes.resize(count);
534+
523535
std::sort(sizes.begin(), sizes.end());
524536
return sizes;
525537
}

0 commit comments

Comments
 (0)