diff --git a/ci/test_cuda_parallel_python.sh b/ci/test_cuda_parallel_python.sh index d663db9457b..dbfbee3df22 100755 --- a/ci/test_cuda_parallel_python.sh +++ b/ci/test_cuda_parallel_python.sh @@ -20,4 +20,4 @@ python -m pip install "${CUDA_PARALLEL_WHEEL_PATH}[test]" # Run tests cd "/home/coder/cccl/python/cuda_parallel/tests/" -python -m pytest -n auto -v +python -m pytest -n 6 -v -m "not large" diff --git a/python/cuda_parallel/pyproject.toml b/python/cuda_parallel/pyproject.toml index 5234e5e6bb4..7c3da2d35cb 100644 --- a/python/cuda_parallel/pyproject.toml +++ b/python/cuda_parallel/pyproject.toml @@ -76,4 +76,7 @@ extend = "../../pyproject.toml" known-first-party = ["cuda.parallel"] [tool.pytest.ini_options] -markers = ["no_verify_sass: skip SASS verification check"] +markers = [ + "no_verify_sass: skip SASS verification check", + "large: tests requiring large device memory allocations", +] diff --git a/python/cuda_parallel/tests/test_radix_sort.py b/python/cuda_parallel/tests/test_radix_sort.py index 2262db5917d..a8a3706816e 100644 --- a/python/cuda_parallel/tests/test_radix_sort.py +++ b/python/cuda_parallel/tests/test_radix_sort.py @@ -12,6 +12,13 @@ import cuda.parallel.experimental.algorithms as algorithms + +def get_mark(dt, log_size): + if log_size < 20: + return tuple() + return pytest.mark.large + + DTYPE_LIST = [ np.uint8, np.uint16, @@ -27,7 +34,11 @@ PROBLEM_SIZES = [2, 10, 20] -DTYPE_SIZE = [(dt, 2**log_size) for dt in DTYPE_LIST for log_size in PROBLEM_SIZES] +DTYPE_SIZE = [ + pytest.param(dt, 2**log_size, marks=get_mark(dt, log_size)) + for dt in DTYPE_LIST + for log_size in PROBLEM_SIZES +] def random_array(size, dtype, max_value=None) -> np.typing.NDArray: @@ -251,7 +262,7 @@ def test_radix_sort_pairs_double_buffer(dtype, num_items): # These tests take longer to execute so we reduce the number of test cases DTYPE_SIZE_BIT_WINDOW = [ - (dt, 2**log_size) + pytest.param(dt, 2**log_size, marks=get_mark(dt, log_size)) for dt in [np.uint8, np.int16, np.uint32, np.int64, np.float64] for log_size in [2, 24] ] diff --git a/python/cuda_parallel/tests/test_reduce.py b/python/cuda_parallel/tests/test_reduce.py index 60536e134f9..c775d11116b 100644 --- a/python/cuda_parallel/tests/test_reduce.py +++ b/python/cuda_parallel/tests/test_reduce.py @@ -31,8 +31,14 @@ def type_to_problem_sizes(dtype): raise ValueError("Unsupported dtype") +def get_mark(dt, log_size): + if log_size + np.log2(np.dtype(dt).itemsize) < 21: + return tuple() + return pytest.mark.large + + dtype_size_pairs = [ - (dt, 2**log_size) + pytest.param(dt, 2**log_size, marks=get_mark(dt, log_size)) for dt in [np.uint8, np.uint16, np.uint32, np.uint64] for log_size in type_to_problem_sizes(dt) ] diff --git a/python/cuda_parallel/tests/test_unique_by_key.py b/python/cuda_parallel/tests/test_unique_by_key.py index e55a64c414c..d258f7b6179 100644 --- a/python/cuda_parallel/tests/test_unique_by_key.py +++ b/python/cuda_parallel/tests/test_unique_by_key.py @@ -25,10 +25,19 @@ np.float64, ] + +def get_mark(dt, log_size): + if log_size < 20: + return tuple() + return pytest.mark.large + + PROBLEM_SIZES = [2, 8, 16, 22] DTYPE_SIZE_PAIRS = [ - (dt, 2**log_size) for dt in DTYPE_LIST for log_size in PROBLEM_SIZES + pytest.param(dt, 2**log_size, marks=get_mark(dt, log_size)) + for dt in DTYPE_LIST + for log_size in PROBLEM_SIZES ]