Skip to content

Commit 5cb12f6

Browse files
CUDA: fix sum.cu compilation for CUDA < 11.7 (ggml-org#9562)
1 parent d39e267 commit 5cb12f6

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

ggml/src/ggml-cuda/sum.cu

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
1-
#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
1+
#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700
2+
#define USE_CUB
3+
#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700
4+
5+
#ifdef USE_CUB
26
// On Windows CUB uses libraries with variables called CC_PASCAL which conflict with the define in common.cuh.
37
// For this reason CUB must be included BEFORE anything else.
48
#include <cub/cub.cuh>
59
using namespace cub;
6-
#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
10+
#endif // USE_CUB
711

812
#include "sumrows.cuh"
913
#include "sum.cuh"
1014

1115
#include <cstdint>
1216

1317
void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream) {
14-
#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
18+
#ifdef USE_CUB
1519
size_t tmp_size = 0;
1620
DeviceReduce::Sum(nullptr, tmp_size, x, dst, ne, stream);
1721
ggml_cuda_pool_alloc<uint8_t> tmp_alloc(pool, tmp_size);
@@ -21,7 +25,7 @@ void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int
2125
// For AMD there is rocPRIM which could be used as a drop-in replacement via hipcub but this would require C++11 -> C++14.
2226
sum_rows_f32_cuda(x, dst, ne, 1, stream);
2327
GGML_UNUSED(pool);
24-
#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
28+
#endif // USE_CUB
2529
}
2630

2731
void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {

0 commit comments

Comments
 (0)