From af8945e88ebb15790151b4caec2a43f464ac9f8e Mon Sep 17 00:00:00 2001 From: cudawarped <12133430+cudawarped@users.noreply.github.com> Date: Wed, 25 Jun 2025 15:35:35 +0300 Subject: [PATCH] cudev: Add _shfl_down implementation for long long and unsigned long long for CUDA Tookit versions < 9.0 --- .../include/opencv2/cudev/warp/shuffle.hpp | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/modules/cudev/include/opencv2/cudev/warp/shuffle.hpp b/modules/cudev/include/opencv2/cudev/warp/shuffle.hpp index 0de5351fff..3e3bbb55cf 100644 --- a/modules/cudev/include/opencv2/cudev/warp/shuffle.hpp +++ b/modules/cudev/include/opencv2/cudev/warp/shuffle.hpp @@ -334,12 +334,28 @@ __device__ __forceinline__ uint shfl_down(uint val, uint delta, int width = warp __device__ __forceinline__ signed long long shfl_down(signed long long val, uint delta, int width = warpSize) { +#if defined __CUDACC_VER_MAJOR__ < 9 + union { long long ll; int2 i2; } u; + u.ll = val; + u.i2.x = __shfl_down(u.i2.x, delta, width); + u.i2.y = __shfl_down(u.i2.y, delta, width); + return u.ll; +#else return __shfl_down(val, delta, width); +#endif } __device__ __forceinline__ unsigned long long shfl_down(unsigned long long val, uint delta, int width = warpSize) { - return (unsigned long long) __shfl_down(val, delta, width); +#if defined __CUDACC_VER_MAJOR__ < 9 + union { unsigned long long ull; uint2 u2; } u; + u.ull = val; + u.u2.x = __shfl_down(static_cast(u.u2.x), delta, width); + u.u2.y = __shfl_down(static_cast(u.u2.y), delta, width); + return u.ull; +#else + return __shfl_down(val, delta, width); +#endif } __device__ __forceinline__ float shfl_down(float val, uint delta, int width = warpSize)