|
53 | 53 | #include "opencv2/core/cuda/saturate_cast.hpp"
|
54 | 54 | #include "opencv2/core/cuda/border_interpolate.hpp"
|
55 | 55 |
|
| 56 | + |
| 57 | +// The CUB library is used for the Median Filter with Wavelet Matrix, |
| 58 | +// which has become a standard library since CUDA 11. |
| 59 | +#include "wavelet_matrix_feature_support_checks.h" |
| 60 | +#ifdef __OPENCV_USE_WAVELET_MATRIX_FOR_MEDIAN_FILTER_CUDA__ |
| 61 | +#include "wavelet_matrix_multi.cuh" |
| 62 | +#include "wavelet_matrix_2d.cuh" |
| 63 | +#include "wavelet_matrix_float_supporter.cuh" |
| 64 | +#endif |
| 65 | + |
| 66 | + |
56 | 67 | namespace cv { namespace cuda { namespace device
|
57 | 68 | {
|
58 | 69 | __device__ void histogramAddAndSub8(int* H, const int * hist_colAdd,const int * hist_colSub){
|
@@ -334,4 +345,72 @@ namespace cv { namespace cuda { namespace device
|
334 | 345 |
|
335 | 346 | }}}
|
336 | 347 |
|
| 348 | + |
| 349 | +#ifdef __OPENCV_USE_WAVELET_MATRIX_FOR_MEDIAN_FILTER_CUDA__ |
| 350 | +namespace cv { namespace cuda { namespace device |
| 351 | + { |
| 352 | + using namespace wavelet_matrix_median; |
| 353 | + |
| 354 | + template<int CH_NUM, typename T> |
| 355 | + void medianFiltering_wavelet_matrix_gpu(const PtrStepSz<T> src, PtrStepSz<T> dst, int radius,cudaStream_t stream){ |
| 356 | + |
| 357 | + constexpr bool is_float = std::is_same<T, float>::value; |
| 358 | + constexpr static int WORD_SIZE = 32; |
| 359 | + constexpr static int ThW = (std::is_same<T, uint8_t>::value ? 8 : 4); |
| 360 | + constexpr static int ThH = (std::is_same<T, uint8_t>::value ? 64 : 256); |
| 361 | + using XYIdxT = uint32_t; |
| 362 | + using XIdxT = uint16_t; |
| 363 | + using WM_T = typename std::conditional<is_float, uint32_t, T>::type; |
| 364 | + using MedianResT = typename std::conditional<is_float, T, std::nullptr_t>::type; |
| 365 | + using WM2D_IMPL = WaveletMatrix2dCu5C<WM_T, CH_NUM, WaveletMatrixMultiCu4G<XIdxT, 512>, 512, WORD_SIZE>; |
| 366 | + |
| 367 | + CV_Assert(src.cols == dst.cols); |
| 368 | + CV_Assert(dst.step % sizeof(T) == 0); |
| 369 | + |
| 370 | + WM2D_IMPL WM_cuda(src.rows, src.cols, is_float, false); |
| 371 | + WM_cuda.res_cu = reinterpret_cast<WM_T*>(dst.ptr()); |
| 372 | + |
| 373 | + const size_t line_num = src.cols * CH_NUM; |
| 374 | + if (is_float) { |
| 375 | + WMMedianFloatSupporter::WMMedianFloatSupporter<float, CH_NUM, XYIdxT> float_supporter(src.rows, src.cols); |
| 376 | + float_supporter.alloc(); |
| 377 | + for (int y = 0; y < src.rows; ++y) { |
| 378 | + cudaMemcpy(float_supporter.val_in_cu + y * line_num, src.ptr(y), line_num * sizeof(T), cudaMemcpyDeviceToDevice); |
| 379 | + } |
| 380 | + const auto p = WM_cuda.get_nowcu_and_buf_byte_div32(); |
| 381 | + float_supporter.sort_and_set((XYIdxT*)p.first, p.second); |
| 382 | + WM_cuda.construct(nullptr, stream, true); |
| 383 | + WM_cuda.template median2d<ThW, ThH, MedianResT, false>(radius, dst.step / sizeof(T), (MedianResT*)float_supporter.get_res_table(), stream); |
| 384 | + } else { |
| 385 | + for (int y = 0; y < src.rows; ++y) { |
| 386 | + cudaMemcpy(WM_cuda.src_cu + y * line_num, src.ptr(y), line_num * sizeof(T), cudaMemcpyDeviceToDevice); |
| 387 | + } |
| 388 | + WM_cuda.construct(nullptr, stream); |
| 389 | + WM_cuda.template median2d<ThW, ThH, MedianResT, false>(radius, dst.step / sizeof(T), nullptr, stream); |
| 390 | + } |
| 391 | + WM_cuda.res_cu = nullptr; |
| 392 | + if (!stream) { |
| 393 | + cudaSafeCall( cudaDeviceSynchronize() ); |
| 394 | + } |
| 395 | + } |
| 396 | + |
| 397 | + template<typename T> |
| 398 | + void medianFiltering_wavelet_matrix_gpu(const PtrStepSz<T> src, PtrStepSz<T> dst, int radius, const int num_channels, cudaStream_t stream){ |
| 399 | + if (num_channels == 1) { |
| 400 | + medianFiltering_wavelet_matrix_gpu<1>(src, dst, radius, stream); |
| 401 | + } else if (num_channels == 3) { |
| 402 | + medianFiltering_wavelet_matrix_gpu<3>(src, dst, radius, stream); |
| 403 | + } else if (num_channels == 4) { |
| 404 | + medianFiltering_wavelet_matrix_gpu<4>(src, dst, radius, stream); |
| 405 | + } else { |
| 406 | + CV_Assert(num_channels == 1 || num_channels == 3 || num_channels == 4); |
| 407 | + } |
| 408 | + } |
| 409 | + |
| 410 | + template void medianFiltering_wavelet_matrix_gpu(const PtrStepSz<uint8_t> src, PtrStepSz<uint8_t> dst, int radius, const int num_channels, cudaStream_t stream); |
| 411 | + template void medianFiltering_wavelet_matrix_gpu(const PtrStepSz<uint16_t> src, PtrStepSz<uint16_t> dst, int radius, const int num_channels, cudaStream_t stream); |
| 412 | + template void medianFiltering_wavelet_matrix_gpu(const PtrStepSz<float> src, PtrStepSz<float> dst, int radius, const int num_channels, cudaStream_t stream); |
| 413 | +}}} |
| 414 | +#endif // __OPENCV_USE_WAVELET_MATRIX_FOR_MEDIAN_FILTER_CUDA__ |
| 415 | + |
337 | 416 | #endif
|
0 commit comments