Skip to content

Commit 7fd584f

Browse files
committed
Merge remote-tracking branch 'upstream/4.x' into '5.x'
2 parents 8b4dac2 + b236c71 commit 7fd584f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+3017
-194
lines changed

.github/workflows/PR-5.x.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ jobs:
1515
Ubuntu2204-x64:
1616
uses: opencv/ci-gha-workflow/.github/workflows/OCV-Contrib-PR-5.x-U22.yaml@main
1717

18+
Ubuntu2404-x64:
19+
uses: opencv/ci-gha-workflow/.github/workflows/OCV-Contrib-PR-5.x-U24.yaml@main
20+
1821
Ubuntu2004-x64-CUDA:
1922
uses: opencv/ci-gha-workflow/.github/workflows/OCV-Contrib-PR-5.x-U20-Cuda.yaml@main
2023

modules/bgsegm/test/test_backgroundsubtractor_gbh.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@ void CV_BackgroundSubtractorTest::run(int)
2929
{
3030
int code = cvtest::TS::OK;
3131
RNG& rng = ts->get_rng();
32-
int type = ((unsigned int)rng)%7; //!< pick a random type, 0 - 6, defined in types_c.h
33-
int channels = 1 + ((unsigned int)rng)%4; //!< random number of channels from 1 to 4.
32+
int type = ((unsigned int)rng) % 3;
33+
type = (type == 0) ? CV_8U : (type == 1) ? CV_16U : CV_32F; // 8U, 16U, 32F
34+
int channels = ((unsigned int)rng)%3;
35+
channels = (channels == 2) ? 4 : channels; // 1, 3, 4
3436
int channelsAndType = CV_MAKETYPE(type,channels);
3537
int width = 2 + ((unsigned int)rng)%98; //!< Mat will be 2 to 100 in width and height
3638
int height = 2 + ((unsigned int)rng)%98;

modules/cudaarithm/src/cuda/polar_cart.cu

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -133,23 +133,9 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu
133133
GpuMat_<float> anglec(angle.reshape(1));
134134

135135
if (angleInDegrees)
136-
{
137-
gridTransformTuple(zipPtr(xc, yc),
138-
tie(magc, anglec),
139-
make_tuple(
140-
binaryTupleAdapter<0, 1>(magnitude_func<float>()),
141-
binaryTupleAdapter<0, 1>(direction_func<float, true>())),
142-
stream);
143-
}
136+
gridTransformBinary(xc, yc, magc, anglec, magnitude_func<float>(), direction_func<float, true>(), stream);
144137
else
145-
{
146-
gridTransformTuple(zipPtr(xc, yc),
147-
tie(magc, anglec),
148-
make_tuple(
149-
binaryTupleAdapter<0, 1>(magnitude_func<float>()),
150-
binaryTupleAdapter<0, 1>(direction_func<float, false>())),
151-
stream);
152-
}
138+
gridTransformBinary(xc, yc, magc, anglec, magnitude_func<float>(), direction_func<float, false>(), stream);
153139

154140
syncOutput(mag, _mag, stream);
155141
syncOutput(angle, _angle, stream);

modules/cudaarithm/src/cuda/split_merge.cu

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ namespace
6767
{
6868
static void call(const GpuMat* src, GpuMat& dst, Stream& stream)
6969
{
70-
gridMerge(zipPtr(globPtr<T>(src[0]), globPtr<T>(src[1])),
70+
const std::array<GlobPtrSz<T>, 2> d_src = {globPtr<T>(src[0]), globPtr<T>(src[1])};
71+
gridMerge(d_src,
7172
globPtr<typename MakeVec<T, 2>::type>(dst),
7273
stream);
7374
}
@@ -77,7 +78,8 @@ namespace
7778
{
7879
static void call(const GpuMat* src, GpuMat& dst, Stream& stream)
7980
{
80-
gridMerge(zipPtr(globPtr<T>(src[0]), globPtr<T>(src[1]), globPtr<T>(src[2])),
81+
const std::array<GlobPtrSz<T>, 3> d_src = {globPtr<T>(src[0]), globPtr<T>(src[1]), globPtr<T>(src[2])};
82+
gridMerge(d_src,
8183
globPtr<typename MakeVec<T, 3>::type>(dst),
8284
stream);
8385
}
@@ -87,7 +89,8 @@ namespace
8789
{
8890
static void call(const GpuMat* src, GpuMat& dst, Stream& stream)
8991
{
90-
gridMerge(zipPtr(globPtr<T>(src[0]), globPtr<T>(src[1]), globPtr<T>(src[2]), globPtr<T>(src[3])),
92+
const std::array<GlobPtrSz<T>, 4 > d_src = {globPtr<T>(src[0]), globPtr<T>(src[1]), globPtr<T>(src[2]), globPtr<T>(src[3])};
93+
gridMerge(d_src,
9194
globPtr<typename MakeVec<T, 4>::type>(dst),
9295
stream);
9396
}

modules/cudaarithm/src/reductions.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,12 @@ void cv::cuda::meanStdDev(InputArray src, OutputArray dst, Stream& stream)
151151
sz.width = gsrc.cols;
152152
sz.height = gsrc.rows;
153153

154+
#if (CUDA_VERSION >= 12040)
155+
size_t bufSize;
156+
#else
154157
int bufSize;
158+
#endif
159+
155160
#if (CUDA_VERSION <= 4020)
156161
nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );
157162
#else
@@ -162,7 +167,8 @@ void cv::cuda::meanStdDev(InputArray src, OutputArray dst, Stream& stream)
162167
#endif
163168

164169
BufferPool pool(stream);
165-
GpuMat buf = pool.getBuffer(1, bufSize, gsrc.type());
170+
CV_Assert(bufSize <= std::numeric_limits<int>::max());
171+
GpuMat buf = pool.getBuffer(1, static_cast<int>(bufSize), gsrc.type());
166172

167173
// detail: https://github.com/opencv/opencv/issues/11063
168174
//NppStreamHandler h(StreamAccessor::getStream(stream));
@@ -227,7 +233,12 @@ void cv::cuda::meanStdDev(InputArray src, OutputArray dst, InputArray mask, Stre
227233
sz.width = gsrc.cols;
228234
sz.height = gsrc.rows;
229235

236+
#if (CUDA_VERSION >= 12040)
237+
size_t bufSize;
238+
#else
230239
int bufSize;
240+
#endif
241+
231242
#if (CUDA_VERSION <= 4020)
232243
nppSafeCall( nppiMeanStdDev8uC1MRGetBufferHostSize(sz, &bufSize) );
233244
#else
@@ -238,7 +249,8 @@ void cv::cuda::meanStdDev(InputArray src, OutputArray dst, InputArray mask, Stre
238249
#endif
239250

240251
BufferPool pool(stream);
241-
GpuMat buf = pool.getBuffer(1, bufSize, gsrc.type());
252+
CV_Assert(bufSize <= std::numeric_limits<int>::max());
253+
GpuMat buf = pool.getBuffer(1, static_cast<int>(bufSize), gsrc.type());
242254

243255
if(gsrc.type() == CV_8UC1)
244256
nppSafeCall( nppiMean_StdDev_8u_C1MR(gsrc.ptr<Npp8u>(), static_cast<int>(gsrc.step), gmask.ptr<Npp8u>(), static_cast<int>(gmask.step),

modules/cudafilters/src/cuda/median_filter.cu

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,17 @@
5353
#include "opencv2/core/cuda/saturate_cast.hpp"
5454
#include "opencv2/core/cuda/border_interpolate.hpp"
5555

56+
57+
// The CUB library is used for the Median Filter with Wavelet Matrix,
58+
// which has become a standard library since CUDA 11.
59+
#include "wavelet_matrix_feature_support_checks.h"
60+
#ifdef __OPENCV_USE_WAVELET_MATRIX_FOR_MEDIAN_FILTER_CUDA__
61+
#include "wavelet_matrix_multi.cuh"
62+
#include "wavelet_matrix_2d.cuh"
63+
#include "wavelet_matrix_float_supporter.cuh"
64+
#endif
65+
66+
5667
namespace cv { namespace cuda { namespace device
5768
{
5869
__device__ void histogramAddAndSub8(int* H, const int * hist_colAdd,const int * hist_colSub){
@@ -334,4 +345,72 @@ namespace cv { namespace cuda { namespace device
334345

335346
}}}
336347

348+
349+
#ifdef __OPENCV_USE_WAVELET_MATRIX_FOR_MEDIAN_FILTER_CUDA__
350+
namespace cv { namespace cuda { namespace device
351+
{
352+
using namespace wavelet_matrix_median;
353+
354+
template<int CH_NUM, typename T>
355+
void medianFiltering_wavelet_matrix_gpu(const PtrStepSz<T> src, PtrStepSz<T> dst, int radius,cudaStream_t stream){
356+
357+
constexpr bool is_float = std::is_same<T, float>::value;
358+
constexpr static int WORD_SIZE = 32;
359+
constexpr static int ThW = (std::is_same<T, uint8_t>::value ? 8 : 4);
360+
constexpr static int ThH = (std::is_same<T, uint8_t>::value ? 64 : 256);
361+
using XYIdxT = uint32_t;
362+
using XIdxT = uint16_t;
363+
using WM_T = typename std::conditional<is_float, uint32_t, T>::type;
364+
using MedianResT = typename std::conditional<is_float, T, std::nullptr_t>::type;
365+
using WM2D_IMPL = WaveletMatrix2dCu5C<WM_T, CH_NUM, WaveletMatrixMultiCu4G<XIdxT, 512>, 512, WORD_SIZE>;
366+
367+
CV_Assert(src.cols == dst.cols);
368+
CV_Assert(dst.step % sizeof(T) == 0);
369+
370+
WM2D_IMPL WM_cuda(src.rows, src.cols, is_float, false);
371+
WM_cuda.res_cu = reinterpret_cast<WM_T*>(dst.ptr());
372+
373+
const size_t line_num = src.cols * CH_NUM;
374+
if (is_float) {
375+
WMMedianFloatSupporter::WMMedianFloatSupporter<float, CH_NUM, XYIdxT> float_supporter(src.rows, src.cols);
376+
float_supporter.alloc();
377+
for (int y = 0; y < src.rows; ++y) {
378+
cudaMemcpy(float_supporter.val_in_cu + y * line_num, src.ptr(y), line_num * sizeof(T), cudaMemcpyDeviceToDevice);
379+
}
380+
const auto p = WM_cuda.get_nowcu_and_buf_byte_div32();
381+
float_supporter.sort_and_set((XYIdxT*)p.first, p.second);
382+
WM_cuda.construct(nullptr, stream, true);
383+
WM_cuda.template median2d<ThW, ThH, MedianResT, false>(radius, dst.step / sizeof(T), (MedianResT*)float_supporter.get_res_table(), stream);
384+
} else {
385+
for (int y = 0; y < src.rows; ++y) {
386+
cudaMemcpy(WM_cuda.src_cu + y * line_num, src.ptr(y), line_num * sizeof(T), cudaMemcpyDeviceToDevice);
387+
}
388+
WM_cuda.construct(nullptr, stream);
389+
WM_cuda.template median2d<ThW, ThH, MedianResT, false>(radius, dst.step / sizeof(T), nullptr, stream);
390+
}
391+
WM_cuda.res_cu = nullptr;
392+
if (!stream) {
393+
cudaSafeCall( cudaDeviceSynchronize() );
394+
}
395+
}
396+
397+
template<typename T>
398+
void medianFiltering_wavelet_matrix_gpu(const PtrStepSz<T> src, PtrStepSz<T> dst, int radius, const int num_channels, cudaStream_t stream){
399+
if (num_channels == 1) {
400+
medianFiltering_wavelet_matrix_gpu<1>(src, dst, radius, stream);
401+
} else if (num_channels == 3) {
402+
medianFiltering_wavelet_matrix_gpu<3>(src, dst, radius, stream);
403+
} else if (num_channels == 4) {
404+
medianFiltering_wavelet_matrix_gpu<4>(src, dst, radius, stream);
405+
} else {
406+
CV_Assert(num_channels == 1 || num_channels == 3 || num_channels == 4);
407+
}
408+
}
409+
410+
template void medianFiltering_wavelet_matrix_gpu(const PtrStepSz<uint8_t> src, PtrStepSz<uint8_t> dst, int radius, const int num_channels, cudaStream_t stream);
411+
template void medianFiltering_wavelet_matrix_gpu(const PtrStepSz<uint16_t> src, PtrStepSz<uint16_t> dst, int radius, const int num_channels, cudaStream_t stream);
412+
template void medianFiltering_wavelet_matrix_gpu(const PtrStepSz<float> src, PtrStepSz<float> dst, int radius, const int num_channels, cudaStream_t stream);
413+
}}}
414+
#endif // __OPENCV_USE_WAVELET_MATRIX_FOR_MEDIAN_FILTER_CUDA__
415+
337416
#endif

0 commit comments

Comments
 (0)