Skip to content

(5.x) Merge 4.x #3769

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion modules/cudaarithm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-d
set(extra_dependencies "")
set(optional_dependencies "")
if(ENABLE_CUDA_FIRST_CLASS_LANGUAGE)
if(UNIX AND NOT BUILD_SHARED_LIBS AND CUDA_VERSION_STRING VERSION_GREATER_EQUAL 9.2 AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.23)
set(CUDA_FFT_LIB_EXT "_static_nocallback")
endif()
list(APPEND extra_dependencies CUDA::cudart_static CUDA::nppial${CUDA_LIB_EXT} CUDA::nppc${CUDA_LIB_EXT} CUDA::nppitc${CUDA_LIB_EXT} CUDA::nppig${CUDA_LIB_EXT} CUDA::nppist${CUDA_LIB_EXT} CUDA::nppidei${CUDA_LIB_EXT})
if(HAVE_CUBLAS)
list(APPEND optional_dependencies CUDA::cublas${CUDA_LIB_EXT})
Expand All @@ -18,7 +21,8 @@ if(ENABLE_CUDA_FIRST_CLASS_LANGUAGE)
endif()
if(HAVE_CUFFT)
# static version requires seperable compilation which is incompatible with opencv's current library structure
list(APPEND optional_dependencies CUDA::cufft)
# the cufft_static_nocallback variant does not requires seperable compilation. callbacks are currently not used.
list(APPEND optional_dependencies CUDA::cufft${CUDA_FFT_LIB_EXT})
endif()
else()
if(HAVE_CUBLAS)
Expand Down
4 changes: 3 additions & 1 deletion modules/cudafilters/include/opencv2/cudafilters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,14 @@ CV_EXPORTS_W Ptr<Filter> createLaplacianFilter(int srcType, int dstType, int ksi
////////////////////////////////////////////////////////////////////////////////////////////////////
// Separable Linear Filter

/** @brief Creates a separable linear filter.
/** @brief Creates a separable linear filter. In-place processing is supported.

@param srcType Source array type.
@param dstType Destination array type.
@param rowKernel Horizontal filter coefficients. Support kernels with size \<= 32 .
noArray() is supported to ignore the row filtering.
@param columnKernel Vertical filter coefficients. Support kernels with size \<= 32 .
noArray() is supported to ignore the column filtering.
@param anchor Anchor position within the kernel. Negative values mean that anchor is positioned at
the aperture center.
@param rowBorderMode Pixel extrapolation method in the vertical direction For details, see
Expand Down
70 changes: 55 additions & 15 deletions modules/cudafilters/src/filtering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,28 +386,38 @@ namespace
const int cn = CV_MAT_CN(srcType);
const int ddepth = CV_MAT_DEPTH(dstType);

Mat rowKernel = _rowKernel.getMat();
Mat columnKernel = _columnKernel.getMat();
CV_Assert( _rowKernel.empty() || _rowKernel.isMat() );
CV_Assert( _columnKernel.empty() || _columnKernel.isMat() );
Mat rowKernel = _rowKernel.empty() ? cv::Mat() : _rowKernel.getMat();
Mat columnKernel = _columnKernel.empty() ? cv::Mat() : _columnKernel.getMat();

CV_Assert( sdepth <= CV_64F && cn <= 4 );
CV_Assert( rowKernel.channels() == 1 );
CV_Assert( columnKernel.channels() == 1 );
CV_Assert( rowKernel.empty() || rowKernel.channels() == 1 );
CV_Assert( columnKernel.empty() || columnKernel.channels() == 1 );
CV_Assert( rowBorderMode == BORDER_REFLECT101 || rowBorderMode == BORDER_REPLICATE || rowBorderMode == BORDER_CONSTANT || rowBorderMode == BORDER_REFLECT || rowBorderMode == BORDER_WRAP );
CV_Assert( columnBorderMode == BORDER_REFLECT101 || columnBorderMode == BORDER_REPLICATE || columnBorderMode == BORDER_CONSTANT || columnBorderMode == BORDER_REFLECT || columnBorderMode == BORDER_WRAP );

Mat kernel32F;

rowKernel.convertTo(kernel32F, CV_32F);
rowKernel_.upload(kernel32F.reshape(1, 1));
if (!rowKernel.empty())
{
rowKernel.convertTo(kernel32F, CV_32F);
rowKernel_.upload(kernel32F.reshape(1, 1));
}

columnKernel.convertTo(kernel32F, CV_32F);
columnKernel_.upload(kernel32F.reshape(1, 1));
if (!columnKernel.empty())
{
columnKernel.convertTo(kernel32F, CV_32F);
columnKernel_.upload(kernel32F.reshape(1, 1));
}

CV_Assert( rowKernel_.cols > 0 && rowKernel_.cols <= 32 );
CV_Assert( columnKernel_.cols > 0 && columnKernel_.cols <= 32 );
CV_Assert( rowKernel_.empty() || (rowKernel_.cols > 0 && rowKernel_.cols <= 32 ));
CV_Assert( columnKernel_.empty() || (columnKernel_.cols > 0 && columnKernel_.cols <= 32 ));

normalizeAnchor(anchor_.x, rowKernel_.cols);
normalizeAnchor(anchor_.y, columnKernel_.cols);
if (!rowKernel_.empty())
normalizeAnchor(anchor_.x, rowKernel_.cols);
if (!columnKernel_.empty())
normalizeAnchor(anchor_.y, columnKernel_.cols);

bufType_ = CV_MAKE_TYPE(CV_32F, cn);

Expand All @@ -426,15 +436,45 @@ namespace
_dst.create(src.size(), dstType_);
GpuMat dst = _dst.getGpuMat();

ensureSizeIsEnough(src.size(), bufType_, buf_);
const bool isInPlace = (src.data == dst.data);
const bool hasRowKernel = !rowKernel_.empty();
const bool hasColKernel = !columnKernel_.empty();
const bool hasSingleKernel = (hasRowKernel ^ hasColKernel);
const bool needsSrcAdaptation = !hasRowKernel && hasColKernel && (srcType_ != bufType_);
const bool needsDstAdaptation = hasRowKernel && !hasColKernel && (dstType_ != bufType_);
const bool needsBufForIntermediateStorage = (hasRowKernel && hasColKernel) || (hasSingleKernel && isInPlace);
const bool needsBuf = needsSrcAdaptation || needsDstAdaptation || needsBufForIntermediateStorage;
if (needsBuf)
ensureSizeIsEnough(src.size(), bufType_, buf_);

if (needsSrcAdaptation)
src.convertTo(buf_, bufType_, _stream);
GpuMat& srcAdapted = needsSrcAdaptation ? buf_ : src;

DeviceInfo devInfo;
const int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();

cudaStream_t stream = StreamAccessor::getStream(_stream);

rowFilter_(src, buf_, rowKernel_.ptr<float>(), rowKernel_.cols, anchor_.x, rowBorderMode_, cc, stream);
columnFilter_(buf_, dst, columnKernel_.ptr<float>(), columnKernel_.cols, anchor_.y, columnBorderMode_, cc, stream);
if (!hasRowKernel && !hasColKernel && !isInPlace)
srcAdapted.convertTo(dst, dstType_, _stream);
else if (hasRowKernel || hasColKernel)
{
GpuMat& rowFilterSrc = srcAdapted;
GpuMat& rowFilterDst = !hasRowKernel ? srcAdapted : needsBuf ? buf_ : dst;
GpuMat& colFilterSrc = hasColKernel && needsBuf ? buf_ : srcAdapted;
GpuMat& colFilterTo = dst;

if (hasRowKernel)
rowFilter_(rowFilterSrc, rowFilterDst, rowKernel_.ptr<float>(), rowKernel_.cols, anchor_.x, rowBorderMode_, cc, stream);
else if (hasColKernel && (needsBufForIntermediateStorage && !needsSrcAdaptation))
rowFilterSrc.convertTo(buf_, bufType_, _stream);

if (hasColKernel)
columnFilter_(colFilterSrc, colFilterTo, columnKernel_.ptr<float>(), columnKernel_.cols, anchor_.y, columnBorderMode_, cc, stream);
else if (needsBuf)
buf_.convertTo(dst, dstType_, _stream);
}
}
}

Expand Down
80 changes: 80 additions & 0 deletions modules/cudafilters/test/test_filters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,86 @@ INSTANTIATE_TEST_CASE_P(CUDA_Filters, SeparableLinearFilter, testing::Combine(
BorderType(cv::BORDER_REFLECT)),
WHOLE_SUBMAT));

PARAM_TEST_CASE(SeparableLinearFilterWithEmptyKernels, cv::cuda::DeviceInfo, MatDepth, Channels, MatDepth, bool, bool, bool)
{
cv::cuda::DeviceInfo devInfo;
bool inPlace;
bool useRowKernel;
bool useColKernel;

cv::Size size;
int srcDepth;
int cn;
int dstDepth;
cv::Size ksize;
cv::Point anchor;
int borderType;
int srcType;
int dstType;

virtual void SetUp()
{
devInfo = GET_PARAM(0);
srcDepth = GET_PARAM(1);
cn = GET_PARAM(2);
dstDepth = GET_PARAM(3);
inPlace = GET_PARAM(4);
useRowKernel = GET_PARAM(5);
useColKernel = GET_PARAM(6);

size = cv::Size(640, 480);
ksize = cv::Size(3, 1);
anchor = cv::Point(-1, -1);
borderType = cv::BORDER_REPLICATE;

cv::cuda::setDevice(devInfo.deviceID());

srcType = CV_MAKE_TYPE(srcDepth, cn);
dstType = CV_MAKE_TYPE(dstDepth, cn);
}
};

CUDA_TEST_P(SeparableLinearFilterWithEmptyKernels, Accuracy)
{
cv::Mat src = randomMat(size, srcType);
cv::Mat rowKernel = (cv::Mat_<float>(ksize) << -1, 0, 1);
cv::Mat colKernel = rowKernel.t();
cv::Mat oneKernel = cv::Mat::ones(cv::Size(1, 1), CV_32FC1);
cv::Mat noKernel = cv::Mat();

cv::Ptr<cv::cuda::Filter> sepFilterDummyKernels =
cv::cuda::createSeparableLinearFilter(srcType, dstType,
useRowKernel ? rowKernel : oneKernel,
useColKernel ? colKernel : oneKernel,
cv::Point(-1, -1), cv::BORDER_REPLICATE, cv::BORDER_REPLICATE);

cv::Ptr<cv::cuda::Filter> sepFilterEmptyKernels =
cv::cuda::createSeparableLinearFilter(srcType, dstType,
useRowKernel ? rowKernel : noKernel,
useColKernel ? colKernel : noKernel,
cv::Point(-1, -1), cv::BORDER_REPLICATE, cv::BORDER_REPLICATE);

cv::cuda::GpuMat src_sep_dummyK = loadMat(src);
cv::cuda::GpuMat dst_sep_dummyK = inPlace ? src_sep_dummyK : cv::cuda::GpuMat();
cv::cuda::GpuMat src_sep_emptyK = loadMat(src);
cv::cuda::GpuMat dst_sep_emptyK = inPlace ? src_sep_emptyK : cv::cuda::GpuMat();

sepFilterDummyKernels->apply(src_sep_dummyK, dst_sep_dummyK);
sepFilterEmptyKernels->apply(src_sep_emptyK, dst_sep_emptyK);

EXPECT_MAT_NEAR(dst_sep_dummyK, dst_sep_emptyK, src.depth() < CV_32F ? 1.0 : 1e-2);
}

INSTANTIATE_TEST_CASE_P(CUDA_Filters, SeparableLinearFilterWithEmptyKernels, testing::Combine(
ALL_DEVICES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)),
IMAGE_CHANNELS,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)),
testing::Values(false, true),//in-place
testing::Values(false, true),//use row kernel
testing::Values(false, true)//use col kernel
));

/////////////////////////////////////////////////////////////////////////////////////////////////
// Sobel

Expand Down
2 changes: 1 addition & 1 deletion modules/wechat_qrcode/samples/qrcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
except:
print("---------------------------------------------------------------")
print("Failed to initialize WeChatQRCode.")
print("Please, download 'detector.*' and 'sr.*' from")
print("Please, download 'detect.*' and 'sr.*' from")
print("https://github.com/WeChatCV/opencv_3rdparty/tree/wechat_qrcode")
print("and put them into the current directory.")
print("---------------------------------------------------------------")
Expand Down
Loading