From b893c398da3fb1db863fa5ee942f9b0b59e56716 Mon Sep 17 00:00:00 2001 From: cudawarped <12133430+cudawarped@users.noreply.github.com> Date: Sat, 30 Dec 2023 20:53:45 +0200 Subject: [PATCH 1/6] cudacodec: Enable disabled windows features relying on ffmpeg dll --- modules/cudacodec/src/ffmpeg_video_source.cpp | 46 ------------------- modules/cudacodec/src/video_writer.cpp | 3 -- modules/cudacodec/test/test_video.cpp | 21 --------- 3 files changed, 70 deletions(-) diff --git a/modules/cudacodec/src/ffmpeg_video_source.cpp b/modules/cudacodec/src/ffmpeg_video_source.cpp index 87b7ef149e2..6296383db63 100644 --- a/modules/cudacodec/src/ffmpeg_video_source.cpp +++ b/modules/cudacodec/src/ffmpeg_video_source.cpp @@ -66,55 +66,9 @@ static std::string fourccToString(int fourcc) (i32_c.c[3] >= ' ' && i32_c.c[3] < 128) ? i32_c.c[3] : '?'); } -// handle old FFmpeg backend - remove when windows shared library is updated -#ifdef _WIN32 -static -Codec FourccToCodecWin32Old(int codec) -{ - switch (codec) - { - case CV_FOURCC_MACRO('m', 'p', 'e', 'g'): // fallthru - case CV_FOURCC_MACRO('m', 'p', 'g', '1'): // fallthru - case CV_FOURCC_MACRO('M', 'P', 'G', '1'): return MPEG1; - case CV_FOURCC_MACRO('m', 'p', 'g', '2'): // fallthru - case CV_FOURCC_MACRO('M', 'P', 'G', '2'): return MPEG2; - case CV_FOURCC_MACRO('X', 'V', 'I', 'D'): // fallthru - case CV_FOURCC_MACRO('m', 'p', '4', 'v'): // fallthru - case CV_FOURCC_MACRO('D', 'I', 'V', 'X'): return MPEG4; - case CV_FOURCC_MACRO('W', 'V', 'C', '1'): return VC1; - case CV_FOURCC_MACRO('H', '2', '6', '4'): // fallthru - case CV_FOURCC_MACRO('h', '2', '6', '4'): // fallthru - case CV_FOURCC_MACRO('a', 'v', 'c', '1'): return H264; - case CV_FOURCC_MACRO('H', '2', '6', '5'): // fallthru - case CV_FOURCC_MACRO('h', '2', '6', '5'): // fallthru - case CV_FOURCC_MACRO('h', 'e', 'v', 'c'): return HEVC; - case CV_FOURCC_MACRO('M', 'J', 'P', 'G'): return JPEG; - case CV_FOURCC_MACRO('v', 'p', '8', '0'): // fallthru - case CV_FOURCC_MACRO('V', 'P', '8', '0'): // fallthru - case CV_FOURCC_MACRO('v', 'p', '0', '8'): // fallthru - case CV_FOURCC_MACRO('V', 'P', '0', '8'): return VP8; - case CV_FOURCC_MACRO('v', 'p', '9', '0'): // fallthru - case CV_FOURCC_MACRO('V', 'P', '9', '0'): // fallthru - case CV_FOURCC_MACRO('V', 'P', '0', '9'): // fallthru - case CV_FOURCC_MACRO('v', 'p', '0', '9'): return VP9; - case CV_FOURCC_MACRO('a', 'v', '1', '0'): // fallthru - case CV_FOURCC_MACRO('A', 'V', '1', '0'): // fallthru - case CV_FOURCC_MACRO('a', 'v', '0', '1'): // fallthru - case CV_FOURCC_MACRO('A', 'V', '0', '1'): return AV1; - default: - return NumCodecs; - } -} -#endif - static Codec FourccToCodec(int codec) { -#ifdef _WIN32 // handle old FFmpeg backend - remove when windows shared library is updated - Codec win32OldCodec = FourccToCodecWin32Old(codec); - if(win32OldCodec != NumCodecs) - return win32OldCodec; -#endif switch (codec) { case CV_FOURCC_MACRO('m', 'p', 'g', '1'): return MPEG1; diff --git a/modules/cudacodec/src/video_writer.cpp b/modules/cudacodec/src/video_writer.cpp index 8b5c703f759..2754ac58443 100644 --- a/modules/cudacodec/src/video_writer.cpp +++ b/modules/cudacodec/src/video_writer.cpp @@ -402,13 +402,10 @@ Ptr createVideoWriter(const String& fileName, const Size frameSize, { CV_Assert(params.idrPeriod >= params.gopLength); if (!encoderCallback) { - // required until PR for raw video encapsulation is merged and windows dll is updated -#ifndef WIN32 // remove #define and keep code once merged try { encoderCallback = new FFmpegVideoWriter(fileName, codec, fps, frameSize, params.idrPeriod); } catch (...) -#endif { encoderCallback = new RawVideoWriter(fileName); } diff --git a/modules/cudacodec/test/test_video.cpp b/modules/cudacodec/test/test_video.cpp index 88df2fb1afb..16782a39088 100644 --- a/modules/cudacodec/test/test_video.cpp +++ b/modules/cudacodec/test/test_video.cpp @@ -611,9 +611,6 @@ CUDA_TEST_P(CheckInitParams, Reader) CUDA_TEST_P(Seek, Reader) { -#if defined(WIN32) - throw SkipTestException("Test disabled on Windows until the FFMpeg wrapper is updated to include PR24012."); -#endif std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny.mp4"; // seek to a non key frame const int firstFrameIdx = 18; @@ -660,13 +657,7 @@ CUDA_TEST_P(TransCode, H264ToH265) constexpr cv::cudacodec::ColorFormat colorFormat = cv::cudacodec::ColorFormat::NV_NV12; constexpr double fps = 25; const cudacodec::Codec codec = cudacodec::Codec::HEVC; - // required until PR for raw video encapsulation is merged and windows dll is updated -#ifdef WIN32 - const std::string ext = ".hevc"; -#else - // use this after update const std::string ext = ".mp4"; -#endif const std::string outputFile = cv::tempfile(ext.c_str()); constexpr int nFrames = 5; Size frameSz; @@ -743,13 +734,7 @@ CUDA_TEST_P(Write, Writer) const cudacodec::Codec codec = GET_PARAM(2); const double fps = GET_PARAM(3); const cv::cudacodec::ColorFormat colorFormat = GET_PARAM(4); - // required until PR for raw video encapsulation is merged and windows dll is updated -#ifdef WIN32 - const std::string ext = codec == cudacodec::Codec::H264 ? ".h264" : ".hevc"; -#else - // use this after update const std::string ext = ".mp4"; -#endif const std::string outputFile = cv::tempfile(ext.c_str()); constexpr int nFrames = 5; Size frameSz; @@ -827,13 +812,7 @@ CUDA_TEST_P(EncoderParams, Writer) const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny.mp4"; constexpr double fps = 25.0; constexpr cudacodec::Codec codec = cudacodec::Codec::H264; - // required until PR for raw video encapsulation is merged and windows dll is updated -#ifdef WIN32 - const std::string ext = ".h264"; -#else - // use this after update const std::string ext = ".mp4"; -#endif const std::string outputFile = cv::tempfile(ext.c_str()); Size frameSz; const int nFrames = max(params.gopLength, params.idrPeriod) + 1; From 8107d082abe522f7fb07251f1226f803a391cc30 Mon Sep 17 00:00:00 2001 From: Vincent Rabaud Date: Tue, 2 Jan 2024 22:32:15 +0100 Subject: [PATCH 2/6] Add imgcodecs dependency. This is useful if highgui is built without the imgcodecs dependency. --- modules/ccalib/CMakeLists.txt | 2 +- modules/ccalib/src/precomp.hpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/ccalib/CMakeLists.txt b/modules/ccalib/CMakeLists.txt index f803322ba9b..d7763aa909a 100644 --- a/modules/ccalib/CMakeLists.txt +++ b/modules/ccalib/CMakeLists.txt @@ -1,2 +1,2 @@ set(the_description "Custom Calibration Pattern") -ocv_define_module(ccalib opencv_core opencv_imgproc opencv_calib3d opencv_features2d opencv_highgui WRAP python) +ocv_define_module(ccalib opencv_core opencv_imgproc opencv_calib3d opencv_features2d opencv_highgui opencv_imgcodecs WRAP python) diff --git a/modules/ccalib/src/precomp.hpp b/modules/ccalib/src/precomp.hpp index b2ced45da23..e5bcf76dfb5 100644 --- a/modules/ccalib/src/precomp.hpp +++ b/modules/ccalib/src/precomp.hpp @@ -46,6 +46,7 @@ #include #include #include +#include "opencv2/imgcodecs.hpp" #include "opencv2/imgproc.hpp" #include "opencv2/highgui.hpp" #include From 105e514302cba1a20f300657cde060c25c013ecc Mon Sep 17 00:00:00 2001 From: amishutin Date: Thu, 11 Jan 2024 14:43:22 +0300 Subject: [PATCH 3/6] added signal module --- modules/README.md | 2 + modules/signal/CMakeLists.txt | 2 + modules/signal/README.md | 4 + modules/signal/include/opencv2/signal.hpp | 17 + .../opencv2/signal/signal_resample.hpp | 32 ++ modules/signal/perf/perf_main.cpp | 6 + modules/signal/perf/perf_precomp.hpp | 15 + modules/signal/perf/perf_resample.cpp | 37 ++ modules/signal/src/precomp.hpp | 10 + modules/signal/src/signal_resample.cpp | 375 ++++++++++++++++++ modules/signal/test/test_main.cpp | 6 + modules/signal/test/test_precomp.hpp | 10 + modules/signal/test/test_signal.cpp | 180 +++++++++ 13 files changed, 696 insertions(+) create mode 100644 modules/signal/CMakeLists.txt create mode 100644 modules/signal/README.md create mode 100644 modules/signal/include/opencv2/signal.hpp create mode 100644 modules/signal/include/opencv2/signal/signal_resample.hpp create mode 100644 modules/signal/perf/perf_main.cpp create mode 100644 modules/signal/perf/perf_precomp.hpp create mode 100644 modules/signal/perf/perf_resample.cpp create mode 100644 modules/signal/src/precomp.hpp create mode 100644 modules/signal/src/signal_resample.cpp create mode 100644 modules/signal/test/test_main.cpp create mode 100644 modules/signal/test/test_precomp.hpp create mode 100644 modules/signal/test/test_signal.cpp diff --git a/modules/README.md b/modules/README.md index cd8ea0fbe77..413523f7d02 100644 --- a/modules/README.md +++ b/modules/README.md @@ -72,6 +72,8 @@ $ cmake -D OPENCV_EXTRA_MODULES_PATH=/modules -D BUILD_opencv_ + +namespace cv { +namespace signal { + +//! @addtogroup signal +//! @{ + +/** @brief Signal resampling + * + * @param[in] inputSignal Array with input signal. + * @param[out] outSignal Array with output signal + * @param[in] inFreq Input signal frequency. + * @param[in] outFreq Output signal frequency. + * Signal resampling implemented a cubic interpolation function and a filtering function based on Kaiser window and Bessel function, used to construct a FIR filter. + * Result is similar to `scipy.signal.resample`. + +Detail: https://en.wikipedia.org/wiki/Sample-rate_conversion +*/ +CV_EXPORTS_W void resampleSignal(InputArray inputSignal, OutputArray outSignal, const int inFreq, const int outFreq); + +//! @} + +} +} +#endif diff --git a/modules/signal/perf/perf_main.cpp b/modules/signal/perf/perf_main.cpp new file mode 100644 index 00000000000..442e2e43076 --- /dev/null +++ b/modules/signal/perf/perf_main.cpp @@ -0,0 +1,6 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "perf_precomp.hpp" + +CV_PERF_TEST_MAIN(signal) diff --git a/modules/signal/perf/perf_precomp.hpp b/modules/signal/perf/perf_precomp.hpp new file mode 100644 index 00000000000..2dc91dc2140 --- /dev/null +++ b/modules/signal/perf/perf_precomp.hpp @@ -0,0 +1,15 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#ifndef __OPENCV_PERF_PRECOMP_HPP__ +#define __OPENCV_PERF_PRECOMP_HPP__ + +#include "opencv2/ts.hpp" +#include "opencv2/signal.hpp" + +namespace opencv_test { +using namespace perf; +using namespace cv::signal; +} + +#endif diff --git a/modules/signal/perf/perf_resample.cpp b/modules/signal/perf/perf_resample.cpp new file mode 100644 index 00000000000..b79b7c420c6 --- /dev/null +++ b/modules/signal/perf/perf_resample.cpp @@ -0,0 +1,37 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "perf_precomp.hpp" + +using namespace std; +using namespace cv; +using namespace perf; + +namespace opencv_test { namespace { + +typedef TestBaseWithParam< tuple > TestResampleFunc; + +PERF_TEST_P( TestResampleFunc, resample_sin_signal, + testing::Combine( + testing::Values(1234U, 12345U, 123456U, 1234567U, 12345678U), + testing::Values(16000U, 32000U, 44100U, 48000U), + testing::Values(48000U, 44100U, 32000U, 16000U)) +) +{ + uint32_t sample_signal_size = GET_PARAM(0); + uint32_t inFreq = GET_PARAM(1); + uint32_t outFreq = GET_PARAM(2); + + Mat1f sample_signal(Size(sample_signal_size,1U)); + Mat1f outSignal(Size(1U, 1U)); + for (uint32_t i = 0U; i < (uint32_t)sample_signal.cols; ++i) + { + sample_signal.at(0, i) = sinf(float(i)); + } + declare.in(sample_signal).out(outSignal); + TEST_CYCLE() resampleSignal(sample_signal, outSignal, inFreq, outFreq); + SANITY_CHECK_NOTHING(); +} + +}} diff --git a/modules/signal/src/precomp.hpp b/modules/signal/src/precomp.hpp new file mode 100644 index 00000000000..6c2978318ed --- /dev/null +++ b/modules/signal/src/precomp.hpp @@ -0,0 +1,10 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef __OPENCV_SIGNAL_PRECOMP__ +#define __OPENCV_SIGNAL_PRECOMP__ + +#include + +#endif diff --git a/modules/signal/src/signal_resample.cpp b/modules/signal/src/signal_resample.cpp new file mode 100644 index 00000000000..03fc5cad3f8 --- /dev/null +++ b/modules/signal/src/signal_resample.cpp @@ -0,0 +1,375 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + + +#include "precomp.hpp" + +#include +#include +#include +#include + +#include +#include +#include + +namespace cv { +namespace signal { + +#if (CV_SIMD || CV_SIMD_SCALABLE) +#define v_float32_width (uint32_t)VTraits::vlanes() +const uint32_t v_float32_max_width = (uint32_t)VTraits::max_nlanes; +#endif + +// Modified Bessel function 1st kind 0th order +static float Bessel(float x) +{ + int k = 12; // approximation parameter + float defmul = x * x * 0.25f; + float mul = defmul; + float acc = 0.f; + for(int i = 0 ; i < k; ++i) + { + mul = powf(defmul, static_cast(i)); + mul = mul / powf(tgammaf(static_cast(i + 1)), 2.f); // tgamma(i+1) equals i! + acc +=mul; + } + return acc; +} + +static void init_filter(float beta, int ntabs, float* tabs) +{ + float fc = 0.25f; + // build sinc filter + for (int i = 0; i < ntabs; ++i) + { + tabs[i] = 2 * fc * (i - (ntabs - 1) / 2); + } + std::vector tmparr(ntabs); + for (int i = 0 ; i < ntabs; ++i) + { + if (tabs[i] == 0.f) + { + tmparr[i] = 1.f; + continue; + } + tmparr[i] = (float)(CV_PI * tabs[i]); + } + float mult = 2.f / (float)(ntabs - 1); + // multiply by Kaiser window + for (int i = 0; i < ntabs; ++i) + { + tabs[i] = std::sin(tmparr[i]) / tmparr[i]; + tabs[i] *= Bessel(beta * sqrtf((float)1 - powf((i * mult - 1), 2))) / Bessel(beta); + } + float sum = 0.f; + for (int i = 0 ; i < ntabs; ++i) + { + sum += tabs[i]; + } + sum = 1.f/sum; + // normalize tabs to get unity gain + for (int i = 0; i < ntabs; ++i) + { + tabs[i] *= sum; + } +} + +/////////////// cubic Hermite spline (tail of execIntrinLoop or scalar version) /////////////// +static float scal_cubicHermite(float A, float B, float C, float D, float t) +{ + float a = (-A + (3.0f * B) - (3.0f * C) + D) * 0.5f; + float b = A + C + C - (5.0f * B + D) * 0.5f; + float c = (-A + C) * 0.5f; + return a * t * t * t + b * t * t + c * t + B; +} + +/////////////// cubic Hermite spline (OpenCV's Universal Intrinsics) /////////////// +#if (CV_SIMD || CV_SIMD_SCALABLE) +static inline v_float32 simd_cubicHermite(const v_float32 &v_A, const v_float32 &v_B, const v_float32 &v_C, + const v_float32 &v_D, const v_float32 &v_t) +{ + v_float32 v_zero = vx_setzero_f32(); + v_float32 v_three= vx_setall_f32(3.0f); + v_float32 v_half = vx_setall_f32(0.5f); + v_float32 v_five = vx_setall_f32(5.0f); + + v_float32 v_inv_A = v_sub(v_zero, v_A); + + v_float32 v_a = v_mul(v_sub(v_fma(v_three, v_B, v_add(v_inv_A, v_D)), v_mul(v_three, v_C)), v_half); + v_float32 v_b = v_sub(v_add(v_A, v_C, v_C), v_mul(v_fma(v_five, v_B, v_D), v_half)); + v_float32 v_c = v_mul(v_add(v_inv_A, v_C), v_half); + + return v_add(v_mul(v_a, v_t, v_t, v_t), v_mul(v_b, v_t, v_t), v_fma(v_c, v_t, v_B)); +} +#endif + +static void cubicInterpolate(const Mat1f &src, uint32_t dstlen, Mat1f &dst, uint32_t srclen) +{ + Mat1f tmp(Size(srclen + 3U, 1U)); + tmp.at(0) = src.at(0); + +#if (CV_SIMD || CV_SIMD_SCALABLE) + v_float32 v_reg = vx_setall_f32(src.at(srclen - 1U)); + vx_store(tmp.ptr(0) + (srclen - 1U), v_reg); +#else // scalar version + tmp.at(srclen + 1U) = src.at(srclen - 1U); + tmp.at(srclen + 2U) = src.at(srclen - 1U); +#endif + + uint32_t i = 0U; + +#if (CV_SIMD || CV_SIMD_SCALABLE) + uint32_t len_sub_vfloatStep = (uint32_t)std::max((int64_t)srclen - (int64_t)v_float32_width, (int64_t)0); + for (; i < len_sub_vfloatStep; i+= v_float32_width) + { + v_float32 v_copy = vx_load(src.ptr(0) + i); + vx_store(tmp.ptr(0) + (i + 1U), v_copy); + } +#endif + + // if the tail exists or scalar version + for (; i < srclen; ++i) + { + tmp.at(i + 1U) = src.at(i); + } + + i = 0U; + +#if (CV_SIMD || CV_SIMD_SCALABLE) + int ptr_x_int[v_float32_max_width]; + uint32_t j; + + v_float32 v_dstlen_sub_1 = vx_setall_f32((float)(dstlen - 1U)); + v_float32 v_one = vx_setall_f32(1.0f); + v_float32 v_x_start = v_div(v_one, v_dstlen_sub_1); + v_float32 v_u = vx_setall_f32((float)srclen); + v_float32 v_half = vx_setall_f32(0.5f); + + len_sub_vfloatStep = (uint32_t)std::max((int64_t)dstlen - (int64_t)v_float32_width, (int64_t)0); + for (; i < v_float32_width; ++i) + { + ptr_x_int[i] = (int)i; + } + + float ptr_for_cubicHermite[v_float32_max_width]; + v_float32 v_sequence = v_cvt_f32(vx_load(ptr_x_int)); + for (i = 0U; i < len_sub_vfloatStep; i+= v_float32_width) + { + v_float32 v_reg_i = v_add(vx_setall_f32((float)i), v_sequence); + + v_float32 v_x = v_sub(v_mul(v_x_start, v_reg_i, v_u), v_half); + + v_int32 v_x_int = v_trunc(v_x); + v_float32 v_x_fract = v_sub(v_x, v_cvt_f32(v_floor(v_x))); + + vx_store(ptr_x_int, v_x_int); + + for(j = 0U; j < v_float32_width; ++j) + ptr_for_cubicHermite[j] = *(tmp.ptr(0) + (ptr_x_int[j] - 1)); + v_float32 v_x_int_add_A = vx_load(ptr_for_cubicHermite); + + for(j = 0U; j < v_float32_width; ++j) + ptr_for_cubicHermite[j] = *(tmp.ptr(0) + (ptr_x_int[j])); + v_float32 v_x_int_add_B = vx_load(ptr_for_cubicHermite); + + for(j = 0U; j < v_float32_width; ++j) + ptr_for_cubicHermite[j] = *(tmp.ptr(0) + (ptr_x_int[j] + 1)); + v_float32 v_x_int_add_C = vx_load(ptr_for_cubicHermite); + + for(j = 0U; j < v_float32_width; ++j) + ptr_for_cubicHermite[j] = *(tmp.ptr(0) + (ptr_x_int[j] + 2)); + v_float32 v_x_int_add_D = vx_load(ptr_for_cubicHermite); + + + vx_store(dst.ptr(0) + i, simd_cubicHermite(v_x_int_add_A, v_x_int_add_B, v_x_int_add_C, v_x_int_add_D, v_x_fract)); + } +#endif + + // if the tail exists or scalar version + float *ptr = tmp.ptr(0) + 1U; + float lenScale = 1.0f / (float)(dstlen - 1U); + float U, X, xfract; + int xint; + for(; i < dstlen; ++i) + { + U = (float)i * lenScale; + X = (U * (float)srclen) - 0.5f; + xfract = X - floor(X); + xint = (int)X; + dst.at(i) = scal_cubicHermite(ptr[xint - 1], ptr[xint], ptr[xint + 1], ptr[xint + 2], xfract); + } + +} + +static void fir_f32(const float *pSrc, float *pDst, + const float *pCoeffs, float *pBuffer, + uint32_t numTaps, uint32_t blockSize) +{ + uint32_t copyLen = std::min(blockSize, numTaps); + + /////////////// delay line to the left /////////////// + uint32_t i = numTaps - 1U, k = 0U, j = 0U; + uint32_t value_i; + const float* ptr = pSrc + 1U - numTaps; + +#if (CV_SIMD || CV_SIMD_SCALABLE) + v_float32 v_pDst; + value_i = (uint32_t)std::max((int64_t)(numTaps + numTaps - 2U) - (int64_t)v_float32_width, (int64_t)0); + uint32_t value_k = (uint32_t)std::max((int64_t)copyLen - (int64_t)v_float32_width, (int64_t)0); + + + uint32_t value_j = (uint32_t)std::max((int64_t)(numTaps) - (int64_t)v_float32_width, (int64_t)0); + + for (; i < value_i && k < value_k; i += v_float32_width, k += v_float32_width) + { + v_float32 pSrc_data = vx_load(ptr + i); //vx_load(pSrc + (i + 1U - numTaps)); + vx_store(pBuffer + i, pSrc_data); + } +#endif + + // if the tail exists or scalar version + value_i = numTaps + numTaps - 2U; + for (; i < value_i && k < copyLen; ++i, ++k) + { + *(pBuffer + i) = *(ptr + i); // pBuffer[i] = pSrc[i + 1U - numTaps] + } + + + /////////////// process delay line /////////////// + i = 0U; k = 0U; + value_i = numTaps - 1U; + float *ptr_Buf; + + for(; i < value_i && k < copyLen; ++i, ++k) + { + ptr_Buf = pBuffer + i; + j = 0U; + +#if (CV_SIMD || CV_SIMD_SCALABLE) + + v_pDst = vx_setzero_f32(); + for (; j < value_j; j += v_float32_width) + { + v_float32 v_pBuffer = vx_load(ptr_Buf + j); //vx_load(pBuffer[i + j]) + v_float32 v_pCoeffs = vx_load(pCoeffs + j); //vx_load(pCoeffs[j]) + + v_pDst = v_fma(v_pBuffer, v_pCoeffs, v_pDst); // v_pDst = v_pBuffer * v_pCoeffs + v_pDst + } + pDst[i] = v_reduce_sum(v_pDst); +#endif + + // if the tail exists or scalar version + for (; j < numTaps; ++j) + pDst[i] += pCoeffs[j] * *(ptr_Buf + j); // pDst[i] += pCoeffs[j] * pBuffer[i + j]; + } + + + /////////////// process main block /////////////// + i = numTaps - 1U; + + for(; i < blockSize; ++i) + { + const float *ptr_Src = pSrc + (i + 1U - numTaps); + j = 0U; + +#if (CV_SIMD || CV_SIMD_SCALABLE) + v_pDst = vx_setzero_f32(); + for (; j < value_j; j += v_float32_width) + { + v_float32 v_pSrc = vx_load(ptr_Src + j); // vx_load(pSrc[i + j - (numTaps - 1)]) + v_float32 v_pCoeffs = vx_load(pCoeffs + j); //vx_load(pCoeffs[j]) + v_pDst = v_fma(v_pSrc, v_pCoeffs, v_pDst); + } + pDst[i] = v_reduce_sum(v_pDst); +#endif + + // if the tail exists or scalar version + for (; j < numTaps; ++j) + pDst[i] += pCoeffs[j] * *(ptr_Src + j); // pDst[i] += pCoeffs[j] * pSrc[i + j + 1U - numTaps]; + } + + + /////////////// move delay line left by copyLen elements /////////////// +#if (CV_SIMD || CV_SIMD_SCALABLE) + value_i = (uint32_t)std::max((int64_t)(numTaps - 1U) - (int64_t)v_float32_width, (int64_t)0); + ptr_Buf = pBuffer + copyLen; + + for(i = 0U; i < value_i; i += v_float32_width) + { + v_float32 v_pBuffer = vx_load(ptr_Buf + i); //vx_load(pBuffer[copyLen + i]) + vx_store(pBuffer + i, v_pBuffer); + } +#endif + + // if the tail exists or scalar version + value_i = numTaps - 1U; + for (; i < value_i; ++i) + { + pBuffer[i] = pBuffer[i + copyLen]; + } + + + /////////////// copy new elements /////////////// + /////////////// post-process delay line /////////////// + int l = (int)(numTaps - 2U); k = 0U; + +#if (CV_SIMD || CV_SIMD_SCALABLE) + int value_l = (int)v_float32_width; + const float* ptr_part = pSrc + (blockSize + 1U - numTaps - v_float32_width); + for(; l >= value_l && k < value_k; l -= value_l, k += v_float32_width) + { + v_float32 v_pSrc = vx_load(ptr_part + l); // vx_load(pSrc[blockSize - (numTaps - 1) + l - v_float32_width]) + vx_store(pBuffer + (l - value_l), v_pSrc); + } +#endif + const float* ptr_Src = pSrc + (blockSize + 1U - numTaps); + for(; l >= 0 && k < copyLen; --l, ++k) + { + pBuffer[l] = *(ptr_Src + l); // pBuffer[l] = pSrc[blockSize + 1U - numTaps + l]; + } +} + +void resampleSignal(InputArray inputSignal, OutputArray outputSignal, + const int inFreq, const int outFreq) +{ + CV_TRACE_FUNCTION(); + CV_Assert(!inputSignal.empty()); + CV_CheckGE(inFreq, 1000, ""); + CV_CheckGE(outFreq, 1000, ""); + if (inFreq == outFreq) + { + inputSignal.copyTo(outputSignal); + return; + } + uint32_t filtLen = 33U; + float beta = 3.395f; + std::vector filt_window(filtLen, 0.f); + init_filter(beta, filtLen, filt_window.data()); + float ratio = (float)outFreq / float(inFreq); + Mat1f inMat = inputSignal.getMat(); + Mat1f outMat = Mat1f(Size(cvFloor(inMat.cols * ratio), 1)); + cubicInterpolate(inMat, outMat.cols, outMat, inMat.cols); + if (inFreq < 2 * outFreq) + { + std::vector dlyl(filtLen * 2 - 1, 0.f); + std::vector ptmp(outMat.cols + 2 * filtLen, 0.); + + for (auto i = filtLen; i < outMat.cols + filtLen; ++i) + { + ptmp[i] = outMat.at(i - filtLen); + } + std::vector ptmp2(outMat.cols + 2 * filtLen, 0.f); + fir_f32(ptmp.data(), ptmp2.data(), filt_window.data(), dlyl.data(), filtLen, (uint32_t)(ptmp.size())); + for (auto i = filtLen; i < outMat.cols + filtLen; ++i) + { + outMat.at(i - filtLen) = ptmp2[i + cvFloor((float)filtLen / 2.f)]; + } + } + outputSignal.assign(std::move(outMat)); +} + + +} +} diff --git a/modules/signal/test/test_main.cpp b/modules/signal/test/test_main.cpp new file mode 100644 index 00000000000..a6fc332d4ae --- /dev/null +++ b/modules/signal/test/test_main.cpp @@ -0,0 +1,6 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "test_precomp.hpp" + +CV_TEST_MAIN("") diff --git a/modules/signal/test/test_precomp.hpp b/modules/signal/test/test_precomp.hpp new file mode 100644 index 00000000000..c398e080f88 --- /dev/null +++ b/modules/signal/test/test_precomp.hpp @@ -0,0 +1,10 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#ifndef __OPENCV_TEST_PRECOMP_HPP__ +#define __OPENCV_TEST_PRECOMP_HPP__ + +#include "opencv2/ts.hpp" +#include "opencv2/signal.hpp" + +#endif diff --git a/modules/signal/test/test_signal.cpp b/modules/signal/test/test_signal.cpp new file mode 100644 index 00000000000..377ac5734da --- /dev/null +++ b/modules/signal/test/test_signal.cpp @@ -0,0 +1,180 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" +#include "opencv2/core/mat.hpp" +#include "opencv2/signal.hpp" + +#include +#include +#include +#include +#include +#include +#include + + +namespace opencv_test { namespace { + +using namespace cv; +using namespace cv::signal; + +float MSE(const Mat1f &outSignal, const Mat1f &refSignal) +{ + float mse = 0.f; + for (int i = 0; i < refSignal.cols; ++i) + { + mse += powf(outSignal.at(0,i) - refSignal.at(0,i), 2.f); + } + mse /= refSignal.cols; + return mse; +} + +// RRMSE = sqrt( MSE / SUM(sqr(refSignal(i))) ) * 100% +float RRMSE(const Mat1f &outSignal, const Mat1f &refSignal) +{ + float rrmse = 0.f; + float div = 0.f; + rrmse = MSE(outSignal, refSignal); + for (int i = 0; i < refSignal.cols; ++i) + { + div += powf(refSignal.at(0,i), 2.f); + } + rrmse /= div; + rrmse = sqrt(rrmse) * 100; + return rrmse; +} + +TEST(ResampleTest, simple_resample_test_up) +{ + Mat1f sample_signal(Size(1000U,1U)); + Mat1f outSignal; + std::iota(sample_signal.begin(), sample_signal.end(), 1.f); + resampleSignal(sample_signal, outSignal, 16000U, 32000U); + vector ref(outSignal.cols, 0.f); + for (uint32_t i = 0U; i < 2000U; ++i) + { + ref[i] = static_cast(i) / 2.f; + } + EXPECT_NEAR(cvtest::norm(ref, NORM_L2) / cvtest::norm(outSignal, NORM_L2), 1.0f, 0.05f) + << "\nL2_norm(refSignal) = " << cvtest::norm(ref, NORM_L2) + << "\nL2_norm(outSignal) = " << cvtest::norm(outSignal, NORM_L2); +} + +TEST(ResampleTest, resample_sin_signal_up_2) +{ + Mat1f sample_signal(Size(1000U,1U)); + Mat1f outSignal; + for (uint32_t i = 0U; i < (uint32_t)sample_signal.cols; ++i) + { + sample_signal.at(0, i) = sinf(float(i)); + } + resampleSignal(sample_signal, outSignal, 16000U, 32000U); + vector ref(outSignal.cols, 0.f); + for (uint32_t i = 0U; i < 2000U; ++i) + { + ref[i] = sin(static_cast(i) / 2.f); + } + EXPECT_NEAR(cvtest::norm(ref, NORM_L2) / cvtest::norm(outSignal, NORM_L2), 1.0f, 0.05f) + << "\nL2_norm(refSignal) = " << cvtest::norm(ref, NORM_L2) + << "\nL2_norm(outSignal) = " << cvtest::norm(outSignal, NORM_L2); +} + +TEST(ResampleTest, simple_resample_test_dn) +{ + Mat1f sample_signal(Size(1000U,1U)); + Mat1f outSignal; + std::iota(sample_signal.begin(), sample_signal.end(), 1.f); + resampleSignal(sample_signal, outSignal, 32000U, 16000U); + vector ref(outSignal.cols, 0.f); + for (uint32_t i = 0U; i < 500U; ++i) + { + ref[i] = static_cast(i) * 2.f; + } + EXPECT_NEAR(cvtest::norm(ref, NORM_L2) / cvtest::norm(outSignal, NORM_L2), 1.0f, 0.05f) + << "\nL2_norm(refSignal) = " << cvtest::norm(ref, NORM_L2) + << "\nL2_norm(outSignal) = " << cvtest::norm(outSignal, NORM_L2); +} + +TEST(ResampleTest, resample_sin_signal_dn_2) +{ + Mat1f sample_signal(Size(1000U,1U)); + Mat1f outSignal; + for (uint32_t i = 0U; i < (uint32_t)sample_signal.cols; ++i) + { + sample_signal.at(0, i) = sinf(float(i)); + } + resampleSignal(sample_signal, outSignal, 32000U, 16000U); + std::vector ref(outSignal.cols, 0.f); + for (uint32_t i = 0U; i < 500U; ++i) + { + ref[i] = sin(static_cast(i) * 2.f); + } + EXPECT_NEAR(cvtest::norm(ref, NORM_L2) / cvtest::norm(outSignal, NORM_L2), 1.0f, 0.05f) + << "\nL2_norm(refSignal) = " << cvtest::norm(ref, NORM_L2) + << "\nL2_norm(outSignal) = " << cvtest::norm(outSignal, NORM_L2); +} + +// produce 1s of signal @ freq hz +void fillSignal(uint32_t freq, Mat1f &inSignal) +{ + static std::default_random_engine e((unsigned int)(time(NULL))); + static std::uniform_real_distribution<> dis(0, 1); // range [0, 1) + static auto a = dis(e), b = dis(e), c = dis(e); + uint32_t idx = 0; + std::generate(inSignal.begin(), inSignal.end(), [&]() + { + float ret = static_cast(sin(idx/(float)freq + a) + 3 * sin(CV_PI / 4 * (idx/(float)freq + b)) + + 5 * sin(CV_PI/12 * idx/(float)freq + c) + 20*cos(idx/(float)freq*4000)); + idx++; + return ret; + }); +} + +class ResampleTestClass : public testing::TestWithParam> +{ +}; + +TEST_P(ResampleTestClass, func_test) { + auto params1 = GetParam(); + uint32_t inFreq = std::get<0>(params1); + uint32_t outFreq = std::get<1>(params1); + // 1 second @ inFreq hz + Mat1f inSignal(Size(inFreq, 1U)); + Mat1f outSignal; + // generating testing function as a sum of different sinusoids + fillSignal(inFreq, inSignal); + resampleSignal(inSignal, outSignal, inFreq, outFreq); + // reference signal + // 1 second @ outFreq hz + Mat1f refSignal(Size(outFreq, 1U)); + fillSignal(outFreq, refSignal); + // calculating maxDiff + float maxDiff = 0.f; + // exclude 2 elements and last 2 elements from testing + for (uint32_t i = 2; i < (uint32_t)refSignal.cols - 2; ++i) + { + if(maxDiff < abs(refSignal.at(0,i) - outSignal.at(0,i))) + { + maxDiff = abs(refSignal.at(0,i) - outSignal.at(0,i)); + } + } + auto max = std::max_element(outSignal.begin(), outSignal.end()); + float maxDiffRel = maxDiff / (*max); + EXPECT_LE(maxDiffRel, 0.35f); + // calculating relative error of L2 norms + EXPECT_NEAR(abs(cvtest::norm(outSignal, NORM_L2) - cvtest::norm(refSignal, NORM_L2)) / + cvtest::norm(refSignal, NORM_L2), 0.0f, 0.05f); + // calculating relative mean squared error + float rrmse = RRMSE(outSignal, refSignal); + // 1% error + EXPECT_LE(rrmse, 1.f); +} + +INSTANTIATE_TEST_CASE_P(RefSignalTestingCase, + ResampleTestClass, + ::testing::Combine(testing::Values(16000, 32000, 44100, 48000), + testing::Values(16000, 32000, 44100, 48000))); + +}} // namespace From 504f15cad1462b1b501d357043e935da386edada Mon Sep 17 00:00:00 2001 From: cudawarped <12133430+cudawarped@users.noreply.github.com> Date: Mon, 15 Jan 2024 12:24:15 +0200 Subject: [PATCH 4/6] Merge pull request #3618 from cudawarped:cudaimgproc_moments_fix cudaimgproc: update for CUDA 8.0 and fix out of bounds memory error in cuda::moments #3618 Fix https://github.com/opencv/opencv_contrib/issues/3612 and address out of bounds memory error when not calculating all image moments. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- .../include/opencv2/cudaimgproc.hpp | 34 +++++++++++++------ modules/cudaimgproc/src/cuda/moments.cu | 10 +++++- modules/cudaimgproc/src/moments.cpp | 26 +++++++++++--- modules/cudaimgproc/test/test_moments.cpp | 5 +-- 4 files changed, 54 insertions(+), 21 deletions(-) diff --git a/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp b/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp index 4c9ee0f48e8..0bc4e97d76a 100644 --- a/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp +++ b/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp @@ -800,10 +800,21 @@ enum MomentsOrder { @param order Order of largest moments to calculate with lower order moments requiring less computation. @returns number of image moments. -@sa cuda::moments, cuda::spatialMoments, cuda::MomentsOrder +@sa cuda::spatialMoments, cuda::moments, cuda::MomentsOrder */ CV_EXPORTS_W int numMoments(const MomentsOrder order); +/** @brief Converts the spatial image moments returned from cuda::spatialMoments to cv::Moments. +@param spatialMoments Spatial moments returned from cuda::spatialMoments. +@param order Order used when calculating image moments with cuda::spatialMoments. +@param momentsType Precision used when calculating image moments with cuda::spatialMoments. + +@returns cv::Moments. + +@sa cuda::spatialMoments, cuda::moments, cuda::convertSpatialMoments, cuda::numMoments, cuda::MomentsOrder + */ +CV_EXPORTS_W Moments convertSpatialMoments(Mat spatialMoments, const MomentsOrder order, const int momentsType); + /** @brief Calculates all of the spatial moments up to the 3rd order of a rasterized shape. Asynchronous version of cuda::moments() which only calculates the spatial (not centralized or normalized) moments, up to the 3rd order, of a rasterized shape. @@ -813,24 +824,25 @@ Each moment is returned as a column entry in the 1D \a moments array. @param [out] moments 1D array with each column entry containing a spatial image moment. @param binaryImage If it is true, all non-zero image pixels are treated as 1's. @param order Order of largest moments to calculate with lower order moments requiring less computation. -@param momentsType Precision to use when calculating moments. Available types are `CV_32F` and `CV_64F` with the performance of `CV_32F` an order of magnitude greater than `CV_64F`. If the image is small the accuracy from `CV_32F` can be equal or very close to `CV_64F`. +@param momentsType Precision to use when calculating moments. Available types are \ref CV_32F and \ref CV_64F with the performance of \ref CV_32F an order of magnitude greater than \ref CV_64F. If the image is small the accuracy from \ref CV_32F can be equal or very close to \ref CV_64F. @param stream Stream for the asynchronous version. -@note For maximum performance pre-allocate a 1D GpuMat for \a moments of the correct type and size large enough to store the all the image moments of up to the desired \a order. e.g. With \a order === MomentsOrder::SECOND_ORDER_MOMENTS and \a momentsType == `CV_32F` \a moments can be allocated as +@note For maximum performance pre-allocate a 1D GpuMat for \a moments of the correct type and size large enough to store the all the image moments of up to the desired \a order. e.g. With \a order === MomentsOrder::SECOND_ORDER_MOMENTS and \a momentsType == \ref CV_32F \a moments can be allocated as ``` GpuMat momentsDevice(1,numMoments(MomentsOrder::SECOND_ORDER_MOMENTS),CV_32F) ``` -The central and normalized moments can easily be calculated on the host by downloading the \a moments array and using the cv::Moments constructor. e.g. +The central and normalized moments can easily be calculated on the host by downloading the \a moments array and using the cuda::convertSpatialMoments helper function. e.g. ``` -HostMem momentsHostMem(1, numMoments(MomentsOrder::SECOND_ORDER_MOMENTS), CV_32F); -momentsDevice.download(momentsHostMem, stream); +HostMem spatialMomentsHostMem(1, numMoments(MomentsOrder::SECOND_ORDER_MOMENTS), CV_32F); +spatialMomentsDevice.download(spatialMomentsHostMem, stream); stream.waitForCompletion(); -Mat momentsMat = momentsHostMem.createMatHeader(); -cv::Moments cvMoments(momentsMat.at(0), momentsMat.at(1), momentsMat.at(2), momentsMat.at(3), momentsMat.at(4), momentsMat.at(5), momentsMat.at(6), momentsMat.at(7), momentsMat.at(8), momentsMat.at(9)); +Mat spatialMoments = spatialMomentsHostMem.createMatHeader(); +cv::Moments cvMoments = convertSpatialMoments(spatialMoments, order); ``` + see the \a CUDA_TEST_P(Moments, Async) test inside opencv_contrib_source_code/modules/cudaimgproc/test/test_moments.cpp for an example. @returns cv::Moments. -@sa cuda::moments +@sa cuda::moments, cuda::convertSpatialMoments, cuda::numMoments, cuda::MomentsOrder */ CV_EXPORTS_W void spatialMoments(InputArray src, OutputArray moments, const bool binaryImage = false, const MomentsOrder order = MomentsOrder::THIRD_ORDER_MOMENTS, const int momentsType = CV_64F, Stream& stream = Stream::Null()); @@ -842,7 +854,7 @@ results are returned in the structure cv::Moments. @param src Raster image (single-channel 2D array). @param binaryImage If it is true, all non-zero image pixels are treated as 1's. @param order Order of largest moments to calculate with lower order moments requiring less computation. - @param momentsType Precision to use when calculating moments. Available types are `CV_32F` and `CV_64F` with the performance of `CV_32F` an order of magnitude greater than `CV_64F`. If the image is small the accuracy from `CV_32F` can be equal or very close to `CV_64F`. + @param momentsType Precision to use when calculating moments. Available types are \ref CV_32F and \ref CV_64F with the performance of \ref CV_32F an order of magnitude greater than \ref CV_64F. If the image is small the accuracy from \ref CV_32F can be equal or very close to \ref CV_64F. @note For maximum performance use the asynchronous version cuda::spatialMoments() as this version interally allocates and deallocates both GpuMat and HostMem to respectively perform the calculation on the device and download the result to the host. The costly HostMem allocation cannot be avoided however the GpuMat device allocation can be by using BufferPool, e.g. @@ -852,7 +864,7 @@ The costly HostMem allocation cannot be avoided however the GpuMat device alloca ``` see the \a CUDA_TEST_P(Moments, Accuracy) test inside opencv_contrib_source_code/modules/cudaimgproc/test/test_moments.cpp for an example. @returns cv::Moments. -@sa cuda::spatialMoments +@sa cuda::spatialMoments, cuda::convertSpatialMoments, cuda::numMoments, cuda::MomentsOrder */ CV_EXPORTS_W Moments moments(InputArray src, const bool binaryImage = false, const MomentsOrder order = MomentsOrder::THIRD_ORDER_MOMENTS, const int momentsType = CV_64F); diff --git a/modules/cudaimgproc/src/cuda/moments.cu b/modules/cudaimgproc/src/cuda/moments.cu index 9828c5614b2..daf479a75f7 100644 --- a/modules/cudaimgproc/src/cuda/moments.cu +++ b/modules/cudaimgproc/src/cuda/moments.cu @@ -16,14 +16,22 @@ constexpr int blockSizeY = 16; template __device__ T butterflyWarpReduction(T value) { for (int i = 16; i >= 1; i /= 2) +#if (CUDART_VERSION >= 9000) value += __shfl_xor_sync(0xffffffff, value, i, 32); +#else + value += __shfl_xor(value, i, 32); +#endif return value; } template __device__ T butterflyHalfWarpReduction(T value) { for (int i = 8; i >= 1; i /= 2) - value += __shfl_xor_sync(0xffff, value, i, 32); +#if (CUDART_VERSION >= 9000) + value += __shfl_xor_sync(0xffff, value, i, 16); +#else + value += __shfl_xor(value, i, 16); +#endif return value; } diff --git a/modules/cudaimgproc/src/moments.cpp b/modules/cudaimgproc/src/moments.cpp index ced5b2f8c66..3c2e62c4b90 100644 --- a/modules/cudaimgproc/src/moments.cpp +++ b/modules/cudaimgproc/src/moments.cpp @@ -12,6 +12,25 @@ int cv::cuda::numMoments(const MomentsOrder order) { return order == MomentsOrder::FIRST_ORDER_MOMENTS ? device::imgproc::n1 : order == MomentsOrder::SECOND_ORDER_MOMENTS ? device::imgproc::n12 : device::imgproc::n123; } +template +cv::Moments convertSpatialMomentsT(Mat spatialMoments, const MomentsOrder order) { + switch (order) { + case MomentsOrder::FIRST_ORDER_MOMENTS: + return Moments(spatialMoments.at(0), spatialMoments.at(1), spatialMoments.at(2), 0, 0, 0, 0, 0, 0, 0); + case MomentsOrder::SECOND_ORDER_MOMENTS: + return Moments(spatialMoments.at(0), spatialMoments.at(1), spatialMoments.at(2), spatialMoments.at(3), spatialMoments.at(4), spatialMoments.at(5), 0, 0, 0, 0); + default: + return Moments(spatialMoments.at(0), spatialMoments.at(1), spatialMoments.at(2), spatialMoments.at(3), spatialMoments.at(4), spatialMoments.at(5), spatialMoments.at(6), spatialMoments.at(7), spatialMoments.at(8), spatialMoments.at(9)); + } +} + +cv::Moments cv::cuda::convertSpatialMoments(Mat spatialMoments, const MomentsOrder order, const int momentsType) { + if (momentsType == CV_32F) + return convertSpatialMomentsT(spatialMoments, order); + else + return convertSpatialMomentsT(spatialMoments, order); +} + #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) Moments cv::cuda::moments(InputArray src, const bool binary, const MomentsOrder order, const int momentsType) { throw_no_cuda(); } void spatialMoments(InputArray src, OutputArray moments, const bool binary, const MomentsOrder order, const int momentsType, Stream& stream) { throw_no_cuda(); } @@ -53,15 +72,12 @@ void cv::cuda::spatialMoments(InputArray src, OutputArray moments, const bool bi } Moments cv::cuda::moments(InputArray src, const bool binary, const MomentsOrder order, const int momentsType) { - Stream& stream = Stream::Null(); + Stream stream; HostMem dst; spatialMoments(src, dst, binary, order, momentsType, stream); stream.waitForCompletion(); Mat moments = dst.createMatHeader(); - if(momentsType == CV_32F) - return Moments(moments.at(0), moments.at(1), moments.at(2), moments.at(3), moments.at(4), moments.at(5), moments.at(6), moments.at(7), moments.at(8), moments.at(9)); - else - return Moments(moments.at(0), moments.at(1), moments.at(2), moments.at(3), moments.at(4), moments.at(5), moments.at(6), moments.at(7), moments.at(8), moments.at(9)); + return convertSpatialMoments(moments, order, momentsType); } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudaimgproc/test/test_moments.cpp b/modules/cudaimgproc/test/test_moments.cpp index c5dd889f095..6c5800c291e 100644 --- a/modules/cudaimgproc/test/test_moments.cpp +++ b/modules/cudaimgproc/test/test_moments.cpp @@ -101,10 +101,7 @@ CUDA_TEST_P(Moments, Async) HostMem momentsHost(1, nMoments, momentsType); momentsDevice.download(momentsHost, stream); stream.waitForCompletion(); - Mat momentsHost64F = momentsHost.createMatHeader(); - if (momentsType == CV_32F) - momentsHost.createMatHeader().convertTo(momentsHost64F, CV_64F); - const cv::Moments moments = cv::Moments(momentsHost64F.at(0), momentsHost64F.at(1), momentsHost64F.at(2), momentsHost64F.at(3), momentsHost64F.at(4), momentsHost64F.at(5), momentsHost64F.at(6), momentsHost64F.at(7), momentsHost64F.at(8), momentsHost64F.at(9)); + const cv::Moments moments = convertSpatialMoments(momentsHost.createMatHeader(), order, momentsType); Mat imgHostAdjustedType = imgHost(roi); if (imgType != CV_8U && imgType != CV_32F) imgHost(roi).convertTo(imgHostAdjustedType, CV_32F); From ee5a2c31792ae5f2efe9a87b6634149ce53bb3f1 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 15 Jan 2024 11:09:10 +0000 Subject: [PATCH 5/6] signal: fix memory access --- modules/signal/src/signal_resample.cpp | 120 +++++++++---------------- 1 file changed, 44 insertions(+), 76 deletions(-) diff --git a/modules/signal/src/signal_resample.cpp b/modules/signal/src/signal_resample.cpp index 03fc5cad3f8..78911234679 100644 --- a/modules/signal/src/signal_resample.cpp +++ b/modules/signal/src/signal_resample.cpp @@ -105,102 +105,69 @@ static inline v_float32 simd_cubicHermite(const v_float32 &v_A, const v_float32 } #endif -static void cubicInterpolate(const Mat1f &src, uint32_t dstlen, Mat1f &dst, uint32_t srclen) +static void cubicInterpolate(const float* src/*[srclen]*/, uint32_t srclen, float* dst/*[dstlen]*/, uint32_t dstlen) { - Mat1f tmp(Size(srclen + 3U, 1U)); - tmp.at(0) = src.at(0); - -#if (CV_SIMD || CV_SIMD_SCALABLE) - v_float32 v_reg = vx_setall_f32(src.at(srclen - 1U)); - vx_store(tmp.ptr(0) + (srclen - 1U), v_reg); -#else // scalar version - tmp.at(srclen + 1U) = src.at(srclen - 1U); - tmp.at(srclen + 2U) = src.at(srclen - 1U); -#endif + const int srclen_1 = (int)srclen - 1; uint32_t i = 0U; + const float dstToSrcScale = 1.0f / (float)(dstlen - 1U) * (float)srclen; #if (CV_SIMD || CV_SIMD_SCALABLE) - uint32_t len_sub_vfloatStep = (uint32_t)std::max((int64_t)srclen - (int64_t)v_float32_width, (int64_t)0); - for (; i < len_sub_vfloatStep; i+= v_float32_width) - { - v_float32 v_copy = vx_load(src.ptr(0) + i); - vx_store(tmp.ptr(0) + (i + 1U), v_copy); - } -#endif - - // if the tail exists or scalar version - for (; i < srclen; ++i) - { - tmp.at(i + 1U) = src.at(i); - } - - i = 0U; + const v_float32 v_dst2src_scale = vx_setall_f32(dstToSrcScale); + const v_float32 v_half = vx_setall_f32(0.5f); -#if (CV_SIMD || CV_SIMD_SCALABLE) int ptr_x_int[v_float32_max_width]; - uint32_t j; - - v_float32 v_dstlen_sub_1 = vx_setall_f32((float)(dstlen - 1U)); - v_float32 v_one = vx_setall_f32(1.0f); - v_float32 v_x_start = v_div(v_one, v_dstlen_sub_1); - v_float32 v_u = vx_setall_f32((float)srclen); - v_float32 v_half = vx_setall_f32(0.5f); - - len_sub_vfloatStep = (uint32_t)std::max((int64_t)dstlen - (int64_t)v_float32_width, (int64_t)0); - for (; i < v_float32_width; ++i) + for (unsigned j = 0; j < v_float32_width; ++j) { - ptr_x_int[i] = (int)i; + ptr_x_int[j] = (int)j; } + const v_float32 v_sequence = v_cvt_f32(vx_load(ptr_x_int)); - float ptr_for_cubicHermite[v_float32_max_width]; - v_float32 v_sequence = v_cvt_f32(vx_load(ptr_x_int)); - for (i = 0U; i < len_sub_vfloatStep; i+= v_float32_width) + for (i = 0U; i <= dstlen - v_float32_width; i+= v_float32_width) { v_float32 v_reg_i = v_add(vx_setall_f32((float)i), v_sequence); - v_float32 v_x = v_sub(v_mul(v_x_start, v_reg_i, v_u), v_half); + v_float32 v_x = v_sub(v_mul(v_reg_i, v_dst2src_scale), v_half); v_int32 v_x_int = v_trunc(v_x); v_float32 v_x_fract = v_sub(v_x, v_cvt_f32(v_floor(v_x))); vx_store(ptr_x_int, v_x_int); - for(j = 0U; j < v_float32_width; ++j) - ptr_for_cubicHermite[j] = *(tmp.ptr(0) + (ptr_x_int[j] - 1)); - v_float32 v_x_int_add_A = vx_load(ptr_for_cubicHermite); - - for(j = 0U; j < v_float32_width; ++j) - ptr_for_cubicHermite[j] = *(tmp.ptr(0) + (ptr_x_int[j])); - v_float32 v_x_int_add_B = vx_load(ptr_for_cubicHermite); - - for(j = 0U; j < v_float32_width; ++j) - ptr_for_cubicHermite[j] = *(tmp.ptr(0) + (ptr_x_int[j] + 1)); - v_float32 v_x_int_add_C = vx_load(ptr_for_cubicHermite); - - for(j = 0U; j < v_float32_width; ++j) - ptr_for_cubicHermite[j] = *(tmp.ptr(0) + (ptr_x_int[j] + 2)); - v_float32 v_x_int_add_D = vx_load(ptr_for_cubicHermite); + float ptr_for_cubicHermiteA[v_float32_max_width]; + float ptr_for_cubicHermiteB[v_float32_max_width]; + float ptr_for_cubicHermiteC[v_float32_max_width]; + float ptr_for_cubicHermiteD[v_float32_max_width]; + for (unsigned j = 0U; j < v_float32_width; ++j) + { + int src_offset = ptr_x_int[j]; + ptr_for_cubicHermiteA[j] = src[std::min(std::max(0, src_offset - 1), srclen_1)]; + ptr_for_cubicHermiteB[j] = src[std::min(std::max(0, src_offset + 0), srclen_1)]; + ptr_for_cubicHermiteC[j] = src[std::min(std::max(0, src_offset + 1), srclen_1)]; + ptr_for_cubicHermiteD[j] = src[std::min(std::max(0, src_offset + 2), srclen_1)]; + } + v_float32 v_x_int_add_A = vx_load(ptr_for_cubicHermiteA); + v_float32 v_x_int_add_B = vx_load(ptr_for_cubicHermiteB); + v_float32 v_x_int_add_C = vx_load(ptr_for_cubicHermiteC); + v_float32 v_x_int_add_D = vx_load(ptr_for_cubicHermiteD); - vx_store(dst.ptr(0) + i, simd_cubicHermite(v_x_int_add_A, v_x_int_add_B, v_x_int_add_C, v_x_int_add_D, v_x_fract)); + vx_store(&dst[i], simd_cubicHermite(v_x_int_add_A, v_x_int_add_B, v_x_int_add_C, v_x_int_add_D, v_x_fract)); } #endif // if the tail exists or scalar version - float *ptr = tmp.ptr(0) + 1U; - float lenScale = 1.0f / (float)(dstlen - 1U); - float U, X, xfract; - int xint; for(; i < dstlen; ++i) { - U = (float)i * lenScale; - X = (U * (float)srclen) - 0.5f; - xfract = X - floor(X); - xint = (int)X; - dst.at(i) = scal_cubicHermite(ptr[xint - 1], ptr[xint], ptr[xint + 1], ptr[xint + 2], xfract); + float X = (float)i * dstToSrcScale - 0.5f; + float xfract = X - floor(X); + int xint = (int)X; + float cubicHermiteA = src[std::min(std::max(0, xint - 1), srclen_1)]; + float cubicHermiteB = src[std::min(std::max(0, xint + 0), srclen_1)]; + float cubicHermiteC = src[std::min(std::max(0, xint + 1), srclen_1)]; + float cubicHermiteD = src[std::min(std::max(0, xint + 2), srclen_1)]; + dst[i] = scal_cubicHermite(cubicHermiteA, cubicHermiteB, cubicHermiteC, cubicHermiteD, xfract); } - } static void fir_f32(const float *pSrc, float *pDst, @@ -332,7 +299,7 @@ static void fir_f32(const float *pSrc, float *pDst, } void resampleSignal(InputArray inputSignal, OutputArray outputSignal, - const int inFreq, const int outFreq) + const int inFreq, const int outFreq) { CV_TRACE_FUNCTION(); CV_Assert(!inputSignal.empty()); @@ -343,16 +310,18 @@ void resampleSignal(InputArray inputSignal, OutputArray outputSignal, inputSignal.copyTo(outputSignal); return; } - uint32_t filtLen = 33U; - float beta = 3.395f; - std::vector filt_window(filtLen, 0.f); - init_filter(beta, filtLen, filt_window.data()); float ratio = (float)outFreq / float(inFreq); Mat1f inMat = inputSignal.getMat(); - Mat1f outMat = Mat1f(Size(cvFloor(inMat.cols * ratio), 1)); - cubicInterpolate(inMat, outMat.cols, outMat, inMat.cols); + outputSignal.create(Size(cvFloor(inMat.cols * ratio), 1), CV_32FC1); + Mat1f outMat = outputSignal.getMat(); + cubicInterpolate(inMat.ptr(0), inMat.cols, outMat.ptr(0), outMat.cols); if (inFreq < 2 * outFreq) { + uint32_t filtLen = 33U; + float beta = 3.395f; + std::vector filt_window(filtLen, 0.f); + init_filter(beta, filtLen, filt_window.data()); + std::vector dlyl(filtLen * 2 - 1, 0.f); std::vector ptmp(outMat.cols + 2 * filtLen, 0.); @@ -367,7 +336,6 @@ void resampleSignal(InputArray inputSignal, OutputArray outputSignal, outMat.at(i - filtLen) = ptmp2[i + cvFloor((float)filtLen / 2.f)]; } } - outputSignal.assign(std::move(outMat)); } From 46fb893f9a632012990713c4003d7d3cab4f2f25 Mon Sep 17 00:00:00 2001 From: Kumataro Date: Fri, 19 Jan 2024 15:41:00 +0000 Subject: [PATCH 6/6] sfm: add opencv.sfm prefix for static libraries --- modules/sfm/CMakeLists.txt | 8 ++++---- .../libmv_light/libmv/correspondence/CMakeLists.txt | 8 ++++---- .../sfm/src/libmv_light/libmv/multiview/CMakeLists.txt | 10 +++++----- .../sfm/src/libmv_light/libmv/numeric/CMakeLists.txt | 6 +++--- .../libmv_light/libmv/simple_pipeline/CMakeLists.txt | 6 +++--- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/modules/sfm/CMakeLists.txt b/modules/sfm/CMakeLists.txt index 4377a61651e..46da72e309a 100644 --- a/modules/sfm/CMakeLists.txt +++ b/modules/sfm/CMakeLists.txt @@ -88,16 +88,16 @@ set(LIBMV_LIGHT_INCLUDES ) set(LIBMV_LIGHT_LIBS - correspondence - multiview - numeric + opencv.sfm.correspondence + opencv.sfm.multiview + opencv.sfm.numeric ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES} ) if(Ceres_FOUND) add_definitions("-DCERES_FOUND=1") - list(APPEND LIBMV_LIGHT_LIBS simple_pipeline) + list(APPEND LIBMV_LIGHT_LIBS opencv.sfm.simple_pipeline) if(Ceres_VERSION VERSION_LESS 2.0.0) list(APPEND LIBMV_LIGHT_INCLUDES "${CERES_INCLUDE_DIRS}") endif() diff --git a/modules/sfm/src/libmv_light/libmv/correspondence/CMakeLists.txt b/modules/sfm/src/libmv_light/libmv/correspondence/CMakeLists.txt index b950665325a..ddc6686ce28 100644 --- a/modules/sfm/src/libmv_light/libmv/correspondence/CMakeLists.txt +++ b/modules/sfm/src/libmv_light/libmv/correspondence/CMakeLists.txt @@ -6,12 +6,12 @@ SET(CORRESPONDENCE_SRC feature_matching.cc # define the header files (make the headers appear in IDEs.) FILE(GLOB CORRESPONDENCE_HDRS *.h) -ADD_LIBRARY(correspondence STATIC ${CORRESPONDENCE_SRC} ${CORRESPONDENCE_HDRS}) +ADD_LIBRARY(opencv.sfm.correspondence STATIC ${CORRESPONDENCE_SRC} ${CORRESPONDENCE_HDRS}) -ocv_target_link_libraries(correspondence LINK_PRIVATE ${GLOG_LIBRARIES} multiview opencv_imgcodecs) +ocv_target_link_libraries(opencv.sfm.correspondence LINK_PRIVATE ${GLOG_LIBRARIES} opencv.sfm.multiview opencv_imgcodecs) IF(TARGET Eigen3::Eigen) - TARGET_LINK_LIBRARIES(correspondence LINK_PUBLIC Eigen3::Eigen) + TARGET_LINK_LIBRARIES(opencv.sfm.correspondence LINK_PUBLIC Eigen3::Eigen) ENDIF() -LIBMV_INSTALL_LIB(correspondence) +LIBMV_INSTALL_LIB(opencv.sfm.correspondence) diff --git a/modules/sfm/src/libmv_light/libmv/multiview/CMakeLists.txt b/modules/sfm/src/libmv_light/libmv/multiview/CMakeLists.txt index ece11759079..806a01d1ef4 100644 --- a/modules/sfm/src/libmv_light/libmv/multiview/CMakeLists.txt +++ b/modules/sfm/src/libmv_light/libmv/multiview/CMakeLists.txt @@ -16,13 +16,13 @@ SET(MULTIVIEW_SRC conditioning.cc # define the header files (make the headers appear in IDEs.) FILE(GLOB MULTIVIEW_HDRS *.h) -ADD_LIBRARY(multiview STATIC ${MULTIVIEW_SRC} ${MULTIVIEW_HDRS}) -TARGET_LINK_LIBRARIES(multiview LINK_PRIVATE ${GLOG_LIBRARIES} numeric) +ADD_LIBRARY(opencv.sfm.multiview STATIC ${MULTIVIEW_SRC} ${MULTIVIEW_HDRS}) +TARGET_LINK_LIBRARIES(opencv.sfm.multiview LINK_PRIVATE ${GLOG_LIBRARIES} opencv.sfm.numeric) IF(TARGET Eigen3::Eigen) - TARGET_LINK_LIBRARIES(multiview LINK_PUBLIC Eigen3::Eigen) + TARGET_LINK_LIBRARIES(opencv.sfm.multiview LINK_PUBLIC Eigen3::Eigen) ENDIF() IF(CERES_LIBRARIES) - TARGET_LINK_LIBRARIES(multiview LINK_PRIVATE ${CERES_LIBRARIES}) + TARGET_LINK_LIBRARIES(opencv.sfm.multiview LINK_PRIVATE ${CERES_LIBRARIES}) ENDIF() -LIBMV_INSTALL_LIB(multiview) +LIBMV_INSTALL_LIB(opencv.sfm.multiview) diff --git a/modules/sfm/src/libmv_light/libmv/numeric/CMakeLists.txt b/modules/sfm/src/libmv_light/libmv/numeric/CMakeLists.txt index 3de2f2fda4b..002a00ff25f 100644 --- a/modules/sfm/src/libmv_light/libmv/numeric/CMakeLists.txt +++ b/modules/sfm/src/libmv_light/libmv/numeric/CMakeLists.txt @@ -5,10 +5,10 @@ SET(NUMERIC_SRC numeric.cc # define the header files (make the headers appear in IDEs.) FILE(GLOB NUMERIC_HDRS *.h) -ADD_LIBRARY(numeric STATIC ${NUMERIC_SRC} ${NUMERIC_HDRS}) +ADD_LIBRARY(opencv.sfm.numeric STATIC ${NUMERIC_SRC} ${NUMERIC_HDRS}) IF(TARGET Eigen3::Eigen) - TARGET_LINK_LIBRARIES(numeric LINK_PUBLIC Eigen3::Eigen) + TARGET_LINK_LIBRARIES(opencv.sfm.numeric LINK_PUBLIC Eigen3::Eigen) ENDIF() -LIBMV_INSTALL_LIB(numeric) +LIBMV_INSTALL_LIB(opencv.sfm.numeric) diff --git a/modules/sfm/src/libmv_light/libmv/simple_pipeline/CMakeLists.txt b/modules/sfm/src/libmv_light/libmv/simple_pipeline/CMakeLists.txt index 9426e290144..4ea75ea63c5 100644 --- a/modules/sfm/src/libmv_light/libmv/simple_pipeline/CMakeLists.txt +++ b/modules/sfm/src/libmv_light/libmv/simple_pipeline/CMakeLists.txt @@ -15,8 +15,8 @@ SET(SIMPLE_PIPELINE_SRC # Define the header files so that they appear in IDEs. FILE(GLOB SIMPLE_PIPELINE_HDRS *.h) -ADD_LIBRARY(simple_pipeline STATIC ${SIMPLE_PIPELINE_SRC} ${SIMPLE_PIPELINE_HDRS}) +ADD_LIBRARY(opencv.sfm.simple_pipeline STATIC ${SIMPLE_PIPELINE_SRC} ${SIMPLE_PIPELINE_HDRS}) -TARGET_LINK_LIBRARIES(simple_pipeline LINK_PRIVATE multiview ${CERES_LIBRARIES}) +TARGET_LINK_LIBRARIES(opencv.sfm.simple_pipeline LINK_PRIVATE opencv.sfm.multiview ${CERES_LIBRARIES}) -LIBMV_INSTALL_LIB(simple_pipeline) +LIBMV_INSTALL_LIB(opencv.sfm.simple_pipeline)