Skip to content

Commit 67f07b1

Browse files
authored
Merge pull request opencv#25624 from savuor:rv/hal_addscalar
HAL added for add(array, scalar) opencv#25624 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
1 parent 8fe70a1 commit 67f07b1

File tree

2 files changed

+95
-25
lines changed

2 files changed

+95
-25
lines changed

modules/core/src/arithm.cpp

Lines changed: 79 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -585,14 +585,19 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
585585

586586
#endif
587587

588+
typedef int (*ScalarFunc)(const uchar* src, size_t step_src,
589+
uchar* dst, size_t step_dst, int width, int height,
590+
void* scalar, bool scalarIsFirst);
591+
588592
typedef int (*ExtendedTypeFunc)(const uchar* src1, size_t step1,
589593
const uchar* src2, size_t step2,
590594
uchar* dst, size_t step, int width, int height,
591595
void*);
592596

593597
static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
594598
InputArray _mask, int dtype, BinaryFuncC* tab, bool muldiv=false,
595-
void* usrdata=0, int oclop=-1, ExtendedTypeFunc extendedFunc = nullptr )
599+
void* usrdata=0, int oclop=-1, ExtendedTypeFunc extendedFunc = nullptr,
600+
ScalarFunc scalarFunc = nullptr)
596601
{
597602
const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
598603
_InputArray::KindFlag kind1 = psrc1->kind(), kind2 = psrc2->kind();
@@ -638,8 +643,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
638643
(kind1 == _InputArray::MATX && (sz1 == Size(1,4) || sz1 == Size(1,1))) ||
639644
(kind2 == _InputArray::MATX && (sz2 == Size(1,4) || sz2 == Size(1,1))) )
640645
{
641-
if ((type1 == CV_64F && (sz1.height == 1 || sz1.height == 4)) &&
642-
checkScalar(*psrc1, type2, kind1, kind2))
646+
if ((type1 == CV_64F && (sz1.height == 1 || sz1.height == 4)) && src1Scalar)
643647
{
644648
// src1 is a scalar; swap it with src2
645649
swap(psrc1, psrc2);
@@ -654,7 +658,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
654658
if ( oclop == OCL_OP_DIV_SCALE )
655659
oclop = OCL_OP_RDIV_SCALE;
656660
}
657-
else if( !checkScalar(*psrc2, type1, kind2, kind1) )
661+
else if( !src2Scalar )
658662
CV_Error( cv::Error::StsUnmatchedSizes,
659663
"The operation is neither 'array op array' "
660664
"(where arrays have the same size and the same number of channels), "
@@ -866,32 +870,38 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
866870
const uchar* extSptr1 = sptr1;
867871
const uchar* extSptr2 = sptr2;
868872
if( swapped12 )
869-
std::swap(extSptr1, extSptr1);
873+
std::swap(extSptr1, extSptr2);
870874

871-
// try to perform operation with conversion in one call
872-
// if fail, use converter functions
875+
// try to perform operation in 1 call, fallback to classic way if fail
873876
uchar* opconverted = haveMask ? maskbuf : dptr;
874-
if (!extendedFunc || extendedFunc(extSptr1, 1, extSptr2, 1, opconverted, 1,
875-
bszn.width, bszn.height, usrdata) != 0)
877+
if (!scalarFunc || src2.total() != 1 ||
878+
scalarFunc(extSptr1, 1, opconverted, 1, bszn.width, bszn.height, (void*)extSptr2, swapped12) != 0)
876879
{
877-
if( cvtsrc1 )
878-
{
879-
cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
880-
sptr1 = buf1;
881-
}
882-
883-
if( swapped12 )
884-
std::swap(sptr1, sptr2);
885-
886-
uchar* fdst = ( haveMask || cvtdst ) ? wbuf : dptr;
887-
func( sptr1, 1, sptr2, 1, fdst, 1, bszn.width, bszn.height, usrdata );
880+
// try to perform operation with conversion in one call
881+
// if fail, use converter functions
888882

889-
if (cvtdst)
883+
if (!extendedFunc || extendedFunc(extSptr1, 1, extSptr2, 1, opconverted, 1,
884+
bszn.width, bszn.height, usrdata) != 0)
890885
{
891-
uchar* cdst = haveMask ? maskbuf : dptr;
892-
cvtdst(wbuf, 1, 0, 1, cdst, 1, bszn, 0);
886+
if( cvtsrc1 )
887+
{
888+
cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
889+
sptr1 = buf1;
890+
}
891+
892+
if( swapped12 )
893+
std::swap(sptr1, sptr2);
894+
895+
uchar* fdst = ( haveMask || cvtdst ) ? wbuf : dptr;
896+
func( sptr1, 1, sptr2, 1, fdst, 1, bszn.width, bszn.height, usrdata );
897+
898+
if (cvtdst)
899+
{
900+
uchar* cdst = haveMask ? maskbuf : dptr;
901+
cvtdst(wbuf, 1, 0, 1, cdst, 1, bszn, 0);
902+
}
903+
opconverted = cvtdst ? maskbuf : wbuf;
893904
}
894-
opconverted = cvtdst ? maskbuf : wbuf;
895905
}
896906

897907
if (haveMask)
@@ -920,6 +930,48 @@ static BinaryFuncC* getAddTab()
920930
return addTab;
921931
}
922932

933+
static int addScalar32f32fWrapper(const uchar* src, size_t step_src, uchar* dst, size_t step_dst, int width, int height,
934+
void* scalar, bool /*scalarIsFirst*/)
935+
{
936+
int res = cv_hal_addScalar32f32f((const float*)src, step_src, (float *)dst, step_dst, width, height, (const float*)scalar);
937+
if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED)
938+
return res;
939+
else
940+
{
941+
CV_Error_(cv::Error::StsInternal, ("HAL implementation addScalar32f32f ==> " CVAUX_STR(cv_hal_addScalar32f32f)
942+
" returned %d (0x%08x)", res, res));
943+
}
944+
}
945+
946+
static int addScalar16s16sWrapper(const uchar* src, size_t step_src, uchar* dst, size_t step_dst, int width, int height,
947+
void* scalar, bool /*scalarIsFirst*/)
948+
{
949+
int res = cv_hal_addScalar16s16s((const int16_t*)src, step_src, (int16_t *)dst, step_dst, width, height, (const int16_t*)scalar);
950+
if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED)
951+
return res;
952+
else
953+
{
954+
CV_Error_(cv::Error::StsInternal, ("HAL implementation addScalar16s16s ==> " CVAUX_STR(cv_hal_addScalar16s16s)
955+
" returned %d (0x%08x)", res, res));
956+
}
957+
}
958+
959+
static ScalarFunc getAddScalarFunc(int srcType, int dstType)
960+
{
961+
if (srcType == CV_32F && dstType == CV_32F)
962+
{
963+
return addScalar32f32fWrapper;
964+
}
965+
else if (srcType == CV_16S && dstType == CV_16S)
966+
{
967+
return addScalar16s16sWrapper;
968+
}
969+
else
970+
{
971+
return nullptr;
972+
}
973+
}
974+
923975
static int sub8u32fWrapper(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
924976
uchar* dst, size_t step, int width, int height, void* )
925977
{
@@ -1004,7 +1056,9 @@ void cv::add( InputArray src1, InputArray src2, OutputArray dst,
10041056
return;
10051057
}
10061058

1007-
arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD );
1059+
ScalarFunc scalarFunc = getAddScalarFunc(src1.depth(), dtype < 0 ? dst.depth() : dtype);
1060+
arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD, nullptr,
1061+
/* scalarFunc */ scalarFunc );
10081062
}
10091063

10101064
void cv::subtract( InputArray _src1, InputArray _src2, OutputArray _dst,

modules/core/src/hal_replacement.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,20 @@ inline int hal_ni_sub64f(const double *src1_data, size_t src1_step, const double
9898

9999
inline int hal_ni_sub8u32f(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
100100
inline int hal_ni_sub8s32f(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
101+
102+
/**
103+
Add scalar: _dst[i] = src[i] + scalar
104+
105+
@param src_data source image data
106+
@param src_step source image step
107+
@param dst_data destination image data
108+
@param dst_step destination image step
109+
@param width width of the images
110+
@param height height of the images
111+
@param scalar_data pointer to scalar value
112+
*/
113+
inline int hal_ni_addScalar32f32f(const float *src_data, size_t src_step, float *dst_data, size_t dst_step, int width, int height, const float* scalar_data) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
114+
inline int hal_ni_addScalar16s16s(const int16_t *src_data, size_t src_step, int16_t *dst_data, size_t dst_step, int width, int height, const int16_t* scalar_data) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
101115
//! @}
102116

103117
/**
@@ -192,6 +206,8 @@ inline int hal_ni_not8u(const uchar *src_data, size_t src_step, uchar *dst_data,
192206
#define cv_hal_sub64f hal_ni_sub64f
193207
#define cv_hal_sub8u32f hal_ni_sub8u32f
194208
#define cv_hal_sub8s32f hal_ni_sub8s32f
209+
#define cv_hal_addScalar32f32f hal_ni_addScalar32f32f
210+
#define cv_hal_addScalar16s16s hal_ni_addScalar16s16s
195211
#define cv_hal_max8u hal_ni_max8u
196212
#define cv_hal_max8s hal_ni_max8s
197213
#define cv_hal_max16u hal_ni_max16u

0 commit comments

Comments
 (0)