@@ -587,7 +587,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
587
587
588
588
typedef int (*ScalarFunc)(const uchar* src, size_t step_src,
589
589
uchar* dst, size_t step_dst, int width, int height,
590
- void * scalar, bool scalarIsFirst);
590
+ void * scalar, bool scalarIsFirst, int nChannels );
591
591
592
592
typedef int (*ExtendedTypeFunc)(const uchar* src1, size_t step1,
593
593
const uchar* src2, size_t step2,
@@ -862,7 +862,6 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
862
862
for ( size_t j = 0 ; j < total; j += blocksize )
863
863
{
864
864
int bsz = (int )MIN (total - j, blocksize);
865
- Size bszn (bsz*cn, 1 );
866
865
const uchar *sptr1 = ptrs[0 ];
867
866
const uchar* sptr2 = buf2;
868
867
uchar* dptr = ptrs[1 ];
@@ -875,30 +874,30 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
875
874
// try to perform operation in 1 call, fallback to classic way if fail
876
875
uchar* opconverted = haveMask ? maskbuf : dptr;
877
876
if (!scalarFunc || src2.total () != 1 ||
878
- scalarFunc (extSptr1, 1 , opconverted, 1 , bszn. width , bszn. height , (void *)extSptr2, swapped12) != 0 )
877
+ scalarFunc (extSptr1, 1 , opconverted, 1 , bsz, 1 , (void *)extSptr2, swapped12, cn ) != 0 )
879
878
{
880
879
// try to perform operation with conversion in one call
881
880
// if fail, use converter functions
882
881
883
882
if (!extendedFunc || extendedFunc (extSptr1, 1 , extSptr2, 1 , opconverted, 1 ,
884
- bszn. width , bszn. height , usrdata) != 0 )
883
+ bsz*cn, 1 , usrdata) != 0 )
885
884
{
886
885
if ( cvtsrc1 )
887
886
{
888
- cvtsrc1 ( sptr1, 1 , 0 , 1 , buf1, 1 , bszn , 0 );
887
+ cvtsrc1 ( sptr1, 1 , 0 , 1 , buf1, 1 , Size (bsz*cn, 1 ) , 0 );
889
888
sptr1 = buf1;
890
889
}
891
890
892
891
if ( swapped12 )
893
892
std::swap (sptr1, sptr2);
894
893
895
894
uchar* fdst = ( haveMask || cvtdst ) ? wbuf : dptr;
896
- func ( sptr1, 1 , sptr2, 1 , fdst, 1 , bszn. width , bszn. height , usrdata );
895
+ func ( sptr1, 1 , sptr2, 1 , fdst, 1 , bsz*cn, 1 , usrdata );
897
896
898
897
if (cvtdst)
899
898
{
900
899
uchar* cdst = haveMask ? maskbuf : dptr;
901
- cvtdst (wbuf, 1 , 0 , 1 , cdst, 1 , bszn , 0 );
900
+ cvtdst (wbuf, 1 , 0 , 1 , cdst, 1 , Size (bsz*cn, 1 ) , 0 );
902
901
}
903
902
opconverted = cvtdst ? maskbuf : wbuf;
904
903
}
@@ -931,9 +930,9 @@ static BinaryFuncC* getAddTab()
931
930
}
932
931
933
932
static int addScalar32f32fWrapper (const uchar* src, size_t step_src, uchar* dst, size_t step_dst, int width, int height,
934
- void * scalar, bool /* scalarIsFirst*/ )
933
+ void * scalar, bool /* scalarIsFirst*/ , int nChannels )
935
934
{
936
- int res = cv_hal_addScalar32f32f ((const float *)src, step_src, (float *)dst, step_dst, width, height, (const float *)scalar);
935
+ int res = cv_hal_addScalar32f32f ((const float *)src, step_src, (float *)dst, step_dst, width, height, (const float *)scalar, nChannels );
937
936
if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED)
938
937
return res;
939
938
else
@@ -944,9 +943,9 @@ static int addScalar32f32fWrapper(const uchar* src, size_t step_src, uchar* dst,
944
943
}
945
944
946
945
static int addScalar16s16sWrapper (const uchar* src, size_t step_src, uchar* dst, size_t step_dst, int width, int height,
947
- void * scalar, bool /* scalarIsFirst*/ )
946
+ void * scalar, bool /* scalarIsFirst*/ , int nChannels )
948
947
{
949
- int res = cv_hal_addScalar16s16s ((const int16_t *)src, step_src, (int16_t *)dst, step_dst, width, height, (const int16_t *)scalar);
948
+ int res = cv_hal_addScalar16s16s ((const int16_t *)src, step_src, (int16_t *)dst, step_dst, width, height, (const int16_t *)scalar, nChannels );
950
949
if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED)
951
950
return res;
952
951
else
@@ -1042,6 +1041,67 @@ static BinaryFuncC* getAbsDiffTab()
1042
1041
return absDiffTab;
1043
1042
}
1044
1043
1044
+
1045
+ static int absDiffScalar32f32fWrapper (const uchar* src, size_t step_src, uchar* dst, size_t step_dst, int width, int height,
1046
+ void * scalar, bool /* scalarIsFirst*/ , int nChannels)
1047
+ {
1048
+ int res = cv_hal_absDiffScalar32f32f ((const float *)src, step_src, (float *)dst, step_dst, width, height, (const float *)scalar, nChannels);
1049
+ if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED)
1050
+ return res;
1051
+ else
1052
+ {
1053
+ CV_Error_ (cv::Error::StsInternal, (" HAL implementation addScalar32f32f ==> " CVAUX_STR (cv_hal_addScalar32f32f)
1054
+ " returned %d (0x%08x)" , res, res));
1055
+ }
1056
+ }
1057
+
1058
+ static int absDiffScalar32s32uWrapper (const uchar* src, size_t step_src, uchar* dst, size_t step_dst, int width, int height,
1059
+ void * scalar, bool /* scalarIsFirst*/ , int nChannels)
1060
+ {
1061
+ int res = cv_hal_absDiffScalar32s32u ((const int *)src, step_src, (uint32_t *)dst, step_dst, width, height, (const int *)scalar, nChannels);
1062
+ if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED)
1063
+ return res;
1064
+ else
1065
+ {
1066
+ CV_Error_ (cv::Error::StsInternal, (" HAL implementation addScalar32f32f ==> " CVAUX_STR (cv_hal_addScalar32f32f)
1067
+ " returned %d (0x%08x)" , res, res));
1068
+ }
1069
+ }
1070
+
1071
+ static int absDiffScalar8u8uWrapper (const uchar* src, size_t step_src, uchar* dst, size_t step_dst, int width, int height,
1072
+ void * scalar, bool /* scalarIsFirst*/ , int nChannels)
1073
+ {
1074
+ int res = cv_hal_absDiffScalar8u8u ((const uchar*)src, step_src, (uchar*)dst, step_dst, width, height, (const uchar*)scalar, nChannels);
1075
+ if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED)
1076
+ return res;
1077
+ else
1078
+ {
1079
+ CV_Error_ (cv::Error::StsInternal, (" HAL implementation addScalar32f32f ==> " CVAUX_STR (cv_hal_addScalar32f32f)
1080
+ " returned %d (0x%08x)" , res, res));
1081
+ }
1082
+ }
1083
+
1084
+ static ScalarFunc getAbsDiffScalarFunc (int srcType, int dstType)
1085
+ {
1086
+ if (srcType == CV_32F && dstType == CV_32F)
1087
+ {
1088
+ return absDiffScalar32f32fWrapper;
1089
+ }
1090
+ // resulting type is 32U in fact
1091
+ else if (srcType == CV_32S && dstType == CV_32S)
1092
+ {
1093
+ return absDiffScalar32s32uWrapper;
1094
+ }
1095
+ else if (srcType == CV_8U && dstType == CV_8U)
1096
+ {
1097
+ return absDiffScalar8u8uWrapper;
1098
+ }
1099
+ else
1100
+ {
1101
+ return nullptr ;
1102
+ }
1103
+ }
1104
+
1045
1105
}
1046
1106
1047
1107
void cv::add ( InputArray src1, InputArray src2, OutputArray dst,
@@ -1056,7 +1116,17 @@ void cv::add( InputArray src1, InputArray src2, OutputArray dst,
1056
1116
return ;
1057
1117
}
1058
1118
1059
- ScalarFunc scalarFunc = getAddScalarFunc (src1.depth (), dtype < 0 ? dst.depth () : dtype);
1119
+ int sdepth = src1.depth ();
1120
+ if (checkScalar (src1, src1.type (), src1.kind (), _InputArray::MATX))
1121
+ {
1122
+ sdepth = src2.depth ();
1123
+ }
1124
+ if (checkScalar (src2, src2.type (), src2.kind (), _InputArray::MATX))
1125
+ {
1126
+ sdepth = src1.depth ();
1127
+ }
1128
+
1129
+ ScalarFunc scalarFunc = getAddScalarFunc (sdepth, dtype < 0 ? dst.depth () : dtype);
1060
1130
arithm_op (src1, src2, dst, mask, dtype, getAddTab (), false , 0 , OCL_OP_ADD, nullptr ,
1061
1131
/* scalarFunc */ scalarFunc );
1062
1132
}
@@ -1089,7 +1159,18 @@ void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst )
1089
1159
return ;
1090
1160
}
1091
1161
1092
- arithm_op (src1, src2, dst, noArray (), -1 , getAbsDiffTab (), false , 0 , OCL_OP_ABSDIFF);
1162
+ int sdepth = src1.depth ();
1163
+ if (checkScalar (src1, src1.type (), src1.kind (), _InputArray::MATX))
1164
+ {
1165
+ sdepth = src2.depth ();
1166
+ }
1167
+ if (checkScalar (src2, src2.type (), src2.kind (), _InputArray::MATX))
1168
+ {
1169
+ sdepth = src1.depth ();
1170
+ }
1171
+ ScalarFunc scalarFunc = getAbsDiffScalarFunc (sdepth, dst.depth ());
1172
+ arithm_op (src1, src2, dst, noArray (), -1 , getAbsDiffTab (), false , 0 , OCL_OP_ABSDIFF,
1173
+ /* extendedFunc */ nullptr , scalarFunc);
1093
1174
}
1094
1175
1095
1176
void cv::copyTo (InputArray _src, OutputArray _dst, InputArray _mask)
0 commit comments