@@ -585,14 +585,19 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
585
585
586
586
#endif
587
587
588
+ typedef int (*ScalarFunc)(const uchar* src, size_t step_src,
589
+ uchar* dst, size_t step_dst, int width, int height,
590
+ void * scalar, bool scalarIsFirst);
591
+
588
592
typedef int (*ExtendedTypeFunc)(const uchar* src1, size_t step1,
589
593
const uchar* src2, size_t step2,
590
594
uchar* dst, size_t step, int width, int height,
591
595
void *);
592
596
593
597
static void arithm_op (InputArray _src1, InputArray _src2, OutputArray _dst,
594
598
InputArray _mask, int dtype, BinaryFuncC* tab, bool muldiv=false ,
595
- void * usrdata=0 , int oclop=-1 , ExtendedTypeFunc extendedFunc = nullptr )
599
+ void * usrdata=0 , int oclop=-1 , ExtendedTypeFunc extendedFunc = nullptr ,
600
+ ScalarFunc scalarFunc = nullptr )
596
601
{
597
602
const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
598
603
_InputArray::KindFlag kind1 = psrc1->kind (), kind2 = psrc2->kind ();
@@ -638,8 +643,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
638
643
(kind1 == _InputArray::MATX && (sz1 == Size (1 ,4 ) || sz1 == Size (1 ,1 ))) ||
639
644
(kind2 == _InputArray::MATX && (sz2 == Size (1 ,4 ) || sz2 == Size (1 ,1 ))) )
640
645
{
641
- if ((type1 == CV_64F && (sz1.height == 1 || sz1.height == 4 )) &&
642
- checkScalar (*psrc1, type2, kind1, kind2))
646
+ if ((type1 == CV_64F && (sz1.height == 1 || sz1.height == 4 )) && src1Scalar)
643
647
{
644
648
// src1 is a scalar; swap it with src2
645
649
swap (psrc1, psrc2);
@@ -654,7 +658,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
654
658
if ( oclop == OCL_OP_DIV_SCALE )
655
659
oclop = OCL_OP_RDIV_SCALE;
656
660
}
657
- else if ( !checkScalar (*psrc2, type1, kind2, kind1) )
661
+ else if ( !src2Scalar )
658
662
CV_Error ( cv::Error::StsUnmatchedSizes,
659
663
" The operation is neither 'array op array' "
660
664
" (where arrays have the same size and the same number of channels), "
@@ -866,32 +870,38 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
866
870
const uchar* extSptr1 = sptr1;
867
871
const uchar* extSptr2 = sptr2;
868
872
if ( swapped12 )
869
- std::swap (extSptr1, extSptr1 );
873
+ std::swap (extSptr1, extSptr2 );
870
874
871
- // try to perform operation with conversion in one call
872
- // if fail, use converter functions
875
+ // try to perform operation in 1 call, fallback to classic way if fail
873
876
uchar* opconverted = haveMask ? maskbuf : dptr;
874
- if (!extendedFunc || extendedFunc (extSptr1, 1 , extSptr2, 1 , opconverted, 1 ,
875
- bszn.width , bszn.height , usrdata ) != 0 )
877
+ if (!scalarFunc || src2. total () != 1 ||
878
+ scalarFunc (extSptr1, 1 , opconverted, 1 , bszn.width , bszn.height , ( void *)extSptr2, swapped12 ) != 0 )
876
879
{
877
- if ( cvtsrc1 )
878
- {
879
- cvtsrc1 ( sptr1, 1 , 0 , 1 , buf1, 1 , bszn, 0 );
880
- sptr1 = buf1;
881
- }
882
-
883
- if ( swapped12 )
884
- std::swap (sptr1, sptr2);
885
-
886
- uchar* fdst = ( haveMask || cvtdst ) ? wbuf : dptr;
887
- func ( sptr1, 1 , sptr2, 1 , fdst, 1 , bszn.width , bszn.height , usrdata );
880
+ // try to perform operation with conversion in one call
881
+ // if fail, use converter functions
888
882
889
- if (cvtdst)
883
+ if (!extendedFunc || extendedFunc (extSptr1, 1 , extSptr2, 1 , opconverted, 1 ,
884
+ bszn.width , bszn.height , usrdata) != 0 )
890
885
{
891
- uchar* cdst = haveMask ? maskbuf : dptr;
892
- cvtdst (wbuf, 1 , 0 , 1 , cdst, 1 , bszn, 0 );
886
+ if ( cvtsrc1 )
887
+ {
888
+ cvtsrc1 ( sptr1, 1 , 0 , 1 , buf1, 1 , bszn, 0 );
889
+ sptr1 = buf1;
890
+ }
891
+
892
+ if ( swapped12 )
893
+ std::swap (sptr1, sptr2);
894
+
895
+ uchar* fdst = ( haveMask || cvtdst ) ? wbuf : dptr;
896
+ func ( sptr1, 1 , sptr2, 1 , fdst, 1 , bszn.width , bszn.height , usrdata );
897
+
898
+ if (cvtdst)
899
+ {
900
+ uchar* cdst = haveMask ? maskbuf : dptr;
901
+ cvtdst (wbuf, 1 , 0 , 1 , cdst, 1 , bszn, 0 );
902
+ }
903
+ opconverted = cvtdst ? maskbuf : wbuf;
893
904
}
894
- opconverted = cvtdst ? maskbuf : wbuf;
895
905
}
896
906
897
907
if (haveMask)
@@ -920,6 +930,48 @@ static BinaryFuncC* getAddTab()
920
930
return addTab;
921
931
}
922
932
933
+ static int addScalar32f32fWrapper (const uchar* src, size_t step_src, uchar* dst, size_t step_dst, int width, int height,
934
+ void * scalar, bool /* scalarIsFirst*/ )
935
+ {
936
+ int res = cv_hal_addScalar32f32f ((const float *)src, step_src, (float *)dst, step_dst, width, height, (const float *)scalar);
937
+ if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED)
938
+ return res;
939
+ else
940
+ {
941
+ CV_Error_ (cv::Error::StsInternal, (" HAL implementation addScalar32f32f ==> " CVAUX_STR (cv_hal_addScalar32f32f)
942
+ " returned %d (0x%08x)" , res, res));
943
+ }
944
+ }
945
+
946
+ static int addScalar16s16sWrapper (const uchar* src, size_t step_src, uchar* dst, size_t step_dst, int width, int height,
947
+ void * scalar, bool /* scalarIsFirst*/ )
948
+ {
949
+ int res = cv_hal_addScalar16s16s ((const int16_t *)src, step_src, (int16_t *)dst, step_dst, width, height, (const int16_t *)scalar);
950
+ if (res == CV_HAL_ERROR_OK || res == CV_HAL_ERROR_NOT_IMPLEMENTED)
951
+ return res;
952
+ else
953
+ {
954
+ CV_Error_ (cv::Error::StsInternal, (" HAL implementation addScalar16s16s ==> " CVAUX_STR (cv_hal_addScalar16s16s)
955
+ " returned %d (0x%08x)" , res, res));
956
+ }
957
+ }
958
+
959
+ static ScalarFunc getAddScalarFunc (int srcType, int dstType)
960
+ {
961
+ if (srcType == CV_32F && dstType == CV_32F)
962
+ {
963
+ return addScalar32f32fWrapper;
964
+ }
965
+ else if (srcType == CV_16S && dstType == CV_16S)
966
+ {
967
+ return addScalar16s16sWrapper;
968
+ }
969
+ else
970
+ {
971
+ return nullptr ;
972
+ }
973
+ }
974
+
923
975
static int sub8u32fWrapper (const uchar* src1, size_t step1, const uchar* src2, size_t step2,
924
976
uchar* dst, size_t step, int width, int height, void * )
925
977
{
@@ -1004,7 +1056,9 @@ void cv::add( InputArray src1, InputArray src2, OutputArray dst,
1004
1056
return ;
1005
1057
}
1006
1058
1007
- arithm_op (src1, src2, dst, mask, dtype, getAddTab (), false , 0 , OCL_OP_ADD );
1059
+ ScalarFunc scalarFunc = getAddScalarFunc (src1.depth (), dtype < 0 ? dst.depth () : dtype);
1060
+ arithm_op (src1, src2, dst, mask, dtype, getAddTab (), false , 0 , OCL_OP_ADD, nullptr ,
1061
+ /* scalarFunc */ scalarFunc );
1008
1062
}
1009
1063
1010
1064
void cv::subtract ( InputArray _src1, InputArray _src2, OutputArray _dst,
0 commit comments