Skip to content

Commit cbfd38b

Browse files
committed
core: rework code locality
- to reduce binaries size of FFmpeg Windows wrapper - MinGW linker doesn't support -ffunction-sections (used for FFmpeg Windows wrapper) - move code to improve locality with its used dependencies - move UMat::dot() to matmul.dispatch.cpp (Mat::dot() is already there) - move UMat::inv() to lapack.cpp - move UMat::mul() to arithm.cpp - move UMat:eye() to matrix_operations.cpp (near setIdentity() implementation) - move normalize(): convert_scale.cpp => norm.cpp - move convertAndUnrollScalar(): arithm.cpp => copy.cpp - move scalarToRawData(): array.cpp => copy.cpp - move transpose(): matrix_operations.cpp => matrix_transform.cpp - move flip(), rotate(): copy.cpp => matrix_transform.cpp (rotate90 uses flip and transpose) - add 'OPENCV_CORE_EXCLUDE_C_API' CMake variable to exclude compilation of C-API functions from the core module - matrix_wrap.cpp: add compile-time checks for CUDA/OpenGL calls - the steps above allow to reduce FFmpeg wrapper size for ~1.5Mb (initial size of OpenCV part is about 3Mb) backport is done to improve merge experience (less conflicts) backport of commit: 65eb946
1 parent 2ab1f3f commit cbfd38b

20 files changed

+1245
-1139
lines changed

modules/core/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ ocv_target_link_libraries(${the_module} PRIVATE
112112
"${OPENCV_HAL_LINKER_LIBS}"
113113
)
114114

115+
if(OPENCV_CORE_EXCLUDE_C_API)
116+
ocv_target_compile_definitions(${the_module} PRIVATE "OPENCV_EXCLUDE_C_API=1")
117+
endif()
118+
115119
ocv_add_accuracy_tests()
116120
ocv_add_perf_tests()
117121

modules/core/src/arithm.cpp

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -57,24 +57,6 @@ namespace cv
5757
* logical operations *
5858
\****************************************************************************************/
5959

60-
void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize )
61-
{
62-
int scn = (int)sc.total(), cn = CV_MAT_CN(buftype);
63-
size_t esz = CV_ELEM_SIZE(buftype);
64-
getConvertFunc(sc.depth(), buftype)(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0);
65-
// unroll the scalar
66-
if( scn < cn )
67-
{
68-
CV_Assert( scn == 1 );
69-
size_t esz1 = CV_ELEM_SIZE1(buftype);
70-
for( size_t i = esz1; i < esz; i++ )
71-
scbuf[i] = scbuf[i - esz1];
72-
}
73-
for( size_t i = esz; i < blocksize*esz; i++ )
74-
scbuf[i] = scbuf[i - esz];
75-
}
76-
77-
7860
enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4,
7961
OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8,
8062
OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14,
@@ -1041,9 +1023,7 @@ static BinaryFuncC* getRecipTab()
10411023
return recipTab;
10421024
}
10431025

1044-
}
1045-
1046-
void cv::multiply(InputArray src1, InputArray src2,
1026+
void multiply(InputArray src1, InputArray src2,
10471027
OutputArray dst, double scale, int dtype)
10481028
{
10491029
CV_INSTRUMENT_REGION();
@@ -1052,29 +1032,33 @@ void cv::multiply(InputArray src1, InputArray src2,
10521032
true, &scale, std::abs(scale - 1.0) < DBL_EPSILON ? OCL_OP_MUL : OCL_OP_MUL_SCALE);
10531033
}
10541034

1055-
void cv::divide(InputArray src1, InputArray src2,
1035+
void divide(InputArray src1, InputArray src2,
10561036
OutputArray dst, double scale, int dtype)
10571037
{
10581038
CV_INSTRUMENT_REGION();
10591039

10601040
arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE);
10611041
}
10621042

1063-
void cv::divide(double scale, InputArray src2,
1043+
void divide(double scale, InputArray src2,
10641044
OutputArray dst, int dtype)
10651045
{
10661046
CV_INSTRUMENT_REGION();
10671047

10681048
arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE);
10691049
}
10701050

1051+
UMat UMat::mul(InputArray m, double scale) const
1052+
{
1053+
UMat dst;
1054+
multiply(*this, m, dst, scale);
1055+
return dst;
1056+
}
1057+
10711058
/****************************************************************************************\
10721059
* addWeighted *
10731060
\****************************************************************************************/
10741061

1075-
namespace cv
1076-
{
1077-
10781062
static BinaryFuncC* getAddWeightedTab()
10791063
{
10801064
static BinaryFuncC addWeightedTab[] =
@@ -1879,6 +1863,9 @@ void cv::inRange(InputArray _src, InputArray _lowerb,
18791863
}
18801864
}
18811865

1866+
1867+
#ifndef OPENCV_EXCLUDE_C_API
1868+
18821869
/****************************************************************************************\
18831870
* Earlier API: cvAdd etc. *
18841871
\****************************************************************************************/
@@ -2141,4 +2128,5 @@ cvMaxS( const void* srcarr1, double value, void* dstarr )
21412128
cv::max( src1, value, dst );
21422129
}
21432130

2131+
#endif // OPENCV_EXCLUDE_C_API
21442132
/* End of file. */

modules/core/src/array.cpp

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848

4949
#include "precomp.hpp"
5050

51+
#ifndef OPENCV_EXCLUDE_C_API
52+
5153
#define CV_ORIGIN_TL 0
5254
#define CV_ORIGIN_BL 1
5355

@@ -3223,51 +3225,50 @@ template<> void DefaultDeleter<CvMemStorage>::operator ()(CvMemStorage* obj) con
32233225
template<> void DefaultDeleter<CvFileStorage>::operator ()(CvFileStorage* obj) const
32243226
{ cvReleaseFileStorage(&obj); }
32253227

3226-
template <typename T> static inline
3227-
void scalarToRawData_(const Scalar& s, T * const buf, const int cn, const int unroll_to)
3228-
{
3229-
int i = 0;
3230-
for(; i < cn; i++)
3231-
buf[i] = saturate_cast<T>(s.val[i]);
3232-
for(; i < unroll_to; i++)
3233-
buf[i] = buf[i-cn];
3234-
}
3228+
} // cv::
32353229

3236-
void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
3230+
3231+
/* universal functions */
3232+
CV_IMPL void
3233+
cvRelease( void** struct_ptr )
32373234
{
3238-
CV_INSTRUMENT_REGION();
3235+
CvTypeInfo* info;
32393236

3240-
const int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
3241-
CV_Assert(cn <= 4);
3242-
switch(depth)
3237+
if( !struct_ptr )
3238+
CV_Error( CV_StsNullPtr, "NULL double pointer" );
3239+
3240+
if( *struct_ptr )
32433241
{
3244-
case CV_8U:
3245-
scalarToRawData_<uchar>(s, (uchar*)_buf, cn, unroll_to);
3246-
break;
3247-
case CV_8S:
3248-
scalarToRawData_<schar>(s, (schar*)_buf, cn, unroll_to);
3249-
break;
3250-
case CV_16U:
3251-
scalarToRawData_<ushort>(s, (ushort*)_buf, cn, unroll_to);
3252-
break;
3253-
case CV_16S:
3254-
scalarToRawData_<short>(s, (short*)_buf, cn, unroll_to);
3255-
break;
3256-
case CV_32S:
3257-
scalarToRawData_<int>(s, (int*)_buf, cn, unroll_to);
3258-
break;
3259-
case CV_32F:
3260-
scalarToRawData_<float>(s, (float*)_buf, cn, unroll_to);
3261-
break;
3262-
case CV_64F:
3263-
scalarToRawData_<double>(s, (double*)_buf, cn, unroll_to);
3264-
break;
3265-
default:
3266-
CV_Error(CV_StsUnsupportedFormat,"");
3242+
info = cvTypeOf( *struct_ptr );
3243+
if( !info )
3244+
CV_Error( CV_StsError, "Unknown object type" );
3245+
if( !info->release )
3246+
CV_Error( CV_StsError, "release function pointer is NULL" );
3247+
3248+
info->release( struct_ptr );
3249+
*struct_ptr = 0;
32673250
}
32683251
}
32693252

3270-
} // cv::
3253+
3254+
void* cvClone( const void* struct_ptr )
3255+
{
3256+
void* struct_copy = 0;
3257+
CvTypeInfo* info;
3258+
3259+
if( !struct_ptr )
3260+
CV_Error( CV_StsNullPtr, "NULL structure pointer" );
3261+
3262+
info = cvTypeOf( struct_ptr );
3263+
if( !info )
3264+
CV_Error( CV_StsError, "Unknown object type" );
3265+
if( !info->clone )
3266+
CV_Error( CV_StsError, "clone function pointer is NULL" );
3267+
3268+
struct_copy = info->clone( struct_ptr );
3269+
return struct_copy;
3270+
}
32713271

32723272

3273+
#endif // OPENCV_EXCLUDE_C_API
32733274
/* End of file. */

modules/core/src/convert_c.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include "precomp.hpp"
77

8+
#ifndef OPENCV_EXCLUDE_C_API
89

910
CV_IMPL void
1011
cvSplit( const void* srcarr, void* dstarr0, void* dstarr1, void* dstarr2, void* dstarr3 )
@@ -132,3 +133,5 @@ CV_IMPL void cvNormalize( const CvArr* srcarr, CvArr* dstarr,
132133
CV_Assert( dst.size() == src.size() && src.channels() == dst.channels() );
133134
cv::normalize( src, dst, a, b, norm_type, dst.type(), mask );
134135
}
136+
137+
#endif // OPENCV_EXCLUDE_C_API

modules/core/src/convert_scale.dispatch.cpp

Lines changed: 0 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
#include "convert_scale.simd.hpp"
1010
#include "convert_scale.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
1111

12-
1312
namespace cv
1413
{
1514

@@ -117,143 +116,4 @@ void convertScaleAbs(InputArray _src, OutputArray _dst, double alpha, double bet
117116
}
118117
}
119118

120-
//==================================================================================================
121-
122-
#ifdef HAVE_OPENCL
123-
124-
static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype,
125-
double scale, double delta )
126-
{
127-
UMat src = _src.getUMat();
128-
129-
if( _mask.empty() )
130-
src.convertTo( _dst, dtype, scale, delta );
131-
else if (src.channels() <= 4)
132-
{
133-
const ocl::Device & dev = ocl::Device::getDefault();
134-
135-
int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype),
136-
ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, std::max(sdepth, ddepth)),
137-
rowsPerWI = dev.isIntel() ? 4 : 1;
138-
139-
float fscale = static_cast<float>(scale), fdelta = static_cast<float>(delta);
140-
bool haveScale = std::fabs(scale - 1) > DBL_EPSILON,
141-
haveZeroScale = !(std::fabs(scale) > DBL_EPSILON),
142-
haveDelta = std::fabs(delta) > DBL_EPSILON,
143-
doubleSupport = dev.doubleFPConfig() > 0;
144-
145-
if (!haveScale && !haveDelta && stype == dtype)
146-
{
147-
_src.copyTo(_dst, _mask);
148-
return true;
149-
}
150-
if (haveZeroScale)
151-
{
152-
_dst.setTo(Scalar(delta), _mask);
153-
return true;
154-
}
155-
156-
if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport)
157-
return false;
158-
159-
char cvt[2][40];
160-
String opts = format("-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d"
161-
" -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s",
162-
ocl::typeToStr(stype), ocl::typeToStr(dtype),
163-
ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), cn,
164-
rowsPerWI, ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]),
165-
ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
166-
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
167-
haveScale ? " -D HAVE_SCALE" : "",
168-
haveDelta ? " -D HAVE_DELTA" : "",
169-
ocl::typeToStr(sdepth), ocl::typeToStr(ddepth));
170-
171-
ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts);
172-
if (k.empty())
173-
return false;
174-
175-
UMat mask = _mask.getUMat(), dst = _dst.getUMat();
176-
177-
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
178-
maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
179-
dstarg = ocl::KernelArg::ReadWrite(dst);
180-
181-
if (haveScale)
182-
{
183-
if (haveDelta)
184-
k.args(srcarg, maskarg, dstarg, fscale, fdelta);
185-
else
186-
k.args(srcarg, maskarg, dstarg, fscale);
187-
}
188-
else
189-
{
190-
if (haveDelta)
191-
k.args(srcarg, maskarg, dstarg, fdelta);
192-
else
193-
k.args(srcarg, maskarg, dstarg);
194-
}
195-
196-
size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
197-
return k.run(2, globalsize, NULL, false);
198-
}
199-
else
200-
{
201-
UMat temp;
202-
src.convertTo( temp, dtype, scale, delta );
203-
temp.copyTo( _dst, _mask );
204-
}
205-
206-
return true;
207-
}
208-
209-
#endif
210-
211-
void normalize(InputArray _src, InputOutputArray _dst, double a, double b,
212-
int norm_type, int rtype, InputArray _mask)
213-
{
214-
CV_INSTRUMENT_REGION();
215-
216-
double scale = 1, shift = 0;
217-
int type = _src.type(), depth = CV_MAT_DEPTH(type);
218-
219-
if( rtype < 0 )
220-
rtype = _dst.fixedType() ? _dst.depth() : depth;
221-
222-
if( norm_type == CV_MINMAX )
223-
{
224-
double smin = 0, smax = 0;
225-
double dmin = MIN( a, b ), dmax = MAX( a, b );
226-
minMaxIdx( _src, &smin, &smax, 0, 0, _mask );
227-
scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0);
228-
if( rtype == CV_32F )
229-
{
230-
scale = (float)scale;
231-
shift = (float)dmin - (float)(smin*scale);
232-
}
233-
else
234-
shift = dmin - smin*scale;
235-
}
236-
else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C )
237-
{
238-
scale = norm( _src, norm_type, _mask );
239-
scale = scale > DBL_EPSILON ? a/scale : 0.;
240-
shift = 0;
241-
}
242-
else
243-
CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" );
244-
245-
CV_OCL_RUN(_dst.isUMat(),
246-
ocl_normalize(_src, _dst, _mask, rtype, scale, shift))
247-
248-
Mat src = _src.getMat();
249-
if( _mask.empty() )
250-
src.convertTo( _dst, rtype, scale, shift );
251-
else
252-
{
253-
Mat temp;
254-
src.convertTo( temp, rtype, scale, shift );
255-
temp.copyTo( _dst, _mask );
256-
}
257-
}
258-
259119
} // namespace

0 commit comments

Comments
 (0)