Skip to content

Commit 667a66e

Browse files
authored
Merge pull request #3607 from chacha21:cuda_phase_interleaved
Add interleaved versions of phase/cartToPolar/polarToCart #3607 This PR is for performance only (at the cost of more template code and increased GPU code size) The additional variants can help the caller skip the creation of temporary GPU mats (where memory is more likely to be a critical resource), and can even allow in-place processing. magnitude/angles/x/y are often already interleaved when dealing with DFTs. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [X] I agree to contribute to the project under Apache 2 License. - [X] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [X] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [X] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [X] The feature is well documented and sample code can be built with the project CMake
1 parent b2c0ce0 commit 667a66e

File tree

4 files changed

+545
-31
lines changed

4 files changed

+545
-31
lines changed

modules/cudaarithm/include/opencv2/cudaarithm.hpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,17 @@ CV_EXPORTS_W void magnitudeSqr(InputArray x, InputArray y, OutputArray magnitude
433433
*/
434434
CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null());
435435

436+
/** @brief Computes polar angles of complex matrix elements.
437+
438+
@param xy Source matrix containing real and imaginary components ( CV_32FC2 ).
439+
@param angle Destination matrix of angles ( CV_32FC1 ).
440+
@param angleInDegrees Flag for angles that must be evaluated in degrees.
441+
@param stream Stream for the asynchronous version.
442+
443+
@sa phase
444+
*/
445+
CV_EXPORTS_W void phase(InputArray xy, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null());
446+
436447
/** @brief Converts Cartesian coordinates into polar.
437448
438449
@param x Source matrix containing real components ( CV_32FC1 ).
@@ -446,6 +457,29 @@ CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle, bool angl
446457
*/
447458
CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null());
448459

460+
/** @brief Converts Cartesian coordinates into polar.
461+
462+
@param xy Source matrix containing real and imaginary components ( CV_32FC2 ).
463+
@param magnitude Destination matrix of float magnitudes ( CV_32FC1 ).
464+
@param angle Destination matrix of angles ( CV_32FC1 ).
465+
@param angleInDegrees Flag for angles that must be evaluated in degrees.
466+
@param stream Stream for the asynchronous version.
467+
468+
@sa cartToPolar
469+
*/
470+
CV_EXPORTS_W void cartToPolar(InputArray xy, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null());
471+
472+
/** @brief Converts Cartesian coordinates into polar.
473+
474+
@param xy Source matrix containing real and imaginary components ( CV_32FC2 ).
475+
@param magnitudeAngle Destination matrix of float magnitudes and angles ( CV_32FC2 ).
476+
@param angleInDegrees Flag for angles that must be evaluated in degrees.
477+
@param stream Stream for the asynchronous version.
478+
479+
@sa cartToPolar
480+
*/
481+
CV_EXPORTS_W void cartToPolar(InputArray xy, OutputArray magnitudeAngle, bool angleInDegrees = false, Stream& stream = Stream::Null());
482+
449483
/** @brief Converts polar coordinates into Cartesian.
450484
451485
@param magnitude Source matrix containing magnitudes ( CV_32FC1 or CV_64FC1 ).
@@ -457,6 +491,25 @@ CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y, OutputArray magnitude,
457491
*/
458492
CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle, OutputArray x, OutputArray y, bool angleInDegrees = false, Stream& stream = Stream::Null());
459493

494+
/** @brief Converts polar coordinates into Cartesian.
495+
496+
@param magnitude Source matrix containing magnitudes ( CV_32FC1 or CV_64FC1 ).
497+
@param angle Source matrix containing angles ( same type as magnitude ).
498+
@param xy Destination matrix of real and imaginary components ( same depth as magnitude, i.e. CV_32FC2 or CV_64FC2 ).
499+
@param angleInDegrees Flag that indicates angles in degrees.
500+
@param stream Stream for the asynchronous version.
501+
*/
502+
CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle, OutputArray xy, bool angleInDegrees = false, Stream& stream = Stream::Null());
503+
504+
/** @brief Converts polar coordinates into Cartesian.
505+
506+
@param magnitudeAngle Source matrix containing magnitudes and angles ( CV_32FC2 or CV_64FC2 ).
507+
@param xy Destination matrix of real and imaginary components ( same depth as source ).
508+
@param angleInDegrees Flag that indicates angles in degrees.
509+
@param stream Stream for the asynchronous version.
510+
*/
511+
CV_EXPORTS_W void polarToCart(InputArray magnitudeAngle, OutputArray xy, bool angleInDegrees = false, Stream& stream = Stream::Null());
512+
460513
//! @} cudaarithm_elem
461514

462515
//! @addtogroup cudaarithm_core

modules/cudaarithm/src/cuda/polar_cart.cu

Lines changed: 211 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,10 @@
5252
#include "opencv2/cudev.hpp"
5353
#include "opencv2/core/private.cuda.hpp"
5454

55-
using namespace cv;
56-
using namespace cv::cuda;
55+
//do not use implicit cv::cuda to avoid clash of tuples from ::cuda::std
56+
/*using namespace cv;
57+
using namespace cv::cuda;*/
58+
5759
using namespace cv::cudev;
5860

5961
void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
@@ -66,11 +68,7 @@ void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream&
6668

6769
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
6870

69-
GpuMat_<float> xc(x.reshape(1));
70-
GpuMat_<float> yc(y.reshape(1));
71-
GpuMat_<float> magc(dst.reshape(1));
72-
73-
gridTransformBinary(xc, yc, magc, magnitude_func<float>(), stream);
71+
gridTransformBinary(globPtr<float>(x), globPtr<float>(y), globPtr<float>(dst), magnitude_func<float>(), stream);
7472

7573
syncOutput(dst, _dst, stream);
7674
}
@@ -85,11 +83,7 @@ void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stre
8583

8684
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
8785

88-
GpuMat_<float> xc(x.reshape(1));
89-
GpuMat_<float> yc(y.reshape(1));
90-
GpuMat_<float> magc(dst.reshape(1));
91-
92-
gridTransformBinary(xc, yc, magc, magnitude_sqr_func<float>(), stream);
86+
gridTransformBinary(globPtr<float>(x), globPtr<float>(y), globPtr<float>(dst), magnitude_sqr_func<float>(), stream);
9387

9488
syncOutput(dst, _dst, stream);
9589
}
@@ -104,14 +98,26 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI
10498

10599
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
106100

107-
GpuMat_<float> xc(x.reshape(1));
108-
GpuMat_<float> yc(y.reshape(1));
109-
GpuMat_<float> anglec(dst.reshape(1));
101+
if (angleInDegrees)
102+
gridTransformBinary(globPtr<float>(x), globPtr<float>(y), globPtr<float>(dst), direction_func<float, true>(), stream);
103+
else
104+
gridTransformBinary(globPtr<float>(x), globPtr<float>(y), globPtr<float>(dst), direction_func<float, false>(), stream);
105+
106+
syncOutput(dst, _dst, stream);
107+
}
108+
109+
void cv::cuda::phase(InputArray _xy, OutputArray _dst, bool angleInDegrees, Stream& stream)
110+
{
111+
GpuMat xy = getInputMat(_xy, stream);
112+
113+
CV_Assert( xy.type() == CV_32FC2 );
114+
115+
GpuMat dst = getOutputMat(_dst, xy.size(), CV_32FC1, stream);
110116

111117
if (angleInDegrees)
112-
gridTransformBinary(xc, yc, anglec, direction_func<float, true>(), stream);
118+
gridTransformUnary(globPtr<float2>(xy), globPtr<float>(dst), direction_interleaved_func<float2, true>(), stream);
113119
else
114-
gridTransformBinary(xc, yc, anglec, direction_func<float, false>(), stream);
120+
gridTransformUnary(globPtr<float2>(xy), globPtr<float>(dst), direction_interleaved_func<float2, false>(), stream);
115121

116122
syncOutput(dst, _dst, stream);
117123
}
@@ -127,10 +133,10 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu
127133
GpuMat mag = getOutputMat(_mag, x.size(), CV_32FC1, stream);
128134
GpuMat angle = getOutputMat(_angle, x.size(), CV_32FC1, stream);
129135

130-
GpuMat_<float> xc(x.reshape(1));
131-
GpuMat_<float> yc(y.reshape(1));
132-
GpuMat_<float> magc(mag.reshape(1));
133-
GpuMat_<float> anglec(angle.reshape(1));
136+
GpuMat_<float> xc(x);
137+
GpuMat_<float> yc(y);
138+
GpuMat_<float> magc(mag);
139+
GpuMat_<float> anglec(angle);
134140

135141
if (angleInDegrees)
136142
gridTransformBinary(xc, yc, magc, anglec, magnitude_func<float>(), direction_func<float, true>(), stream);
@@ -141,6 +147,69 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu
141147
syncOutput(angle, _angle, stream);
142148
}
143149

150+
void cv::cuda::cartToPolar(InputArray _xy, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream)
151+
{
152+
GpuMat xy = getInputMat(_xy, stream);
153+
154+
CV_Assert( xy.type() == CV_32FC2 );
155+
156+
GpuMat mag = getOutputMat(_mag, xy.size(), CV_32FC1, stream);
157+
GpuMat angle = getOutputMat(_angle, xy.size(), CV_32FC1, stream);
158+
159+
GpuMat_<float> magc(mag);
160+
GpuMat_<float> anglec(angle);
161+
162+
if (angleInDegrees)
163+
{
164+
auto f1 = magnitude_interleaved_func<float2>();
165+
auto f2 = direction_interleaved_func<float2, true>();
166+
cv::cudev::tuple<decltype(f1), decltype(f2)> f12 = cv::cudev::make_tuple(f1, f2);
167+
gridTransformTuple(globPtr<float2>(xy),
168+
tie(magc, anglec),
169+
f12,
170+
stream);
171+
}
172+
else
173+
{
174+
auto f1 = magnitude_interleaved_func<float2>();
175+
auto f2 = direction_interleaved_func<float2, false>();
176+
cv::cudev::tuple<decltype(f1), decltype(f2)> f12 = cv::cudev::make_tuple(f1, f2);
177+
gridTransformTuple(globPtr<float2>(xy),
178+
tie(magc, anglec),
179+
f12,
180+
stream);
181+
}
182+
183+
syncOutput(mag, _mag, stream);
184+
syncOutput(angle, _angle, stream);
185+
}
186+
187+
void cv::cuda::cartToPolar(InputArray _xy, OutputArray _magAngle, bool angleInDegrees, Stream& stream)
188+
{
189+
GpuMat xy = getInputMat(_xy, stream);
190+
191+
CV_Assert( xy.type() == CV_32FC2 );
192+
193+
GpuMat magAngle = getOutputMat(_magAngle, xy.size(), CV_32FC2, stream);
194+
195+
if (angleInDegrees)
196+
{
197+
gridTransformUnary(globPtr<float2>(xy),
198+
globPtr<float2>(magAngle),
199+
magnitude_direction_interleaved_func<float2, true>(),
200+
stream);
201+
}
202+
else
203+
{
204+
gridTransformUnary(globPtr<float2>(xy),
205+
globPtr<float2>(magAngle),
206+
magnitude_direction_interleaved_func<float2, false>(),
207+
stream);
208+
}
209+
210+
syncOutput(magAngle, _magAngle, stream);
211+
}
212+
144213
namespace
145214
{
146215
template <typename T> struct sincos_op
@@ -159,12 +228,12 @@ namespace
159228
};
160229

161230
template <typename T, bool useMag>
162-
__global__ void polarToCartImpl_(const GlobPtr<T> mag, const GlobPtr<T> angle, GlobPtr<T> xmat, GlobPtr<T> ymat, const T scale, const int rows, const int cols)
231+
__global__ void polarToCartImpl_(const PtrStep<T> mag, const PtrStepSz<T> angle, PtrStep<T> xmat, PtrStep<T> ymat, const T scale)
163232
{
164233
const int x = blockDim.x * blockIdx.x + threadIdx.x;
165234
const int y = blockDim.y * blockIdx.y + threadIdx.y;
166235

167-
if (x >= cols || y >= rows)
236+
if (x >= angle.cols || y >= angle.rows)
168237
return;
169238

170239
const T mag_val = useMag ? mag(y, x) : static_cast<T>(1.0);
@@ -178,23 +247,90 @@ namespace
178247
ymat(y, x) = mag_val * sin_a;
179248
}
180249

250+
template <typename T, bool useMag>
251+
__global__ void polarToCartDstInterleavedImpl_(const PtrStep<T> mag, const PtrStepSz<T> angle, PtrStep<typename MakeVec<T, 2>::type > xymat, const T scale)
252+
{
253+
typedef typename MakeVec<T, 2>::type T2;
254+
const int x = blockDim.x * blockIdx.x + threadIdx.x;
255+
const int y = blockDim.y * blockIdx.y + threadIdx.y;
256+
257+
if (x >= angle.cols || y >= angle.rows)
258+
return;
259+
260+
const T mag_val = useMag ? mag(y, x) : static_cast<T>(1.0);
261+
const T angle_val = angle(y, x);
262+
263+
T sin_a, cos_a;
264+
sincos_op<T> op;
265+
op(scale * angle_val, &sin_a, &cos_a);
266+
267+
const T2 xy = {mag_val * cos_a, mag_val * sin_a};
268+
xymat(y, x) = xy;
269+
}
270+
271+
template <typename T>
272+
__global__ void polarToCartInterleavedImpl_(const PtrStepSz<typename MakeVec<T, 2>::type > magAngle, PtrStep<typename MakeVec<T, 2>::type > xymat, const T scale)
273+
{
274+
typedef typename MakeVec<T, 2>::type T2;
275+
const int x = blockDim.x * blockIdx.x + threadIdx.x;
276+
const int y = blockDim.y * blockIdx.y + threadIdx.y;
277+
278+
if (x >= magAngle.cols || y >= magAngle.rows)
279+
return;
280+
281+
const T2 magAngle_val = magAngle(y, x);
282+
const T mag_val = magAngle_val.x;
283+
const T angle_val = magAngle_val.y;
284+
285+
T sin_a, cos_a;
286+
sincos_op<T> op;
287+
op(scale * angle_val, &sin_a, &cos_a);
288+
289+
const T2 xy = {mag_val * cos_a, mag_val * sin_a};
290+
xymat(y, x) = xy;
291+
}
292+
181293
template <typename T>
182294
void polarToCartImpl(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t& stream)
183295
{
184-
GpuMat_<T> xc(x.reshape(1));
185-
GpuMat_<T> yc(y.reshape(1));
186-
GpuMat_<T> magc(mag.reshape(1));
187-
GpuMat_<T> anglec(angle.reshape(1));
296+
const dim3 block(32, 8);
297+
const dim3 grid(divUp(angle.cols, block.x), divUp(angle.rows, block.y));
298+
299+
const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0);
300+
301+
if (mag.empty())
302+
polarToCartImpl_<T, false> << <grid, block, 0, stream >> >(mag, angle, x, y, scale);
303+
else
304+
polarToCartImpl_<T, true> << <grid, block, 0, stream >> >(mag, angle, x, y, scale);
305+
}
306+
307+
template <typename T>
308+
void polarToCartDstInterleavedImpl(const GpuMat& mag, const GpuMat& angle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream)
309+
{
310+
typedef typename MakeVec<T, 2>::type T2;
188311

189312
const dim3 block(32, 8);
190-
const dim3 grid(divUp(anglec.cols, block.x), divUp(anglec.rows, block.y));
313+
const dim3 grid(divUp(angle.cols, block.x), divUp(angle.rows, block.y));
191314

192315
const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0);
193316

194-
if (magc.empty())
195-
polarToCartImpl_<T, false> << <grid, block, 0, stream >> >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xc), shrinkPtr(yc), scale, anglec.rows, anglec.cols);
317+
if (mag.empty())
318+
polarToCartDstInterleavedImpl_<T, false> << <grid, block, 0, stream >> >(mag, angle, xy, scale);
196319
else
197-
polarToCartImpl_<T, true> << <grid, block, 0, stream >> >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xc), shrinkPtr(yc), scale, anglec.rows, anglec.cols);
320+
polarToCartDstInterleavedImpl_<T, true> << <grid, block, 0, stream >> >(mag, angle, xy, scale);
321+
}
322+
323+
template <typename T>
324+
void polarToCartInterleavedImpl(const GpuMat& magAngle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream)
325+
{
326+
typedef typename MakeVec<T, 2>::type T2;
327+
328+
const dim3 block(32, 8);
329+
const dim3 grid(divUp(magAngle.cols, block.x), divUp(magAngle.rows, block.y));
330+
331+
const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0);
332+
333+
polarToCartInterleavedImpl_<T> << <grid, block, 0, stream >> >(magAngle, xy, scale);
198334
}
199335
}
200336

@@ -223,4 +359,48 @@ void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, O
223359
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
224360
}
225361

362+
void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _xy, bool angleInDegrees, Stream& _stream)
363+
{
364+
typedef void(*func_t)(const GpuMat& mag, const GpuMat& angle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream);
365+
static const func_t funcs[7] = { 0, 0, 0, 0, 0, polarToCartDstInterleavedImpl<float>, polarToCartDstInterleavedImpl<double> };
366+
367+
GpuMat mag = getInputMat(_mag, _stream);
368+
GpuMat angle = getInputMat(_angle, _stream);
369+
370+
CV_Assert(angle.depth() == CV_32F || angle.depth() == CV_64F);
371+
CV_Assert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) );
372+
373+
GpuMat xy = getOutputMat(_xy, angle.size(), CV_MAKETYPE(angle.depth(), 2), _stream);
374+
375+
cudaStream_t stream = StreamAccessor::getStream(_stream);
376+
funcs[angle.depth()](mag, angle, xy, angleInDegrees, stream);
377+
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
378+
379+
syncOutput(xy, _xy, _stream);
380+
381+
if (stream == 0)
382+
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
383+
}
384+
385+
void cv::cuda::polarToCart(InputArray _magAngle, OutputArray _xy, bool angleInDegrees, Stream& _stream)
386+
{
387+
typedef void(*func_t)(const GpuMat& magAngle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream);
388+
static const func_t funcs[7] = { 0, 0, 0, 0, 0, polarToCartInterleavedImpl<float>, polarToCartInterleavedImpl<double> };
389+
390+
GpuMat magAngle = getInputMat(_magAngle, _stream);
391+
392+
CV_Assert(magAngle.type() == CV_32FC2 || magAngle.type() == CV_64FC2);
393+
394+
GpuMat xy = getOutputMat(_xy, magAngle.size(), magAngle.type(), _stream);
395+
396+
cudaStream_t stream = StreamAccessor::getStream(_stream);
397+
funcs[magAngle.depth()](magAngle, xy, angleInDegrees, stream);
398+
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
399+
400+
syncOutput(xy, _xy, _stream);
401+
402+
if (stream == 0)
403+
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
404+
}
405+
226406
#endif

0 commit comments

Comments
 (0)