50
50
51
51
namespace cv { namespace cuda { namespace device
52
52
{
53
- // // namespace imgproc
54
- // {
55
-
56
53
__device__ void histogramAddAndSub8 (int * H, const int * hist_colAdd,const int * hist_colSub){
57
54
int tx = threadIdx .x ;
58
55
if (tx<8 ){
@@ -120,35 +117,40 @@ namespace cv { namespace cuda { namespace device
120
117
luc[tx]=0 ;
121
118
}
122
119
120
+ #define scanNeighbor (array, range, index, threadIndex ) \
121
+ { \
122
+ int v = 0 ; \
123
+ if (index <= threadIndex && threadIndex < range) \
124
+ v = array[threadIndex] + array[threadIndex-index]; \
125
+ __syncthreads (); \
126
+ if (index <= threadIndex && threadIndex < range) \
127
+ array[threadIndex] = v; \
128
+ }
129
+ #define findMedian (array, range, threadIndex, result, count, position ) \
130
+ if (threadIndex < range) \
131
+ { \
132
+ if (array[threadIndex+1 ] > position && array[threadIndex] <= position) \
133
+ { \
134
+ *result = threadIndex+1 ; \
135
+ *count = array[threadIndex]; \
136
+ } \
137
+ }
138
+
123
139
__device__ void histogramMedianPar8LookupOnly (int * H,int * Hscan, const int medPos,int * retval, int * countAtMed){
124
140
int tx=threadIdx .x ;
125
141
*retval=*countAtMed=0 ;
126
142
if (tx<8 ){
127
143
Hscan[tx]=H[tx];
128
144
}
129
145
__syncthreads ();
130
- if (1 <= tx && tx < 8 )
131
- Hscan[tx]+=Hscan[tx-1 ];
146
+ scanNeighbor (Hscan, 8 , 1 , tx);
132
147
__syncthreads ();
133
- if (2 <= tx && tx < 8 )
134
- Hscan[tx]+=Hscan[tx-2 ];
148
+ scanNeighbor (Hscan, 8 , 2 , tx);
135
149
__syncthreads ();
136
- if (4 <= tx && tx < 8 )
137
- Hscan[tx]+=Hscan[tx-4 ];
150
+ scanNeighbor (Hscan, 8 , 4 , tx);
138
151
__syncthreads ();
139
152
140
- if (tx<7 ){
141
- if (Hscan[tx+1 ] > medPos && Hscan[tx] < medPos){
142
- *retval=tx+1 ;
143
- *countAtMed=Hscan[tx];
144
- }
145
- else if (Hscan[tx]==medPos){
146
- if (Hscan[tx+1 ]>medPos){
147
- *retval=tx+1 ;
148
- *countAtMed=Hscan[tx];
149
- }
150
- }
151
- }
153
+ findMedian (Hscan, 7 , tx, retval, countAtMed, medPos);
152
154
}
153
155
154
156
__device__ void histogramMedianPar32LookupOnly (int * H,int * Hscan, const int medPos,int * retval, int * countAtMed){
@@ -158,33 +160,18 @@ namespace cv { namespace cuda { namespace device
158
160
Hscan[tx]=H[tx];
159
161
}
160
162
__syncthreads ();
161
- if ( 1 <= tx && tx < 32 )
162
- Hscan[tx]+=Hscan[tx-1 ];
163
+ scanNeighbor (Hscan, 32 , 1 , tx);
163
164
__syncthreads ();
164
- if ( 2 <= tx && tx < 32 )
165
- Hscan[tx]+=Hscan[tx-2 ];
165
+ scanNeighbor (Hscan, 32 , 2 , tx);
166
166
__syncthreads ();
167
- if ( 4 <= tx && tx < 32 )
168
- Hscan[tx]+=Hscan[tx-4 ];
167
+ scanNeighbor (Hscan, 32 , 4 , tx);
169
168
__syncthreads ();
170
- if ( 8 <= tx && tx < 32 )
171
- Hscan[tx]+=Hscan[tx-8 ];
169
+ scanNeighbor (Hscan, 32 , 8 , tx);
172
170
__syncthreads ();
173
- if ( 16 <= tx && tx < 32 )
174
- Hscan[tx]+=Hscan[tx-16 ];
171
+ scanNeighbor (Hscan, 32 , 16 , tx);
175
172
__syncthreads ();
176
- if (tx<31 ){
177
- if (Hscan[tx+1 ] > medPos && Hscan[tx] < medPos){
178
- *retval=tx+1 ;
179
- *countAtMed=Hscan[tx];
180
- }
181
- else if (Hscan[tx]==medPos){
182
- if (Hscan[tx+1 ]>medPos){
183
- *retval=tx+1 ;
184
- *countAtMed=Hscan[tx];
185
- }
186
- }
187
- }
173
+
174
+ findMedian (Hscan, 31 , tx, retval, countAtMed, medPos);
188
175
}
189
176
190
177
__global__ void cuMedianFilterMultiBlock (PtrStepSzb src, PtrStepSzb dest, PtrStepSzi histPar, PtrStepSzi coarseHistGrid,int r, int medPos_)
@@ -283,7 +270,6 @@ namespace cv { namespace cuda { namespace device
283
270
__syncthreads ();
284
271
285
272
histogramMultipleAdd8 (HCoarse,histCoarse, 2 *r+1 );
286
- // __syncthreads();
287
273
int cols_m_1=cols-1 ;
288
274
289
275
for (int j=r;j<cols-r;j++){
@@ -295,23 +281,24 @@ namespace cv { namespace cuda { namespace device
295
281
histogramMedianPar8LookupOnly (HCoarse,HCoarseScan,medPos, &firstBin,&countAtMed);
296
282
__syncthreads ();
297
283
298
- if ( luc[firstBin] <= (j-r))
284
+ int loopIndex = luc[firstBin];
285
+ if (loopIndex <= (j-r))
299
286
{
300
287
histogramClear32 (HFine[firstBin]);
301
- for ( luc[firstBin] = j-r; luc[firstBin] < ::min (j+r+1 ,cols); luc[firstBin] ++ ){
302
- histogramAdd32 (HFine[firstBin], hist+(luc[firstBin] *256 +(firstBin<<5 ) ) );
288
+ for ( loopIndex = j-r; loopIndex < ::min (j+r+1 ,cols); loopIndex ++ ){
289
+ histogramAdd32 (HFine[firstBin], hist+(loopIndex *256 +(firstBin<<5 ) ) );
303
290
}
304
291
}
305
292
else {
306
- for ( ; luc[firstBin] < (j+r+1 );luc[firstBin] ++ ) {
293
+ for ( ; loopIndex < (j+r+1 );loopIndex ++ ) {
307
294
histogramAddAndSub32 (HFine[firstBin],
308
- hist+(::min (luc[firstBin] ,cols_m_1)*256 +(firstBin<<5 ) ),
309
- hist+(::max (luc[firstBin] -2 *r-1 ,0 )*256 +(firstBin<<5 ) ) );
295
+ hist+(::min (loopIndex ,cols_m_1)*256 +(firstBin<<5 ) ),
296
+ hist+(::max (loopIndex -2 *r-1 ,0 )*256 +(firstBin<<5 ) ) );
310
297
__syncthreads ();
311
-
312
298
}
313
299
}
314
300
__syncthreads ();
301
+ luc[firstBin] = loopIndex;
315
302
316
303
int leftOver=medPos-countAtMed;
317
304
if (leftOver>=0 ){
0 commit comments