Skip to content

Commit 7de3a8e

Browse files
authored
Merge pull request opencv#26088 from plctlab:rvp_pt2
3rdparty: NDSRVP - Part 2: Filter
2 parents fcfdd31 + e087cc8 commit 7de3a8e

File tree

4 files changed

+545
-0
lines changed

4 files changed

+545
-0
lines changed

3rdparty/ndsrvp/include/imgproc.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
#ifndef OPENCV_NDSRVP_IMGPROC_HPP
66
#define OPENCV_NDSRVP_IMGPROC_HPP
77

8+
struct cvhalFilter2D;
9+
810
namespace cv {
911

1012
namespace ndsrvp {
@@ -71,6 +73,34 @@ int threshold(const uchar* src_data, size_t src_step,
7173
#undef cv_hal_threshold
7274
#define cv_hal_threshold (cv::ndsrvp::threshold)
7375

76+
// ################ filter ################
77+
78+
int filterInit(cvhalFilter2D **context,
79+
uchar *kernel_data, size_t kernel_step,
80+
int kernel_type, int kernel_width,
81+
int kernel_height, int max_width, int max_height,
82+
int src_type, int dst_type, int borderType,
83+
double delta, int anchor_x, int anchor_y,
84+
bool allowSubmatrix, bool allowInplace);
85+
86+
#undef cv_hal_filterInit
87+
#define cv_hal_filterInit (cv::ndsrvp::filterInit)
88+
89+
int filter(cvhalFilter2D *context,
90+
const uchar *src_data, size_t src_step,
91+
uchar *dst_data, size_t dst_step,
92+
int width, int height,
93+
int full_width, int full_height,
94+
int offset_x, int offset_y);
95+
96+
#undef cv_hal_filter
97+
#define cv_hal_filter (cv::ndsrvp::filter)
98+
99+
int filterFree(cvhalFilter2D *context);
100+
101+
#undef cv_hal_filterFree
102+
#define cv_hal_filterFree (cv::ndsrvp::filterFree)
103+
74104
} // namespace ndsrvp
75105

76106
} // namespace cv

3rdparty/ndsrvp/src/cvutils.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,40 @@ int borderInterpolate(int p, int len, int borderType)
7373
return p;
7474
}
7575

76+
int16x4_t borderInterpolate_vector(int16x4_t vp, short len, int borderType)
77+
{
78+
int16x4_t vzero = (int16x4_t){0, 0, 0, 0};
79+
int16x4_t vone = (int16x4_t){1, 1, 1, 1};
80+
int16x4_t vlen = (int16x4_t){len, len, len, len};
81+
if(borderType == CV_HAL_BORDER_REPLICATE)
82+
vp = (int16x4_t)__nds__bpick(0, __nds__bpick((long)(vlen - 1), (long)vp, (long)(vp >= vlen)), (long)(vp < 0));
83+
else if(borderType == CV_HAL_BORDER_REFLECT || borderType == CV_HAL_BORDER_REFLECT_101)
84+
{
85+
int16x4_t vdelta = (borderType == CV_HAL_BORDER_REFLECT_101) ? vone : vzero;
86+
if(len == 1)
87+
return vzero;
88+
do
89+
{
90+
int16x4_t vneg = -vp - 1 + vdelta;
91+
int16x4_t vpos = vlen - 1 - (vp - vlen) - vdelta;
92+
vp = (int16x4_t)__nds__bpick((long)vneg, __nds__bpick((long)vpos, (long)vp, (long)(vp >= vlen)), (long)(vp < 0));
93+
}
94+
while( (long)(vp >= vlen) || (long)(vp < 0) );
95+
}
96+
else if(borderType == CV_HAL_BORDER_WRAP)
97+
{
98+
ndsrvp_assert(len > 0);
99+
int16x4_t vneg = vp - ((vp - vlen + 1) / vlen) * vlen;
100+
int16x4_t vpos = vp % vlen;
101+
vp = (int16x4_t)__nds__bpick((long)vneg, __nds__bpick((long)vpos, (long)vp, (long)(vp >= vlen)), (long)(vp < 0));
102+
}
103+
else if(borderType == CV_HAL_BORDER_CONSTANT)
104+
vp = (int16x4_t)__nds__bpick((long)-vone, (long)vp, (long)(vp < 0 || vp >= vlen));
105+
else
106+
ndsrvp_error(Error::StsBadArg, "borderInterpolate_vector(): Unknown/unsupported border type");
107+
return vp;
108+
}
109+
76110
} // namespace ndsrvp
77111

78112
} // namespace cv

3rdparty/ndsrvp/src/cvutils.hpp

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <iostream>
1515
#include <string>
1616
#include <array>
17+
#include <vector>
1718
#include <climits>
1819
#include <algorithm>
1920

@@ -26,16 +27,26 @@ namespace ndsrvp {
2627
void* fastMalloc(size_t size);
2728
void fastFree(void* ptr);
2829
int borderInterpolate(int p, int len, int borderType);
30+
int16x4_t borderInterpolate_vector(int16x4_t vp, short len, int borderType);
2931

3032
#ifndef MAX
3133
# define MAX(a,b) ((a) < (b) ? (b) : (a))
3234
#endif
3335

36+
#ifndef MIN
37+
# define MIN(a,b) ((a) > (b) ? (b) : (a))
38+
#endif
39+
3440
#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT)
3541
#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
3642

43+
#define CV_ELEM_SIZE1(type) ((0x28442211 >> CV_MAT_DEPTH(type)*4) & 15)
44+
#define CV_ELEM_SIZE(type) (CV_MAT_CN(type)*CV_ELEM_SIZE1(type))
45+
3746
#define CV_MALLOC_ALIGN 64
3847

48+
inline size_t getElemSize(int type) { return (size_t)CV_ELEM_SIZE(type); }
49+
3950
// error codes
4051

4152
enum Error{
@@ -69,6 +80,135 @@ inline int32x2_t vclip(int32x2_t x, int32x2_t a, int32x2_t b)
6980
return (int32x2_t)__nds__bpick((long)a, __nds__bpick((long)(b - 1), (long)x, (long)(x < b)), (long)(x >= a));
7081
}
7182

83+
// expand
84+
85+
/*
86+
[0] [1] [2] [3] [4] [5] [6] [7]
87+
810 [ 0 ] [ 1 ] [ 4 ] [ 5 ]
88+
832 [ 2 ] [ 3 ] [ 6 ] [ 7 ]
89+
bb [ 0 ] [ 1 ] [ 2 ] [ 3 ]
90+
tt [ 4 ] [ 5 ] [ 6 ] [ 7 ]
91+
*/
92+
93+
inline void ndsrvp_u8_u16_expand8(const unsigned long vs, ushort* dst)
94+
{
95+
unsigned long vs810 = __nds__zunpkd810(vs);
96+
unsigned long vs832 = __nds__zunpkd832(vs);
97+
*(unsigned long*)dst = __nds__pkbb32(vs832, vs810);
98+
*(unsigned long*)(dst + 4) = __nds__pktt32(vs832, vs810);
99+
}
100+
101+
/*
102+
[0] [1] [2] [3] [4] [5] [6] [7]
103+
820 [ 0 ] [ 2 ] [ 4 ] [ 6 ]
104+
831 [ 1 ] [ 3 ] [ 5 ] [ 7 ]
105+
bb [ 0 ] [ 2 ] [ 1 ] [ 3 ]
106+
tt [ 4 ] [ 6 ] [ 5 ] [ 7 ]
107+
*/
108+
109+
inline void ndsrvp_u8_u16_eswap8(const unsigned long vs, ushort* dst)
110+
{
111+
unsigned long vs820 = __nds__zunpkd820(vs);
112+
unsigned long vs831 = __nds__zunpkd831(vs);
113+
*(unsigned long*)dst = __nds__pkbb32(vs831, vs820);
114+
*(unsigned long*)(dst + 4) = __nds__pktt32(vs831, vs820);
115+
}
116+
117+
/*
118+
[0] [1] [2] [3] [4] [5] [6] [7]
119+
820 [ 0 ] [ 2 ] [ 4 ] [ 6 ]
120+
831 [ 1 ] [ 3 ] [ 5 ] [ 7 ]
121+
bb [ 0 ] [ 2 ] [ 1 ] [ 3 ]
122+
tt [ 4 ] [ 6 ] [ 5 ] [ 7 ]
123+
bbbb[ 0 ] [ 1 ]
124+
bbtt[ 2 ] [ 3 ]
125+
ttbb[ 4 ] [ 5 ]
126+
tttt[ 6 ] [ 7 ]
127+
*/
128+
129+
130+
inline void ndsrvp_u8_u32_expand8(const unsigned long vs, uint* dst)
131+
{
132+
unsigned long vs820 = __nds__zunpkd820(vs);
133+
unsigned long vs831 = __nds__zunpkd831(vs);
134+
unsigned long vsbb = __nds__pkbb32(vs831, vs820);
135+
unsigned long vstt = __nds__pktt32(vs831, vs820);
136+
*(unsigned long*)dst = __nds__pkbb16(0, vsbb);
137+
*(unsigned long*)(dst + 2) = __nds__pktt16(0, vsbb);
138+
*(unsigned long*)(dst + 4) = __nds__pkbb16(0, vstt);
139+
*(unsigned long*)(dst + 6) = __nds__pktt16(0, vstt);
140+
}
141+
142+
// float replacement
143+
144+
inline void ndsrvp_f32_add8(const float* a, const float* b, float* c)
145+
{
146+
c[0] = a[0] + b[0];
147+
c[1] = a[1] + b[1];
148+
c[2] = a[2] + b[2];
149+
c[3] = a[3] + b[3];
150+
c[4] = a[4] + b[4];
151+
c[5] = a[5] + b[5];
152+
c[6] = a[6] + b[6];
153+
c[7] = a[7] + b[7];
154+
}
155+
156+
/*
157+
[1] [8] [23]
158+
[24] [8]
159+
*/
160+
161+
inline void ndsrvp_f32_u8_mul8(const float* a, const unsigned long b, float* c) // experimental, not bit exact
162+
{
163+
const int mask_frac = 0x007FFFFF;
164+
const int mask_sign = 0x7FFFFFFF;
165+
const int mask_lead = 0x40000000;
166+
const int ofs_exp = 23;
167+
168+
uint32x2_t va01 = *(uint32x2_t*)a;
169+
uint32x2_t va23 = *(uint32x2_t*)(a + 2);
170+
uint32x2_t va45 = *(uint32x2_t*)(a + 4);
171+
uint32x2_t va67 = *(uint32x2_t*)(a + 6);
172+
173+
uint32x2_t vaexp01 = va01 >> ofs_exp;
174+
uint32x2_t vaexp23 = va23 >> ofs_exp;
175+
uint32x2_t vaexp45 = va45 >> ofs_exp;
176+
uint32x2_t vaexp67 = va67 >> ofs_exp;
177+
178+
uint32x2_t vafrac01 = ((va01 << 7) & mask_sign) | mask_lead;
179+
uint32x2_t vafrac23 = ((va23 << 7) & mask_sign) | mask_lead;
180+
uint32x2_t vafrac45 = ((va45 << 7) & mask_sign) | mask_lead;
181+
uint32x2_t vafrac67 = ((va67 << 7) & mask_sign) | mask_lead;
182+
183+
int16x4_t vb[2]; // fake signed for signed multiply
184+
ndsrvp_u8_u16_eswap8(b, (ushort*)vb);
185+
186+
vafrac01 = (uint32x2_t)__nds__kmmwb2_u((long)vafrac01, (unsigned long)vb[0]);
187+
vafrac23 = (uint32x2_t)__nds__kmmwt2_u((long)vafrac23, (unsigned long)vb[0]);
188+
vafrac45 = (uint32x2_t)__nds__kmmwb2_u((long)vafrac45, (unsigned long)vb[1]);
189+
vafrac67 = (uint32x2_t)__nds__kmmwt2_u((long)vafrac67, (unsigned long)vb[1]);
190+
191+
uint32x2_t vaclz01 = __nds__v_clz32(vafrac01) - 8;
192+
uint32x2_t vaclz23 = __nds__v_clz32(vafrac23) - 8;
193+
uint32x2_t vaclz45 = __nds__v_clz32(vafrac45) - 8;
194+
uint32x2_t vaclz67 = __nds__v_clz32(vafrac67) - 8;
195+
196+
vaexp01 += 8 - vaclz01;
197+
vaexp23 += 8 - vaclz23;
198+
vaexp45 += 8 - vaclz45;
199+
vaexp67 += 8 - vaclz67;
200+
201+
vafrac01 <<= vaclz01;
202+
vafrac23 <<= vaclz23;
203+
vafrac45 <<= vaclz45;
204+
vafrac67 <<= vaclz67;
205+
206+
*(uint32x2_t*)c = (vaexp01 << ofs_exp) | (vafrac01 & mask_frac);
207+
*(uint32x2_t*)(c + 2) = (vaexp23 << ofs_exp) | (vafrac23 & mask_frac);
208+
*(uint32x2_t*)(c + 4) = (vaexp45 << ofs_exp) | (vafrac45 & mask_frac);
209+
*(uint32x2_t*)(c + 6) = (vaexp67 << ofs_exp) | (vafrac67 & mask_frac);
210+
}
211+
72212
// saturate
73213

74214
template<typename _Tp> static inline _Tp saturate_cast(int v) { return _Tp(v); }
@@ -94,6 +234,26 @@ template<> inline short saturate_cast<short>(double v) { return saturate_cas
94234
template<> inline int saturate_cast<int>(float v) { return (int)lrintf(v); }
95235
template<> inline int saturate_cast<int>(double v) { return (int)lrint(v); }
96236

237+
inline double cast_ptr_to_double(const uchar* v, int depth) {
238+
switch (depth) {
239+
case CV_8U: return (double)*(uchar*)v;
240+
case CV_8S: return (double)*(char*)v;
241+
case CV_16U: return (double)*(ushort*)v;
242+
case CV_16S: return (double)*(short*)v;
243+
case CV_32S: return (double)*(int*)v;
244+
case CV_32F: return (double)*(float*)v;
245+
case CV_64F: return (double)*(double*)v;
246+
case CV_16F: return (double)*(float*)v;
247+
default: return 0;
248+
}
249+
}
250+
251+
template <typename _Tp>
252+
inline _Tp data_at(const uchar* data, int step, int y, int x, int cn)
253+
{
254+
return ((_Tp*)(data + y * step))[x * cn];
255+
}
256+
97257
// align
98258

99259
inline long align(size_t v, int n)

0 commit comments

Comments
 (0)