14
14
#include < iostream>
15
15
#include < string>
16
16
#include < array>
17
+ #include < vector>
17
18
#include < climits>
18
19
#include < algorithm>
19
20
@@ -26,16 +27,26 @@ namespace ndsrvp {
26
27
void * fastMalloc (size_t size);
27
28
void fastFree (void * ptr);
28
29
int borderInterpolate (int p, int len, int borderType);
30
+ int16x4_t borderInterpolate_vector (int16x4_t vp, short len, int borderType);
29
31
30
32
#ifndef MAX
31
33
# define MAX (a,b ) ((a) < (b) ? (b) : (a))
32
34
#endif
33
35
36
+ #ifndef MIN
37
+ # define MIN (a,b ) ((a) > (b) ? (b) : (a))
38
+ #endif
39
+
34
40
#define CV_MAT_CN_MASK ((CV_CN_MAX - 1 ) << CV_CN_SHIFT)
35
41
#define CV_MAT_CN (flags ) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1 )
36
42
43
+ #define CV_ELEM_SIZE1 (type ) ((0x28442211 >> CV_MAT_DEPTH (type)*4) & 15)
44
+ #define CV_ELEM_SIZE (type ) (CV_MAT_CN(type)*CV_ELEM_SIZE1 (type))
45
+
37
46
#define CV_MALLOC_ALIGN 64
38
47
48
+ inline size_t getElemSize (int type) { return (size_t )CV_ELEM_SIZE (type); }
49
+
39
50
// error codes
40
51
41
52
enum Error{
@@ -69,6 +80,135 @@ inline int32x2_t vclip(int32x2_t x, int32x2_t a, int32x2_t b)
69
80
return (int32x2_t )__nds__bpick ((long )a, __nds__bpick ((long )(b - 1 ), (long )x, (long )(x < b)), (long )(x >= a));
70
81
}
71
82
83
+ // expand
84
+
85
+ /*
86
+ [0] [1] [2] [3] [4] [5] [6] [7]
87
+ 810 [ 0 ] [ 1 ] [ 4 ] [ 5 ]
88
+ 832 [ 2 ] [ 3 ] [ 6 ] [ 7 ]
89
+ bb [ 0 ] [ 1 ] [ 2 ] [ 3 ]
90
+ tt [ 4 ] [ 5 ] [ 6 ] [ 7 ]
91
+ */
92
+
93
+ inline void ndsrvp_u8_u16_expand8 (const unsigned long vs, ushort* dst)
94
+ {
95
+ unsigned long vs810 = __nds__zunpkd810 (vs);
96
+ unsigned long vs832 = __nds__zunpkd832 (vs);
97
+ *(unsigned long *)dst = __nds__pkbb32 (vs832, vs810);
98
+ *(unsigned long *)(dst + 4 ) = __nds__pktt32 (vs832, vs810);
99
+ }
100
+
101
+ /*
102
+ [0] [1] [2] [3] [4] [5] [6] [7]
103
+ 820 [ 0 ] [ 2 ] [ 4 ] [ 6 ]
104
+ 831 [ 1 ] [ 3 ] [ 5 ] [ 7 ]
105
+ bb [ 0 ] [ 2 ] [ 1 ] [ 3 ]
106
+ tt [ 4 ] [ 6 ] [ 5 ] [ 7 ]
107
+ */
108
+
109
+ inline void ndsrvp_u8_u16_eswap8 (const unsigned long vs, ushort* dst)
110
+ {
111
+ unsigned long vs820 = __nds__zunpkd820 (vs);
112
+ unsigned long vs831 = __nds__zunpkd831 (vs);
113
+ *(unsigned long *)dst = __nds__pkbb32 (vs831, vs820);
114
+ *(unsigned long *)(dst + 4 ) = __nds__pktt32 (vs831, vs820);
115
+ }
116
+
117
+ /*
118
+ [0] [1] [2] [3] [4] [5] [6] [7]
119
+ 820 [ 0 ] [ 2 ] [ 4 ] [ 6 ]
120
+ 831 [ 1 ] [ 3 ] [ 5 ] [ 7 ]
121
+ bb [ 0 ] [ 2 ] [ 1 ] [ 3 ]
122
+ tt [ 4 ] [ 6 ] [ 5 ] [ 7 ]
123
+ bbbb[ 0 ] [ 1 ]
124
+ bbtt[ 2 ] [ 3 ]
125
+ ttbb[ 4 ] [ 5 ]
126
+ tttt[ 6 ] [ 7 ]
127
+ */
128
+
129
+
130
+ inline void ndsrvp_u8_u32_expand8 (const unsigned long vs, uint* dst)
131
+ {
132
+ unsigned long vs820 = __nds__zunpkd820 (vs);
133
+ unsigned long vs831 = __nds__zunpkd831 (vs);
134
+ unsigned long vsbb = __nds__pkbb32 (vs831, vs820);
135
+ unsigned long vstt = __nds__pktt32 (vs831, vs820);
136
+ *(unsigned long *)dst = __nds__pkbb16 (0 , vsbb);
137
+ *(unsigned long *)(dst + 2 ) = __nds__pktt16 (0 , vsbb);
138
+ *(unsigned long *)(dst + 4 ) = __nds__pkbb16 (0 , vstt);
139
+ *(unsigned long *)(dst + 6 ) = __nds__pktt16 (0 , vstt);
140
+ }
141
+
142
+ // float replacement
143
+
144
+ inline void ndsrvp_f32_add8 (const float * a, const float * b, float * c)
145
+ {
146
+ c[0 ] = a[0 ] + b[0 ];
147
+ c[1 ] = a[1 ] + b[1 ];
148
+ c[2 ] = a[2 ] + b[2 ];
149
+ c[3 ] = a[3 ] + b[3 ];
150
+ c[4 ] = a[4 ] + b[4 ];
151
+ c[5 ] = a[5 ] + b[5 ];
152
+ c[6 ] = a[6 ] + b[6 ];
153
+ c[7 ] = a[7 ] + b[7 ];
154
+ }
155
+
156
+ /*
157
+ [1] [8] [23]
158
+ [24] [8]
159
+ */
160
+
161
+ inline void ndsrvp_f32_u8_mul8 (const float * a, const unsigned long b, float * c) // experimental, not bit exact
162
+ {
163
+ const int mask_frac = 0x007FFFFF ;
164
+ const int mask_sign = 0x7FFFFFFF ;
165
+ const int mask_lead = 0x40000000 ;
166
+ const int ofs_exp = 23 ;
167
+
168
+ uint32x2_t va01 = *(uint32x2_t *)a;
169
+ uint32x2_t va23 = *(uint32x2_t *)(a + 2 );
170
+ uint32x2_t va45 = *(uint32x2_t *)(a + 4 );
171
+ uint32x2_t va67 = *(uint32x2_t *)(a + 6 );
172
+
173
+ uint32x2_t vaexp01 = va01 >> ofs_exp;
174
+ uint32x2_t vaexp23 = va23 >> ofs_exp;
175
+ uint32x2_t vaexp45 = va45 >> ofs_exp;
176
+ uint32x2_t vaexp67 = va67 >> ofs_exp;
177
+
178
+ uint32x2_t vafrac01 = ((va01 << 7 ) & mask_sign) | mask_lead;
179
+ uint32x2_t vafrac23 = ((va23 << 7 ) & mask_sign) | mask_lead;
180
+ uint32x2_t vafrac45 = ((va45 << 7 ) & mask_sign) | mask_lead;
181
+ uint32x2_t vafrac67 = ((va67 << 7 ) & mask_sign) | mask_lead;
182
+
183
+ int16x4_t vb[2 ]; // fake signed for signed multiply
184
+ ndsrvp_u8_u16_eswap8 (b, (ushort*)vb);
185
+
186
+ vafrac01 = (uint32x2_t )__nds__kmmwb2_u ((long )vafrac01, (unsigned long )vb[0 ]);
187
+ vafrac23 = (uint32x2_t )__nds__kmmwt2_u ((long )vafrac23, (unsigned long )vb[0 ]);
188
+ vafrac45 = (uint32x2_t )__nds__kmmwb2_u ((long )vafrac45, (unsigned long )vb[1 ]);
189
+ vafrac67 = (uint32x2_t )__nds__kmmwt2_u ((long )vafrac67, (unsigned long )vb[1 ]);
190
+
191
+ uint32x2_t vaclz01 = __nds__v_clz32 (vafrac01) - 8 ;
192
+ uint32x2_t vaclz23 = __nds__v_clz32 (vafrac23) - 8 ;
193
+ uint32x2_t vaclz45 = __nds__v_clz32 (vafrac45) - 8 ;
194
+ uint32x2_t vaclz67 = __nds__v_clz32 (vafrac67) - 8 ;
195
+
196
+ vaexp01 += 8 - vaclz01;
197
+ vaexp23 += 8 - vaclz23;
198
+ vaexp45 += 8 - vaclz45;
199
+ vaexp67 += 8 - vaclz67;
200
+
201
+ vafrac01 <<= vaclz01;
202
+ vafrac23 <<= vaclz23;
203
+ vafrac45 <<= vaclz45;
204
+ vafrac67 <<= vaclz67;
205
+
206
+ *(uint32x2_t *)c = (vaexp01 << ofs_exp) | (vafrac01 & mask_frac);
207
+ *(uint32x2_t *)(c + 2 ) = (vaexp23 << ofs_exp) | (vafrac23 & mask_frac);
208
+ *(uint32x2_t *)(c + 4 ) = (vaexp45 << ofs_exp) | (vafrac45 & mask_frac);
209
+ *(uint32x2_t *)(c + 6 ) = (vaexp67 << ofs_exp) | (vafrac67 & mask_frac);
210
+ }
211
+
72
212
// saturate
73
213
74
214
template <typename _Tp> static inline _Tp saturate_cast (int v) { return _Tp (v); }
@@ -94,6 +234,26 @@ template<> inline short saturate_cast<short>(double v) { return saturate_cas
94
234
template <> inline int saturate_cast<int >(float v) { return (int )lrintf (v); }
95
235
template <> inline int saturate_cast<int >(double v) { return (int )lrint (v); }
96
236
237
+ inline double cast_ptr_to_double (const uchar* v, int depth) {
238
+ switch (depth) {
239
+ case CV_8U: return (double )*(uchar*)v;
240
+ case CV_8S: return (double )*(char *)v;
241
+ case CV_16U: return (double )*(ushort*)v;
242
+ case CV_16S: return (double )*(short *)v;
243
+ case CV_32S: return (double )*(int *)v;
244
+ case CV_32F: return (double )*(float *)v;
245
+ case CV_64F: return (double )*(double *)v;
246
+ case CV_16F: return (double )*(float *)v;
247
+ default : return 0 ;
248
+ }
249
+ }
250
+
251
+ template <typename _Tp>
252
+ inline _Tp data_at (const uchar* data, int step, int y, int x, int cn)
253
+ {
254
+ return ((_Tp*)(data + y * step))[x * cn];
255
+ }
256
+
97
257
// align
98
258
99
259
inline long align (size_t v, int n)
0 commit comments