@@ -182,29 +182,34 @@ __kernel void depth_conv2d_3x3(
182
182
in_pos_in_one_block .y + dilation >= input_height )
183
183
));
184
184
185
- CL_DTYPE4 filters [9 ];
186
- filters [0 ] =
185
+ CL_DTYPE4 filters_0 =
187
186
READ_IMG_TYPE (CL_DTYPE_CHAR , filter , SAMPLER , (int2 )(filter_x , filter_y ));
188
- filters [ 1 ] = READ_IMG_TYPE (
187
+ CL_DTYPE4 filters_1 = READ_IMG_TYPE (
189
188
CL_DTYPE_CHAR , filter , SAMPLER , (int2 )(filter_x + 1 , filter_y ));
190
- filters [ 2 ] = READ_IMG_TYPE (
189
+ CL_DTYPE4 filters_2 = READ_IMG_TYPE (
191
190
CL_DTYPE_CHAR , filter , SAMPLER , (int2 )(filter_x + 2 , filter_y ));
192
- filters [ 3 ] = READ_IMG_TYPE (
191
+ CL_DTYPE4 filters_3 = READ_IMG_TYPE (
193
192
CL_DTYPE_CHAR , filter , SAMPLER , (int2 )(filter_x , filter_y + 1 ));
194
- filters [ 4 ] = READ_IMG_TYPE (
193
+ CL_DTYPE4 filters_4 = READ_IMG_TYPE (
195
194
CL_DTYPE_CHAR , filter , SAMPLER , (int2 )(filter_x + 1 , filter_y + 1 ));
196
- filters [ 5 ] = READ_IMG_TYPE (
195
+ CL_DTYPE4 filters_5 = READ_IMG_TYPE (
197
196
CL_DTYPE_CHAR , filter , SAMPLER , (int2 )(filter_x + 2 , filter_y + 1 ));
198
- filters [ 6 ] = READ_IMG_TYPE (
197
+ CL_DTYPE4 filters_6 = READ_IMG_TYPE (
199
198
CL_DTYPE_CHAR , filter , SAMPLER , (int2 )(filter_x , filter_y + 2 ));
200
- filters [ 7 ] = READ_IMG_TYPE (
199
+ CL_DTYPE4 filters_7 = READ_IMG_TYPE (
201
200
CL_DTYPE_CHAR , filter , SAMPLER , (int2 )(filter_x + 1 , filter_y + 2 ));
202
- filters [ 8 ] = READ_IMG_TYPE (
201
+ CL_DTYPE4 filters_8 = READ_IMG_TYPE (
203
202
CL_DTYPE_CHAR , filter , SAMPLER , (int2 )(filter_x + 2 , filter_y + 2 ));
204
203
205
- for (int i = 0 ; i < 9 ; i ++ ) {
206
- output += inputs [i ] * filters [i ];
207
- }
204
+ output += inputs [0 ] * filters_0 ;
205
+ output += inputs [1 ] * filters_1 ;
206
+ output += inputs [2 ] * filters_2 ;
207
+ output += inputs [3 ] * filters_3 ;
208
+ output += inputs [4 ] * filters_4 ;
209
+ output += inputs [5 ] * filters_5 ;
210
+ output += inputs [6 ] * filters_6 ;
211
+ output += inputs [7 ] * filters_7 ;
212
+ output += inputs [8 ] * filters_8 ;
208
213
209
214
CL_DTYPE4 alpha0 ;
210
215
#ifdef PRELU_CH //{
0 commit comments