Skip to content

Commit 0a866ad

Browse files
committed
SepConv1d/2d for io_parallel w/ Latency strategy
1 parent a357b7a commit 0a866ad

File tree

6 files changed

+360
-14
lines changed

6 files changed

+360
-14
lines changed

hls4ml/backends/fpga/passes/codegen.py

Lines changed: 72 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from hls4ml.model.layers import Conv1D, Conv2D
1+
from hls4ml.model.layers import Conv1D, Conv2D, SeparableConv1D, SeparableConv2D
22
from hls4ml.model.optimizer import OptimizerPass
33
from hls4ml.model.types import Source
44

@@ -7,16 +7,27 @@ class GenerateConvIm2col(OptimizerPass):
77
'''Generates tcode for im2col step of 1D/2d convolution'''
88

99
def match(self, node):
10-
return isinstance(node, (Conv1D, Conv2D)) and node.model.config.get_config_value('IOType') == 'io_parallel'
10+
return (
11+
isinstance(node, (Conv1D, Conv2D, SeparableConv1D, SeparableConv2D))
12+
and node.model.config.get_config_value('IOType') == 'io_parallel'
13+
)
1114

1215
def transform(self, model, node):
13-
node_class = node.__class__.__name__
14-
if '1D' in node_class:
15-
self._generate_im2col_1d(node)
16-
elif '2D' in node_class:
17-
self._generate_im2col_2d(node)
16+
node_class = node.class_name
17+
if 'Separable' in node_class:
18+
if '1D' in node_class:
19+
self._generate_separable_im2col_1d(node)
20+
elif '2D' in node_class:
21+
self._generate_separable_im2col_2d(node)
22+
else:
23+
raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
1824
else:
19-
raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
25+
if '1D' in node_class:
26+
self._generate_im2col_1d(node)
27+
elif '2D' in node_class:
28+
self._generate_im2col_2d(node)
29+
else:
30+
raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
2031

2132
def _generate_im2col_1d(self, node):
2233
code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
@@ -49,3 +60,56 @@ def _generate_im2col_2d(self, node):
4960
)
5061

5162
node.set_attr('line_buffer_codegen', Source(code_str))
63+
64+
def _generate_separable_im2col_1d(self, node):
65+
dw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
66+
str(node.get_attr('index')) + '_dw',
67+
node.get_attr('n_partitions'),
68+
node.get_input_variable().shape[0],
69+
node.get_input_variable().shape[1],
70+
kernel=node.get_attr('filt_width'),
71+
stride=node.get_attr('stride_width'),
72+
pad=(node.get_attr('pad_left'), node.get_attr('pad_right')),
73+
)
74+
75+
node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
76+
77+
pw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
78+
str(node.get_attr('index')) + '_pw',
79+
node.get_attr('n_partitions'),
80+
node.get_output_variable().shape[0],
81+
node.get_output_variable().shape[1],
82+
kernel=1,
83+
)
84+
85+
node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))
86+
87+
def _generate_separable_im2col_2d(self, node):
88+
dw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
89+
str(node.get_attr('index')) + '_dw',
90+
node.get_attr('n_partitions'),
91+
node.get_input_variable().shape[0],
92+
node.get_input_variable().shape[1],
93+
node.get_input_variable().shape[2],
94+
kernel=(node.get_attr('filt_height'), node.get_attr('filt_width')),
95+
stride=(node.get_attr('stride_height'), node.get_attr('stride_width')),
96+
pad=(
97+
node.get_attr('pad_top'),
98+
node.get_attr('pad_bottom'),
99+
node.get_attr('pad_left'),
100+
node.get_attr('pad_right'),
101+
),
102+
)
103+
104+
node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
105+
106+
pw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
107+
str(node.get_attr('index')) + '_pw',
108+
node.get_attr('n_partitions'),
109+
node.get_output_variable().shape[0],
110+
node.get_output_variable().shape[1],
111+
node.get_input_variable().shape[2],
112+
kernel=(1, 1),
113+
)
114+
115+
node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))

hls4ml/backends/vivado/passes/convolution_templates.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,8 @@ def __init__(self):
254254
'{input}, {output}, {d}, {p}, {z}, {b});'
255255
)
256256

257-
sepconv1d_include_list = ['nnet_utils/nnet_conv1d.h', 'nnet_utils/nnet_sepconv1d_stream.h']
258-
sepconv2d_include_list = ['nnet_utils/nnet_conv2d.h', 'nnet_utils/nnet_sepconv2d_stream.h']
257+
sepconv1d_include_list = ['nnet_utils/nnet_conv1d.h', 'nnet_utils/nnet_sepconv1d.h', 'nnet_utils/nnet_sepconv1d_stream.h']
258+
sepconv2d_include_list = ['nnet_utils/nnet_conv2d.h', 'nnet_utils/nnet_sepconv2d.h', 'nnet_utils/nnet_sepconv2d_stream.h']
259259

260260

261261
class SeparableConv1DConfigTemplate(LayerConfigTemplate):
@@ -286,7 +286,10 @@ def format(self, node):
286286
params['index'] = str(node.index) + '_depthwise'
287287
params['weight_t'] = node.get_weights('depthwise').type
288288
params['bias_t'] = node.get_weights('zero_bias').type
289-
params['fill_fn'] = 'FillConv1DBuffer'
289+
if node.model.config.get_config_value('IOType') == 'io_parallel':
290+
params['fill_fn'] = f'fill_buffer_{node.index}_dw'
291+
else:
292+
params['fill_fn'] = 'FillConv1DBuffer'
290293

291294
if node.get_attr('unscaled'):
292295
params['scale_index_type'] = 'scale_index_unscaled'
@@ -323,7 +326,10 @@ def format(self, node):
323326
params['weight_t'] = node.get_weights('pointwise').type
324327
params['min_width'] = params['in_width']
325328
params['instructions'] = '0'
326-
params['fill_fn'] = 'FillConv1DBuffer'
329+
if node.model.config.get_config_value('IOType') == 'io_parallel':
330+
params['fill_fn'] = f'fill_buffer_{node.index}_dw'
331+
else:
332+
params['fill_fn'] = 'FillConv1DBuffer'
327333

328334
if node.get_attr('unscaled'):
329335
params['scale_index_type'] = 'scale_index_unscaled'
@@ -402,7 +408,10 @@ def format(self, node):
402408
params['nzeros'] = node.get_weights('depthwise').nzeros
403409
params['index'] = str(node.index) + '_depthwise'
404410
params['weight_t'] = node.get_weights('depthwise').type
405-
params['fill_fn'] = 'FillConv2DBuffer'
411+
if node.model.config.get_config_value('IOType') == 'io_parallel':
412+
params['fill_fn'] = f'fill_buffer_{node.index}_dw'
413+
else:
414+
params['fill_fn'] = 'FillConv2DBuffer'
406415

407416
if node.get_attr('unscaled_h'):
408417
params['scale_index_height_type'] = 'scale_index_unscaled'
@@ -447,7 +456,10 @@ def format(self, node):
447456
params['min_height'] = params['in_height']
448457
params['min_width'] = params['in_width']
449458
params['instructions'] = '0'
450-
params['fill_fn'] = 'FillConv2DBuffer'
459+
if node.model.config.get_config_value('IOType') == 'io_parallel':
460+
params['fill_fn'] = f'fill_buffer_{node.index}_pw'
461+
else:
462+
params['fill_fn'] = 'FillConv2DBuffer'
451463

452464
if node.get_attr('unscaled_h'):
453465
params['scale_index_height_type'] = 'scale_index_unscaled'
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#ifndef NNET_SEPARABLE_CONV1D_H_
2+
#define NNET_SEPARABLE_CONV1D_H_
3+
4+
#include "nnet_common.h"
5+
#include "nnet_conv1d.h"
6+
#include "nnet_sepconv1d_latency.h"
7+
//#include "nnet_sepconv1d_resource.h"
8+
#include <cstdlib>
9+
10+
namespace nnet {
11+
12+
template <class data_T, class res_T, typename CONFIG_T>
13+
void depthwise_conv_1d_cl(data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],
14+
res_T res[CONFIG_T::out_width * CONFIG_T::n_filt],
15+
typename CONFIG_T::weight_t weights[CONFIG_T::filt_width * CONFIG_T::n_chan],
16+
typename CONFIG_T::bias_t biases[CONFIG_T::n_chan]) {
17+
#pragma HLS INLINE recursive
18+
if (CONFIG_T::strategy == nnet::latency) {
19+
depthwise_conv_1d_latency_cl<data_T, res_T, CONFIG_T>(data, res, weights, biases);
20+
} else {
21+
assert("Resource strategy for DepthwiseConv1D is not supported." && false);
22+
}
23+
}
24+
25+
template <class data_T, class dw_res_T, class res_T, typename CONFIG_T>
26+
void separable_conv_1d_cl(data_T data[CONFIG_T::depthwise_config::in_width * CONFIG_T::depthwise_config::n_chan],
27+
res_T res[CONFIG_T::pointwise_config::out_width * CONFIG_T::pointwise_config::n_filt],
28+
typename CONFIG_T::depthwise_config::weight_t
29+
depthwise_weights[CONFIG_T::depthwise_config::filt_width * CONFIG_T::depthwise_config::n_chan],
30+
typename CONFIG_T::pointwise_config::weight_t
31+
pointwise_weights[CONFIG_T::pointwise_config::n_chan * CONFIG_T::pointwise_config::n_filt],
32+
typename CONFIG_T::depthwise_config::bias_t depthwise_biases[CONFIG_T::depthwise_config::n_chan],
33+
typename CONFIG_T::pointwise_config::bias_t pointwise_biases[CONFIG_T::pointwise_config::n_filt]) {
34+
#pragma HLS INLINE recursive
35+
36+
dw_res_T depthwise_res[CONFIG_T::depthwise_config::out_width * CONFIG_T::depthwise_config::n_filt];
37+
38+
depthwise_conv_1d_cl<data_T, dw_res_T, typename CONFIG_T::depthwise_config>(data, depthwise_res, depthwise_weights,
39+
depthwise_biases);
40+
pointwise_conv_1d_cl<dw_res_T, res_T, typename CONFIG_T::pointwise_config>(depthwise_res, res, pointwise_weights,
41+
pointwise_biases);
42+
}
43+
44+
} // namespace nnet
45+
46+
#endif
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#ifndef NNET_SEPARABLE_CONV2D_LATENCY_H_
2+
#define NNET_SEPARABLE_CONV2D_LATENCY_H_
3+
4+
#include "nnet_common.h"
5+
#include "nnet_mult.h"
6+
#include <cstdlib>
7+
8+
namespace nnet {
9+
10+
template <class data_T, class res_T, typename CONFIG_T>
11+
void depthwise_conv_1d_latency_cl(data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],
12+
res_T res[CONFIG_T::out_width * CONFIG_T::n_filt],
13+
typename CONFIG_T::weight_t weights[CONFIG_T::filt_width * CONFIG_T::n_chan],
14+
typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]) {
15+
16+
constexpr unsigned mult_n_in = CONFIG_T::filt_width * CONFIG_T::n_chan;
17+
constexpr unsigned mult_n_acc = CONFIG_T::filt_width;
18+
constexpr unsigned mult_n_out = CONFIG_T::n_filt;
19+
20+
data_T data_buf[CONFIG_T::n_pixels][mult_n_in];
21+
#pragma HLS ARRAY_PARTITION variable=data_buf complete dim=0
22+
23+
typename CONFIG_T::accum_t mult[mult_n_in];
24+
#pragma HLS ARRAY_PARTITION variable=mult complete
25+
26+
typename CONFIG_T::accum_t acc[mult_n_out];
27+
#pragma HLS ARRAY_PARTITION variable=acc complete
28+
29+
#pragma HLS ARRAY_PARTITION variable=weights complete
30+
#pragma HLS ARRAY_PARTITION variable=biases complete
31+
32+
// Limit multipliers to control parallelization
33+
#pragma HLS ALLOCATION operation instances=mul limit=CONFIG_T::mult_config::multiplier_limit
34+
35+
PartitionLoop:
36+
for (int i_part = 0; i_part < CONFIG_T::n_partitions; i_part++) {
37+
#pragma HLS PIPELINE II=CONFIG_T::reuse_factor rewind
38+
39+
CONFIG_T::template fill_buffer<data_T, CONFIG_T>::fill_buffer(data, data_buf, i_part);
40+
41+
PixelLoop:
42+
for (unsigned i_pxl = 0; i_pxl < CONFIG_T::n_pixels; i_pxl++) {
43+
#pragma HLS UNROLL
44+
45+
data_T cache;
46+
47+
// Do the matrix-multiply
48+
Product:
49+
for (int i_in = 0; i_in < mult_n_in; i_in++) {
50+
#pragma HLS UNROLL
51+
cache = data_buf[i_pxl][i_in];
52+
mult[i_in] =
53+
CONFIG_T::mult_config::template product<data_T, typename CONFIG_T::mult_config::weight_t>::product(
54+
cache, weights[i_in]);
55+
}
56+
57+
// Initialize accumulator with input biases
58+
ResetAccum:
59+
for (int i_acc = 0; i_acc < mult_n_out; i_acc++) {
60+
#pragma HLS UNROLL
61+
acc[i_acc] = (typename CONFIG_T::accum_t)biases[i_acc];
62+
}
63+
64+
// Accumulate multiplication result
65+
Accum1:
66+
for (int i_in = 0; i_in < mult_n_acc; i_in++) {
67+
#pragma HLS UNROLL
68+
Accum2:
69+
for (int i_out = 0; i_out < mult_n_out; i_out++) {
70+
#pragma HLS UNROLL
71+
acc[i_out] += mult[i_in * mult_n_out + i_out];
72+
}
73+
}
74+
75+
// Cast to "res_t" type
76+
Result:
77+
for (int i_res = 0; i_res < mult_n_out; i_res++) {
78+
#pragma HLS UNROLL
79+
*(res++) = cast<data_T, res_T, typename CONFIG_T::mult_config>(acc[i_res]);
80+
}
81+
}
82+
}
83+
}
84+
85+
} // namespace nnet
86+
#endif
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#ifndef NNET_SEPARABLE_CONV2D_H_
2+
#define NNET_SEPARABLE_CONV2D_H_
3+
4+
#include "nnet_common.h"
5+
#include "nnet_conv2d.h"
6+
#include "nnet_sepconv2d_latency.h"
7+
//#include "nnet_sepconv2d_resource.h"
8+
#include <cstdlib>
9+
10+
namespace nnet {
11+
12+
template <class data_T, class res_T, typename CONFIG_T>
13+
void depthwise_conv_2d_cl(
14+
data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_chan],
15+
res_T res[CONFIG_T::out_height * CONFIG_T::out_width * CONFIG_T::n_filt],
16+
typename CONFIG_T::weight_t weights[CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan],
17+
typename CONFIG_T::bias_t biases[CONFIG_T::n_chan]) {
18+
#pragma HLS INLINE recursive
19+
if (CONFIG_T::strategy == nnet::latency) {
20+
depthwise_conv_2d_latency_cl<data_T, res_T, CONFIG_T>(data, res, weights, biases);
21+
} else {
22+
assert("Resource strategy for DepthwiseConv2D is not supported." && false);
23+
}
24+
}
25+
26+
template <class data_T, class dw_res_T, class res_T, typename CONFIG_T>
27+
void separable_conv_2d_cl(data_T data[CONFIG_T::depthwise_config::in_height * CONFIG_T::depthwise_config::in_width *
28+
CONFIG_T::depthwise_config::n_chan],
29+
res_T res[CONFIG_T::pointwise_config::out_height * CONFIG_T::pointwise_config::out_width *
30+
CONFIG_T::pointwise_config::n_filt],
31+
typename CONFIG_T::depthwise_config::weight_t
32+
depthwise_weights[CONFIG_T::depthwise_config::filt_height *
33+
CONFIG_T::depthwise_config::filt_width * CONFIG_T::depthwise_config::n_chan],
34+
typename CONFIG_T::pointwise_config::weight_t
35+
pointwise_weights[CONFIG_T::pointwise_config::n_chan * CONFIG_T::pointwise_config::n_filt],
36+
typename CONFIG_T::depthwise_config::bias_t depthwise_biases[CONFIG_T::depthwise_config::n_chan],
37+
typename CONFIG_T::pointwise_config::bias_t pointwise_biases[CONFIG_T::pointwise_config::n_filt]) {
38+
#pragma HLS INLINE recursive
39+
40+
dw_res_T depthwise_res[CONFIG_T::depthwise_config::out_height * CONFIG_T::depthwise_config::out_width *
41+
CONFIG_T::depthwise_config::n_filt];
42+
43+
depthwise_conv_2d_cl<data_T, dw_res_T, typename CONFIG_T::depthwise_config>(data, depthwise_res, depthwise_weights,
44+
depthwise_biases);
45+
pointwise_conv_2d_cl<dw_res_T, res_T, typename CONFIG_T::pointwise_config>(depthwise_res, res, pointwise_weights,
46+
pointwise_biases);
47+
}
48+
49+
} // namespace nnet
50+
51+
#endif

0 commit comments

Comments
 (0)