Skip to content

Commit fcafe5d

Browse files
change 2d transpose weight input to be 3-dimensional (passed from python code)
1 parent 1c345f7 commit fcafe5d

File tree

8 files changed

+90
-135
lines changed

8 files changed

+90
-135
lines changed

hls4ml/backends/vivado/passes/resource_strategy.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ class ApplyResourceStrategy(OptimizerPass):
77
''' Transposes the weights to use the dense_resource matrix multiply routine '''
88
def match(self, node):
99

10-
node_matches = isinstance(node, (Dense, Conv1D, SeparableConv1D, Conv2D, Conv1DTranspose, Conv2DTranspose, SeparableConv2D, LSTM, GRU))
10+
node_matches = isinstance(node, (Dense, Conv1D, SeparableConv1D, Conv2D, SeparableConv2D, LSTM, GRU))
1111

1212
is_resource_strategy = node.get_attr('strategy', '').lower() == 'resource'
1313
already_transformed = node.get_attr('_weights_transposed', False) == True
@@ -19,30 +19,11 @@ def transform(self, model, node):
1919
node.weights['weight'].data = np.transpose(node.weights['weight'].data)
2020
elif isinstance(node, Conv1D):
2121
node.weights['weight'].data = np.transpose(node.weights['weight'].data, axes=[2, 0, 1]) #(W,C,F) => (F,W,C)
22-
elif isinstance(node, Conv1DTranspose):
23-
pass
24-
# #(W,F,C) => (F,W,C)
25-
# node.weights['weight'].data = np.transpose(node.weights['weight'].data, axes=[1, 0, 2])
26-
# # now split the kernel into stride width kernels (F, W, C) -> (S, F, W/S, C)
27-
# n_filts, kern_width, n_chan = node.weights['weight'].data.shape
28-
# new_weights = np.zeros((node.get_attr('stride_width'), n_filts, node.get_attr('trfilt_width'), n_chan))
29-
# for i_sw in range(node.get_attr('stride_width')):
30-
# for i_fw in range(node.get_attr('trfilt_width')):
31-
# filt_ind = i_sw + (node.get_attr('trfilt_width')-i_fw-1) * node.get_attr('stride_width')
32-
# for i_nf in range(n_filts):
33-
# for i_nc in range(n_chan):
34-
# if filt_ind < kern_width:
35-
# new_weights[i_sw][i_nf][i_fw][i_nc] = \
36-
# node.weights['weight'].data[i_nf][filt_ind][i_nc]
37-
# node.weights['weight'].data = new_weights
38-
# print("Updated shape:", node.weights['weight'].data.shape)
3922
elif isinstance(node, SeparableConv1D):
4023
node.weights['depthwise'].data = np.transpose(node.weights['depthwise'].data, axes=[2, 0, 1]) #(W,C,F) => (F,W,C)
4124
node.weights['pointwise'].data = np.transpose(node.weights['pointwise'].data, axes=[2, 0, 1]) #(W,C,F) => (F,W,C)
4225
elif isinstance(node, Conv2D):
4326
node.weights['weight'].data = np.transpose(node.weights['weight'].data, axes=[3, 0, 1, 2]) #(H,W,C,F) => (F,H,W,C)
44-
elif isinstance(node, Conv2DTranspose):
45-
node.weights['weight'].data = np.transpose(node.weights['weight'].data, axes=[2, 0, 1, 3]) #(H,W,F,C) => (F,H,W,C)
4627
elif isinstance(node, SeparableConv2D):
4728
node.weights['depthwise'].data = np.transpose(node.weights['depthwise'].data, axes=[3, 0, 1, 2]) #(H,W,C,F) => (F,H,W,C)
4829
node.weights['pointwise'].data = np.transpose(node.weights['pointwise'].data, axes=[3, 0, 1, 2]) #(H,W,C,F) => (F,H,W,C)

hls4ml/converters/keras/convolution.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ def parse_conv2dtranspose_layer(keras_layer, input_names, input_shapes, data_rea
138138
layer['stride_height'] = keras_layer['config']['strides'][0]
139139
layer['stride_width'] = keras_layer['config']['strides'][1]
140140
layer['padding'] = keras_layer['config']['padding']
141+
layer['trfilt_height'] = (layer['filt_height'] + layer['stride_height'] - 1)//layer['stride_height']
142+
layer['trfilt_width'] = (layer['filt_width'] + layer['stride_width'] - 1)//layer['stride_width']
141143

142144
(
143145
layer['out_height'],

hls4ml/model/layers.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,8 +509,32 @@ def initialize(self):
509509
else:
510510
shape = [self.attributes['n_filt'], self.attributes['out_height'], self.attributes['out_width']]
511511
dims = ['N_FILT_{}'.format(self.index), 'OUT_HEIGHT_{}'.format(self.index), 'OUT_WIDTH_{}'.format(self.index)]
512+
513+
data = self.model.get_weights_data(self.name, 'kernel')
514+
# now we transform the entire kernel
515+
516+
#(H,W,F,C) => (F,H,W,C)
517+
data = np.transpose(data, axes=[2, 0, 1, 3])
518+
# now split the kernel into stride width kernels (F, W, C) -> (Sh, Sw, F, H/Sh, W/Sw, C)
519+
n_filts, kern_height, kern_width, n_chan = data.shape
520+
new_weights = np.zeros((self.attributes['stride_height'], self.attributes['stride_width'], \
521+
n_filts, self.attributes['trfilt_height'], self.attributes['trfilt_width'], n_chan))
522+
for i_sh in range(self.attributes['stride_height']):
523+
for i_sw in range(self.attributes['stride_width']):
524+
for i_fh in range(self.attributes['trfilt_height']):
525+
for i_fw in range(self.attributes['trfilt_width']):
526+
filt_h_ind = i_sh + (self.attributes['trfilt_height']-i_fh-1)*self.attributes['stride_height']
527+
filt_w_ind = i_sw + (self.attributes['trfilt_width']-i_fw-1)*self.attributes['stride_width']
528+
for i_nf in range(n_filts):
529+
for i_nc in range(n_chan):
530+
if filt_h_ind < kern_height and filt_w_ind < kern_width:
531+
new_weights[i_sh][i_sw][i_nf][i_fh][i_fw][i_nc] = \
532+
data[i_nf][filt_h_ind][filt_w_ind][i_nc]
533+
data = new_weights
534+
512535
self.add_output_variable(shape, dims)
513-
self.add_weights(quantizer=self.get_attr('weight_quantizer'))
536+
self.add_weights_variable(name='weight', var_name='w{index}', \
537+
data=data, quantizer=self.get_attr('weight_quantizer'), keep_dims=2)
514538
self.add_bias(quantizer=self.get_attr('bias_quantizer'))
515539

516540

hls4ml/templates/vivado/nnet_utils/nnet_conv2dtranspose.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ template<class data_T, class res_T, typename CONFIG_T>
4444
void conv_2d_transpose_cl(
4545
data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_chan],
4646
res_T res[CONFIG_T::out_height * CONFIG_T::out_width * CONFIG_T::n_filt],
47-
typename CONFIG_T::weight_t weights[CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt],
47+
typename CONFIG_T::weight_t weights[CONFIG_T::stride_height][CONFIG_T::stride_width][
48+
CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
49+
],
4850
typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]
4951
)
5052
{

hls4ml/templates/vivado/nnet_utils/nnet_conv2dtranspose_resource.h

Lines changed: 5 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ template<class data_T, class res_T, typename CONFIG_T>
1010
void conv_2d_transpose_resource_cl(
1111
data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_chan],
1212
res_T res[CONFIG_T::out_height * CONFIG_T::out_width * CONFIG_T::n_filt],
13-
typename CONFIG_T::weight_t weights[CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt],
13+
typename CONFIG_T::weight_t weights[CONFIG_T::stride_height][CONFIG_T::stride_width][
14+
CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
15+
],
1416
typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]
1517
)
1618
{
@@ -32,51 +34,7 @@ void conv_2d_transpose_resource_cl(
3234
typename CONFIG_T::accum_t acc[CONFIG_T::n_pixels][mult_n_out][CONFIG_T::stride_height][CONFIG_T::stride_width];
3335
#pragma HLS ARRAY_PARTITION variable=acc complete dim=0
3436

35-
typename CONFIG_T::weight_t trfilt_weights[
36-
CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
37-
][CONFIG_T::stride_height][CONFIG_T::stride_width];
38-
39-
//pull out the individual filter weights (split kernel into stride_height x stride_width kernels)
40-
for (unsigned i_sh = 0; i_sh < CONFIG_T::stride_height; i_sh++) {
41-
#pragma HLS UNROLL
42-
for (unsigned i_sw = 0; i_sw < CONFIG_T::stride_width; i_sw++) {
43-
#pragma HLS UNROLL
44-
for (unsigned i_fh = 0; i_fh < CONFIG_T::trfilt_height; i_fh++) {
45-
#pragma HLS UNROLL
46-
for (unsigned i_fw = 0; i_fw < CONFIG_T::trfilt_width; i_fw++) {
47-
#pragma HLS UNROLL
48-
unsigned filt_h_ind = i_sh + (CONFIG_T::trfilt_height-i_fh-1)*CONFIG_T::stride_height;
49-
unsigned filt_w_ind = i_sw + (CONFIG_T::trfilt_width-i_fw-1)*CONFIG_T::stride_width;
50-
for (unsigned i_nf = 0; i_nf < CONFIG_T::n_filt; i_nf++) {
51-
#pragma HLS UNROLL
52-
for (unsigned i_nc = 0; i_nc < CONFIG_T::n_chan; i_nc++) {
53-
#pragma HLS UNROLL
54-
if (filt_h_ind < CONFIG_T::filt_height && filt_w_ind < CONFIG_T::filt_width) {
55-
trfilt_weights[
56-
i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_height * CONFIG_T::trfilt_width +
57-
i_fh * CONFIG_T::trfilt_width * CONFIG_T::n_chan +
58-
i_fw * CONFIG_T::n_chan + i_nc
59-
][i_sh][i_sw] = weights[
60-
i_nf * CONFIG_T::n_chan * CONFIG_T::filt_height * CONFIG_T::filt_width +
61-
filt_h_ind * CONFIG_T::n_chan * CONFIG_T::filt_width +
62-
filt_w_ind * CONFIG_T::n_chan + i_nc
63-
];
64-
}
65-
else {
66-
trfilt_weights[
67-
i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_height * CONFIG_T::trfilt_width +
68-
i_fh * CONFIG_T::trfilt_width * CONFIG_T::n_chan +
69-
i_fw * CONFIG_T::n_chan + i_nc
70-
][i_sh][i_sw] = 0;
71-
}
72-
}
73-
}
74-
}
75-
}
76-
}
77-
}
78-
79-
#pragma HLS ARRAY_RESHAPE variable=trfilt_weights block factor=block_factor dim=1
37+
#pragma HLS ARRAY_RESHAPE variable=weights block factor=block_factor dim=3
8038

8139
PartitionLoop:
8240
for (unsigned i_part = 0; i_part < CONFIG_T::n_partitions; i_part++) {
@@ -127,7 +85,7 @@ void conv_2d_transpose_resource_cl(
12785

12886
acc[i_pxl][i_out][i_sh][i_sw] += static_cast<typename CONFIG_T::accum_t>(
12987
CONFIG_T::mult_config::template product<data_T, typename CONFIG_T::mult_config::weight_t>::product(
130-
data_buf[i_pxl][i_in], trfilt_weights[i_w][i_sh][i_sw]
88+
data_buf[i_pxl][i_in], weights[i_sh][i_sw][i_w]
13189
)
13290
);
13391
}

hls4ml/templates/vivado/nnet_utils/nnet_conv2dtranspose_stream.h

Lines changed: 11 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -68,64 +68,14 @@ void shift_line_buffer_tr(const data_T& in_elem,
6868
kernel_shift_tr_2d<data_T, CONFIG_T>(shift_buffer, kernel_window);
6969
}
7070

71-
template<typename CONFIG_T>
72-
void load_trfilt_weights(
73-
typename CONFIG_T::weight_t trfilt_weights[CONFIG_T::stride_height][CONFIG_T::stride_width][
74-
CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
75-
],
76-
typename CONFIG_T::weight_t weights[
77-
CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_chan * CONFIG_T::n_filt
78-
]
79-
)
80-
{
81-
#pragma HLS INLINE
82-
//pull out the individual filter weights (split kernel into stride_height x stride_width kernels)
83-
TrfiltWeightsLoop: for (unsigned i_sh = 0; i_sh < CONFIG_T::stride_height; i_sh++) {
84-
#pragma HLS UNROLL
85-
for (unsigned i_sw = 0; i_sw < CONFIG_T::stride_width; i_sw++) {
86-
#pragma HLS UNROLL
87-
for (unsigned i_fh = 0; i_fh < CONFIG_T::trfilt_height; i_fh++) {
88-
#pragma HLS UNROLL
89-
for (unsigned i_fw = 0; i_fw < CONFIG_T::trfilt_width; i_fw++) {
90-
#pragma HLS UNROLL
91-
unsigned filt_h_ind = i_sh + (CONFIG_T::trfilt_height-i_fh-1)*CONFIG_T::stride_height;
92-
unsigned filt_w_ind = i_sw + (CONFIG_T::trfilt_width-i_fw-1)*CONFIG_T::stride_width;
93-
for (unsigned i_nf = 0; i_nf < CONFIG_T::n_filt; i_nf++) {
94-
#pragma HLS UNROLL
95-
for (unsigned i_nc = 0; i_nc < CONFIG_T::n_chan; i_nc++) {
96-
#pragma HLS UNROLL
97-
if (filt_h_ind < CONFIG_T::filt_height && filt_w_ind < CONFIG_T::filt_width) {
98-
trfilt_weights[i_sh][i_sw][
99-
i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_height * CONFIG_T::trfilt_width +
100-
i_fh * CONFIG_T::trfilt_width * CONFIG_T::n_chan +
101-
i_fw * CONFIG_T::n_chan + i_nc
102-
]= weights[
103-
i_nf * CONFIG_T::n_chan * CONFIG_T::filt_height * CONFIG_T::filt_width +
104-
filt_h_ind * CONFIG_T::n_chan * CONFIG_T::filt_width +
105-
filt_w_ind * CONFIG_T::n_chan + i_nc
106-
];
107-
}
108-
else {
109-
trfilt_weights[i_sh][i_sw][
110-
i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_height * CONFIG_T::trfilt_width +
111-
i_fh * CONFIG_T::trfilt_width * CONFIG_T::n_chan +
112-
i_fw * CONFIG_T::n_chan + i_nc
113-
] = 0;
114-
}
115-
}
116-
}
117-
}
118-
}
119-
}
120-
}
121-
}
122-
12371
template<class data_T, class res_T, typename CONFIG_T>
12472
void compute_output_buffer_tr_2d(
12573
const data_T& in_elem,
12674
ap_shift_reg<typename data_T::value_type, CONFIG_T::in_width> line_buffer[MAX(CONFIG_T::trfilt_height-1, 1)][CONFIG_T::n_chan],
12775
hls::stream<res_T> &res_stream,
128-
typename CONFIG_T::weight_t weights[CONFIG_T::kernel_size * CONFIG_T::n_chan * CONFIG_T::n_filt],
76+
typename CONFIG_T::weight_t weights[CONFIG_T::stride_height][CONFIG_T::stride_width][
77+
CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
78+
],
12979
typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]
13080
)
13181
{
@@ -138,12 +88,6 @@ void compute_output_buffer_tr_2d(
13888
static typename data_T::value_type kernel_data[CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_chan];
13989
#pragma HLS ARRAY_PARTITION variable=kernel_data complete
14090

141-
static typename CONFIG_T::weight_t trfilt_weights[CONFIG_T::stride_height][CONFIG_T::stride_width][
142-
CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
143-
];
144-
145-
load_trfilt_weights<CONFIG_T>(trfilt_weights, weights);
146-
14791
typename res_T::value_type res_out[CONFIG_T::n_filt];
14892
#pragma HLS ARRAY_PARTITION variable=res_out complete dim = 0
14993

@@ -168,11 +112,11 @@ void compute_output_buffer_tr_2d(
168112

169113
if (CONFIG_T::strategy == nnet::latency) {
170114
dense_latency<typename data_T::value_type, typename res_T::value_type, typename CONFIG_T::mult_config>(
171-
kernel_data, res_out, trfilt_weights[h_idx][w_idx], biases
115+
kernel_data, res_out, weights[h_idx][w_idx], biases
172116
);
173117
} else {
174118
dense_resource<typename data_T::value_type, typename res_T::value_type, typename CONFIG_T::mult_config>(
175-
kernel_data, res_out, trfilt_weights[h_idx][w_idx], biases
119+
kernel_data, res_out, weights[h_idx][w_idx], biases
176120
);
177121
}
178122

@@ -182,10 +126,7 @@ void compute_output_buffer_tr_2d(
182126
(pX*CONFIG_T::stride_width+w_idx)*CONFIG_T::stride_height*CONFIG_T::n_filt +
183127
h_idx*CONFIG_T::n_filt + i_ic
184128
] = res_out[i_ic];
185-
// res_pack[i_ic] = res_out[i_ic];
186129
}
187-
// res_stream.write(res_pack);
188-
189130
}
190131
}
191132

@@ -226,7 +167,9 @@ template<class data_T, class res_T, typename CONFIG_T>
226167
void conv_2d_transpose_buffer_cl(
227168
hls::stream<data_T> &data,
228169
hls::stream<res_T> &res,
229-
typename CONFIG_T::weight_t weights[CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt],
170+
typename CONFIG_T::weight_t weights[CONFIG_T::stride_height][CONFIG_T::stride_width][
171+
CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
172+
],
230173
typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]
231174
)
232175
{
@@ -248,7 +191,9 @@ template<class data_T, class res_T, typename CONFIG_T>
248191
void conv_2d_transpose_cl(
249192
hls::stream<data_T> &data,
250193
hls::stream<res_T> &res,
251-
typename CONFIG_T::weight_t weights[CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt],
194+
typename CONFIG_T::weight_t weights[CONFIG_T::stride_height][CONFIG_T::stride_width][
195+
CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_filt * CONFIG_T::n_chan
196+
],
252197
typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]
253198
)
254199
{

hls4ml/templates/vivado/nnet_utils/nnet_helpers.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,47 @@ void load_weights_from_txt(T w[DIM_1][DIM_2], const char* fname) {
103103
}
104104
}
105105

106+
template<class T, size_t DIM_1, size_t DIM_2, size_t DIM_3>
107+
void load_weights_from_txt(T w[DIM_1][DIM_2][DIM_3], const char* fname) {
108+
109+
std::string full_path = std::string(WEIGHTS_DIR) + "/" + std::string(fname);
110+
std::ifstream infile(full_path.c_str(), std::ios::binary);
111+
112+
if (infile.fail()) {
113+
std::cerr << "ERROR: file " << std::string(fname) << " does not exist" << std::endl;
114+
exit(1);
115+
}
116+
117+
std::string line;
118+
if (std::getline(infile, line)) {
119+
std::istringstream iss(line);
120+
std::string token;
121+
122+
size_t i = 0;
123+
size_t j = 0;
124+
size_t k = 0;
125+
size_t tot = 0;
126+
while(std::getline(iss, token, ',')) {
127+
std::istringstream(token) >> w[i][j][k];
128+
k++;
129+
if (k == DIM_3) {
130+
k = 0;
131+
j++;
132+
if (j == DIM_2) {
133+
j = 0;
134+
i++;
135+
}
136+
}
137+
tot++;
138+
}
139+
140+
if (DIM_1*DIM_2*DIM_3 != tot) {
141+
std::cerr << "ERROR: Expected " << DIM_1*DIM_2*DIM_3 << " values";
142+
std::cerr << " but read only " << tot << " values" << std::endl;
143+
}
144+
}
145+
}
146+
106147
template<class T, size_t SIZE>
107148
void load_compressed_weights_from_txt(T *w, const char* fname) {
108149

hls4ml/writer/vivado_writer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ def write_project_cpp(self, model):
149149
dim_info = w.data_length
150150
if w.keep_dims == 1:
151151
dim_info = '{}, {}'.format(w.shape[0], w.data_length//w.shape[0])
152+
if w.keep_dims == 2:
153+
dim_info = '{}, {}, {}'.format(w.shape[0], w.shape[1], w.data_length//(w.shape[0]*w.shape[1]))
152154
newline += indent + ' nnet::load_weights_from_txt<{}, {}>({}, "{}.txt");\n'.format(w.type.name, dim_info, w.name, w.name)
153155

154156
#Add input/output type

0 commit comments

Comments
 (0)