Skip to content

Commit c1ea360

Browse files
Jonathan-Shoemakerjmduarte
authored andcommitted
attempt to add support for conv1d transpose
add new files for conv1dtranspose resource clean up so that conv code is reached. Still need to get the actual implementation matching keras implement conv1dtranspose super inefficiently (gets correct answer though) try to fix indices to make code work make the c code work for conv1dtranspose reduce weight dimensions to properly reflect transposed kernel size clean up so that transpose filter width is passes around from config fix code such that simple transpose layer gets synthesized move variables out of loops, optimize slightly and add in alternative method of computation to compute by kernel (that option is not optimized as of now) add in conv1d transpose linebuffer format code. seems to work, unsure of if it is optimized yet trying to fix stream behavior get transpose compilation working mostly as expected. weird jump in latency from reuse 1 to 2 still exists initial conv2dtranspose addition. Output is permuted as of now. output in correct order. using large array to buffer output though fix up conv1dtranspose a bit to pad correctly. fix up stream instructions for both 1d and 2d transposes fix allowed reuse factors for transpose layers update to new conv methods for io_parallel. Still some issues with multiple filters as well as some padding issues clean up error with multiple filters and larger kernels optimize conv transpose resource to get it working reasonably well. may still have slight optimization left fix output to conv1d transpose resource add conv2dtranspose io_parallel implementation. Can still be optimized small changeup to data storage in conv1d parallel fix zero padding pass addition for transpose stream layers move transposing of weight matrix to resource_strategy for transpose layers change how stream loads in weights to be like parallel for conv transposes. unroll all stride steps completely fix output of 1d transpose parallel to be faster change 1d transpose weight input to be 2-dimensional (passed from python code) change 2d transpose weight input to be 3-dimensional (passed from python code) small changes to transposes Revert "fix nondefault project name handling (#626)". The commit breaks the Vivado Accelerator workflow, and the fix is unclear to me right now. This reverts commit e8f048a. steps towards getting integer inputs to work
1 parent a4b0e0c commit c1ea360

24 files changed

+1808
-177
lines changed

hls4ml/backends/fpga/fpga_backend.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,22 @@ def get_layer_mult_size(self, layer):
158158
n_out = layer.get_attr('n_out')
159159
return n_in, n_out
160160

161+
if 'Conv1DTranspose' in layer.class_name:
162+
trfilt_width = (layer.get_attr('filt_width') + layer.get_attr('stride_width') - 1) \
163+
// layer.get_attr('stride_width')
164+
n_in = layer.get_attr('n_chan') * trfilt_width
165+
n_out = layer.get_attr('n_filt')
166+
return n_in, n_out
167+
168+
if 'Conv2DTranspose' in layer.class_name:
169+
trfilt_width = (layer.get_attr('filt_width') + layer.get_attr('stride_width') - 1) \
170+
// layer.get_attr('stride_width')
171+
trfilt_height = (layer.get_attr('filt_height') + layer.get_attr('stride_height') - 1) \
172+
// layer.get_attr('stride_height')
173+
n_in = layer.get_attr('n_chan') * trfilt_height * trfilt_width
174+
n_out = layer.get_attr('n_filt')
175+
return n_in, n_out
176+
161177
if 'Conv1D' in layer.class_name:
162178
n_in = layer.get_attr('n_chan') * layer.get_attr('filt_width')
163179
n_out = layer.get_attr('n_filt')
@@ -713,7 +729,67 @@ def generate_conv1d_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, ke
713729
" ) {{\n"
714730
).format(index=layer_idx)
715731
indent = ' '
732+
for partition_idx, partition in enumerate(np.split(im2col_matrix, n_partitions)):
733+
generated_code += indent * 2 + 'if (partition == {:>3}) {{\n'.format(partition_idx)
734+
for pixel_idx, arr in enumerate(partition):
735+
buffer_stmts = []
736+
for j, v in enumerate(arr):
737+
if v == 0:
738+
val = '0'
739+
else:
740+
val = 'data[{}]'.format(int(v-1))
741+
buffer_stmts.append('buffer[{}][{}] = {:>10};'.format(pixel_idx, j, val))
742+
generated_code += indent * 3 + ' '.join(buffer_stmts) + '\n'
743+
generated_code += '\n' + indent * 2 + '}\n'
744+
745+
generated_code += indent + '}\n'
746+
generated_code += '};\n'
747+
748+
return generated_code
749+
750+
def _compute_conv1d_tr_im2col(self, input_shape, out_w, kernel=3, stride=1):
751+
W, C = input_shape
752+
753+
tr_kernel = (kernel+stride-1)//stride
754+
755+
input_img = np.arange(1, W * C + 1)
756+
im_matrix = np.zeros((tr_kernel * C * out_w, ))
757+
758+
index = 0
759+
for i_ow in range(out_w):
760+
for i_kw in range(tr_kernel):
761+
for i_c in range(C):
762+
# input column is just the output column shifted
763+
input_col = i_ow - (tr_kernel-1) + i_kw
764+
if (input_col >= 0 and input_col < W):
765+
im_matrix[index] = input_img[input_col * C + i_c]
766+
else:
767+
im_matrix[index] = 0
768+
index += 1
769+
im_matrix = im_matrix.reshape(out_w, -1)
770+
return im_matrix
771+
772+
773+
def generate_conv1d_tr_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, out_W, kernel=3, stride=1):
716774

775+
im2col_matrix = self._compute_conv1d_tr_im2col(
776+
(in_W, in_C),
777+
out_W,
778+
kernel,
779+
stride,
780+
)
781+
782+
generated_code = (
783+
"template<class data_T, typename CONFIG_T>\n"
784+
"class fill_buffer_{index} : public FillConv1DBuffer<data_T, CONFIG_T> {{\n"
785+
" public:\n"
786+
" static void fill_buffer(\n"
787+
" data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],\n"
788+
" data_T buffer[CONFIG_T::n_pixels][CONFIG_T::trfilt_width * CONFIG_T::n_chan],\n"
789+
" const unsigned partition\n"
790+
" ) {{\n"
791+
).format(index=layer_idx)
792+
indent = ' '
717793
for partition_idx, partition in enumerate(np.split(im2col_matrix, n_partitions)):
718794
generated_code += indent * 2 + f'if (partition == {partition_idx:>3}) {{\n'
719795
for pixel_idx, arr in enumerate(partition):
@@ -862,6 +938,91 @@ def generate_conv2d_line_buffer_fn(
862938

863939
return generated_code
864940

941+
def _compute_conv2d_tr_im2col(self, input_shape, out_shape, kernel=(3, 3), stride=(1, 1)):
942+
H, W, C = input_shape
943+
kernel_h, kernel_w = kernel
944+
stride_h, stride_w = stride
945+
out_h, out_w = out_shape
946+
947+
tr_kernel_h = (kernel_h+stride_h-1)//stride_h
948+
tr_kernel_w = (kernel_w+stride_w-1)//stride_w
949+
950+
input_img = np.arange(1, H * W * C + 1)
951+
im_matrix = np.zeros((tr_kernel_h * tr_kernel_w * C * out_h * out_w, ))
952+
953+
index = 0
954+
for i_oh in range(out_h):
955+
for i_ow in range(out_w):
956+
for i_kh in range(tr_kernel_h):
957+
input_row = i_oh - (tr_kernel_h-1) + i_kh
958+
for i_kw in range(tr_kernel_w):
959+
for i_c in range(C):
960+
if (input_row < 0 or input_row >= H):
961+
im_matrix[index] = 0
962+
else:
963+
input_col = i_ow - (tr_kernel_w-1) + i_kw
964+
if (input_col >= 0 and input_col < W):
965+
im_matrix[index] = input_img[input_row * W * C + input_col * C + i_c]
966+
else:
967+
im_matrix[index] = 0
968+
index += 1
969+
970+
im_matrix = im_matrix.reshape(out_h * out_w, -1)
971+
return im_matrix
972+
973+
974+
def generate_conv2d_tr_line_buffer_fn(self, layer_idx, n_partitions, in_H, in_W, in_C, out_H, out_W, kernel=(3, 3), stride=(1, 1)):
975+
if isinstance(kernel, Iterable):
976+
kernel_height = kernel[0]
977+
kernel_width = kernel[1]
978+
else:
979+
kernel_height = kernel
980+
kernel_width = kernel
981+
982+
if isinstance(stride, Iterable):
983+
stride_height = stride[0]
984+
stride_width = stride[1]
985+
else:
986+
stride_height = stride
987+
stride_width = stride
988+
989+
im2col_matrix = self._compute_conv2d_tr_im2col(
990+
(in_H, in_W, in_C),
991+
(out_W, out_W),
992+
(kernel_height, kernel_width),
993+
(stride_height, stride_width),
994+
)
995+
996+
generated_code = (
997+
"template<class data_T, typename CONFIG_T>\n"
998+
"class fill_buffer_{index} : public FillConv2DBuffer<data_T, CONFIG_T> {{\n"
999+
" public:\n"
1000+
" static void fill_buffer(\n"
1001+
" data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_chan],\n"
1002+
" data_T buffer[CONFIG_T::n_pixels][CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_chan],\n"
1003+
" const unsigned partition\n"
1004+
" ) {{\n"
1005+
).format(index=layer_idx)
1006+
indent = ' '
1007+
1008+
for partition_idx, partition in enumerate(np.split(im2col_matrix, n_partitions)):
1009+
generated_code += indent * 2 + 'if (partition == {:>3}) {{\n'.format(partition_idx)
1010+
for pixel_idx, arr in enumerate(partition):
1011+
buffer_stmts = []
1012+
for j, v in enumerate(arr):
1013+
if v == 0:
1014+
val = '0'
1015+
else:
1016+
val = 'data[{}]'.format(int(v-1))
1017+
buffer_stmts.append('buffer[{}][{}] = {:>10};'.format(pixel_idx, j, val))
1018+
generated_code += indent * 3 + ' '.join(buffer_stmts) + '\n'
1019+
generated_code += '\n' + indent * 2 + '}\n'
1020+
1021+
generated_code += indent + '}\n'
1022+
generated_code += '};\n'
1023+
1024+
return generated_code
1025+
8651026
@model_optimizer()
8661027
def write_hls(self, model):
8671028
self.writer.write_hls(model)

hls4ml/backends/fpga/fpga_types.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,15 @@ def __init__(self, type_converter):
326326

327327
class StaticWeightVariableDefinition(VariableDefinition):
328328
def definition_cpp(self, name_suffix='', as_reference=False):
329+
if self.keep_dims > 0:
330+
size_str = ''
331+
for dim in range(self.keep_dims):
332+
size_str += '[{cur_dim}]'.format(cur_dim=self.shape[dim])
333+
final_dim = 1
334+
for dim in range(self.keep_dims, len(self.shape)):
335+
final_dim *= self.shape[dim]
336+
size_str += '[{last_dim}]'.format(last_dim=final_dim)
337+
return '{type} {name}{sizes}'.format(type=self.type.name, name=self.name, sizes=size_str)
329338
return '{type} {name}[{size}]'.format(type=self.type.name, name=self.name, size=self.data_length)
330339

331340
class StaticWeightVariableConverter(object):

hls4ml/backends/fpga/passes/codegen.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
from hls4ml.model.optimizer import OptimizerPass
2-
from hls4ml.model.layers import Conv1D, Conv2D
2+
from hls4ml.model.layers import Conv1D, Conv2D, Conv1DTranspose, Conv2DTranspose
33
from hls4ml.model.types import Source
44

55
class GenerateConvIm2col(OptimizerPass):
66
''' Generates tcode for im2col step of 1D/2d convolution '''
77
def match(self, node):
8-
return isinstance(node, (Conv1D, Conv2D)) and \
8+
return isinstance(node, (Conv1D, Conv2D, Conv1DTranspose, Conv2DTranspose)) and \
99
node.model.config.get_config_value('IOType') == 'io_parallel'
1010

1111
def transform(self, model, node):
1212
node_class = node.__class__.__name__
13-
if '1D' in node_class:
13+
if '1DTranspose' in node_class:
14+
self._generate_im2col_1d_transpose(node)
15+
elif '1D' in node_class:
1416
self._generate_im2col_1d(node)
17+
elif '2DTranspose' in node_class:
18+
self._generate_im2col_2d_transpose(node)
1519
elif '2D' in node_class:
1620
self._generate_im2col_2d(node)
1721
else:
@@ -30,6 +34,19 @@ def _generate_im2col_1d(self, node):
3034

3135
node.set_attr('line_buffer_codegen', Source(code_str))
3236

37+
def _generate_im2col_1d_transpose(self, node):
38+
code_str = node.model.config.backend.generate_conv1d_tr_line_buffer_fn(
39+
node.get_attr('index'),
40+
node.get_attr('n_partitions'),
41+
node.get_input_variable().shape[0],
42+
node.get_input_variable().shape[1],
43+
node.get_attr('proc_width'),
44+
kernel=node.get_attr('filt_width'),
45+
stride=node.get_attr('stride_width'),
46+
)
47+
48+
node.set_attr('line_buffer_codegen', Source(code_str))
49+
3350
def _generate_im2col_2d(self, node):
3451
code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
3552
node.get_attr('index'),
@@ -43,3 +60,18 @@ def _generate_im2col_2d(self, node):
4360
)
4461

4562
node.set_attr('line_buffer_codegen', Source(code_str))
63+
64+
def _generate_im2col_2d_transpose(self, node):
65+
code_str = node.model.config.backend.generate_conv2d_tr_line_buffer_fn(
66+
node.get_attr('index'),
67+
node.get_attr('n_partitions'),
68+
node.get_input_variable().shape[0],
69+
node.get_input_variable().shape[1],
70+
node.get_input_variable().shape[2],
71+
node.get_attr('proc_height'),
72+
node.get_attr('proc_width'),
73+
kernel=(node.get_attr('filt_height'), node.get_attr('filt_width')),
74+
stride=(node.get_attr('stride_height'), node.get_attr('stride_width')),
75+
)
76+
77+
node.set_attr('line_buffer_codegen', Source(code_str))

hls4ml/backends/vivado/passes/conv_same_pad.py

Lines changed: 111 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from hls4ml.model.optimizer import OptimizerPass
2-
from hls4ml.model.layers import Conv1D, SeparableConv1D, Conv2D, SeparableConv2D
2+
from hls4ml.model.layers import Conv1D, SeparableConv1D, Conv2D, SeparableConv2D, Conv1DTranspose, Conv2DTranspose
33

44
class InsertZeroPaddingBeforeConv1D(OptimizerPass):
55
name = 'insert_zero_padding_before_conv1d'
@@ -46,6 +46,53 @@ def transform(self, model, node):
4646

4747
return True
4848

49+
class InsertZeroPaddingBeforeConv1DTranspose(OptimizerPass):
50+
name = 'insert_zero_padding_before_conv1dtranspose'
51+
52+
def match(self, node):
53+
is_match = isinstance(node, (Conv1DTranspose)) and \
54+
node.get_attr('padding') == 'same' and \
55+
node.get_attr('filt_width') != 1
56+
return is_match
57+
58+
def transform(self, model, node):
59+
if model.config.get_config_value('IOType') != 'io_stream':
60+
return False
61+
62+
# Get the padding parameters from Conv1D layer
63+
pad_left = node.get_attr('pad_left')
64+
pad_right = node.get_attr('pad_right')
65+
convtr_out_width = node.get_attr('out_width')
66+
in_width = node.get_attr('in_width')
67+
stride_width = node.get_attr('stride_width')
68+
trfilt_width = (node.get_attr('filt_width') + node.get_attr('stride_width') - 1) \
69+
// node.get_attr('stride_width')
70+
71+
add_right = (convtr_out_width + pad_left)//stride_width - (in_width-1)
72+
73+
out_width = in_width + add_right + trfilt_width-1
74+
75+
attrs = {
76+
'pad_left': trfilt_width-1,
77+
'pad_right': add_right,
78+
'in_width': in_width,
79+
'out_width': out_width,
80+
'n_chan': node.get_attr('n_chan'),
81+
'data_format': node.get_attr('data_format', 'channels_last')
82+
}
83+
84+
# Switch Conv1DTranspose to be 'valid'. I think this is wrong
85+
node.set_attr('padding', 'valid')
86+
node.set_attr('in_width', out_width)
87+
node.set_attr('pad_left', pad_left + (trfilt_width-1)*stride_width)
88+
89+
# Insert new ZeroPadding1D node above Conv1DTranspose
90+
padding_layer = model.make_node('ZeroPadding1D', 'zp1d_' + node.name, attrs, node.inputs.copy())
91+
padding_layer.get_output_variable().type.precision = node.get_input_variable().type.precision
92+
model.insert_node(padding_layer)
93+
94+
return True
95+
4996
class InsertZeroPaddingBeforeConv2D(OptimizerPass):
5097
name = 'insert_zero_padding_before_conv2d'
5198

@@ -100,3 +147,66 @@ def transform(self, model, node):
100147
model.insert_node(padding_layer, before=node)
101148

102149
return True
150+
151+
class InsertZeroPaddingBeforeConv2DTranspose(OptimizerPass):
152+
name = 'insert_zero_padding_before_conv2dtranspose'
153+
154+
def match(self, node):
155+
is_match = isinstance(node, Conv2DTranspose) and \
156+
node.get_attr('padding') == 'same' and \
157+
node.get_attr('filt_width') != 1
158+
return is_match
159+
160+
def transform(self, model, node):
161+
if model.config.get_config_value('IOType') != 'io_stream':
162+
return False
163+
164+
# Get the padding parameters from Conv2DTranspose layer
165+
pad_left = node.get_attr('pad_left')
166+
pad_right = node.get_attr('pad_right')
167+
pad_top = node.get_attr('pad_top')
168+
pad_bottom = node.get_attr('pad_bottom')
169+
convtr_out_width = node.get_attr('out_width')
170+
convtr_out_height = node.get_attr('out_height')
171+
in_width = node.get_attr('in_width')
172+
in_height = node.get_attr('in_height')
173+
stride_width = node.get_attr('stride_width')
174+
stride_height = node.get_attr('stride_height')
175+
trfilt_width = (node.get_attr('filt_width') + node.get_attr('stride_width') - 1) \
176+
// node.get_attr('stride_width')
177+
trfilt_height = (node.get_attr('filt_height') + node.get_attr('stride_height') - 1) \
178+
// node.get_attr('stride_height')
179+
180+
add_right = (convtr_out_width + pad_left)//stride_width-(in_width-1)
181+
add_bottom = (convtr_out_height + pad_top)//stride_height-(in_height-1)
182+
183+
out_width = in_width + add_right + trfilt_width-1
184+
out_height = in_height + add_bottom + trfilt_height-1
185+
186+
attrs = {
187+
'pad_left': trfilt_width-1,
188+
'pad_right': add_right,
189+
'pad_top': trfilt_height-1,
190+
'pad_bottom': add_bottom,
191+
'in_width': in_width,
192+
'in_height': in_height,
193+
'out_width': out_width,
194+
'out_height': out_height,
195+
'n_chan': node.get_attr('n_chan'),
196+
'data_format': node.get_attr('data_format', 'channels_last')
197+
}
198+
199+
# switch Conv2DTranspose to be 'valid'. This is technically not true though
200+
node.set_attr('padding', 'valid')
201+
node.set_attr('in_width', out_width)
202+
node.set_attr('in_height', out_height)
203+
node.set_attr('pad_left', pad_left + (trfilt_width-1)*stride_width)
204+
node.set_attr('pad_top', pad_top + (trfilt_height-1)*stride_height)
205+
206+
# insert new ZeroPadding2D ndoe above Conv2DTranspose
207+
padding_layer = model.make_node('ZeroPadding2D', 'zp2d_' + node.name, attrs, node.inputs.copy())
208+
padding_layer.get_output_variable().type.precision = node.get_input_variable().type.precision
209+
model.insert_node(padding_layer, before=node)
210+
211+
return True
212+

0 commit comments

Comments
 (0)