Skip to content

Commit 5299392

Browse files
authored
Merge branch 'main' into GRUv1
2 parents 774072f + 4f4b164 commit 5299392

24 files changed

+741
-288
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ repos:
3030
args: ["--profile", "black", --line-length=125]
3131

3232
- repo: https://github.com/asottile/pyupgrade
33-
rev: v3.15.2
33+
rev: v3.16.0
3434
hooks:
3535
- id: pyupgrade
3636
args: ["--py36-plus"]
@@ -41,7 +41,7 @@ repos:
4141
- id: setup-cfg-fmt
4242

4343
- repo: https://github.com/pycqa/flake8
44-
rev: 7.0.0
44+
rev: 7.1.0
4545
hooks:
4646
- id: flake8
4747
exclude: docs/conf.py

Jenkinsfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
pipeline {
22
agent {
33
docker {
4-
image 'vivado-el7:3'
4+
image 'vivado-alma9:1'
55
args '-v /data/Xilinx:/data/Xilinx'
66
}
77
}
@@ -14,8 +14,9 @@ pipeline {
1414
steps {
1515
dir(path: 'test') {
1616
sh '''#!/bin/bash --login
17-
conda activate hls4ml-py38
18-
pip install tensorflow pyparsing
17+
conda activate hls4ml-py310
18+
conda install -y jupyterhub pydot graphviz pytest pytest-cov
19+
pip install pytest-randomly jupyter onnx>=1.4.0 matplotlib pandas seaborn pydigitalwavetools==1.1 pyyaml tensorflow==2.14 qonnx torch git+https://github.com/google/qkeras.git pyparsing
1920
pip install -U ../ --user
2021
./convert-keras-models.sh -x -f keras-models.txt
2122
pip uninstall hls4ml -y'''

hls4ml/backends/fpga/fpga_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ def generate_conv1d_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, ke
685685
686686
The HLS compiler produces suboptimal designs for a im2col algorithm implementation, so a trick we use is
687687
to generate a resulting a result of im2col transformation explicitly, instead of relying on loops. Since
688-
the result depends on the paraleters of the convolution layer (the input size, the kernel size, stride etc),
688+
the result depends on the parameters of the convolution layer (the input size, the kernel size, stride etc),
689689
we need to do this for every convolution layer.
690690
691691
Args:
@@ -782,7 +782,7 @@ def generate_conv2d_line_buffer_fn(
782782
783783
The HLS compiler produces suboptimal designs for a im2col algorithm implementation, so a trick we use is
784784
to generate a resulting a result of im2col transformation explicitly, instead of relying on loops. Since
785-
the result depends on the paraleters of the convolution layer (the input size, the kernel size, stride etc),
785+
the result depends on the parameters of the convolution layer (the input size, the kernel size, stride etc),
786786
we need to do this for every convolution layer.
787787
788788
Args:

hls4ml/backends/fpga/passes/codegen.py

Lines changed: 72 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from hls4ml.model.layers import Conv1D, Conv2D
1+
from hls4ml.model.layers import Conv1D, Conv2D, SeparableConv1D, SeparableConv2D
22
from hls4ml.model.optimizer import OptimizerPass
33
from hls4ml.model.types import Source
44

@@ -7,16 +7,27 @@ class GenerateConvIm2col(OptimizerPass):
77
'''Generates tcode for im2col step of 1D/2d convolution'''
88

99
def match(self, node):
10-
return isinstance(node, (Conv1D, Conv2D)) and node.model.config.get_config_value('IOType') == 'io_parallel'
10+
return (
11+
isinstance(node, (Conv1D, Conv2D, SeparableConv1D, SeparableConv2D))
12+
and node.model.config.get_config_value('IOType') == 'io_parallel'
13+
)
1114

1215
def transform(self, model, node):
13-
node_class = node.__class__.__name__
14-
if '1D' in node_class:
15-
self._generate_im2col_1d(node)
16-
elif '2D' in node_class:
17-
self._generate_im2col_2d(node)
16+
node_class = node.class_name
17+
if 'Separable' in node_class:
18+
if '1D' in node_class:
19+
self._generate_separable_im2col_1d(node)
20+
elif '2D' in node_class:
21+
self._generate_separable_im2col_2d(node)
22+
else:
23+
raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
1824
else:
19-
raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
25+
if '1D' in node_class:
26+
self._generate_im2col_1d(node)
27+
elif '2D' in node_class:
28+
self._generate_im2col_2d(node)
29+
else:
30+
raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
2031

2132
def _generate_im2col_1d(self, node):
2233
code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
@@ -49,3 +60,56 @@ def _generate_im2col_2d(self, node):
4960
)
5061

5162
node.set_attr('line_buffer_codegen', Source(code_str))
63+
64+
def _generate_separable_im2col_1d(self, node):
65+
dw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
66+
str(node.get_attr('index')) + '_dw',
67+
node.get_attr('n_partitions'),
68+
node.get_input_variable().shape[0],
69+
node.get_input_variable().shape[1],
70+
kernel=node.get_attr('filt_width'),
71+
stride=node.get_attr('stride_width'),
72+
pad=(node.get_attr('pad_left'), node.get_attr('pad_right')),
73+
)
74+
75+
node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
76+
77+
pw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
78+
str(node.get_attr('index')) + '_pw',
79+
node.get_attr('n_partitions'),
80+
node.get_output_variable().shape[0],
81+
node.get_input_variable().shape[1],
82+
kernel=1,
83+
)
84+
85+
node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))
86+
87+
def _generate_separable_im2col_2d(self, node):
88+
dw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
89+
str(node.get_attr('index')) + '_dw',
90+
node.get_attr('n_partitions'),
91+
node.get_input_variable().shape[0],
92+
node.get_input_variable().shape[1],
93+
node.get_input_variable().shape[2],
94+
kernel=(node.get_attr('filt_height'), node.get_attr('filt_width')),
95+
stride=(node.get_attr('stride_height'), node.get_attr('stride_width')),
96+
pad=(
97+
node.get_attr('pad_top'),
98+
node.get_attr('pad_bottom'),
99+
node.get_attr('pad_left'),
100+
node.get_attr('pad_right'),
101+
),
102+
)
103+
104+
node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
105+
106+
pw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
107+
str(node.get_attr('index')) + '_pw',
108+
node.get_attr('n_partitions'),
109+
node.get_output_variable().shape[0],
110+
node.get_output_variable().shape[1],
111+
node.get_input_variable().shape[2],
112+
kernel=(1, 1),
113+
)
114+
115+
node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))

hls4ml/backends/quartus/quartus_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ def init_conv1d(self, layer):
274274
# - combination - at compile-time, the decision between Winograd and im2col is made
275275
# - im2col - specifically use im2col
276276
# - Winograd - use Winograd, if possible
277-
layer.set_attr('implementation', layer.model.config.get_layer_config_value(layer, 'Implementation', 'combination'))
277+
layer.set_attr('implementation', layer.model.config.get_layer_config_value(layer, 'Implementation', 'im2col'))
278278

279279
layer.set_attr(
280280
'n_partitions', 1
@@ -305,7 +305,7 @@ def init_conv2d(self, layer):
305305
# - combination - at compile-time, the decision between Winograd and im2col is made
306306
# - im2col - specifically use im2col
307307
# - Winograd - use Winograd, if possible
308-
layer.set_attr('implementation', layer.model.config.get_layer_config_value(layer, 'Implementation', 'combination'))
308+
layer.set_attr('implementation', layer.model.config.get_layer_config_value(layer, 'Implementation', 'im2col'))
309309

310310
layer.set_attr(
311311
'n_partitions', 1

hls4ml/backends/vivado/passes/convolution_templates.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,8 @@ def __init__(self):
254254
'{input}, {output}, {d}, {p}, {z}, {b});'
255255
)
256256

257-
sepconv1d_include_list = ['nnet_utils/nnet_conv1d.h', 'nnet_utils/nnet_sepconv1d_stream.h']
258-
sepconv2d_include_list = ['nnet_utils/nnet_conv2d.h', 'nnet_utils/nnet_sepconv2d_stream.h']
257+
sepconv1d_include_list = ['nnet_utils/nnet_conv1d.h', 'nnet_utils/nnet_sepconv1d.h', 'nnet_utils/nnet_sepconv1d_stream.h']
258+
sepconv2d_include_list = ['nnet_utils/nnet_conv2d.h', 'nnet_utils/nnet_sepconv2d.h', 'nnet_utils/nnet_sepconv2d_stream.h']
259259

260260

261261
class SeparableConv1DConfigTemplate(LayerConfigTemplate):
@@ -286,7 +286,10 @@ def format(self, node):
286286
params['index'] = str(node.index) + '_depthwise'
287287
params['weight_t'] = node.get_weights('depthwise').type
288288
params['bias_t'] = node.get_weights('zero_bias').type
289-
params['fill_fn'] = 'FillConv1DBuffer'
289+
if node.model.config.get_config_value('IOType') == 'io_parallel':
290+
params['fill_fn'] = f'fill_buffer_{node.index}_dw'
291+
else:
292+
params['fill_fn'] = 'FillConv1DBuffer'
290293

291294
if node.get_attr('unscaled'):
292295
params['scale_index_type'] = 'scale_index_unscaled'
@@ -317,13 +320,17 @@ def format(self, node):
317320

318321
params['filt_width'] = 1
319322
params['stride_width'] = 1
323+
params['pad_left'] = params['pad_right'] = 0
320324
params['dilation'] = node.get_attr('dilation', 1)
321325
params['nzeros'] = node.get_weights('pointwise').nzeros
322326
params['index'] = str(node.index) + '_pointwise'
323327
params['weight_t'] = node.get_weights('pointwise').type
324328
params['min_width'] = params['in_width']
325329
params['instructions'] = '0'
326-
params['fill_fn'] = 'FillConv1DBuffer'
330+
if node.model.config.get_config_value('IOType') == 'io_parallel':
331+
params['fill_fn'] = f'fill_buffer_{node.index}_pw'
332+
else:
333+
params['fill_fn'] = 'FillConv1DBuffer'
327334

328335
if node.get_attr('unscaled'):
329336
params['scale_index_type'] = 'scale_index_unscaled'
@@ -402,7 +409,10 @@ def format(self, node):
402409
params['nzeros'] = node.get_weights('depthwise').nzeros
403410
params['index'] = str(node.index) + '_depthwise'
404411
params['weight_t'] = node.get_weights('depthwise').type
405-
params['fill_fn'] = 'FillConv2DBuffer'
412+
if node.model.config.get_config_value('IOType') == 'io_parallel':
413+
params['fill_fn'] = f'fill_buffer_{node.index}_dw'
414+
else:
415+
params['fill_fn'] = 'FillConv2DBuffer'
406416

407417
if node.get_attr('unscaled_h'):
408418
params['scale_index_height_type'] = 'scale_index_unscaled'
@@ -440,14 +450,19 @@ def format(self, node):
440450

441451
params['filt_height'] = params['filt_width'] = 1
442452
params['stride_height'] = params['stride_width'] = 1
453+
params['pad_left'] = params['pad_right'] = 0
454+
params['pad_top'] = params['pad_bottom'] = 0
443455
params['dilation'] = node.get_attr('dilation', 1)
444456
params['nzeros'] = node.get_weights('pointwise').nzeros
445457
params['index'] = str(node.index) + '_pointwise'
446458
params['weight_t'] = node.get_weights('pointwise').type
447459
params['min_height'] = params['in_height']
448460
params['min_width'] = params['in_width']
449461
params['instructions'] = '0'
450-
params['fill_fn'] = 'FillConv2DBuffer'
462+
if node.model.config.get_config_value('IOType') == 'io_parallel':
463+
params['fill_fn'] = f'fill_buffer_{node.index}_pw'
464+
else:
465+
params['fill_fn'] = 'FillConv2DBuffer'
451466

452467
if node.get_attr('unscaled_h'):
453468
params['scale_index_height_type'] = 'scale_index_unscaled'

hls4ml/backends/vivado/vivado_backend.py

Lines changed: 44 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
Embedding,
1919
GarNet,
2020
GarNetStack,
21-
GlobalPooling1D,
22-
GlobalPooling2D,
2321
Layer,
2422
Pooling1D,
2523
Pooling2D,
@@ -31,7 +29,6 @@
3129
from hls4ml.model.optimizer import get_backend_passes, layer_optimizer
3230
from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType, PackedType
3331
from hls4ml.report import parse_vivado_report
34-
from hls4ml.utils.fixed_point_utils import ceil_log2
3532

3633

3734
class VivadoBackend(FPGABackend):
@@ -295,9 +292,20 @@ def init_sepconv1d(self, layer):
295292
else:
296293
layer.set_attr('strategy', 'latency')
297294

298-
layer.set_attr(
299-
'n_partitions', 1
300-
) # TODO Once we have SeparableConv implementation for io_parallel this should be set properly
295+
out_width = layer.get_output_variable().shape[0]
296+
chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
297+
valid_pf = self.get_valid_conv_partition_splits(1, out_width)
298+
if chosen_pf not in valid_pf:
299+
closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
300+
valid_pf_str = ','.join(map(str, valid_pf))
301+
print(
302+
f'WARNING: Invalid ParallelizationFactor={chosen_pf} in layer "{layer.name}".'
303+
f'Using ParallelizationFactor={closest_pf} instead. Valid ParallelizationFactor(s): {valid_pf_str}.'
304+
)
305+
else:
306+
closest_pf = chosen_pf
307+
layer.set_attr('n_partitions', out_width // closest_pf)
308+
301309
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
302310

303311
# Set the output type of the depthwise phase
@@ -350,9 +358,21 @@ def init_sepconv2d(self, layer):
350358
else:
351359
layer.set_attr('strategy', 'latency')
352360

353-
layer.set_attr(
354-
'n_partitions', 1
355-
) # TODO Once we have SeparableConv implementation for io_parallel this should be set properly
361+
out_height = layer.get_output_variable().shape[0]
362+
out_width = layer.get_output_variable().shape[1]
363+
chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
364+
valid_pf = self.get_valid_conv_partition_splits(out_height, out_width)
365+
if chosen_pf not in valid_pf:
366+
closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
367+
valid_pf_str = ','.join(map(str, valid_pf))
368+
print(
369+
f'WARNING: Invalid ParallelizationFactor={chosen_pf} in layer "{layer.name}".'
370+
f'Using ParallelizationFactor={closest_pf} instead. Valid ParallelizationFactor(s): {valid_pf_str}.'
371+
)
372+
else:
373+
closest_pf = chosen_pf
374+
layer.set_attr('n_partitions', out_height * out_width // closest_pf)
375+
356376
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
357377

358378
# Set the output type of the depthwise phase
@@ -373,42 +393,31 @@ def init_depconv2d(self, layer):
373393
else:
374394
layer.set_attr('strategy', 'latency')
375395

376-
layer.set_attr(
377-
'n_partitions', 1
378-
) # TODO Once we have SeparableConv implementation for io_parallel this should be set properly
379-
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
396+
out_height = layer.get_output_variable().shape[0]
397+
out_width = layer.get_output_variable().shape[1]
398+
chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
399+
valid_pf = self.get_valid_conv_partition_splits(out_height, out_width)
400+
if chosen_pf not in valid_pf:
401+
closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
402+
valid_pf_str = ','.join(map(str, valid_pf))
403+
print(
404+
f'WARNING: Invalid ParallelizationFactor={chosen_pf} in layer "{layer.name}".'
405+
f'Using ParallelizationFactor={closest_pf} instead. Valid ParallelizationFactor(s): {valid_pf_str}.'
406+
)
407+
else:
408+
closest_pf = chosen_pf
409+
layer.set_attr('n_partitions', out_height * out_width // closest_pf)
380410

381-
def _set_pooling_accum_t(self, layer, pool_size):
382-
extra_bits = ceil_log2(pool_size)
383-
accum_t = layer.get_attr('accum_t')
384-
accum_t.precision.width += extra_bits * 2
385-
if isinstance(accum_t.precision, FixedPrecisionType):
386-
accum_t.precision.integer += extra_bits
411+
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
387412

388413
@layer_optimizer(Pooling1D)
389414
def init_pooling1d(self, layer):
390-
pool_size = layer.get_attr('pool_width')
391-
self._set_pooling_accum_t(layer, pool_size)
392-
393415
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
394416

395417
@layer_optimizer(Pooling2D)
396418
def init_pooling2d(self, layer):
397-
pool_size = layer.get_attr('pool_height') * layer.get_attr('pool_width')
398-
self._set_pooling_accum_t(layer, pool_size)
399-
400419
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
401420

402-
@layer_optimizer(GlobalPooling1D)
403-
def init_global_pooling1d(self, layer):
404-
pool_size = layer.get_attr('n_in')
405-
self._set_pooling_accum_t(layer, pool_size)
406-
407-
@layer_optimizer(GlobalPooling2D)
408-
def init_global_pooling2d(self, layer):
409-
pool_size = layer.get_attr('in_height') * layer.get_attr('in_width')
410-
self._set_pooling_accum_t(layer, pool_size)
411-
412421
@layer_optimizer(Softmax)
413422
def init_softmax(self, layer):
414423
if layer.model.config.get_config_value('IOType') == 'io_parallel':

0 commit comments

Comments
 (0)